MLIR  21.0.0git
VectorRewritePatterns.h
Go to the documentation of this file.
1 //===- VectorRewritePatterns.h - Vector rewrite patterns --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H
10 #define MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H
11 
12 #include <optional>
13 #include <utility>
14 
17 #include "mlir/IR/PatternMatch.h"
18 
19 #include "mlir/Dialect/Vector/Transforms/VectorTransformsEnums.h.inc"
20 
21 namespace mlir {
22 class ConversionTarget;
23 class RewritePatternSet;
24 class TypeConverter;
25 
26 namespace arith {
27 class AndIOp;
28 class NarrowTypeEmulationConverter;
29 class TruncIOp;
30 } // namespace arith
31 
32 namespace vector {
33 struct VectorTransformsOptions;
34 
35 /// Options that control the vector unrolling.
37  using FilterConstraintFnType = std::function<LogicalResult(Operation *op)>;
38  /// Callback function that indicates whether vector unrolling should be
39  /// attempted on the operation.
42  filterConstraint = std::move(constraint);
43  return *this;
44  }
45 
47  std::function<std::optional<SmallVector<int64_t>>(Operation *op)>;
48  /// Function that returns the shape of the vector to unroll to for a given
49  /// operation. The unrolling is aborted if the function returns
50  /// `std::nullopt`.
53  nativeShape = std::move(fn);
54  return *this;
55  }
56 
57  /// Set the native shape to use for unrolling.
59  SmallVector<int64_t> tsShape(shape);
60  nativeShape = [=](Operation *) -> std::optional<SmallVector<int64_t>> {
61  return tsShape;
62  };
63  return *this;
64  }
65 
66  /// Function that returns the traversal order (in terms of "for loop order",
67  /// i.e. slowest varying dimension to fastest varying dimension) that should
68  /// be used when unrolling the given operation into units of the native vector
69  /// size.
71  std::function<std::optional<SmallVector<int64_t>>(Operation *op)>;
75  traversalOrderCallback = std::move(traversalOrderFn);
76  return *this;
77  }
78 };
79 
80 /// Canonicalization of a `vector.contraction %a, %b, %c` with row-major matmul
81 /// semantics to a contraction with MMT semantics (matrix matrix multiplication
82 /// with the RHS transposed). This specific form is meant to have the vector
83 /// operands are organized such that the reduction dimension is contiguous.
84 /// Example:
85 /// ```
86 /// vector.contract {indexing_maps = [affine_map<(m, n, k) -> (m, k)>,
87 /// affine_map<(m, n, k) -> (n, k)>,
88 /// affine_map<(m, n, k) -> (m, n)>],
89 /// iterator_types = ["parallel", "parallel", "reduction"],
90 /// kind = #vector.kind<add>} %a, %b, %c : ...
91 /// ```
92 ///
93 /// The `constraint` predicate is used to decide which `vector.contraction` ops
94 /// to filter out.
97  std::function<LogicalResult(vector::ContractionOp)> constraint =
98  [](vector::ContractionOp) { return success(); },
99  PatternBenefit = 1);
100 
101 /// Collect patterns to convert reduction op to vector.contract and fold
102 /// transpose/broadcast ops into the contract.
104  PatternBenefit benefit = 1);
105 
106 /// Populate `patterns` with the following patterns.
107 ///
108 /// - VectorTransferFullPartialRewriter
109 ///
110 /// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds
111 /// masking) fast path and a slow path.
112 ///
113 /// Example (a 2-D vector.transfer_read):
114 /// ```
115 /// %1 = vector.transfer_read %0[...], %pad : memref<A...>, vector<...>
116 /// ```
117 /// is transformed into:
118 /// ```
119 /// %1:3 = scf.if (%inBounds) {
120 /// // fast path, direct cast
121 /// memref.cast %A: memref<A...> to compatibleMemRefType
122 /// scf.yield %view : compatibleMemRefType, index, index
123 /// } else {
124 /// // slow path, not in-bounds vector.transfer or linalg.copy.
125 /// memref.cast %alloc: memref<B...> to compatibleMemRefType
126 /// scf.yield %4 : compatibleMemRefType, index, index
127 // }
128 /// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... true]}
129 /// ```
130 /// where `alloc` is a top of the function alloca'ed buffer of one vector.
131 ///
132 /// Preconditions:
133 /// 1. `xferOp.permutation_map()` must be a minor identity map
134 /// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()`
135 /// must be equal. This will be relaxed in the future but requires
136 /// rank-reducing subviews.
138  RewritePatternSet &patterns, const VectorTransformsOptions &options);
139 
140 /// Collect a set of patterns to reduce the rank of the operands of vector
141 /// transfer ops to operate on the largest contigious vector.
142 /// These patterns are useful when lowering to dialects with 1d vector type
143 /// such as llvm and it will result fewer memory reads.
145  RewritePatternSet &patterns, PatternBenefit benefit = 1);
146 
147 /// Patterns that remove redundant Vector Ops by re-ordering them with
148 /// e.g. elementwise Ops:
149 /// ```
150 /// %at = vector.transpose %a, [1, 0]: vector<4x2xf32> to vector<2x4xf32>
151 /// %bt = vector.transpose %b, [1, 0]: vector<4x2xf32> to vector<2x4xf32>
152 /// %r = arith.addf %at, %bt : vector<2x4xf32>
153 /// ```
154 /// gets converted to:
155 /// ```
156 /// %0 = arith.addf %a, %b : vector<4x2xf32>
157 /// %r = vector.transpose %0, [1, 0] : vector<2x4xf32>
158 /// ```
159 /// At the moment, these patterns are limited to vector.broadcast and
160 /// vector.transpose.
161 void populateSinkVectorOpsPatterns(RewritePatternSet &patterns,
162  PatternBenefit benefit = 1);
163 
164 /// Patterns that remove redundant Vector Ops by merging them with load/store
165 /// ops
166 /// ```
167 /// vector.load %arg0[%arg1] : memref<?xf32>, vector<4xf32>
168 /// vector.extract %0[1] : f32 from vector<4xf32>
169 /// ```
170 /// Gets converted to:
171 /// ```
172 /// %c1 = arith.constant 1 : index
173 /// %0 = arith.addi %arg1, %c1 overflow<nsw> : index
174 /// %1 = memref.load %arg0[%0] : memref<?xf32>
175 void populateSinkVectorMemOpsPatterns(RewritePatternSet &patterns,
176  PatternBenefit benefit = 1);
177 
178 /// Patterns that fold chained vector reductions. These patterns assume that
179 /// elementwise operations (e.g., `arith.addf` with vector operands) are
180 /// cheaper than vector reduction.
181 /// Note that these patterns change the order of reduction which may not always
182 /// produce bit-identical results on some floating point inputs.
183 ///
184 /// Example:
185 /// ```
186 /// %a = vector.reduction <add> %x, %acc
187 /// %b = vector.reduction <add> %y, %a
188 /// ```
189 /// is transformed into:
190 /// ```
191 /// %a = arith.addf %x, %y
192 /// %b = vector.reduction <add> %a, %acc
193 /// ```
194 void populateChainedVectorReductionFoldingPatterns(RewritePatternSet &patterns,
195  PatternBenefit benefit = 1);
196 
197 /// Patterns to break down vector reductions into a series of arith reductions
198 /// over vector elements. This is intended to be simplify code with reductions
199 /// over small vector types and avoid more specialized reduction lowering when
200 /// possible.
201 ///
202 /// Example:
203 /// ```
204 /// %a = vector.reduction <add> %x : vector<2xf32> into f32
205 /// ```
206 /// is transformed into:
207 /// ```
208 /// %y = vector.extract %x[0] : f32 from vector<2xf32>
209 /// %z = vector.extract %x[1] : f32 from vector<2xf32>
210 /// %a = arith.addf %y, %z : f32
211 /// ```
212 void populateBreakDownVectorReductionPatterns(
213  RewritePatternSet &patterns, unsigned maxNumElementsToExtract = 2,
214  PatternBenefit benefit = 1);
215 
216 /// Populate `patterns` with the following patterns.
217 ///
218 /// [DecomposeDifferentRankInsertStridedSlice]
219 /// ==========================================
220 /// RewritePattern for InsertStridedSliceOp where source and destination vectors
221 /// have different ranks.
222 ///
223 /// When ranks are different, InsertStridedSlice needs to extract a properly
224 /// ranked vector from the destination vector into which to insert. This pattern
225 /// only takes care of this extraction part and forwards the rest to
226 /// [VectorInsertStridedSliceOpSameRankRewritePattern].
227 ///
228 /// For a k-D source and n-D destination vector (k < n), we emit:
229 /// 1. ExtractOp to extract the (unique) (n-1)-D subvector into which to
230 /// insert the k-D source.
231 /// 2. k-D -> (n-1)-D InsertStridedSlice op
232 /// 3. InsertOp that is the reverse of 1.
233 ///
234 /// [DecomposeNDExtractStridedSlice]
235 /// ================================
236 /// For such cases, we can rewrite it to ExtractOp/ExtractElementOp + lower
237 /// rank ExtractStridedSliceOp + InsertOp/InsertElementOp for the n-D case.
238 void populateVectorInsertExtractStridedSliceDecompositionPatterns(
239  RewritePatternSet &patterns, PatternBenefit benefit = 1);
240 
241 /// Populate `patterns` with a pattern to breaks down 1-D extract_strided_slice
242 /// ops into a chain of Extract ops to extract each element from the source, and
243 /// then a chain of Insert ops to insert to the target vector.
244 ///
245 /// If `controlFn` is not nullptr, the pattern will only be invoked on ops that
246 /// `controlFn` returns true. Otherwise runs on ops.
247 void populateVectorExtractStridedSliceToExtractInsertChainPatterns(
248  RewritePatternSet &patterns,
249  std::function<bool(ExtractStridedSliceOp)> controlFn = nullptr,
250  PatternBenefit benefit = 1);
251 
252 /// Populate `patterns` with a pattern to break down 1-D vector.bitcast ops
253 /// based on the destination vector shape. Bitcasts from a lower bitwidth
254 /// element type to a higher bitwidth one are extracted from the lower bitwidth
255 /// based on the native destination vector shape and inserted based on the ratio
256 /// of the bitwidths.
257 ///
258 /// This acts as a last resort way to break down vector.bitcast ops to smaller
259 /// vector sizes. Because this pattern composes until it is bitcasting to a
260 /// single element of the higher bitwidth, the is an optional control function.
261 /// If `controlFn` is not nullptr, the pattern will only apply to ops where
262 /// `controlFn` returns true, otherwise applies to all bitcast ops.
263 void populateBreakDownVectorBitCastOpPatterns(
264  RewritePatternSet &patterns,
265  std::function<bool(BitCastOp)> controlFn = nullptr,
266  PatternBenefit benefit = 1);
267 
268 /// Populate `patterns` with the following patterns.
269 ///
270 /// Patterns in populateVectorInsertExtractStridedSliceDecompositionPatterns();
271 ///
272 /// [ConvertSameRankInsertStridedSliceIntoShuffle]
273 /// ==============================================
274 /// RewritePattern for InsertStridedSliceOp where source and destination vectors
275 /// have the same rank. For each outermost index in the slice:
276 /// begin end stride
277 /// [offset : offset+size*stride : stride]
278 /// 1. ExtractOp one (k-1)-D source subvector and one (n-1)-D dest subvector.
279 /// 2. InsertStridedSlice (k-1)-D into (n-1)-D
280 /// 3. the destination subvector is inserted back in the proper place
281 /// 3. InsertOp that is the reverse of 1.
282 ///
283 /// [Convert1DExtractStridedSliceIntoShuffle]
284 /// =========================================
285 /// For such cases, we can lower it to a ShuffleOp.
286 void populateVectorInsertExtractStridedSliceTransforms(
287  RewritePatternSet &patterns, PatternBenefit benefit = 1);
288 
289 /// Collect a set of pattern to unroll vector operations to a smaller shapes.
290 /// `options` structure controls which operations are unrolled and the target
291 /// shape.
292 /// `op` is unrolled to the `targetShape` as follows, for each of its operands:
293 /// 1. the unrolled type `unrolledVectorType` and number of unrolled instances
294 /// `numUnrolledInstances` are computed from the `targetShape`. For now it is
295 /// assumed the unrolling factors divide the vector sizes.
296 /// 2. ExtractStridedSlice are created to break-up the vector operands.
297 /// 3. the original op is cloned `numUnrolledInstances` times, once for each
298 /// result.
299 /// 4. InsertStridedSlice are inserted to re-assemble the slices into the
300 /// original vectore shape.
301 ///
302 /// Example:
303 ///
304 /// opA(operand0, operand1) // numUnrolledInstances = 3
305 ///
306 /// operand0 operand1
307 /// | |
308 /// fork fork
309 /// <----------gather all fork ops --------->
310 /// /|\ /|\
311 /// f00 f01 f02 f10 f11 f12
312 /// <---------- clone op 3 times --------->
313 /// opA0(f00, f10), opA1(f01, f11), opA2(f02, f12)
314 /// \ | /
315 /// <-------------------- join ------------------------->
316 ///
317 /// Other local patterns then kick in iteratively (including DCE) and compose
318 /// to combine the ExtractStridedSlice/InsertStridedSlice.
319 void populateVectorUnrollPatterns(RewritePatternSet &patterns,
320  const UnrollVectorOptions &options,
321  PatternBenefit benefit = 1);
322 
323 /// Collect a set of leading one dimension removal patterns.
324 ///
325 /// These patterns insert vector.shape_cast to remove leading one dimensions
326 /// to expose more canonical forms of read/write/insert/extract operations.
327 /// With them, there are more chances that we can cancel out extract-insert
328 /// pairs or forward write-read pairs.
329 void populateCastAwayVectorLeadingOneDimPatterns(RewritePatternSet &patterns,
330  PatternBenefit benefit = 1);
331 
332 /// Collect a set of one dimension removal patterns.
333 ///
334 /// These patterns insert rank-reducing memref.subview ops to remove one
335 /// dimensions. With them, there are more chances that we can avoid
336 /// potentially expensive vector.shape_cast operations.
337 void populateVectorTransferDropUnitDimsPatterns(RewritePatternSet &patterns,
338  PatternBenefit benefit = 1);
339 
340 /// Collect a set of patterns that use vector.shape_cast to help fold unit dims.
341 ///
342 /// These patterns use vector.shape_cast to remove unit dims from e.g.
343 /// arithmetic operations on Vectors. The newly inserted shape_casts will either
344 /// cancel each other out or will be folded away when combined with other
345 /// patterns.
346 void populateDropUnitDimWithShapeCastPatterns(RewritePatternSet &patterns,
347  PatternBenefit benefit = 1);
348 
349 /// Collect a set of patterns to flatten n-D vector transfers on contiguous
350 /// memref.
351 ///
352 /// These patterns insert memref.collapse_shape + vector.shape_cast patterns
353 /// to transform multiple small n-D transfers into a larger 1-D transfer where
354 /// the memref contiguity properties allow it.
355 ///
356 /// Flattening is only applied if the bitwidth of the trailing vector dimension
357 /// is smaller or equal to `targetVectorBitwidth`.
358 void populateFlattenVectorTransferPatterns(
359  RewritePatternSet &patterns,
360  unsigned targetVectorBitwidth = std::numeric_limits<unsigned>::max(),
361  PatternBenefit benefit = 1);
362 
363 /// Collect a set of patterns that bubble up/down bitcast ops.
364 ///
365 /// These patterns move vector.bitcast ops to be before insert ops or after
366 /// extract ops where suitable. With them, bitcast will happen on smaller
367 /// vectors and there are more chances to share extract/insert ops.
368 void populateBubbleVectorBitCastOpPatterns(RewritePatternSet &patterns,
369  PatternBenefit benefit = 1);
370 
371 /// These patterns materialize masks for various vector ops such as transfers.
372 void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns,
373  bool force32BitVectorIndices,
374  PatternBenefit benefit = 1);
375 
376 /// Appends patterns for emulating vector operations over narrow types with ops
377 /// over wider types. The `disableAtomicRMW` indicates whether to use a normal
378 /// read-modify-write sequence instead of using `memref.generic_atomic_rmw` to
379 /// perform subbyte storing.
380 void populateVectorNarrowTypeEmulationPatterns(
381  const arith::NarrowTypeEmulationConverter &typeConverter,
382  RewritePatternSet &patterns, bool disableAtomicRMW = false);
383 
384 /// Rewrite a vector `bitcast(trunci)` to use a more efficient sequence of
385 /// vector operations comprising `shuffle` and `bitwise` ops.
386 /// Warning: these patterns currently only work for little endian targets.
387 FailureOr<Value> rewriteBitCastOfTruncI(RewriterBase &rewriter,
388  vector::BitCastOp bitCastOp,
389  arith::TruncIOp truncOp,
390  vector::BroadcastOp maybeBroadcastOp);
391 
392 /// Rewrite a vector `ext(bitcast)` to use a more efficient sequence of
393 /// vector operations comprising `shuffle` and `bitwise` ops.
394 /// Warning: these patterns currently only work for little endian targets.
395 FailureOr<Value> rewriteExtOfBitCast(RewriterBase &rewriter, Operation *extOp,
396  vector::BitCastOp bitCastOp,
397  vector::BroadcastOp maybeBroadcastOp);
398 
399 /// Appends patterns for rewriting vector operations over narrow types with
400 /// ops over wider types.
401 /// Warning: these patterns currently only work for little endian targets.
402 void populateVectorNarrowTypeRewritePatterns(RewritePatternSet &patterns,
403  PatternBenefit benefit = 1);
404 
405 /// Appends patterns for emulating a sub-byte vector transpose.
406 void populateVectorTransposeNarrowTypeRewritePatterns(
407  RewritePatternSet &patterns, PatternBenefit benefit = 1);
408 
409 /// Populates patterns for ND vectors (N >= 2) linearization and sets up the
410 /// provided ConversionTarget with the appropriate legality configuration for
411 /// the ops to get converted properly.
412 void populateVectorLinearizeTypeConversionsAndLegality(
413  TypeConverter &typeConverter, RewritePatternSet &patterns,
414  ConversionTarget &target, unsigned targetBitWidth);
415 
416 /// Populates patterns for linearizing ND (N >= 2) vector operations to 1D
417 /// vector shuffle operations.
418 void populateVectorLinearizeShuffleLikeOpsPatterns(
419  const TypeConverter &typeConverter, RewritePatternSet &patterns,
420  ConversionTarget &target, unsigned targetBitWidth);
421 
422 } // namespace vector
423 } // namespace mlir
424 
425 #endif // MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
void populateVectorContractCanonicalizeMatmulToMMT(RewritePatternSet &patterns, std::function< LogicalResult(vector::ContractionOp)> constraint=[](vector::ContractionOp) { return success();}, PatternBenefit=1)
Canonicalization of a vector.contraction a, b, c with row-major matmul semantics to a contraction wit...
void populateSinkVectorOpsPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Patterns that remove redundant Vector Ops by re-ordering them with e.g.
void populateVectorTransferCollapseInnerMostContiguousDimsPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of patterns to reduce the rank of the operands of vector transfer ops to operate on the...
void populateVectorReductionToContractPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect patterns to convert reduction op to vector.contract and fold transpose/broadcast ops into the...
void populateVectorTransferFullPartialPatterns(RewritePatternSet &patterns, const VectorTransformsOptions &options)
Populate patterns with the following patterns.
Include the generated interface declarations.
const FrozenRewritePatternSet & patterns
Options that control the vector unrolling.
FilterConstraintFnType filterConstraint
Callback function that indicates whether vector unrolling should be attempted on the operation.
UnrollVectorOptions & setFilterConstraint(FilterConstraintFnType constraint)
UnrollVectorOptions & setNativeShapeFn(NativeShapeFnType fn)
UnrollVectorOptions & setUnrollTraversalOrderFn(UnrollTraversalOrderFnType traversalOrderFn)
std::function< std::optional< SmallVector< int64_t > >(Operation *op)> UnrollTraversalOrderFnType
Function that returns the traversal order (in terms of "for loop order", i.e.
std::function< LogicalResult(Operation *op)> FilterConstraintFnType
NativeShapeFnType nativeShape
Function that returns the shape of the vector to unroll to for a given operation.
UnrollVectorOptions & setNativeShape(ArrayRef< int64_t > shape)
Set the native shape to use for unrolling.
UnrollTraversalOrderFnType traversalOrderCallback
std::function< std::optional< SmallVector< int64_t > >(Operation *op)> NativeShapeFnType