MLIR  17.0.0git
Transforms.h
Go to the documentation of this file.
1 //===- Transforms.h - Linalg transformations as patterns --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
10 #define MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
11 
12 #include <utility>
13 
22 #include "mlir/IR/PatternMatch.h"
26 #include "llvm/ADT/SmallBitVector.h"
27 #include "llvm/ADT/SmallSet.h"
28 
29 namespace mlir {
30 namespace bufferization {
31 class BufferizeTypeConverter;
32 } // namespace bufferization
33 
34 class FrozenRewritePatternSet;
35 
36 namespace linalg {
37 
38 struct LinalgElementwiseFusionOptions;
39 struct LinalgFusionOptions;
40 struct LinalgTilingOptions;
41 
42 //===----------------------------------------------------------------------===//
43 // Transformations exposed as function calls.
44 //===----------------------------------------------------------------------===//
46 
49 
50 /// Populate patterns for splitting a `LinalgOp` with multiple statements within
51 /// its payload into multiple `GenericOp` that have a single statement.
52 /// The option `removeDeadArgsAndResults` adds patterns to remove dead arguments
53 /// and results from the generated decomposed ops. This is default `true` since
54 /// the core decomposition patterns relies on these clean up patterns. It is set
55 /// to false only for testing purposes.
57  bool removeDeadArgsAndResults = true);
58 
59 /// Populate patterns that convert non-destination-style ops to destination
60 /// style ops.
62 
63 /// Populate patterns for vectorizing low-D convolution ops. This is a step in
64 /// progressive lowering for convolution ops, it assume high-D convolution ops
65 /// were decomposed previously.
67  PatternBenefit benefit = 1);
68 
69 /// Populate patterns that convert `ElementwiseMappable` ops to linalg
70 /// parallel loops.
72 
73 /// Populate patterns that are only useful in the context of sparse tensors.
75 
76 /// Function type which is used to control when to stop fusion. It is expected
77 /// that OpOperand is not modified in the callback. The OpOperand is not marked
78 /// as const to allow callers to use non-const methods.
79 using ControlFusionFn = std::function<bool(OpOperand *fusedOperand)>;
80 
81 /// Patterns for fusing linalg operation on tensors.
82 
83 /// Pattern to fuse `linalg.generic` -> `linalg.generic` operations
84 /// when both operations are fusable elementwise operations.
86  RewritePatternSet &patterns,
87  const ControlFusionFn &controlElementwiseOpFusion);
88 
89 /// Patterns to bubble up or down data layout ops across other operations.
91 
92 /// Pattern to remove dead operands and results of `linalg.generic` operations.
93 /// This is effectively DCE for a linalg op.
95 
96 /// Patterns to promote inputs to outputs and remove unused inputs of
97 /// `linalg.generic` ops.
99 
100 /// Function type to control generic op dimension collapsing. It is expected
101 /// to return an array of `ReassociationIndices` representing dimensions that
102 /// should be merged.
104  std::function<SmallVector<ReassociationIndices>(linalg::GenericOp)>;
105 
106 /// Pattern to collapse dimensions in a linalg.generic op. This will collapse
107 /// tensor operands when needed and expand back the result tensors.
109  RewritePatternSet &patterns,
110  const GetCollapsableDimensionsFn &controlCollapseDimensions);
111 
112 /// Patterns to fold an expanding (collapsing) tensor_reshape operation with its
113 /// producer (consumer) generic operation by expanding the dimensionality of the
114 /// loop in the generic op.
116  RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes);
117 
118 /// Patterns to fold an expanding tensor.expand_shape operation with its
119 /// producer generic operation by collapsing the dimensions of the generic op.
121  RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes);
122 
123 /// Patterns to constant fold Linalg operations.
125  const ControlFusionFn &controlFn);
126 
127 /// Pattern to fuse a `tensor.pad` operation with the producer of its source,
128 /// if the producer is a `linalg` operation with all parallel iterator types.
130  RewritePatternSet &patterns);
131 
132 /// Patterns to convert from one named op to another. These can be seen as
133 /// canonicalizations of named ops into another named op.
135 
136 /// Patterns to fold unit-extent dimensions in operands/results of linalg ops on
137 /// tensors via reassociative reshape ops.
139 
140 /// Patterns to fold unit-extent dimensions in operands/results of linalg ops on
141 /// tensors via rank-reducing slices.
143 
144 /// A pattern that converts init operands to input operands.
146 
147 /// Patterns that are used to inline constant operands into linalg generic ops.
149 
150 /// Patterns that are used to bubble up extract slice op above linalg op.
152 
153 /// Adds patterns that waps tensor.extract_slice(linalg.fill(%cst, %init)) into
154 /// linalg.fill(%cst, tensor.extract_slice(%init)).
156 
157 /// Return true if two `linalg.generic` operations with producer/consumer
158 /// relationship through `fusedOperand` can be fused using elementwise op
159 /// fusion.
160 bool areElementwiseOpsFusable(OpOperand *fusedOperand);
161 
162 /// Fuse two `linalg.generic` operations that have a producer-consumer
163 /// relationship captured through `fusedOperand`. The method expects
164 /// that `areElementwiseOpsFusable` returns true for the given `fusedOperand`.
166  OpOperand *fusedOperand);
167 
168 /// Split the given `op` into two parts along the given iteration space
169 /// `dimension` at the specified `splitPoint`, and return the two parts.
170 /// If the second part is statically known to be empty, do not create it
171 /// and return nullptr instead. Error state is signalled by returning
172 /// a pair of nullptrs.
173 ///
174 /// For example, the following op:
175 ///
176 /// linalg.matmul ins(%0, %1 : tensor<128x32xf32>, tensor<32x64xf32>)
177 /// outs(%2 : tensor<128x64xf32>)
178 ///
179 /// split along the first dimension at position 42 will result in:
180 ///
181 /// %3 = tensor.extract_slice %0[0, 0][42, 32][1, 1]
182 /// %4 = tensor.extract_slice %2[0, 0][42, 64][1, 1]
183 /// %5 = linalg.matmul ins(%3, %1 : tensor<42x32xf32>, tensor<32x64xf32>)
184 /// outs(%5 : tensor<42x64xf32>)
185 /// %6 = tensor.insert_slice %5 into %2[0, 0][42, 64][1, 1]
186 ///
187 /// %7 = tensor.extract_slice %0[42, 0][86, 32][1, 1]
188 /// %8 = tensor.extract_slice %6[42, 0][86, 64][1, 1]
189 /// %9 = linalg.matmul ins(%7, %1 : tensor<86x32xf32>, tensor<32x64xf32>)
190 /// outs(%8 : tensor<86x64xf32>)
191 /// tensor.insert_slice %5 into %6[42, 0][86, 64][1, 1]
192 ///
193 /// Note that there is no simplification other than constant propagation applied
194 /// to slice extraction and insertion.
195 std::pair<TilingInterface, TilingInterface> splitOp(RewriterBase &rewriter,
196  TilingInterface op,
197  unsigned dimension,
198  OpFoldResult splitPoint);
199 
200 /// Perform standalone tiling of a single LinalgOp by `tileSizes`.
201 /// and permute the loop nest according to `interchangeVector`
202 /// The permutation is expressed as a list of integers that specify
203 /// the new ordering of the loop nest. The length of `interchangeVector`
204 /// must be equal to the length of `tileSizes`.
205 /// An empty vector is interpreted as the identity permutation and the
206 /// transformation returns early.
207 ///
208 /// Return a struct containing the tiled loops in the specified order
209 /// and the cloned op if successful, std::nullopt otherwise.
210 ///
211 /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed by
212 /// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be
213 /// integers, in the range 0..`tileSizes.size()` without duplications
214 /// (i.e. `[1,1,2]` is an invalid permutation).
216  LinalgOp op;
219 };
222 
223 /// Try to peel and canonicalize loop `op` and return the new result.
224 // TODO: Add support for scf.parallel and affine.for loops.
226 /// Peel and canonicalize 'loops'.
227 void peelLoops(RewriterBase &rewriter, ArrayRef<scf::ForOp> loops);
228 
229 /// Interchange the `iterator_types` and `iterator_maps` dimensions and adapts
230 /// the index accesses of `op`. This is an in-place transformation controlled
231 /// by `interchangeVector`. An empty vector is interpreted as the identity
232 /// permutation and the transformation returns early.
233 ///
234 /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed with
235 /// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be
236 /// integers, in the range 0..`op.rank` without duplications
237 /// (i.e. `[1,1,2]` is an invalid permutation).
238 ///
239 /// Return failure if the permutation is not valid.
241  GenericOp genericOp,
242  ArrayRef<unsigned> interchangeVector);
243 
244 /// Create a GenericOp from the given named operation `namedOp` and replace
245 /// namedOp.
246 /// Return failure if `namedOp` is a GenericOp or misses a region builder.
248  LinalgOp namedOp);
249 
250 /// Callback function type used to perform the allocation for the promoted
251 /// `subView`. In `boundingSubViewsize` a best attempt is made to find the
252 /// smallest constant value for the size of the buffer needed for each
253 /// dimension. If that is not possible, contains the dynamic size of the
254 /// subview. The call back should return the buffer to use.
255 using AllocBufferCallbackFn = std::function<std::optional<Value>(
256  OpBuilder &b, memref::SubViewOp subView,
257  ArrayRef<Value> boundingSubViewSize, DataLayout &layout)>;
258 
259 /// Callback function type used to deallocate the buffers used to hold the
260 /// promoted subview.
262  std::function<LogicalResult(OpBuilder &b, Value buffer)>;
263 
264 /// Callback function type used to insert copy from original subview to
265 /// subview of the promoted region for the read operands/subview of promoted
266 /// region to original subview for the results. The copy has to happen from
267 /// `src` to `dst`.
269  std::function<LogicalResult(OpBuilder &b, Value src, Value dst)>;
270 
272  /// Indices of subViews to promote. If `std::nullopt`, try to promote all
273  /// operands.
274  std::optional<DenseSet<unsigned>> operandsToPromote;
277  operandsToPromote->insert(operands.begin(), operands.end());
278  return *this;
279  }
280  /// If ith element of `useFullTiles` is true the full view should be used
281  /// for the promoted buffer of the ith operand in `operandsToPromote`.
282  /// Otherwise the partial view will be used. The decision is defaulted to
283  /// `useFullTileBuffersDefault` when `useFullTileBuffers` is None and for
284  /// operands missing from `useFullTileBuffers`.
285  std::optional<llvm::SmallBitVector> useFullTileBuffers;
287  unsigned size = useFullTiles.size();
288  llvm::SmallBitVector tmp(size, false);
289  for (unsigned i = 0; i < size; ++i)
290  tmp[i] = useFullTiles[i];
291  useFullTileBuffers = tmp;
292  return *this;
293  }
294  /// If true all operands unspecified by `useFullTileBuffers` will use the
295  /// full view, otherwise the partial view.
299  return *this;
300  }
301  /// Alignment of promoted buffer. If `std::nullopt` do not specify alignment.
302  std::optional<unsigned> alignment;
304  alignment = align;
305  return *this;
306  }
307  /// Use alloca with the default allocation scheme.
308  bool useAlloca = false;
310  useAlloca = use;
311  return *this;
312  }
313  /// Callback function to do the allocation of the promoted buffer. If
314  /// std::nullopt, then the default allocation scheme of allocating a
315  /// memref<?xi8> buffer followed by a view operation is used.
316  std::optional<AllocBufferCallbackFn> allocationFn;
317  std::optional<DeallocBufferCallbackFn> deallocationFn;
320  DeallocBufferCallbackFn const &deallocFn) {
321  allocationFn = allocFn;
322  deallocationFn = deallocFn;
323  return *this;
324  }
325  /// Callback function to do the copy of data to and from the promoted
326  /// subview. If std::nullopt then a memref.copy is used.
327  std::optional<CopyCallbackFn> copyInFn;
328  std::optional<CopyCallbackFn> copyOutFn;
330  CopyCallbackFn const &copyOut) {
331  copyInFn = copyIn;
332  copyOutFn = copyOut;
333  return *this;
334  }
335 };
336 
337 /// Create a new buffer using the `allocationFn` provided. The size of this
338 /// buffer is the smallest constant bounding size along each dimension that
339 /// can be computed for the size of the result of `subView`. Returns the
340 /// allocated buffer as `fullLocalView` and the view that matches the size of
341 /// the result of subview operation as `partialLocalView`.
345 };
347 promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView,
348  const AllocBufferCallbackFn &allocationFn,
349  DataLayout &layout);
350 
351 /// Promote the `subViews` into a new buffer allocated at the insertion point
352 /// `b`. Promotion occurs in 3 steps:
353 /// 1. Create a new buffer for a full tile (i.e. not clipped at the
354 /// boundary).
355 /// 2. Take a full view on the buffer.
356 /// 3. Take a partial slice of the full view in step 2. and copy into it.
357 ///
358 /// Return the modified linalg op (the modification happens in place) as well
359 /// as all the copy ops created.
362 
363 /// Emit a suitable vector form for a Linalg op. If provided, `inputVectorSizes`
364 /// are used to vectorize this operation. `inputVectorSizes` must match the rank
365 /// of the iteration space of the operation and the sizes must be smaller or
366 /// equal than their counterpart interation space sizes, if static.
367 /// `inputVectorShapes` also allows the vectorization of operations with dynamic
368 /// shapes.
369 LogicalResult vectorize(RewriterBase &rewriter, LinalgOp linalgOp,
370  ArrayRef<int64_t> inputVectorSizes = {},
371  bool vectorizeNDExtract = false);
372 
373 /// Emit a suitable vector form for a Copy op with fully static shape.
374 LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp);
375 
376 /// Emit a loop nest of `scf.for` with the proper body for `linalgOp`.
378  LinalgOp linalgOp);
379 
380 /// Emit a loop nest of `scf.parallel` with the proper body for `linalgOp`.
382  LinalgOp linalgOp);
383 
384 /// Emit a loop nest of `affine.for` with the proper body for `linalgOp`.
386  LinalgOp linalgOp);
387 
388 //===----------------------------------------------------------------------===//
389 // Preconditions that ensure the corresponding transformation succeeds and can
390 // be applied as a rewrite pattern.
391 //===----------------------------------------------------------------------===//
392 /// Promote memref.subviews feeding linalg-on-buffers operations.
394  LinalgPromotionOptions options);
395 
396 /// Return success if the operation can be vectorized.
398 vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
399  ArrayRef<int64_t> inputVectorSizes = {},
400  bool vectorizeNDExtract = false);
401 
402 //===----------------------------------------------------------------------===//
403 // Transformations exposed as rewrite patterns.
404 //===----------------------------------------------------------------------===//
405 
407  std::function<SmallVector<Value, 4>(OpBuilder &, Operation *)>;
408 
409 /// Creates a number of ranges equal to the number of non-zero in `tileSizes`.
410 /// One for each loop of the LinalgOp that is tiled. The `tileSizes` argument
411 /// has one entry per surrounding loop. It uses zero as the convention that a
412 /// particular loop is not tiled. This convention simplifies implementations
413 /// by avoiding affine map manipulations. The returned ranges correspond to
414 /// the loop ranges, in the proper order, that are tiled and for which new
415 /// loops will be created. Also the function returns a map from loop indices
416 /// of the LinalgOp to the corresponding non-empty range indices of newly
417 /// created loops.
419 std::tuple<SmallVector<Range, 4>, LoopIndexToRangeIndexMap>
421  ArrayRef<OpFoldResult> allShapeSizes,
422  ArrayRef<OpFoldResult> allTileSizes);
423 
424 namespace detail {
425 template <typename T>
427  /// Tile sizes.
429  /// Number of tiles associated with each size.
431 };
432 } // namespace detail
433 
434 /// A description of a multi-size tiling comprising tile sizes and numbers of
435 /// tiles, expressed as Values which may or may not be constant. Multi-size
436 /// currently means two-size.
438  : public detail::MultiSizeSpecificationBase<Value> {};
440  : public detail::MultiSizeSpecificationBase<int64_t> {};
441 
442 /// Emits the IR computing the multi-sized tiling specification with two tile
443 /// sizes not exceeding `targetSize`, each divisible by `sizeDivisor`, such
444 /// that there exist numbers of tiles with these sizes that fully cover the
445 /// given iteration space `dimension` of the structured `op`.
446 ///
447 /// The computation is as follows:
448 ///
449 /// b = originalTripCount floordiv sizeDivisor
450 /// t = (targetSize + sizeDivisor - 1) floordiv sizeDivisor
451 /// d = (b + t - 1) floordiv t
452 /// s = (b floordiv d) * sizeDivisor
453 /// v = b % d
454 /// u = d - v
455 ///
456 /// where the tile sizes are `s` and `s` + `sizeDivisor`, and the numbers of
457 /// the corresponding tiles are `u` and `v`, respectively. Alternatively,
458 ///
459 /// s * u + (s + sizeDivisor) * v == original size,
460 /// where s mod sizeDivisor = 0.
461 ///
462 /// Expects all values to be positive. In some cases with the target tile size
463 /// sufficiently close to the dimension shape and non-unit divisor, it is
464 /// impossible to compute such sizes. If `emitAssertion` is set, also emit the
465 /// assertion that size computation succeeded.
466 ///
467 /// Returns the specification consisting of both tile values and the number of
468 /// tiles of each size.
470 computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension,
471  OpFoldResult targetSize, OpFoldResult divisor,
472  bool emitAssertions = true);
474 computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize,
475  int64_t divisor);
476 
477 /// Rewrite a TilingInterface `op` to a tiled `scf.foreach_thread`, applying
478 /// tiling by `numThreads`.
479 /// If non-empty, the `mapping` is added as an attribute to the
480 /// resulting `scf.foreach_thread`.
481 /// Zero tile sizes indicate that the dimension is not tiled, and can be
482 /// thought of as tiling by the full size of data. It is the user's
483 /// responsibility to ensure that `numThreads` is a valid tiling specification
484 /// (i.e. that only tiles parallel dimensions, e.g. in the Linalg case).
488 };
490 tileToForeachThreadOp(RewriterBase &builder, TilingInterface op,
491  ArrayRef<OpFoldResult> numThreads,
492  std::optional<ArrayAttr> mapping);
493 
494 /// Same as `tileToForeachThreadOp`, but calculate the number of threads
495 /// required using the given tileSizes.
497 tileToForeachThreadOpUsingTileSizes(RewriterBase &builder, TilingInterface op,
498  ArrayRef<OpFoldResult> tileSizes,
499  std::optional<ArrayAttr> mapping);
500 
501 /// Transformation information returned after reduction tiling.
503  /// The partial reduction tiled op generated.
505  /// The final reduction operation merging all the partial reductions.
507  /// The op initializing the tensor used for partial reductions.
509  /// The `scf.foreach_thread` operation that iterate over the tiles.
510  scf::ForeachThreadOp loops;
511 };
512 
513 /// Method to tile a reduction to parallel iterations computing partial
514 /// reductions. After the loop all the partial reduction are merged into a final
515 /// reduction. For example for the following sequence
516 ///
517 /// ```mlir
518 /// %0 = linalg.generic %in ["parallel", "reduction"]
519 /// : tensor<7x9xf32> -> tensor<7xf32>
520 /// ```
521 ///
522 /// into:
523 ///
524 /// ```mlir
525 /// %0 = linalg.fill ... : tensor<7x4xf32>
526 /// %1 = scf.foreach_thread (%iv) in (%c4) shared_outs(%arg0 = %0)
527 /// -> (tensor<7x4xf32>) {
528 /// %2 = tensor.extract_slice %arg3 : tensor<7x4xf32> to tensor<7xf32>
529 /// %3 = tensor.extract_slice %in : tensor<7x9xf32> -> tensor<7x?xf32>
530 /// %4 = linalg.generic %2, %3 ["parallel", "reduction"]
531 /// : tensor<7x?xf32> -> tensor<7xf32>
532 /// %5 = tensor.insert_slice %3, %arg0[0, %iv] : tensor<7x4xf32>
533 /// }
534 /// %6 = linalg.generic %1 ["parallel", "reduction"]
535 /// : tensor<7x4xf32> -> tensor<7xf32>
536 /// ```
538  RewriterBase &b, PartialReductionOpInterface op,
539  ArrayRef<OpFoldResult> numThreads, ArrayRef<OpFoldResult> tileSizes = {},
540  std::optional<ArrayAttr> mapping = std::nullopt);
541 
542 /// All indices returned by IndexOp should be invariant with respect to
543 /// tiling. Therefore, if an operation is tiled, we have to transform the
544 /// indices accordingly, i.e. offset them by the values of the corresponding
545 /// induction variables that are captured implicitly in the body of the op.
546 ///
547 /// Example. `linalg.generic` before tiling:
548 ///
549 /// #id_2d = (i, j) -> (i, j)
550 /// #pointwise_2d_trait = {
551 /// indexing_maps = [#id_2d, #id_2d],
552 /// iterator_types = ["parallel", "parallel"]
553 /// }
554 /// linalg.generic #pointwise_2d_trait %operand, %result {
555 /// ^bb0(%operand_in: f32, %result_in: f32):
556 /// %i = linalg.index 0 : index
557 /// %j = linalg.index 1 : index
558 /// <some operations that use %i, %j>
559 /// }: memref<50x100xf32>, memref<50x100xf32>
560 ///
561 /// After tiling pass with tiles sizes 10 and 25:
562 ///
563 /// #strided = (i, j)[s0, s1, s2] -> (i * s1 + s0 + j * s2)
564 ///
565 /// %c1 = arith.constant 1 : index
566 /// %c0 = arith.constant 0 : index
567 /// %c25 = arith.constant 25 : index
568 /// %c10 = arith.constant 10 : index
569 /// operand_dim_0 = dim %operand, 0 : memref<50x100xf32>
570 /// operand_dim_1 = dim %operand, 1 : memref<50x100xf32>
571 /// scf.for %k = %c0 to operand_dim_0 step %c10 {
572 /// scf.for %l = %c0 to operand_dim_1 step %c25 {
573 /// %4 = memref.subview %operand[%k, %l][%c10, %c25][%c1, %c1]
574 /// : memref<50x100xf32> to memref<?x?xf32, #strided>
575 /// %5 = memref.subview %result[%k, %l][%c10, %c25][%c1, %c1]
576 /// : memref<50x100xf32> to memref<?x?xf32, #strided>
577 /// linalg.generic pointwise_2d_trait %4, %5 {
578 /// ^bb0(%operand_in: f32, %result_in: f32):
579 /// %i = linalg.index 0 : index
580 /// %j = linalg.index 1 : index
581 /// // Indices `k` and `l` are implicitly captured in the body.
582 /// %transformed_i = arith.addi %i, %k : index // index `i` is offset by
583 /// %k %transformed_j = arith.addi %j, %l : index // index `j` is offset
584 /// by %l
585 /// // Every use of %i, %j is replaced with %transformed_i,
586 /// %transformed_j <some operations that use %transformed_i,
587 /// %transformed_j>
588 /// }: memref<?x?xf32, #strided>, memref<?x?xf32, #strided>
589 /// }
590 /// }
591 ///
592 /// TODO: Investigate whether mixing implicit and explicit indices
593 /// does not lead to losing information.
594 void transformIndexOps(RewriterBase &b, LinalgOp op,
596  const LoopIndexToRangeIndexMap &loopIndexToRangeIndex);
597 
599  /// A padding value for every operand.
602  paddingValues.assign(pv.begin(), pv.end());
603  return *this;
604  }
605  /// A list of iterator dimensions to pad.
608  paddingDimensions.assign(pd.begin(), pd.end());
609  return *this;
610  }
611  /// A flag for every operand to mark the PadOp as nofold which enables
612  /// packing for statically shaped operands.
615  packPaddings.assign(pp.begin(), pp.end());
616  return *this;
617  }
618  /// A number of loops to hoist the PadOp out for every operand.
621  hoistPaddings.assign(hp.begin(), hp.end());
622  return *this;
623  }
624  /// A permutation vector for every operand used to transpose the packed
625  /// PadOp results.
629  transposePaddings.assign(tp.begin(), tp.end());
630  return *this;
631  }
632 };
633 
635  /// Tile sizes used to tile the root operation.
638  tileSizes.assign(ts.begin(), ts.end());
639  return *this;
640  }
641  /// Tile interchange used to permute the tile loops.
643  /// When specified, specifies distribution of generated tile loops to
644  /// processors.
645  std::optional<LinalgLoopDistributionOptions> tileDistribution;
648  tileDistribution = std::move(distributionOptions);
649  return *this;
650  }
651 };
652 
654  /// Computation function that returns the tile sizes for each operation.
655  /// Delayed construction of constant tile sizes should occur to interoperate
656  /// with folding.
658 
661  tileSizeComputationFunction = std::move(fun);
662  return *this;
663  }
664  /// Set the `tileSizeComputationFunction` to return the values `ts`. The
665  /// values must not fold away when tiling. Otherwise, use a more robust
666  /// `tileSizeComputationFunction`.
668  tileSizeComputationFunction = [=](OpBuilder &, Operation *) { return ts; };
669  return *this;
670  }
671  /// Convenience function to set the `tileSizeComputationFunction` to a
672  /// function that computes tile sizes at the point they are needed. Allows
673  /// proper interaction with folding.
675 
676  /// Tile all dynamic dimensions by 1. I.e., scalarize those dimensions.
677  /// Note: `scalarizeDynamicDims` and `setTileSizes` cannot be used together.
679 
680  /// The interchange vector to reorder the tiled loops.
682 
684  interchangeVector.assign(interchange.begin(), interchange.end());
685  return *this;
686  }
687 
688  /// The type of tile loops to generate.
690 
692  loopType = lt;
693  return *this;
694  }
695 
696  /// When specified, specifies distribution of generated tile loops to
697  /// processors.
698  std::optional<LinalgLoopDistributionOptions> distribution;
699 
702  distribution = std::move(distributionOptions);
703  return *this;
704  }
705 
706  /// Specification markers of how to distribute the `linalg.tiled_loop`.
708 
710  distributionTypes.assign(types.begin(), types.end());
711  return *this;
712  }
713 
714  /// Peel the specified loops.
716 
718  peeledLoops.clear();
719  peeledLoops.append(loops.begin(), loops.end());
720  return *this;
721  }
722 };
723 
724 /// Canonicalization patterns relevant to apply after tiling patterns. These
725 /// are applied automatically by the tiling pass but need to be applied
726 /// manually when tiling is called programmatically.
729 
730 ///
731 /// Linalg padding pattern.
732 ///
733 /// Apply the `padding` transformation as a pattern.
734 /// See `padding` for more details.
738  PatternBenefit benefit = 1);
739 
740  /// `matchAndRewrite` implementation that returns the significant
741  /// transformed pieces of IR.
743  PatternRewriter &rewriter) const;
744 
746  PatternRewriter &rewriter) const override {
747  return returningMatchAndRewrite(op, rewriter);
748  }
749 
750 private:
751  /// Options to control padding and hoisting.
752  LinalgPaddingOptions options;
753 };
754 
755 /// Rewrites 2-D convolution ops with size-1 window dimensions into 1-D
756 /// convolution ops.
757 template <typename Conv2DOp, typename Conv1DOp>
759  : public OpRewritePattern<Conv2DOp> {
761 
763  PatternRewriter &rewriter) const;
764 
766  PatternRewriter &rewriter) const override {
767  return returningMatchAndRewrite(convOp, rewriter);
768  }
769 };
770 
771 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp,
772  Conv1DNwcWcfOp>;
773 extern template struct DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,
774  Conv1DNcwFcwOp>;
775 
776 /// Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh)
777 /// dimensions into 1-D depthwise convolution ops.
779  : public OpRewritePattern<DepthwiseConv2DNhwcHwcOp> {
781  PatternBenefit benefit = 1)
782  : OpRewritePattern<DepthwiseConv2DNhwcHwcOp>(context, benefit) {}
783 
785  returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp,
786  PatternRewriter &rewriter) const;
787 
788  LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp,
789  PatternRewriter &rewriter) const override {
790  return returningMatchAndRewrite(convOp, rewriter);
791  }
792 };
793 
794 ///
795 /// Linalg generalization pattern.
796 ///
797 /// Apply the `generalization` transformation as a pattern.
798 /// See `generalization` for more details.
799 //
800 // TODO: Automatic default pattern class that just unwraps a function
801 // returning FailureOr<GenericOp>.
803  : public OpInterfaceRewritePattern<LinalgOp> {
805 
806  /// `matchAndRewrite` implementation that returns the significant
807  /// transformed pieces of IR.
809  returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const {
810  return generalizeNamedOp(rewriter, op);
811  }
812 
814  PatternRewriter &rewriter) const override {
815  return returningMatchAndRewrite(op, rewriter);
816  }
817 };
818 
819 /// Vectorization pattern for memref::CopyOp.
820 struct CopyVectorizationPattern : public OpRewritePattern<memref::CopyOp> {
822 
823  LogicalResult matchAndRewrite(memref::CopyOp copyOp,
824  PatternRewriter &rewriter) const override;
825 };
826 
827 /// Return vector::CombiningKind for the given op.
828 std::optional<vector::CombiningKind> getCombinerOpKind(Operation *combinerOp);
829 
830 //===----------------------------------------------------------------------===//
831 // Transformations exposed as rewrite patterns.
832 //===----------------------------------------------------------------------===//
833 
834 /// Linalg generalization patterns
835 
836 /// Populates `patterns` with patterns to convert spec-generated named ops to
837 /// linalg.generic ops.
839 
840 /// Linalg decompose convolutions patterns
841 
842 /// Populates patterns to decompose high-D convolution ops into low-D ones.
843 /// This is a step in progressive lowering for convolution ops, afterwards we
844 /// can vectorize the low-D convolution ops.
846  PatternBenefit benefit = 1);
847 
848 //===----------------------------------------------------------------------===//
849 // Op-specific patterns.
850 //===----------------------------------------------------------------------===//
851 
852 /// tensor::PadOp is not canonicalized away yet, so we provide a
853 /// transformation to `linalg.generic`.
854 struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
856 
857  LogicalResult matchAndRewrite(tensor::PadOp padOp,
858  PatternRewriter &rewriter) const override;
859 };
860 
861 /// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands
862 /// to a static bounding box. Use `paddingValues` and `packPaddings` to set
863 /// padding value and nofold attribute of the created tensor::PadOps,
864 /// respectively. Update `paddedOp` to the cloned operation with statically
865 /// shaped `paddingDimensions` and return the extracted dynamically shaped
866 /// results. If padding fails, return failure.
868 rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
869  ArrayRef<int64_t> paddingDimensions,
870  ArrayRef<Attribute> paddingValues,
871  ArrayRef<bool> packPaddings, LinalgOp &paddedOp);
872 
874  std::function<LogicalResult(PatternRewriter &, tensor::PadOp, Value)>;
875 
876 /// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
877 /// InsertSliceOp. For now, only constant padding values are supported.
878 /// `OptimizeCopyFn` can be used to customize copying step optimization.
879 struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
881  OptimizeCopyFn optimizeCopyFn = nullptr,
882  PatternBenefit benefit = 1)
883  : OpRewritePattern<tensor::PadOp>(context, benefit),
884  optimizeCopyFn(std::move(optimizeCopyFn)) {}
885  LogicalResult matchAndRewrite(tensor::PadOp padOp,
886  PatternRewriter &rewriter) const override;
887 
888 protected:
890  Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp,
891  Value dest,
892  const SmallVector<Value> &dynSizes) const;
893 };
894 
895 /// Rewrites a tensor::PackOp into a sequence of tensor.pad + linalg.transpose +
896 /// tensor.insert_slice ops, where the tensor::PackOp has outer dims being all
897 /// 1s.
899  : public OpRewritePattern<tensor::PackOp> {
901  LogicalResult matchAndRewrite(tensor::PackOp packOp,
902  PatternRewriter &rewriter) const override;
903 };
904 
905 /// Rewrites a tensor::UnPackOp into a sequence of rank-reduced extract_slice op
906 /// + transpose op + insert_slice op, where the tensor::UnPackOp has outer dims
907 /// being all 1s.
909  : public OpRewritePattern<tensor::UnPackOp> {
911  LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
912  PatternRewriter &rewriter) const override;
913 };
914 
915 /// Populates `patterns` with patterns that vectorize tensor.pad.
916 /// These patterns are meant to apply in a complementary fashion. Benefits
917 /// are used to encode a certain ordering of pattern application. To avoid
918 /// scattering magic constants throughout the code base, the patterns must be
919 /// added with this function. `baseBenefit` can be used to offset the benefit
920 /// of all tensor::PadOp vectorization patterns by a certain value.
922  PatternBenefit baseBenefit = 1);
923 
925  PatternBenefit baseBenefit = 1);
926 
927 /// Match and rewrite for the pattern:
928 /// ```
929 /// %alloc = ...
930 /// [optional] %view = memref.view %alloc ...
931 /// %subView = subview %allocOrView ...
932 /// [optional] linalg.fill(%allocOrView, %cst) ...
933 /// ...
934 /// memref.copy(%in, %subView) ...
935 /// vector.transfer_read %allocOrView[...], %cst ...
936 /// ```
937 /// into
938 /// ```
939 /// [unchanged] %alloc = ...
940 /// [unchanged] [optional] %view = memref.view %alloc ...
941 /// [unchanged] [unchanged] %subView = subview %allocOrView ...
942 /// ...
943 /// vector.transfer_read %in[...], %cst ...
944 /// ```
945 /// Where there is no interleaved use between memref.copy and transfer_read as
946 /// well as no interleaved use between linalg.fill and memref.copy (if
947 /// linalg.fill is specified).
948 /// This is a custom rewrite to forward partial reads (with optional fills) to
949 /// vector.transfer_read.
951  : public OpRewritePattern<vector::TransferReadOp> {
953 
954  LogicalResult matchAndRewrite(vector::TransferReadOp xferOp,
955  PatternRewriter &rewriter) const override;
956 };
957 
958 /// Match and rewrite for the pattern:
959 /// ```
960 /// %alloc = ...
961 /// [optional] %view = memref.view %alloc ...
962 /// %subView = subview %allocOrView...
963 /// ...
964 /// vector.transfer_write %..., %allocOrView[...]
965 /// memref.copy(%subView, %out)
966 /// ```
967 /// into
968 /// ```
969 /// [unchanged] %alloc = ...
970 /// [unchanged] [optional] %view = memref.view %alloc ...
971 /// [unchanged] %subView = subview %allocOrView...
972 /// ...
973 /// vector.transfer_write %..., %out[...]
974 /// ```
975 /// Where there is no interleaved use between transfer_write and memref.copy.
976 /// This is a custom rewrite to forward partial writes to
977 /// vector.transfer_write.
979  : public OpRewritePattern<vector::TransferWriteOp> {
981 
982  LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp,
983  PatternRewriter &rewriter) const override;
984 };
985 
986 /// Rewrite extract_slice(tensor.pad(x)) into tensor.pad(extract_slice(x)).
988  : public OpRewritePattern<tensor::ExtractSliceOp> {
989  /// A function to control pattern application and rewrite logic.
990  ///
991  /// The function will be given the slice op and should return:
992  /// - std::nullopt: to fail the match and not apply the pattern;
993  /// - true: to apply the pattern with zero slice guard;
994  /// - false: to apply the pattern without zero slice guard.
995  ///
996  /// See the documentation for tensor::bubbleUpPadSlice regarding zero slice
997  /// guard.
998  using ControlFn = std::function<std::optional<bool>(tensor::ExtractSliceOp)>;
999 
1001  ControlFn controlFn = nullptr,
1002  PatternBenefit benefit = 1)
1003  : OpRewritePattern(context, benefit), controlFn(std::move(controlFn)) {}
1004 
1005  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
1006  PatternRewriter &rewriter) const override;
1007 
1008 private:
1009  ControlFn controlFn;
1010 };
1011 
1012 /// Split Reduction options.
1014  // Ratio used to split the reduction dimension. If the ratio is <= 1,
1015  // nothing will be done.
1016  int64_t ratio = 0;
1017  // Index where the extra dimension is added to the intermediate tensor
1018  // shape.
1019  unsigned index = 0;
1020  // If the inner dimension after splitting is parallel or reduction.
1021  bool innerParallel = false;
1022 };
1023 
1024 /// Function signature to control reduction splitting. This returns
1025 /// `SplitReductionOptions`.
1026 // TODO: don't use unsigned unless doing bit manipulation.
1028  std::function<SplitReductionOptions(LinalgOp op)>;
1029 
1030 /// Patterns to apply `splitReduction` below.
1032  RewritePatternSet &patterns,
1033  const ControlSplitReductionFn &controlSplitReductionFn,
1034  bool useAlloc = false);
1035 
1036 /// Apply transformation to split the single linalg op reduction into a
1037 /// parallel and reduction dimension. Then create a new linalg.generic op
1038 /// doing the rest of the reduction. Return the new linalg op with an extra
1039 /// parallel dimension or failure if the transformation didn't happen.
1040 ///
1041 /// Example:
1042 /// ```
1043 /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
1044 /// affine_map<(d0) -> ()>],
1045 /// iterator_types = ["reduction"]}
1046 /// ins(%in : tensor<32xf32>)
1047 /// outs(%out : tensor<f32>) {
1048 /// ^bb0(%arg1: f32, %arg2: f32):
1049 /// %y = arith.addf %arg1, %arg2 : f32
1050 /// linalg.yield %y : f32
1051 /// } -> tensor<f32>
1052 /// ```
1053 /// To:
1054 /// ```
1055 /// %cst = arith.constant 0.000000e+00 : f32
1056 /// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into
1057 /// tensor<4x8xf32> %1 = tensor.empty [4] : tensor<4xf32> %2 = linalg.fill
1058 /// ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> %3 =
1059 /// linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
1060 /// affine_map<(d0, d1) -> (d0)>],
1061 /// iterator_types = ["parallel", "reduction"]}
1062 /// ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {
1063 /// ^bb0(%arg3: f32, %arg5: f32):
1064 /// %5 = arith.addf %arg3, %arg4 : f32
1065 /// linalg.yield %5 : f32
1066 /// } -> tensor<4xf32>
1067 /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
1068 /// affine_map<(d0) -> ()>],
1069 /// iterator_types = ["reduction"]}
1070 /// ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) {
1071 /// ^bb0(%arg3: f32, %arg4: f32):
1072 /// %5 = arith.addf %arg3, %arg4 : f32
1073 /// linalg.yield %5 : f32
1074 /// } -> tensor<f32>
1075 /// ```
1078  FillOp fillOp;
1079  LinalgOp splitLinalgOp;
1081 };
1083 splitReduction(PatternRewriter &b, LinalgOp op,
1084  const ControlSplitReductionFn &controlSplitReductionFn,
1085  bool useAlloc = false);
1086 
1087 /// Scaling-based implementation of the split reduction transformation.
1088 /// Instead of introducing an ExpandShapeOp, this rewrites a reduction
1089 /// dimension `k` into `k * scale + kk`.
1090 ///
1091 /// Example:
1092 /// ```
1093 /// %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
1094 /// outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
1095 /// ```
1096 ///
1097 /// Is transformed to:
1098 ///
1099 /// ```
1100 /// #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2 * 4 + d3)>
1101 /// #map1 = affine_map<(d0, d1, d2, d3) -> (d2 * 4 + d3, d1)>
1102 /// #map2 = affine_map<(d0, d1, d2, d3) -> (d2, d3)>
1103 /// #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
1104 /// #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
1105 /// #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
1106 /// %0 = tensor.empty [16, 32, 64] : tensor<16x32x64xf32>
1107 /// %cst = arith.constant 0.000000e+00 : f32
1108 /// %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
1109 /// tensor<16x32x64xf32>
1110 /// %2 = tensor.empty [64, 4] : tensor<64x4xi1>
1111 ///
1112 /// %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
1113 /// iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
1114 /// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>,
1115 /// tensor<64x4xi1>)
1116 /// outs(%1 : tensor<16x32x64xf32>) {
1117 /// ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
1118 /// %5 = arith.mulf %arg3, %arg4 : f32
1119 /// %6 = arith.addf %arg6, %5 : f32
1120 /// linalg.yield %6 : f32
1121 /// } -> tensor<16x32x64xf32>
1122 ///
1123 /// %4 = linalg.generic {indexing_maps = [#map4, #map5],
1124 /// iterator_types = ["parallel", "parallel", "reduction"]}
1125 // ins(%3 : tensor<16x32x64xf32>)
1126 /// outs(%C : tensor<16x32xf32>) {
1127 /// ^bb0(%arg3: f32, %arg4: f32):
1128 /// %5 = arith.addf %arg3, %arg4 : f32
1129 /// linalg.yield %5 : f32
1130 /// } -> tensor<16x32xf32>
1131 ///
1132 /// return %4 : tensor<16x32xf32>
1133 /// ```
1136  const ControlSplitReductionFn &controlSplitReductionFn,
1137  bool useAlloc = false);
1138 
1139 /// Collapses dimensions of linalg.generic operation. It also collapses inputs
1140 /// before the op and expands outputs after the op.
1142  GenericOp genericOp, ArrayRef<ReassociationIndices> foldedIterationDims,
1143  RewriterBase &rewriter);
1144 
1145 /// Implement packing of a single LinalgOp by `packedSizes`.
1146 /// There must be one packedSizes entry per `linalgOp` iterator.
1147 /// Return the packed Linalg op on success, failure otherwise.
1149  linalg::LinalgOp linalgOp,
1150  ArrayRef<OpFoldResult> packedSizes);
1151 
1152 /// Struct to hold the result of a `packTranspose` call.
1154  tensor::PackOp transposedPackOp;
1155  linalg::LinalgOp transposedLinalgOp;
1156  tensor::UnPackOp transposedUnPackOp;
1157 };
1158 /// Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the
1159 /// transposed PackOp -> LinalgOp -> UnPackOp chain after replacements.
1160 /// Return failure if either:
1161 /// 1. the `packOp` does not have the `linalgOp` as its unique use.
1162 /// 2. the `maybeUnPackOp`, if specified must be a consumer of the result tied
1163 /// to the unique `packOp` use.
1164 /// 3. `outerPerm` (resp. `innerPerm`) must be valid permutations of
1165 /// `packOp.getOuterDimsPerm` (resp. `packOp.getInnerDimsPerm`) or empty.
1167 packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
1168  linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp,
1169  ArrayRef<int64_t> outerPerm, ArrayRef<int64_t> innerPerm);
1170 
1171 } // namespace linalg
1172 } // namespace mlir
1173 
1174 #endif // MLIR_DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H
static llvm::ManagedStatic< PassManagerOptions > options
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:43
The main mechanism for performing data layout queries.
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
This class helps build Operations.
Definition: Builders.h:199
This class represents a single result from folding an operation.
Definition: OpDefinition.h:233
This class represents an operand of an operation.
Definition: Value.h:255
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:75
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:33
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:621
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:399
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
void populateLinalgNamedOpConversionPatterns(RewritePatternSet &patterns)
Patterns to convert from one named op to another.
void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns)
A pattern that converts init operands to input operands.
FailureOr< LinalgLoops > linalgOpToLoops(PatternRewriter &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.for with the proper body for linalgOp.
Definition: Loops.cpp:374
FailureOr< GenericOp > generalizeNamedOp(RewriterBase &rewriter, LinalgOp namedOp)
Create a GenericOp from the given named operation namedOp and replace namedOp.
void populateSplitReductionPattern(RewritePatternSet &patterns, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Patterns to apply splitReduction below.
void populateDataLayoutPropagationPatterns(RewritePatternSet &patterns)
Patterns to bubble up or down data layout ops across other operations.
void populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet &patterns)
Pattern to fuse a tensor.pad operation with the producer of its source, if the producer is a linalg o...
void populateBubbleUpExtractSliceOpPatterns(RewritePatternSet &patterns)
Patterns that are used to bubble up extract slice op above linalg op.
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
Definition: Tiling.cpp:88
std::function< std::optional< Value >(OpBuilder &b, memref::SubViewOp subView, ArrayRef< Value > boundingSubViewSize, DataLayout &layout)> AllocBufferCallbackFn
Callback function type used to perform the allocation for the promoted subView.
Definition: Transforms.h:257
DenseMap< int, int > LoopIndexToRangeIndexMap
Creates a number of ranges equal to the number of non-zero in tileSizes.
Definition: Transforms.h:418
void populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via reassociativ...
std::function< bool(OpOperand *fusedOperand)> ControlFusionFn
Function type which is used to control when to stop fusion.
Definition: Transforms.h:79
LogicalResult vectorize(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef< int64_t > inputVectorSizes={}, bool vectorizeNDExtract=false)
Emit a suitable vector form for a Linalg op.
void populateFoldReshapeOpsByCollapsingPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding tensor.expand_shape operation with its producer generic operation by co...
LinalgTilingLoopType
The type of loops to be generated during tiling.
Definition: Utils.h:150
FailureOr< Operation * > fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand)
Fuse two linalg.generic operations that have a producer-consumer relationship captured through fusedO...
std::function< LogicalResult(OpBuilder &b, Value buffer)> DeallocBufferCallbackFn
Callback function type used to deallocate the buffers used to hold the promoted subview.
Definition: Transforms.h:262
void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Populates patterns with patterns that vectorize tensor.pad.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
Definition: Tiling.cpp:970
FailureOr< ForeachThreadTilingResult > tileToForeachThreadOp(RewriterBase &builder, TilingInterface op, ArrayRef< OpFoldResult > numThreads, std::optional< ArrayAttr > mapping)
Definition: Tiling.cpp:426
void populateSparseTensorRewriting(RewritePatternSet &patterns)
Populate patterns that are only useful in the context of sparse tensors.
FailureOr< PromotionInfo > promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView, const AllocBufferCallbackFn &allocationFn, DataLayout &layout)
Definition: Promotion.cpp:210
void peelLoops(RewriterBase &rewriter, ArrayRef< scf::ForOp > loops)
Peel and canonicalize 'loops'.
Definition: Transforms.cpp:239
FailureOr< SplitReductionResult > splitReduction(PatternRewriter &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
FailureOr< SmallVector< Value > > collapseGenericOpIterationDims(GenericOp genericOp, ArrayRef< ReassociationIndices > foldedIterationDims, RewriterBase &rewriter)
Collapses dimensions of linalg.generic operation.
void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns)
Populate patterns that convert non-destination-style ops to destination style ops.
std::function< SmallVector< Value, 4 >(OpBuilder &, Operation *)> TileSizeComputationFunction
Definition: Transforms.h:407
FailureOr< LinalgLoops > linalgOpToAffineLoops(PatternRewriter &rewriter, LinalgOp linalgOp)
Emit a loop nest of affine.for with the proper body for linalgOp.
Definition: Loops.cpp:368
LogicalResult vectorizeLinalgOpPrecondition(LinalgOp linalgOp, ArrayRef< int64_t > inputVectorSizes={}, bool vectorizeNDExtract=false)
Return success if the operation can be vectorized.
void populateElementwiseToLinalgConversionPatterns(RewritePatternSet &patterns)
Populate patterns that convert ElementwiseMappable ops to linalg parallel loops.
FailureOr< LinalgLoops > linalgOpToParallelLoops(PatternRewriter &rewriter, LinalgOp linalgOp)
Emit a loop nest of scf.parallel with the proper body for linalgOp.
Definition: Loops.cpp:381
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
Definition: Tiling.cpp:60
LogicalResult promoteSubviewsPrecondition(Operation *op, LinalgPromotionOptions options)
Promote memref.subviews feeding linalg-on-buffers operations.
Definition: Promotion.cpp:368
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Linalg decompose convolutions patterns.
Definition: Transforms.cpp:871
void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate patterns for vectorizing low-D convolution ops.
LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp)
Emit a suitable vector form for a Copy op with fully static shape.
FailureOr< GenericOp > interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, ArrayRef< unsigned > interchangeVector)
Interchange the iterator_types and iterator_maps dimensions and adapts the index accesses of op.
Definition: Interchange.cpp:51
std::function< LogicalResult(PatternRewriter &, tensor::PadOp, Value)> OptimizeCopyFn
Definition: Transforms.h:874
void populateCollapseDimensions(RewritePatternSet &patterns, const GetCollapsableDimensionsFn &controlCollapseDimensions)
Pattern to collapse dimensions in a linalg.generic op.
bool areElementwiseOpsFusable(OpOperand *fusedOperand)
Return true if two linalg.generic operations with producer/consumer relationship through fusedOperand...
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
Definition: Tiling.cpp:121
void populateEraseUnusedOperandsAndResultsPatterns(RewritePatternSet &patterns)
Pattern to remove dead operands and results of linalg.generic operations.
void populatePadTensorTilingPatterns(RewritePatternSet &patterns, const LinalgTilingOptions &options)
Definition: Tiling.cpp:998
FailureOr< SmallVector< Value > > rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, ArrayRef< int64_t > paddingDimensions, ArrayRef< Attribute > paddingValues, ArrayRef< bool > packPaddings, LinalgOp &paddedOp)
Pad the iterator dimensions paddingDimensions of all opToPad operands to a static bounding box.
Definition: Transforms.cpp:166
std::function< LogicalResult(OpBuilder &b, Value src, Value dst)> CopyCallbackFn
Callback function type used to insert copy from original subview to subview of the promoted region fo...
Definition: Transforms.h:269
void populateDecomposeLinalgOpsPattern(RewritePatternSet &patterns, bool removeDeadArgsAndResults=true)
Populate patterns for splitting a LinalgOp with multiple statements within its payload into multiple ...
FailureOr< SplitReductionResult > splitReductionByScaling(PatternRewriter &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Scaling-based implementation of the split reduction transformation.
void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns)
Patterns to promote inputs to outputs and remove unused inputs of linalg.generic ops.
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
Definition: Tiling.cpp:843
FailureOr< ForeachThreadTilingResult > tileToForeachThreadOpUsingTileSizes(RewriterBase &builder, TilingInterface op, ArrayRef< OpFoldResult > tileSizes, std::optional< ArrayAttr > mapping)
Same as tileToForeachThreadOp, but calculate the number of threads required using the given tileSizes...
Definition: Tiling.cpp:435
void populateFoldReshapeOpsByExpansionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlFoldingReshapes)
Patterns to fold an expanding (collapsing) tensor_reshape operation with its producer (consumer) gene...
FailureOr< ForeachThreadReductionTilingResult > tileReductionUsingForeachThread(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
Definition: Tiling.cpp:611
void populateSwapExtractSliceWithFillPatterns(RewritePatternSet &patterns)
Adds patterns that waps tensor.extract_slice(linalg.fill(cst, init)) into linalg.fill(cst,...
void populateInlineConstantOperandsPatterns(RewritePatternSet &patterns)
Patterns that are used to inline constant operands into linalg generic ops.
FailureOr< LinalgOp > promoteSubViews(OpBuilder &b, LinalgOp op, const LinalgPromotionOptions &options)
Promote the subViews into a new buffer allocated at the insertion point b.
Definition: Promotion.cpp:390
void populateConstantFoldLinalgOperations(RewritePatternSet &patterns, const ControlFusionFn &controlFn)
Patterns to constant fold Linalg operations.
std::function< SplitReductionOptions(LinalgOp op)> ControlSplitReductionFn
Function signature to control reduction splitting.
Definition: Transforms.h:1028
void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns)
Linalg generalization patterns.
FailureOr< linalg::LinalgOp > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
std::function< SmallVector< ReassociationIndices >(linalg::GenericOp)> GetCollapsableDimensionsFn
Function type to control generic op dimension collapsing.
Definition: Transforms.h:104
std::optional< vector::CombiningKind > getCombinerOpKind(Operation *combinerOp)
Return vector::CombiningKind for the given op.
SmallVector< Value > peelLoop(RewriterBase &rewriter, Operation *op)
Try to peel and canonicalize loop op and return the new result.
Definition: Transforms.cpp:224
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
Definition: Tiling.cpp:964
void populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via rank-reducin...
FailureOr< PackTransposeResult > packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp, ArrayRef< int64_t > outerPerm, ArrayRef< int64_t > innerPerm)
Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the transposed PackOp -> LinalgOp ...
std::pair< TilingInterface, TilingInterface > splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension, OpFoldResult splitPoint)
Split the given op into two parts along the given iteration space dimension at the specified splitPoi...
Definition: Split.cpp:67
void populateElementwiseOpsFusionPatterns(RewritePatternSet &patterns, const ControlFusionFn &controlElementwiseOpFusion)
Patterns for fusing linalg operation on tensors.
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
Definition: Tiling.cpp:147
void populateExtractOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Include the generated interface declarations.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
OpInterfaceRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting a...
Definition: PatternMatch.h:372
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:357
Vectorization pattern for memref::CopyOp.
Definition: Transforms.h:820
LogicalResult matchAndRewrite(memref::CopyOp copyOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:298
Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) dimensions into 1-D depthwise c...
Definition: Transforms.h:779
FailureOr< DepthwiseConv1DNwcWcOp > returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:804
LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override
Definition: Transforms.h:788
DownscaleDepthwiseConv2DNhwcHwcOp(MLIRContext *context, PatternBenefit benefit=1)
Definition: Transforms.h:780
Rewrites 2-D convolution ops with size-1 window dimensions into 1-D convolution ops.
Definition: Transforms.h:759
LogicalResult matchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const override
Definition: Transforms.h:765
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:681
Rewrite extract_slice(tensor.pad(x)) into tensor.pad(extract_slice(x)).
Definition: Transforms.h:988
std::function< std::optional< bool >(tensor::ExtractSliceOp)> ControlFn
A function to control pattern application and rewrite logic.
Definition: Transforms.h:998
LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:453
ExtractSliceOfPadTensorSwapPattern(MLIRContext *context, ControlFn controlFn=nullptr, PatternBenefit benefit=1)
Definition: Transforms.h:1000
Transformation information returned after reduction tiling.
Definition: Transforms.h:502
scf::ForeachThreadOp loops
The scf.foreach_thread operation that iterate over the tiles.
Definition: Transforms.h:510
Operation * mergeOp
The final reduction operation merging all the partial reductions.
Definition: Transforms.h:506
Operation * initialOp
The op initializing the tensor used for partial reductions.
Definition: Transforms.h:508
Operation * parallelTiledOp
The partial reduction tiled op generated.
Definition: Transforms.h:504
Rewrite a TilingInterface op to a tiled scf.foreach_thread, applying tiling by numThreads.
Definition: Transforms.h:485
Rewrites a tensor::PackOp into a sequence of tensor.pad + linalg.transpose + tensor....
Definition: Transforms.h:899
LogicalResult matchAndRewrite(tensor::PackOp packOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:527
Rewrites a tensor::UnPackOp into a sequence of rank-reduced extract_slice op.
Definition: Transforms.h:909
LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:596
Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and InsertSliceOp.
Definition: Transforms.h:879
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:391
Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector< Value > &dynSizes) const
Filling dest using FillOp constant padding value if possible.
Definition: Transforms.cpp:374
GeneralizePadOpPattern(MLIRContext *context, OptimizeCopyFn optimizeCopyFn=nullptr, PatternBenefit benefit=1)
Definition: Transforms.h:880
Match and rewrite for the pattern:
Definition: Transforms.h:951
LogicalResult matchAndRewrite(vector::TransferReadOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Match and rewrite for the pattern:
Definition: Transforms.h:979
LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const override
TODO: use interfaces, side-effects and aliasing analysis as appropriate, when available.
Linalg generalization pattern.
Definition: Transforms.h:803
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
Definition: Transforms.h:813
FailureOr< GenericOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Definition: Transforms.h:809
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
Definition: Utils.h:371
SmallVector< Attribute > paddingValues
A padding value for every operand.
Definition: Transforms.h:600
SmallVector< bool > packPaddings
A flag for every operand to mark the PadOp as nofold which enables packing for statically shaped oper...
Definition: Transforms.h:613
LinalgPaddingOptions & setPaddingDimensions(ArrayRef< int64_t > pd)
Definition: Transforms.h:607
LinalgPaddingOptions & setTransposePaddings(ArrayRef< SmallVector< int64_t >> tp)
Definition: Transforms.h:628
SmallVector< SmallVector< int64_t > > transposePaddings
A permutation vector for every operand used to transpose the packed PadOp results.
Definition: Transforms.h:626
LinalgPaddingOptions & setPaddingValues(ArrayRef< Attribute > pv)
Definition: Transforms.h:601
LinalgPaddingOptions & setPackPaddings(ArrayRef< bool > pp)
Definition: Transforms.h:614
LinalgPaddingOptions & setHoistPaddings(ArrayRef< int64_t > hp)
Definition: Transforms.h:620
SmallVector< int64_t > hoistPaddings
A number of loops to hoist the PadOp out for every operand.
Definition: Transforms.h:619
SmallVector< int64_t > paddingDimensions
A list of iterator dimensions to pad.
Definition: Transforms.h:606
Linalg padding pattern.
Definition: Transforms.h:735
LogicalResult matchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const override
Definition: Transforms.h:745
FailureOr< LinalgOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Definition: Transforms.cpp:252
LinalgPaddingPattern(MLIRContext *context, LinalgPaddingOptions options=LinalgPaddingOptions(), PatternBenefit benefit=1)
Linalg padding pattern.
Definition: Transforms.cpp:246
std::optional< unsigned > alignment
Alignment of promoted buffer. If std::nullopt do not specify alignment.
Definition: Transforms.h:302
LinalgPromotionOptions & setUseFullTileBuffersByDefault(bool use)
Definition: Transforms.h:297
bool useAlloca
Use alloca with the default allocation scheme.
Definition: Transforms.h:308
LinalgPromotionOptions & setAlignment(unsigned align)
Definition: Transforms.h:303
std::optional< CopyCallbackFn > copyOutFn
Definition: Transforms.h:328
std::optional< CopyCallbackFn > copyInFn
Callback function to do the copy of data to and from the promoted subview.
Definition: Transforms.h:327
LinalgPromotionOptions & setUseAlloca(bool use)
Definition: Transforms.h:309
std::optional< DenseSet< unsigned > > operandsToPromote
Indices of subViews to promote.
Definition: Transforms.h:274
LinalgPromotionOptions & setCopyInOutFns(CopyCallbackFn const &copyIn, CopyCallbackFn const &copyOut)
Definition: Transforms.h:329
LinalgPromotionOptions & setUseFullTileBuffers(ArrayRef< bool > useFullTiles)
Definition: Transforms.h:286
std::optional< AllocBufferCallbackFn > allocationFn
Callback function to do the allocation of the promoted buffer.
Definition: Transforms.h:316
bool useFullTileBuffersDefault
If true all operands unspecified by useFullTileBuffers will use the full view, otherwise the partial ...
Definition: Transforms.h:296
std::optional< DeallocBufferCallbackFn > deallocationFn
Definition: Transforms.h:317
LinalgPromotionOptions & setAllocationDeallocationFns(AllocBufferCallbackFn const &allocFn, DeallocBufferCallbackFn const &deallocFn)
Definition: Transforms.h:319
std::optional< llvm::SmallBitVector > useFullTileBuffers
If ith element of useFullTiles is true the full view should be used for the promoted buffer of the it...
Definition: Transforms.h:285
LinalgPromotionOptions & setOperandsToPromote(ArrayRef< int64_t > operands)
Definition: Transforms.h:275
std::optional< LinalgLoopDistributionOptions > tileDistribution
When specified, specifies distribution of generated tile loops to processors.
Definition: Transforms.h:645
LinalgTilingAndFusionOptions & setTileSizes(ArrayRef< int64_t > ts)
Definition: Transforms.h:637
SmallVector< int64_t > tileInterchange
Tile interchange used to permute the tile loops.
Definition: Transforms.h:642
LinalgTilingAndFusionOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
Definition: Transforms.h:647
SmallVector< int64_t > tileSizes
Tile sizes used to tile the root operation.
Definition: Transforms.h:636
LinalgTilingOptions & setLoopType(LinalgTilingLoopType lt)
Definition: Transforms.h:691
LinalgTilingOptions & setDistributionTypes(ArrayRef< StringRef > types)
Definition: Transforms.h:709
LinalgTilingOptions & setInterchange(ArrayRef< unsigned > interchange)
Definition: Transforms.h:683
LinalgTilingLoopType loopType
The type of tile loops to generate.
Definition: Transforms.h:689
LinalgTilingOptions & setTileSizeComputationFunction(TileSizeComputationFunction fun)
Definition: Transforms.h:660
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
Definition: Transforms.h:667
LinalgTilingOptions & setPeeledLoops(ArrayRef< int64_t > loops)
Definition: Transforms.h:717
SmallVector< int64_t > peeledLoops
Peel the specified loops.
Definition: Transforms.h:715
LinalgTilingOptions & setDistributionOptions(LinalgLoopDistributionOptions distributionOptions)
Definition: Transforms.h:701
SmallVector< unsigned, 4 > interchangeVector
The interchange vector to reorder the tiled loops.
Definition: Transforms.h:681
TileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
Definition: Transforms.h:657
LinalgTilingOptions & scalarizeDynamicDims()
Tile all dynamic dimensions by 1.
std::optional< LinalgLoopDistributionOptions > distribution
When specified, specifies distribution of generated tile loops to processors.
Definition: Transforms.h:698
SmallVector< StringRef, 2 > distributionTypes
Specification markers of how to distribute the linalg.tiled_loop.
Definition: Transforms.h:707
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Definition: Transforms.h:438
Struct to hold the result of a packTranspose call.
Definition: Transforms.h:1153
linalg::LinalgOp transposedLinalgOp
Definition: Transforms.h:1155
tensor::UnPackOp transposedUnPackOp
Definition: Transforms.h:1156
tensor::PadOp is not canonicalized away yet, so we provide a transformation to linalg....
Definition: Transforms.h:854
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp (to initialize with pad_val) and GenericOp...
Definition: Transforms.cpp:312
Create a new buffer using the allocationFn provided.
Definition: Transforms.h:342
Split Reduction options.
Definition: Transforms.h:1013
Apply transformation to split the single linalg op reduction into a parallel and reduction dimension.
Definition: Transforms.h:1076
Perform standalone tiling of a single LinalgOp by tileSizes.
Definition: Transforms.h:215
SmallVector< Operation *, 8 > loops
Definition: Transforms.h:217
SmallVector< Value, 4 > tensorResults
Definition: Transforms.h:218
T lowTripCount
Number of tiles associated with each size.
Definition: Transforms.h:430