MLIR  18.0.0git
TileUsingInterface.h
Go to the documentation of this file.
1 //===- TileUsingInterface.h - Tiling ops using TilingInterface --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_SCF_TRANSFORMS_TILEUSINGINTERFACE_H
10 #define MLIR_DIALECT_SCF_TRANSFORMS_TILEUSINGINTERFACE_H
11 
14 #include "mlir/IR/PatternMatch.h"
16 
17 #include <deque>
18 
19 namespace mlir {
20 class Operation;
21 class RewriterBase;
22 class TilingInterface;
23 } // namespace mlir
24 
25 namespace mlir {
26 namespace scf {
27 
29  std::function<SmallVector<OpFoldResult>(OpBuilder &, Operation *)>;
30 
31 /// Options to use to control tiling.
33  /// Computation function that returns the tile sizes for each operation.
34  /// Delayed construction of constant tile sizes should occur to interoperate
35  /// with folding.
37 
40  tileSizeComputationFunction = std::move(fun);
41  return *this;
42  }
43  /// Convenience function to set the `tileSizeComputationFunction` to a
44  /// function that computes tile sizes at the point they are needed. Allows
45  /// proper interaction with folding.
47 
48  /// The interchange vector to reorder the tiled loops.
51  interchangeVector = llvm::to_vector(interchange);
52  return *this;
53  }
54 
55  /// Specify mapping of loops to devices. This is only respected when the loop
56  /// constructs support such a mapping (like `scf.forall`). Will be ignored
57  /// when using loop constructs that dont support such a mapping (like
58  /// `scf.for`)
61  mappingVector = llvm::map_to_vector(
62  mapping, [](auto attr) -> Attribute { return attr; });
63  return *this;
64  }
65 };
66 
67 /// Transformation information returned after tiling.
69  /// Tiled operations that are generated during tiling. The order does not
70  /// matter except the last op. The replacements are expected to be the results
71  /// of the last op.
73  /// The `scf.for` operations that iterate over the tiles.
75  /// Values to use as replacements for the untiled op. Is the same size as the
76  /// number of results of the untiled op.
78 };
79 
80 /// Method to tile an op that implements the `TilingInterface` using
81 /// `scf.for` for iterating over the tiles.
83  TilingInterface op,
84  const SCFTilingOptions &options);
85 
86 /// Method to tile an op that implements the `TilingInterface` using
87 /// `scf.forall`.
89 tileUsingSCFForallOp(RewriterBase &rewriter, TilingInterface op,
90  const SCFTilingOptions &options);
91 
92 /// Options used to control tile + fuse.
94  /// The tiling options used to control the tiling of the consumer.
98  return *this;
99  }
100 };
101 
102 /// Fuse the producer of the source of `candidateSliceOp` by computing the
103 /// required slice of the producer in-place. Note that the method
104 /// replaces the uses of `candidateSliceOp` with the tiled and fused producer
105 /// value but does not delete the slice operation.
107  OpResult origProducer; // Original untiled producer.
108  Value tiledAndFusedProducer; // Tile and fused producer value.
110 };
111 std::optional<SCFFuseProducerOfSliceResult>
113  tensor::ExtractSliceOp candidateSliceOp,
115 
116 /// Reconstruct the fused producer from within the tiled-and-fused code. Based
117 /// on the slice of the producer computed in place it is possible that within
118 /// the loop nest same slice of the producer is computed multiple times. It is
119 /// in general not possible to recompute the value of the fused producer from
120 /// the tiled loop code in such cases. For the cases where no slice of the
121 /// producer is computed in a redundant fashion it is possible to reconstruct
122 /// the value of the original producer from within the tiled loop. It is upto
123 /// the caller to ensure that the producer is not computed redundantly within
124 /// the tiled loop nest. For example, consider
125 ///
126 /// ```mlir
127 /// %0 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
128 /// %1 = linalg.matmul ins(%0, ..) outs(...) -> tensor<?x?x?f32>
129 /// ```
130 ///
131 /// If `%1` is tiled in a 2D fashion and `%0` is fused with it, the resulting IR
132 /// is,
133 ///
134 /// ```mlir
135 /// %t1_0 = scf.for .... iter_args(%arg0 = ...) {
136 /// %t1_1 = scf.for ... iter_args(%arg1 = %arg0) {
137 /// ...
138 /// %t1_2 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
139 /// %t1_3 = linalg.matmul ins(%t1_2, ...)
140 /// %t1_4 = tensor.insert_slice %t1_3 into %arg1 ...
141 /// scf.yield %t1_4
142 /// }
143 /// scf.yield %t1_1
144 /// }
145 /// ```
146 ///
147 /// Here `%t1_2` is the same for all iterations of the inner `scf.for`. Instead
148 /// if `%1` were tiled only along the rows, the resultant code would be
149 ///
150 /// ```mlir
151 /// %t2_0 = scf.for .... iter_args(%arg0 = ...) {
152 /// ...
153 /// %t2_1 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
154 /// %t2_2 = linalg.matmul ins(%t2_1, ...)
155 /// %t2_3 = tensor.insert_slice %t2_2 into %arg0 ...
156 /// scf.yield %t2_3
157 /// }
158 /// ```
159 ///
160 /// Here there is no intersection in the different slices of `%t2_1` computed
161 /// across iterations of the `scf.for`. In such cases, the value of the original
162 /// `%0` can be reconstructed from within the loop body. This is useful in cases
163 /// where `%0` had other uses as well. If not reconstructed from within the loop
164 /// body, uses of `%0` could not be replaced, making it still live and the
165 /// fusion immaterial.
167  RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp,
168  scf::SCFFuseProducerOfSliceResult fusedProducerInfo,
170 
171 /// Transformation information returned after tile and fuse.
173  /// List of untiled operations that were fused with the tiled consumer.
175  /// List of tiled and fused operations generated. The first one in this list
176  /// is guaranteed to be the tiled operations generated during tiling of the
177  /// generated operation.
179  /// The `scf.for` operations that iterate over the tiles.
181  /// The replacement values to use for the tiled and fused operations.
183 };
184 
185 /// Method to tile and fuse a sequence of operations, by tiling the consumer
186 /// and fusing its producers. Note that this assumes that it is valid to
187 /// tile+fuse the producer into the innermost tiled loop. Its up to the caller
188 /// to ensure that the tile sizes provided make this fusion valid.
189 ///
190 /// For example, for the following sequence
191 ///
192 /// ```mlir
193 /// %0 =
194 /// %1 = linalg.fill ... outs(%0 : ... )
195 /// %2 = linalg.matmul ... outs(%1 : ...) ...
196 /// ```
197 ///
198 /// it is legal to fuse the fill with the matmul only if the matmul is tiled
199 /// along the parallel dimensions and not the reduction dimension, i.e. the tile
200 /// size for the reduction dimension should be 0. The resulting fused
201 /// transformation is
202 ///
203 /// ```mlir
204 /// %1 = scf.for ... iter_args(%arg0 = %0)
205 /// %2 = tensor.extract_slice %arg0
206 /// %3 = linalg.fill .. outs(%2 : ... )
207 /// %4 = linalg.matmul .. outs(%3 : ...)
208 /// }
209 /// ```
212  RewriterBase &rewriter, TilingInterface consumer,
214 
215 /// Method to lower an `op` that implements the `TilingInterface` to
216 /// loops/scalars.
218 lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op);
219 
220 /// Transformation information returned after reduction tiling.
222  /// The partial reduction tiled op generated.
224  /// The final reduction operation merging all the partial reductions.
226  /// Initial op
228  /// The `scf.for` operations that iterate over the tiles.
230 };
231 
232 /// Method to tile a reduction and generate a parallel op within a serial loop.
233 /// Each of the partial reductions are calculated in parallel. Then after the
234 /// loop all the partial reduction are merged into a final reduction.
235 /// For example for the following sequence
236 ///
237 /// ```mlir
238 /// %0 = linalg.generic %in ["parallel", "reduction"]
239 /// : tensor<7x9xf32> -> tensor<7xf32>
240 /// ```
241 ///
242 /// into:
243 ///
244 /// ```mlir
245 /// %0 = linalg.fill ... : tensor<7x4xf32>
246 /// %1 = scf.for ... iter_args(%arg0 = %0)
247 /// %2 = tensor.extract_slice %arg0 : tensor<7x4xf32> -> tensor<7x?xf32>
248 /// %3 = tensor.extract_slice %in : tensor<7x9xf32> -> tensor<7x?xf32>
249 /// %4 = linalg.generic %2, %3 ["parallel", "parallel"]
250 /// : tensor<7x?xf32> -> tensor<7x?xf32>
251 /// %5 = tensor.insert_slice %3, %0[0, 0] : tensor<7x4xf32>
252 /// }
253 /// %6 = linalg.generic %1 ["parallel", "reduction"]
254 /// : tensor<7x4xf32> -> tensor<7xf32>
255 /// ```
257 tileReductionUsingScf(RewriterBase &b, PartialReductionOpInterface op,
258  ArrayRef<OpFoldResult> tileSize);
259 
260 } // namespace scf
261 } // namespace mlir
262 
263 #endif // MLIR_DIALECT_SCF_TRANSFORMS_TILEUSINGINTERFACE_H
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This class helps build Operations.
Definition: Builders.h:206
This is a value defined by a result of an operation.
Definition: Value.h:453
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:399
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
FailureOr< scf::SCFReductionTilingResult > tileReductionUsingScf(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > tileSize)
Method to tile a reduction and generate a parallel op within a serial loop.
FailureOr< SCFTilingResult > tileUsingSCFForallOp(RewriterBase &rewriter, TilingInterface op, const SCFTilingOptions &options)
Method to tile an op that implements the TilingInterface using scf.forall.
FailureOr< SmallVector< scf::ForOp > > lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op)
Method to lower an op that implements the TilingInterface to loops/scalars.
std::optional< SCFFuseProducerOfSliceResult > tileAndFuseProducerOfSlice(RewriterBase &rewriter, tensor::ExtractSliceOp candidateSliceOp, MutableArrayRef< scf::ForOp > loops)
Implementation of fusing producer of a single slice by computing the slice of the producer in-place.
std::function< SmallVector< OpFoldResult >(OpBuilder &, Operation *)> SCFTileSizeComputationFunction
void yieldReplacementForFusedProducer(RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp, scf::SCFFuseProducerOfSliceResult fusedProducerInfo, MutableArrayRef< scf::ForOp > loops)
Reconstruct the fused producer from within the tiled-and-fused code.
FailureOr< SCFTilingResult > tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, const SCFTilingOptions &options)
Method to tile an op that implements the TilingInterface using scf.for for iterating over the tiles.
FailureOr< SCFTileAndFuseResult > tileConsumerAndFuseProducerGreedilyUsingSCFForOp(RewriterBase &rewriter, TilingInterface consumer, const SCFTileAndFuseOptions &options)
Method to tile and fuse a sequence of operations, by tiling the consumer and fusing its producers.
Include the generated interface declarations.
Fuse the producer of the source of candidateSliceOp by computing the required slice of the producer i...
Transformation information returned after reduction tiling.
Operation * parallelTiledOp
The partial reduction tiled op generated.
Operation * mergeOp
The final reduction operation merging all the partial reductions.
SmallVector< scf::ForOp > loops
The scf.for operations that iterate over the tiles.
Options used to control tile + fuse.
SCFTilingOptions tilingOptions
The tiling options used to control the tiling of the consumer.
SCFTileAndFuseOptions & setTilingOptions(SCFTilingOptions options)
Transformation information returned after tile and fuse.
llvm::SetVector< Operation * > fusedProducers
List of untiled operations that were fused with the tiled consumer.
llvm::DenseMap< Value, Value > replacements
The replacement values to use for the tiled and fused operations.
llvm::SetVector< Operation * > tiledAndFusedOps
List of tiled and fused operations generated.
SmallVector< Operation * > loops
The scf.for operations that iterate over the tiles.
Options to use to control tiling.
SCFTileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
SCFTilingOptions & setTileSizeComputationFunction(SCFTileSizeComputationFunction fun)
SCFTilingOptions & setInterchange(ArrayRef< int64_t > interchange)
SCFTilingOptions & setTileSizes(ArrayRef< OpFoldResult > ts)
Convenience function to set the tileSizeComputationFunction to a function that computes tile sizes at...
SCFTilingOptions & setMapping(ArrayRef< DeviceMappingAttrInterface > mapping)
SmallVector< int64_t > interchangeVector
The interchange vector to reorder the tiled loops.
SmallVector< Attribute > mappingVector
Specify mapping of loops to devices.
Transformation information returned after tiling.
SmallVector< Operation * > tiledOps
Tiled operations that are generated during tiling.
SmallVector< Operation * > loops
The scf.for operations that iterate over the tiles.
SmallVector< Value > replacements
Values to use as replacements for the untiled op.