MLIR  17.0.0git
TileUsingInterface.h
Go to the documentation of this file.
1 //===- TileUsingInterface.h - Tiling ops using TilingInterface --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_SCF_TRANSFORMS_TILEUSINGINTERFACE_H
10 #define MLIR_DIALECT_SCF_TRANSFORMS_TILEUSINGINTERFACE_H
11 
14 #include "mlir/IR/PatternMatch.h"
16 
17 #include <deque>
18 
19 namespace mlir {
20 class Operation;
21 class PatternRewriter;
22 class TilingInterface;
23 } // namespace mlir
24 
25 namespace mlir {
26 namespace scf {
27 
29  std::function<SmallVector<Value>(OpBuilder &, Operation *)>;
30 
31 /// Options to use to control tiling.
33  /// Computation function that returns the tile sizes for each operation.
34  /// Delayed construction of constant tile sizes should occur to interoperate
35  /// with folding.
37 
40  tileSizeComputationFunction = std::move(fun);
41  return *this;
42  }
43  /// Set the `tileSizeComputationFunction` to return the values `ts`. The
44  /// values must not fold away when tiling. Otherwise, use a more robust
45  /// `tileSizeComputationFunction`.
47  tileSizeComputationFunction = [=](OpBuilder &, Operation *) { return ts; };
48  return *this;
49  }
50  /// Convenience function to set the `tileSizeComputationFunction` to a
51  /// function that computes tile sizes at the point they are needed. Allows
52  /// proper interaction with folding.
54 
55  /// The interchange vector to reorder the tiled loops.
58  interchangeVector = llvm::to_vector(interchange);
59  return *this;
60  }
61 };
62 
63 /// Transformation information returned after tiling.
65  /// Tiled operations that are generated during tiling. The order does not
66  /// matter except the last op. The replacements are expected to be the results
67  /// of the last op.
69  /// The `scf.for` operations that iterate over the tiles.
71  /// Values to use as replacements for the untiled op. Is the same size as the
72  /// number of results of the untiled op.
74 };
75 
76 /// Method to tile an op that implements the `TilingInterface` using
77 /// `scf.for` for iterating over the tiles.
79  TilingInterface op,
80  const SCFTilingOptions &options);
81 
82 /// Options used to control tile + fuse.
84  /// The tiling options used to control the tiling of the consumer.
88  return *this;
89  }
90 };
91 
92 /// Fuse the producer of the source of `candidateSliceOp` by computing the
93 /// required slice of the producer in-place. Note that the method
94 /// replaces the uses of `candidateSliceOp` with the tiled and fused producer
95 /// value but does not delete the slice operation.
97  OpResult origProducer; // Original untiled producer.
98  Value tiledAndFusedProducer; // Tile and fused producer value.
99 };
100 std::optional<SCFFuseProducerOfSliceResult>
102  tensor::ExtractSliceOp candidateSliceOp,
104 
105 /// Reconstruct the fused producer from within the tiled-and-fused code. Based
106 /// on the slice of the producer computed in place it is possible that within
107 /// the loop nest same slice of the producer is computed multiple times. It is
108 /// in general not possible to recompute the value of the fused producer from
109 /// the tiled loop code in such cases. For the cases where no slice of the
110 /// producer is computed in a redundant fashion it is possible to reconstruct
111 /// the value of the original producer from within the tiled loop. It is upto
112 /// the caller to ensure that the producer is not computed redundantly within
113 /// the tiled loop nest. For example, consider
114 ///
115 /// ```mlir
116 /// %0 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
117 /// %1 = linalg.matmul ins(%0, ..) outs(...) -> tensor<?x?x?f32>
118 /// ```
119 ///
120 /// If `%1` is tiled in a 2D fashion and `%0` is fused with it, the resulting IR
121 /// is,
122 ///
123 /// ```mlir
124 /// %t1_0 = scf.for .... iter_args(%arg0 = ...) {
125 /// %t1_1 = scf.for ... iter_args(%arg1 = %arg0) {
126 /// ...
127 /// %t1_2 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
128 /// %t1_3 = linalg.matmul ins(%t1_2, ...)
129 /// %t1_4 = tensor.insert_slice %t1_3 into %arg1 ...
130 /// scf.yield %t1_4
131 /// }
132 /// scf.yield %t1_1
133 /// }
134 /// ```
135 ///
136 /// Here `%t1_2` is the same for all iterations of the inner `scf.for`. Instead
137 /// if `%1` were tiled only along the rows, the resultant code would be
138 ///
139 /// ```mlir
140 /// %t2_0 = scf.for .... iter_args(%arg0 = ...) {
141 /// ...
142 /// %t2_1 = linalg.matmul ins(...) outs(...) -> tensor<?x?xf32>
143 /// %t2_2 = linalg.matmul ins(%t2_1, ...)
144 /// %t2_3 = tensor.insert_slice %t2_2 into %arg0 ...
145 /// scf.yield %t2_3
146 /// }
147 /// ```
148 ///
149 /// Here there is no intersection in the different slices of `%t2_1` computed
150 /// across iterations of the `scf.for`. In such cases, the value of the original
151 /// `%0` can be reconstructed from within the loop body. This is useful in cases
152 /// where `%0` had other uses as well. If not reconstructed from within the loop
153 /// body, uses of `%0` could not be replaced, making it still live and the
154 /// fusion immaterial.
156  RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp,
157  scf::SCFFuseProducerOfSliceResult fusedProducerInfo,
159 
160 /// Transformation information returned after tile and fuse.
162  /// List of untiled operations that were fused with the tiled consumer.
164  /// List of tiled and fused operations generated. The first one in this list
165  /// is guaranteed to be the tiled operations generated during tiling of the
166  /// generated operation.
168  /// The `scf.for` operations that iterate over the tiles.
170  /// The replacement values to use for the tiled and fused operations.
172 };
173 
174 /// Method to tile and fuse a sequence of operations, by tiling the consumer
175 /// and fusing its producers. Note that this assumes that it is valid to
176 /// tile+fuse the producer into the innermost tiled loop. Its up to the caller
177 /// to ensure that the tile sizes provided make this fusion valid.
178 ///
179 /// For example, for the following sequence
180 ///
181 /// ```mlir
182 /// %0 =
183 /// %1 = linalg.fill ... outs(%0 : ... )
184 /// %2 = linalg.matmul ... outs(%1 : ...) ...
185 /// ```
186 ///
187 /// it is legal to fuse the fill with the matmul only if the matmul is tiled
188 /// along the parallel dimensions and not the reduction dimension, i.e. the tile
189 /// size for the reduction dimension should be 0. The resulting fused
190 /// transformation is
191 ///
192 /// ```mlir
193 /// %1 = scf.for ... iter_args(%arg0 = %0)
194 /// %2 = tensor.extract_slice %arg0
195 /// %3 = linalg.fill .. outs(%2 : ... )
196 /// %4 = linalg.matmul .. outs(%3 : ...)
197 /// }
198 /// ```
201  RewriterBase &rewriter, TilingInterface consumer,
203 
204 /// Method to lower an `op` that implements the `TilingInterface` to
205 /// loops/scalars.
207 lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op);
208 
209 /// Transformation information returned after reduction tiling.
211  /// The partial reduction tiled op generated.
213  /// The final reduction operation merging all the partial reductions.
215  /// Initial op
217  /// The `scf.for` operations that iterate over the tiles.
219 };
220 
221 /// Method to tile a reduction and generate a parallel op within a serial loop.
222 /// Each of the partial reductions are calculated in parallel. Then after the
223 /// loop all the partial reduction are merged into a final reduction.
224 /// For example for the following sequence
225 ///
226 /// ```mlir
227 /// %0 = linalg.generic %in ["parallel", "reduction"]
228 /// : tensor<7x9xf32> -> tensor<7xf32>
229 /// ```
230 ///
231 /// into:
232 ///
233 /// ```mlir
234 /// %0 = linalg.fill ... : tensor<7x4xf32>
235 /// %1 = scf.for ... iter_args(%arg0 = %0)
236 /// %2 = tensor.extract_slice %arg0 : tensor<7x4xf32> -> tensor<7x?xf32>
237 /// %3 = tensor.extract_slice %in : tensor<7x9xf32> -> tensor<7x?xf32>
238 /// %4 = linalg.generic %2, %3 ["parallel", "parallel"]
239 /// : tensor<7x?xf32> -> tensor<7x?xf32>
240 /// %5 = tensor.insert_slice %3, %0[0, 0] : tensor<7x4xf32>
241 /// }
242 /// %6 = linalg.generic %1 ["parallel", "reduction"]
243 /// : tensor<7x4xf32> -> tensor<7xf32>
244 /// ```
246 tileReductionUsingScf(PatternRewriter &b, PartialReductionOpInterface op,
247  ArrayRef<OpFoldResult> tileSize);
248 
249 } // namespace scf
250 } // namespace mlir
251 
252 #endif // MLIR_DIALECT_SCF_TRANSFORMS_TILEUSINGINTERFACE_H
static llvm::ManagedStatic< PassManagerOptions > options
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This class helps build Operations.
Definition: Builders.h:199
This is a value defined by a result of an operation.
Definition: Value.h:450
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:75
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:621
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:399
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
FailureOr< scf::SCFReductionTilingResult > tileReductionUsingScf(PatternRewriter &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > tileSize)
Method to tile a reduction and generate a parallel op within a serial loop.
FailureOr< SmallVector< scf::ForOp > > lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op)
Method to lower an op that implements the TilingInterface to loops/scalars.
std::optional< SCFFuseProducerOfSliceResult > tileAndFuseProducerOfSlice(RewriterBase &rewriter, tensor::ExtractSliceOp candidateSliceOp, MutableArrayRef< scf::ForOp > loops)
Implementation of fusing producer of a single slice by computing the slice of the producer in-place.
std::function< SmallVector< Value >(OpBuilder &, Operation *)> SCFTileSizeComputationFunction
void yieldReplacementForFusedProducer(RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp, scf::SCFFuseProducerOfSliceResult fusedProducerInfo, MutableArrayRef< scf::ForOp > loops)
Reconstruct the fused producer from within the tiled-and-fused code.
FailureOr< SCFTilingResult > tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, const SCFTilingOptions &options)
Method to tile an op that implements the TilingInterface using scf.for for iterating over the tiles.
FailureOr< SCFTileAndFuseResult > tileConsumerAndFuseProducerGreedilyUsingSCFForOp(RewriterBase &rewriter, TilingInterface consumer, const SCFTileAndFuseOptions &options)
Method to tile and fuse a sequence of operations, by tiling the consumer and fusing its producers.
Include the generated interface declarations.
Fuse the producer of the source of candidateSliceOp by computing the required slice of the producer i...
Transformation information returned after reduction tiling.
Operation * parallelTiledOp
The partial reduction tiled op generated.
Operation * mergeOp
The final reduction operation merging all the partial reductions.
SmallVector< scf::ForOp > loops
The scf.for operations that iterate over the tiles.
Options used to control tile + fuse.
SCFTilingOptions tilingOptions
The tiling options used to control the tiling of the consumer.
SCFTileAndFuseOptions & setTilingOptions(SCFTilingOptions options)
Transformation information returned after tile and fuse.
llvm::SetVector< Operation * > fusedProducers
List of untiled operations that were fused with the tiled consumer.
SmallVector< scf::ForOp > loops
The scf.for operations that iterate over the tiles.
llvm::DenseMap< Value, Value > replacements
The replacement values to use for the tiled and fused operations.
llvm::SetVector< Operation * > tiledAndFusedOps
List of tiled and fused operations generated.
Options to use to control tiling.
SCFTileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
SCFTilingOptions & setTileSizeComputationFunction(SCFTileSizeComputationFunction fun)
SCFTilingOptions & setInterchange(ArrayRef< int64_t > interchange)
SmallVector< int64_t > interchangeVector
The interchange vector to reorder the tiled loops.
SCFTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
Transformation information returned after tiling.
SmallVector< Operation * > tiledOps
Tiled operations that are generated during tiling.
SmallVector< scf::ForOp > loops
The scf.for operations that iterate over the tiles.
SmallVector< Value > replacements
Values to use as replacements for the untiled op.