MLIR  15.0.0git
Utils.h
Go to the documentation of this file.
1 //===- Utils.h - Utilities to support the Linalg dialect --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_LINALG_UTILS_UTILS_H
10 #define MLIR_DIALECT_LINALG_UTILS_UTILS_H
11 
14 #include "mlir/Dialect/SCF/SCF.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SetVector.h"
17 
18 namespace mlir {
19 class AffineExpr;
20 class AffineForOp;
21 class AffineMap;
22 class PatternRewriter;
23 
24 namespace tensor {
25 class ExtractSliceOp;
26 } // namespace tensor
27 
28 namespace linalg {
29 class LinalgDependenceGraph;
30 
31 //===----------------------------------------------------------------------===//
32 // General utilities
33 //===----------------------------------------------------------------------===//
34 
35 /// Check if `permutation` is a permutation of the range
36 /// `[0, permutation.size())`.
37 bool isPermutation(ArrayRef<int64_t> permutation);
38 
39 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
40 /// the type of `source`.
41 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim);
42 
43 /// Given an operation, retrieves the value of each dynamic dimension through
44 /// constructing the necessary DimOp operators.
45 SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
46 
47 /// Computes an upper bound for the result `value` of an index computation.
48 /// Translates AffineMinOps and AffineApplyOps along the use-def chains of the
49 /// index computation to affine constraints and projects out intermediate
50 /// values. The method sets `boundMap` to an affine map that given
51 /// `boundOperands` evaluates to an upper bound for the index computation.
52 ///
53 /// If constantRequired is true, only returns the constant bounds (potentially
54 /// over-approximating) and fails when not possible.
55 ///
56 /// Example:
57 /// ```
58 /// %dim0 = dim %tensor, %c0
59 /// %dim1 = dim %tensor, %c1
60 /// %0 = affine.min affine.map<(d0) -> (40, d0)> (%dim0)
61 /// %1 = affine.apply affine.map<(d0, d1) -> (d0 + d1)> (%0, %dim1)
62 /// ```
63 /// getUpperBoundForIndex(%1, boundMap, boundOperands)
64 /// set the output parameters to:
65 /// - boundMap = affine.map<(d0) -> (d0 + 40)>
66 /// - boundOperands = [%dim1]
67 void getUpperBoundForIndex(Value value, AffineMap &boundMap,
68  SmallVectorImpl<Value> &boundOperands,
69  bool constantRequired = false);
70 
71 /// Returns a constant upper bound for the result `value` of an index
72 /// computation. Calls `getUpperBoundForIndex` and returns a constant upper
73 /// bound if the result of `boundMap` is a constant expression and failure
74 /// otherwise.
75 ///
76 /// Example:
77 /// ```
78 /// %0 = affine.min affine.map<(d0) -> (40, d0)> (%d0)
79 /// %1 = affine.apply affine.map<(d0) -> (d0 + 2)> (%0)
80 /// ```
81 /// getConstantUpperBoundForIndex(%1) returns 42
82 /// (boundsMap = affine.map<() -> (42)>)
83 FailureOr<int64_t> getConstantUpperBoundForIndex(Value value);
84 
85 /// Create an ExtractSliceOp and, if `source` is defined by an ExtractSliceOp,
86 /// fold it by adding the offsets.
87 ///
88 /// Example:
89 /// ```
90 /// %0 = tensor.extract_slice %arg0[3, 4][3, 32][1, 1] : tensor<64x64xf32> to
91 /// tensor<3x32xf32>
92 /// %1 = tensor.extract_slice %0[0, 5][3, 4][1, 1] : tensor<3x32xf32> to
93 /// tensor<3x4xf32>
94 /// ```
95 /// folds into:
96 /// ```
97 /// %1 = tensor.extract_slice %arg0[3, 9][3, 4][1, 1] : tensor<64x64xf32> to
98 /// tensor<3x4xf32>
99 /// ```
100 tensor::ExtractSliceOp makeComposedExtractSliceOp(
101  OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
102  ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides);
103 
104 /// Create a tensor::PadOp that pads `source` to the size of the statically
105 /// sized `type` whose static sizes are assumed to be greater than the dynamic
106 /// `source` size. The padding introduces trailing `pad` values until the target
107 /// size is met. If `source` is defined by one or more LinalgOps that have been
108 /// padded with the same value and sizes, return their padded result instead of
109 /// creating a tensor::PadOp.
110 ///
111 /// Example:
112 /// ```
113 /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
114 /// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
115 /// %2 = linalg.matmul ins(...) outs(%1)
116 /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
117 /// ```
118 /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
119 /// makeComposedPadHighOp(source=%3, pad=%other_cst) returns %4
120 /// ```
121 /// %4 = tensor.pad %3 low[0, 0] high[...] { tensor.yield %other_cst }
122 /// ```
123 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
124  Value source, Value pad, bool nofold);
125 
126 /// Returns a GenericOp that tansposes `inputTensor` into `outputTensor` using
127 /// `transposeVector` to permute the `inputTensor` dimensions.
128 GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
129  Value outputTensor,
130  ArrayRef<int64_t> transposeVector);
131 
132 /// Returns GenericOp that copies an n-D memref. Unlike the current
133 /// implementation of memref::CopyOp, this op can further tile, lower to loops
134 /// or vectorize.
135 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to);
136 
137 //===----------------------------------------------------------------------===//
138 // Fusion / Tiling utilities
139 //===----------------------------------------------------------------------===//
140 
141 /// The type of loops to be generated during tiling.
143  Loops = 0,
144  AffineLoops = 1,
145  ParallelLoops = 2,
146  TiledLoops = 3,
147 };
148 
149 /// Checks whether the specific `producer` is the last write to exactly the
150 /// whole `consumedView`. This checks structural dominance, that the dependence
151 /// is a RAW without any interleaved write to any piece of `consumedView`.
153  LinalgOp consumer, Value consumedView,
154  LinalgOp producer);
155 
156 /// Checks whether fusing the specific `producer` of the `consumedView` is
157 /// feasible. This checks `producer` is the last write of `consumedView` and
158 /// that no interleaved dependence would be violated (RAW, WAR or WAW).
159 bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
160  Value consumedView, LinalgOp producer);
161 
162 /// Compute tile offsets, given a list of loop `ivs` and `tileSizes`. In case a
163 /// tile size is zero (i.e., no tiling), the corresponding offset is also zero.
165  ValueRange ivs, ValueRange tileSizes);
166 
167 /// Compute tile sizes, given a list of loop `ivs`, `tileSizes` and dimension
168 /// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the
169 /// corresponding result size is the corresponding value from `sizeBounds`.
170 /// Note: The returned tile sizes are closed intervals.
172  ValueRange tileSizes,
173  ArrayRef<Value> sizeBounds);
174 
175 /// Creates an extract_slice/subview op for a single `valueToTile` with
176 /// `builder`. This new operation extracts a tile of `valueToTile`, starting
177 /// at offsets `lbs` and with sizes `subShapeSizes`. `omitPartialTileCheck`
178 /// controls whether to omit the partial/boundary tile condition check in cases
179 /// where we statically know that it is unnecessary.
180 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
181  ValueRange tileSizes, AffineMap map, ValueRange lbs,
182  ValueRange ubs, ValueRange subShapeSizes,
183  bool omitPartialTileCheck);
184 
185 /// Creates extract_slice/subview ops for all `valuesToTile` of the given
186 /// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop
187 /// nest for tiling with the given induction variables `ivs` and tile sizes
188 /// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the
189 /// implicit loops in `linalgOp`. `omitPartialTileCheck` controls whether to
190 /// omit the partial/boundary tile condition check in cases where we statically
191 /// know that it is unnecessary.
192 ///
193 /// Note that a constant zero in `tileSizes` means no tiling at that implicit
194 /// loop. The number of non-zero values in `tileSizes` should be equal to the
195 /// number of values in `ivs`.
197  LinalgOp linalgOp,
198  ArrayRef<Value> valuesToTile,
199  ValueRange ivs, ValueRange tileSizes,
200  ArrayRef<Value> sizeBounds,
201  bool omitPartialTileCheck);
202 
203 /// Add the tile loop induction variables `ivs` to the IndexOp results found in
204 /// the body of the `tiledOp` to account for the tile offset.
205 void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp,
206  ArrayRef<Value> ivs);
207 
208 using FusableOpDependencesTy = llvm::MapVector<
209  Operation *,
213  const LinalgDependenceGraph &dependenceGraph);
214 
215 /// A struct containing the Linalg producer before and after fusion.
216 /// When operating on tensors, `fusedProducer` may feed into a `tensor.cast` op
217 /// before the consumer Linalg op, until enough canonicalizations have applied.
218 struct FusionInfo {
220  LinalgOp fusedProducer;
221 };
222 
223 /// Fuses producer into consumer if the producer is structurally feasible and
224 /// the fusion would not violate dependencies.
225 /// Implements the fusion part of the "tileAndFuse on buffers" transformation
226 /// and thus requires the `consumerOpOperand` to be a `subview` op (generally
227 /// obtained by applying the tiling transformation).
229  OpOperand &consumerOpOperand,
230  const LinalgDependenceGraph &graph);
231 /// Tensor counterpart of `fuseProducerOfBuffer`.
232 /// This implements the fusion part of the "tileAndFuse on tensors"
233 /// transformation and thus requires the `consumerOpOperand` to be a
234 /// `extract_slice` op (generally obtained by applying the tiling
235 /// transformation).
237  OpOperand &consumerOpOperand);
238 /// Tensor counterpart of `fuseProducerOfBuffer`.
239 /// This implements the fusion part of the "tileAndFuse on tensors"
240 /// transformation and thus requires the `consumerOpOperand` to be a
241 /// `extract_slice` op (generally obtained by applying the tiling
242 /// transformation). Assumes `producerOfTensor` is a Linalg op that produces
243 /// `consumerOpOperand`.
245  OpResult producerOpResult,
246  OpOperand &consumerOpOperand);
247 
248 //===----------------------------------------------------------------------===//
249 // Distribution utilities
250 //===----------------------------------------------------------------------===//
251 
252 /// Scheme used to distribute loops to processors.
253 enum class DistributionMethod {
254  /// Cyclic distribution where no assumption is made about the dynamic
255  /// relationship between number of processors and number of iterations of the
256  /// distributed loop. Distributes the following loop
257  ///
258  /// scf.parallel (%iv) = (%lb) to (%ub) step (%step)
259  ///
260  /// to
261  ///
262  /// scf.parallel(%iv)= (%lb + %procId * %step) to (%ub) step (%step * %nprocs)
263  Cyclic = 0,
264 
265  /// Cyclic distribution where the number of processors can be assumed to be
266  /// more than or equal to the number of iterations of the distributed loop. In
267  /// such cases, a simple in-bounds check is enough (instead of materializing a
268  /// loop). Distributes the following loop
269  ///
270  /// scf.parallel (%iv) = (%lb) to (%ub) step (%step)
271  ///
272  /// to
273  ///
274  /// %iv = %lb + %procId * %step
275  /// %cond = arith.cmpi "slt", %iv, %ub
276  /// scf.if %cond {
277  /// ...
278  /// }
280 
281  /// Cyclic distribution where the number of processors can be assumed to be
282  /// equal to the number of iterations of the distributed loop. In such cases,
283  /// no bounds check is needed. Distributes the following loop
284  ///
285  /// scf.parallel (%iv) = (%lb) to (%ub) step (%step)
286  ///
287  /// to
288  ///
289  /// %iv = %lb + %procId * %step
291 };
292 
293 /// Callback function type used to get processor ID, and number of processors
294 /// used for distribution for all parallel loops generated.
295 struct ProcInfo {
298 };
299 using ProcInfoCallBackFn = std::function<SmallVector<ProcInfo, 2>(
300  OpBuilder &b, Location loc, ArrayRef<Range> parallelLoopRanges)>;
302  std::function<ProcInfo(OpBuilder &b, Location loc)>;
303 
304 /// Options that allow distribution of loops generated in Linalg transforms to
305 /// processors while generating the loops.
307  /// Callback function that returns the Values for processor ID (`procId`), and
308  /// number of processors (`nprocs`) used to execute the parallel loops. The
309  /// number of `{procId, nprocs}` pairs returned must be equal to the number of
310  /// `parallelLoopRanges` passed into the callback, which in-turn is same as
311  /// the number of parallel loops for which the `distributionMethod` is
312  /// specified below.
314  /// Specification of how to distribute the `scf.parallel` loops that are
315  /// generated. As the `scf.parallel` loop is generated, the elements of this
316  /// vector is used (from left to right) and the specified distribution is
317  /// applied. If the vector is less than the number of `scf.parallel` loops
318  /// generated, then no distribution is applied.
319  SmallVector<DistributionMethod, 0> distributionMethod = {};
320 
321  /// The map keyed by the distribution type that contains callback functions
322  /// that return the Values for processor ID (`procId`), and number of
323  /// processors (`nprocs`) used to execute the parallel loops.
325 };
326 
327 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
329  Value procId, Value nprocs, Value &lb,
330  Value &ub, Value &step);
331 
332 //===----------------------------------------------------------------------===//
333 // Fusion on tensor utilities
334 //===----------------------------------------------------------------------===//
335 
336 /// A struct to manage the tile loop nest specific information.
338 public:
339  TileLoopNest(LinalgOp rootOp) : rootOp(rootOp) {}
340 
341  /// Tile the root operation using the given `tileSizes` and `tileInterchange`,
342  /// and `tileDistribution`.
344  tileRootOp(OpBuilder &b, ArrayRef<int64_t> tileSizes,
345  ArrayRef<int64_t> tileInterchange,
346  Optional<LinalgLoopDistributionOptions> tileDistribution);
347 
348  /// Fuse the producer of `consumerOpOperand` into the tile loop nest. Returns
349  /// the fused producer or fails if fusion is not possible.
350  FailureOr<LinalgOp> fuseProducer(OpBuilder &b, OpOperand *consumerOpOperand);
351 
352  /// Returns the replacement results for the original untiled root operation.
353  ValueRange getRootOpReplacementResults();
354 
355  /// Returns the tiled root operation.
356  LinalgOp getRootOp() { return rootOp; }
357 
358  /// Returns the tiled root operation and the fused producers.
359  SmallVector<LinalgOp> getAllTiledAndFusedOps();
360 
361  /// Returns the loop ops generated from tiling.
362  ArrayRef<scf::ForOp> getLoopOps() { return tileLoopOps; }
363 
364  /// Returns true if the tile loop nest has no tile loops.
365  bool isEmpty();
366 
367 private:
368  /// Returns true if the tile loop nest invariants are satisfied:
369  /// - The `rootOp` has been tiled at least once.
370  /// - The number of tile loop operations and dimensions match.
371  /// - The innermost tile loop is the parent of `tiledOp`.
372  /// - The tile loops are directly nested.
373  // TODO: relax to support additional control flow, e.g., IfOp.
374  bool isValid();
375 
376  /// Searches the block arguments tied to a block argument `bbArg` of the
377  /// innermost tile loop. Returns the block argument from outermost to
378  /// innermost or an empty vector if none are found.
379  SmallVector<BlockArgument> getTiedBBArgs(BlockArgument bbArg);
380 
381  /// Returns the iteration argument of the outermost tile loop mapped to a
382  /// block argument `bbArg` of the innermost tile loop.
383  OpOperand *getTiedIterArg(BlockArgument bbArg);
384 
385  /// Returns true if `bbArg` has other used than `sliceOp` and its
386  /// dependencies. Only if there are no other uses, the producer output
387  /// iteration argument may reused to pass the producer result after fusion.
388  bool hasOtherUses(BlockArgument bbArg, tensor::ExtractSliceOp sliceOp);
389 
390  LinalgOp rootOp;
391  SmallVector<scf::ForOp> tileLoopOps;
392  DenseMap<Operation *, SmallVector<int64_t>> tiledRootAndFusedOpsLoops;
393 };
394 
395 /// Tiles `consumerOp` and fuses its dependencies if possible. Uses the
396 /// `tileSizes`, `tileInterchange`, and `tileDistribution` parameters to control
397 /// the tiling.
399  OpBuilder &b, LinalgOp consumerOp, ArrayRef<int64_t> tileSizes,
400  ArrayRef<int64_t> tileInterchange,
401  const Optional<LinalgLoopDistributionOptions> &tileDistribution);
402 
403 //===----------------------------------------------------------------------===//
404 // Generic op region utilities
405 //===----------------------------------------------------------------------===//
406 
407 /// A struct containing common matchers over linalg op's region.
409  enum class BinaryOpKind {
410  IAdd,
411  };
412 
413  /// Matches the given linalg op if its body is performing binary operation on
414  /// int or float scalar values and returns the binary op kind.
415  ///
416  /// The linalg op's region is expected to be
417  /// ```
418  /// {
419  /// ^bb(%a: <scalar-type>, %b: <scalar-type>):
420  /// %0 = <binary-op> %a, %b: <scalar-type>
421  /// linalg.yield %0: <scalar-type>
422  /// }
423  /// ```
424  static Optional<BinaryOpKind> matchAsScalarBinaryOp(GenericOp op);
425 };
426 
427 //===----------------------------------------------------------------------===//
428 // Loop nest utilities
429 //===----------------------------------------------------------------------===//
430 
431 /// Utility class used to generate nested loops with ranges described by
432 /// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
433 /// is used to generate the body of the innermost loop. It is passed a range
434 /// of loop induction variables and a range of operand values to use.
435 template <typename LoopTy>
437  static void doit(OpBuilder &b, Location loc, ArrayRef<Range> loopRanges,
438  LinalgOp linalgOp, ArrayRef<Attribute> iteratorTypes,
441  bodyBuilderFn,
443  ArrayRef<StringRef> distributionTypes = {});
444 };
445 
446 } // namespace linalg
447 } // namespace mlir
448 
449 #endif // MLIR_DIALECT_LINALG_UTILS_UTILS_H
Include the generated interface declarations.
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:436
LinalgOp getRootOp()
Returns the tiled root operation.
Definition: Utils.h:356
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:440
SmallVector< Value > computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds)
Compute tile sizes, given a list of loop ivs, tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:896
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:295
bool isProducerLastWriteOfView(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer)
Checks whether the specific producer is the last write to exactly the whole consumedView.
Definition: Fusion.cpp:229
This is a value defined by a result of an operation.
Definition: Value.h:425
llvm::MapVector< Operation *, SmallVector< LinalgDependenceGraph::LinalgDependenceGraphElem, 1 > > FusableOpDependencesTy
Definition: Utils.h:210
FailureOr< FusionInfo > fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult, OpOperand &consumerOpOperand)
Tensor counterpart of fuseProducerOfBuffer.
Definition: Fusion.cpp:417
std::vector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:55
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:555
FailureOr< FusionInfo > fuseProducerOfBuffer(OpBuilder &b, OpOperand &consumerOpOperand, const LinalgDependenceGraph &graph)
Fuses producer into consumer if the producer is structurally feasible and the fusion would not violat...
Definition: Fusion.cpp:335
DenseMap< StringRef, OneDimProcInfoCallBackFn > procInfoMap
The map keyed by the distribution type that contains callback functions that return the Values for pr...
Definition: Utils.h:324
static bool isPermutation(std::vector< PermutationTy > permutation)
Definition: IRAffine.cpp:57
static Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source...
static constexpr const bool value
FusableOpDependencesTy findAllFusableDependences(ArrayRef< LinalgOp > ops, const LinalgDependenceGraph &dependenceGraph)
Find all dependences that are fusable.
Definition: Fusion.cpp:664
void getUpperBoundForIndex(Value value, AffineMap &boundMap, SmallVectorImpl< Value > &boundOperands, bool constantRequired=false)
Computes an upper bound for the result value of an index computation.
Definition: Utils.cpp:179
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
std::function< SmallVector< ProcInfo, 2 >(OpBuilder &b, Location loc, ArrayRef< Range > parallelLoopRanges)> ProcInfoCallBackFn
Definition: Utils.h:300
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ValueRange tileSizes, AffineMap map, ValueRange lbs, ValueRange ubs, ValueRange subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:764
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer)
Checks whether fusing the specific producer of the consumedView is feasible.
Definition: Fusion.cpp:253
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:77
Data structure for holding a dependence graph that operates on LinalgOp and views as SSA values...
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:253
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:341
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:41
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
ArrayRef< scf::ForOp > getLoopOps()
Returns the loop ops generated from tiling.
Definition: Utils.h:362
LinalgOp fusedProducer
Definition: Utils.h:220
This class represents an argument of a Block.
Definition: Value.h:300
SmallVector< Value > computeTileOffsets(OpBuilder &b, Location loc, ValueRange ivs, ValueRange tileSizes)
Compute tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:881
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
A struct to manage the tile loop nest specific information.
Definition: Utils.h:337
LinalgOp originalProducer
Definition: Utils.h:219
LinalgTilingLoopType
The type of loops to be generated during tiling.
Definition: Utils.h:142
SmallVector< Value, 4 > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ArrayRef< Value > valuesToTile, ValueRange ivs, ValueRange tileSizes, ArrayRef< Value > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder...
Definition: Utils.cpp:911
ProcInfoCallBackFn procInfo
Callback function that returns the Values for processor ID (procId), and number of processors (nprocs...
Definition: Utils.h:313
tensor::ExtractSliceOp makeComposedExtractSliceOp(OpBuilder &b, Location loc, Value source, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ArrayRef< OpFoldResult > strides)
Create an ExtractSliceOp and, if source is defined by an ExtractSliceOp, fold it by adding the offset...
Definition: Utils.cpp:299
std::function< ProcInfo(OpBuilder &b, Location loc)> OneDimProcInfoCallBackFn
Definition: Utils.h:302
SmallVector< Value, 4 > getDynOperands(Location loc, Value val, OpBuilder &b)
Given an operation, retrieves the value of each dynamic dimension through constructing the necessary ...
Definition: Utils.cpp:169
This class represents an operand of an operation.
Definition: Value.h:251
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
A struct containing the Linalg producer before and after fusion.
Definition: Utils.h:218
TileLoopNest(LinalgOp rootOp)
Definition: Utils.h:339
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:280
A struct containing common matchers over linalg op&#39;s region.
Definition: Utils.h:408
FailureOr< TileLoopNest > tileConsumerAndFuseProducers(OpBuilder &b, LinalgOp consumerOp, ArrayRef< int64_t > tileSizes, ArrayRef< int64_t > tileInterchange, const Optional< LinalgLoopDistributionOptions > &tileDistribution)
Tiles consumerOp and fuses its dependencies if possible.
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
Definition: Utils.h:306
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that tansposes inputTensor into outputTensor using transposeVector to permute the...
Definition: Utils.cpp:401
This class helps build Operations.
Definition: Builders.h:177
This class provides an abstraction over the different types of ranges over Values.
void addTileLoopIvsToIndexOpResults(OpBuilder &b, LinalgOp tiledOp, ArrayRef< Value > ivs)
Add the tile loop induction variables ivs to the IndexOp results found in the body of the tiledOp to ...
Definition: Utils.cpp:958
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...