MLIR  16.0.0git
Utils.h
Go to the documentation of this file.
1 //===- Utils.h - Utilities to support the Linalg dialect --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_LINALG_UTILS_UTILS_H
10 #define MLIR_DIALECT_LINALG_UTILS_UTILS_H
11 
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/StringSet.h"
17 
18 namespace mlir {
19 class AffineExpr;
20 class AffineForOp;
21 class AffineMap;
22 class PatternRewriter;
23 
24 namespace tensor {
25 class ExtractSliceOp;
26 } // namespace tensor
27 
28 namespace linalg {
29 class LinalgDependenceGraph;
30 
31 //===----------------------------------------------------------------------===//
32 // General utilities
33 //===----------------------------------------------------------------------===//
34 
35 /// Check if all indexing maps are projected permutations.
36 bool allIndexingsAreProjectedPermutation(LinalgOp op);
37 
38 /// Detect whether `r` has only ConstantOp, ElementwiseMappable and YieldOp.
39 bool hasOnlyScalarElementwiseOp(Region &r);
40 
41 /// Check if a LinalgOp is an element-wise operation.
42 bool isElementwise(LinalgOp op);
43 
44 /// Check if `permutation` is a permutation of the range
45 /// `[0, permutation.size())`.
46 bool isPermutation(ArrayRef<int64_t> permutation);
47 
48 /// Check if `attr` has "parallel" iterator type semantics.
49 bool isParallelIterator(Attribute attr);
50 
51 /// Check if `attr` has "reduction" iterator type semantics.
52 bool isReductionIterator(Attribute attr);
53 
54 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
55 /// the type of `source`.
56 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim);
57 OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source,
58  int64_t dim);
59 
60 /// Given an operation, retrieves the value of each dynamic dimension through
61 /// constructing the necessary DimOp operators.
62 SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
63 
64 /// Computes an upper bound for the result `value` of an index computation.
65 /// Translates AffineMinOps and AffineApplyOps along the use-def chains of the
66 /// index computation to affine constraints and projects out intermediate
67 /// values. The method sets `boundMap` to an affine map that given
68 /// `boundOperands` evaluates to an upper bound for the index computation.
69 ///
70 /// If constantRequired is true, only returns the constant bounds (potentially
71 /// over-approximating) and fails when not possible.
72 ///
73 /// Example:
74 /// ```
75 /// %dim0 = dim %tensor, %c0
76 /// %dim1 = dim %tensor, %c1
77 /// %0 = affine.min affine.map<(d0) -> (40, d0)> (%dim0)
78 /// %1 = affine.apply affine.map<(d0, d1) -> (d0 + d1)> (%0, %dim1)
79 /// ```
80 /// getUpperBoundForIndex(%1, boundMap, boundOperands)
81 /// set the output parameters to:
82 /// - boundMap = affine.map<(d0) -> (d0 + 40)>
83 /// - boundOperands = [%dim1]
84 void getUpperBoundForIndex(Value value, AffineMap &boundMap,
85  SmallVectorImpl<Value> &boundOperands,
86  bool constantRequired = false);
87 
88 /// Returns a constant upper bound for the result `value` of an index
89 /// computation. Calls `getUpperBoundForIndex` and returns a constant upper
90 /// bound if the result of `boundMap` is a constant expression and failure
91 /// otherwise.
92 ///
93 /// Example:
94 /// ```
95 /// %0 = affine.min affine.map<(d0) -> (40, d0)> (%d0)
96 /// %1 = affine.apply affine.map<(d0) -> (d0 + 2)> (%0)
97 /// ```
98 /// getConstantUpperBoundForIndex(%1) returns 42
99 /// (boundsMap = affine.map<() -> (42)>)
100 FailureOr<int64_t> getConstantUpperBoundForIndex(Value value);
101 
102 /// Create a tensor::PadOp that pads `source` to the size of the statically
103 /// sized `type` whose static sizes are assumed to be greater than the dynamic
104 /// `source` size. The padding introduces trailing `pad` values until the target
105 /// size is met. If `source` is defined by one or more LinalgOps that have been
106 /// padded with the same value and sizes, return their padded result instead of
107 /// creating a tensor::PadOp.
108 ///
109 /// Example:
110 /// ```
111 /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
112 /// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
113 /// %2 = linalg.matmul ins(...) outs(%1)
114 /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
115 /// ```
116 /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
117 /// makeComposedPadHighOp(source=%3, pad=%other_cst) returns %4
118 /// ```
119 /// %4 = tensor.pad %3 low[0, 0] high[...] { tensor.yield %other_cst }
120 /// ```
121 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
122  Value source, Value pad, bool nofold);
123 
124 /// Returns a GenericOp that tansposes `inputTensor` into `outputTensor` using
125 /// `transposeVector` to permute the `inputTensor` dimensions.
126 GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
127  Value outputTensor,
128  ArrayRef<int64_t> transposeVector);
129 
130 /// Returns GenericOp that copies an n-D memref. Unlike the current
131 /// implementation of memref::CopyOp, this op can further tile, lower to loops
132 /// or vectorize.
133 GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to);
134 
135 /// Get the reassociation maps to fold the result of a extract_slice (or source
136 /// of a insert_slice) operation with given offsets, and sizes to its
137 /// rank-reduced version. This is only done for the cases where the size is 1
138 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
139 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
140 /// potentially cannot be handled).
141 Optional<SmallVector<ReassociationIndices>>
142 getReassociationMapForFoldingUnitDims(ArrayRef<OpFoldResult> mixedSizes);
143 
144 //===----------------------------------------------------------------------===//
145 // Fusion / Tiling utilities
146 //===----------------------------------------------------------------------===//
147 
148 /// The type of loops to be generated during tiling.
150  Loops = 0,
151  AffineLoops = 1,
152  ParallelLoops = 2
153 };
154 
155 /// Checks whether the specific `producer` is the last write to exactly the
156 /// whole `consumedView`. This checks structural dominance, that the dependence
157 /// is a RAW without any interleaved write to any piece of `consumedView`.
159  LinalgOp consumer, Value consumedView,
160  LinalgOp producer);
161 
162 /// Checks whether fusing the specific `producer` of the `consumedView` is
163 /// feasible. This checks `producer` is the last write of `consumedView` and
164 /// that no interleaved dependence would be violated (RAW, WAR or WAW).
165 bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
166  Value consumedView, LinalgOp producer);
167 
168 /// Computes tile offsets, given a list of loop `ivs` and `tileSizes`. In case a
169 /// tile size is zero (i.e., no tiling), the corresponding offset is also zero.
172  ArrayRef<OpFoldResult> tileSizes);
173 
174 /// Computes tile sizes, given a list of `tileSizes` and dimension
175 /// sizes (`sizeBounds`). In case a tile size is zero (i.e., no tiling), the
176 /// corresponding result size is the corresponding value from `sizeBounds`.
177 /// Note: The returned tile sizes are closed intervals.
179  ArrayRef<OpFoldResult> tileSizes,
180  ArrayRef<OpFoldResult> sizeBounds);
181 
182 /// Returns the list of tensor output types produced when the given structured
183 /// operation `op` is applied to the given `operands`. Note that `operands` are
184 /// not necessarily the actual operands of `op`.
185 SmallVector<Type> getTensorOutputTypes(LinalgOp op, ValueRange operands);
186 
187 /// Creates `insert_slice` ops that insert `results` back into larger tensors
188 /// they were originally extracted from with `extract_slice` before being passed
189 /// as `operands` to the given structured operation `op` or its clone. Note that
190 /// `operands` are not necessarily the actual operands of `op`, the operation
191 /// serves only as metadata container for operand types and positions.
193  LinalgOp op, ValueRange operands,
194  ValueRange results);
195 
196 /// A struct containg offsets-sizes-strides arguments of the tiled shape.
201 };
202 
203 /// Computes SliceParameters for a single `valueToTile` assuming that its user
204 /// is being tiled with the given loop bounds `lbs` and `ubs` and the tile sizes
205 /// `tileSizes`.
206 ///
207 /// `omitPartialTileCheck` controls whether to omit the partial/boundary tile
208 /// condition check in cases where we statically know that it is unnecessary.
210 computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
211  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
213  ArrayRef<OpFoldResult> subShapeSizes,
214  bool omitPartialTileCheck);
215 
216 /// Computes SliceParamaters for all `valuesToTile` of the given `linalgOp`,
217 /// assuming `linalgOp` is being fused into a loop nest. Calls
218 /// `computeSliceParameters` for every individual value.
219 ///
220 /// Note that a constant zero in `tileSizes` means no tiling at that implicit
221 /// loop. The number of non-zero values in `tileSizes` should be equal to the
222 /// number of values in `ivs`.
223 ///
224 /// Some of the `valuesToTile` won't be affected by tiling. For these values,
225 /// llvm::None will be returned.
227 computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
228  ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
229  ArrayRef<OpFoldResult> tileSizes,
230  ArrayRef<OpFoldResult> sizeBounds,
231  bool omitPartialTileCheck);
232 
233 /// Creates an extract_slice/subview op for a single `valueToTile` with
234 /// `builder`. This new operation extracts a tile of `valueToTile`, starting
235 /// at offsets `lbs` and with sizes `subShapeSizes`. `omitPartialTileCheck`
236 /// controls whether to omit the partial/boundary tile condition check in cases
237 /// where we statically know that it is unnecessary.
238 Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
239  ArrayRef<OpFoldResult> tileSizes, AffineMap map,
241  ArrayRef<OpFoldResult> subShapeSizes,
242  bool omitPartialTileCheck);
243 
244 /// Creates extract_slice/subview ops for all `valuesToTile` of the given
245 /// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop
246 /// nest for tiling with the given induction variables `ivs` and tile sizes
247 /// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the
248 /// implicit loops in `linalgOp`. `omitPartialTileCheck` controls whether to
249 /// omit the partial/boundary tile condition check in cases where we statically
250 /// know that it is unnecessary.
251 ///
252 /// Note that a constant zero in `tileSizes` means no tiling at that implicit
253 /// loop. The number of non-zero values in `tileSizes` should be equal to the
254 /// number of values in `ivs`.
256  LinalgOp linalgOp, ValueRange valuesToTile,
258  ArrayRef<OpFoldResult> tileSizes,
259  ArrayRef<OpFoldResult> sizeBounds,
260  bool omitPartialTileCheck);
261 
262 /// Add the specified offsets to any `linalg.index` ops contained in the given
263 /// `linalgOp`. The offsets are provided in the same order as iteration space
264 /// dimensions. Null offests are assumed to be zero.
265 void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
266  ArrayRef<OpFoldResult> offests);
267 void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
268  ArrayRef<OpFoldResult> offests);
269 
270 using FusableOpDependencesTy = llvm::MapVector<
271  Operation *,
275  const LinalgDependenceGraph &dependenceGraph);
276 
277 /// A struct containing the Linalg producer before and after fusion.
278 /// When operating on tensors, `fusedProducer` may feed into a `tensor.cast` op
279 /// before the consumer Linalg op, until enough canonicalizations have applied.
280 struct FusionInfo {
282  LinalgOp fusedProducer;
283 };
284 
285 /// Fuses producer into consumer if the producer is structurally feasible and
286 /// the fusion would not violate dependencies.
287 /// Implements the fusion part of the "tileAndFuse on buffers" transformation
288 /// and thus requires the `consumerOpOperand` to be a `subview` op (generally
289 /// obtained by applying the tiling transformation).
291  OpOperand &consumerOpOperand,
292  const LinalgDependenceGraph &graph);
293 /// Tensor counterpart of `fuseProducerOfBuffer`.
294 /// This implements the fusion part of the "tileAndFuse on tensors"
295 /// transformation and thus requires the `consumerOpOperand` to be a
296 /// `extract_slice` op (generally obtained by applying the tiling
297 /// transformation).
299  OpOperand &consumerOpOperand);
300 /// Tensor counterpart of `fuseProducerOfBuffer`.
301 /// This implements the fusion part of the "tileAndFuse on tensors"
302 /// transformation and thus requires the `consumerOpOperand` to be a
303 /// `extract_slice` op (generally obtained by applying the tiling
304 /// transformation). Assumes `producerOfTensor` is a Linalg op that produces
305 /// `consumerOpOperand`.
307  OpResult producerOpResult,
308  OpOperand &consumerOpOperand);
309 
310 //===----------------------------------------------------------------------===//
311 // Distribution utilities
312 //===----------------------------------------------------------------------===//
313 
314 /// Scheme used to distribute loops to processors.
315 enum class DistributionMethod {
316  /// Cyclic distribution where no assumption is made about the dynamic
317  /// relationship between number of processors and number of iterations of the
318  /// distributed loop. Distributes the following loop
319  ///
320  /// scf.parallel (%iv) = (%lb) to (%ub) step (%step)
321  ///
322  /// to
323  ///
324  /// scf.parallel(%iv)= (%lb + %procId * %step) to (%ub) step (%step * %nprocs)
325  Cyclic = 0,
326 
327  /// Cyclic distribution where the number of processors can be assumed to be
328  /// more than or equal to the number of iterations of the distributed loop. In
329  /// such cases, a simple in-bounds check is enough (instead of materializing a
330  /// loop). Distributes the following loop
331  ///
332  /// scf.parallel (%iv) = (%lb) to (%ub) step (%step)
333  ///
334  /// to
335  ///
336  /// %iv = %lb + %procId * %step
337  /// %cond = arith.cmpi "slt", %iv, %ub
338  /// scf.if %cond {
339  /// ...
340  /// }
342 
343  /// Cyclic distribution where the number of processors can be assumed to be
344  /// equal to the number of iterations of the distributed loop. In such cases,
345  /// no bounds check is needed. Distributes the following loop
346  ///
347  /// scf.parallel (%iv) = (%lb) to (%ub) step (%step)
348  ///
349  /// to
350  ///
351  /// %iv = %lb + %procId * %step
353 
354  /// No Distribution.
355  None = 3
356 };
357 
358 /// Callback function type used to get processor ID, and number of processors
359 /// used for distribution for all parallel loops generated.
360 struct ProcInfo {
364 };
365 using ProcInfoCallBackFn = std::function<SmallVector<ProcInfo>(
366  OpBuilder &b, Location loc, ArrayRef<Range> parallelLoopRanges)>;
367 
368 /// Options that allow distribution of loops generated in Linalg transforms to
369 /// processors while generating the loops.
371  /// Callback function that returns the Values for processor ID (`procId`), and
372  /// number of processors (`nprocs`) used to execute the parallel loops. The
373  /// number of `{procId, nprocs}` pairs returned must be equal to the number of
374  /// `parallelLoopRanges` passed into the callback. The `parallelLoopRanges`
375  /// are ranges of the outer parallel loops of the operation that
376  /// do have non-zero tile sizes specified.
378 };
379 
380 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
382  Value procId, Value nprocs, Value &lb,
383  Value &ub, Value &step);
384 
385 //===----------------------------------------------------------------------===//
386 // Fusion on tensor utilities
387 //===----------------------------------------------------------------------===//
388 
389 /// A struct to manage the tile loop nest specific information.
391 public:
392  TileLoopNest(LinalgOp rootOp) : rootOp(rootOp) {}
393 
394  /// Tile the root operation using the given `tileSizes` and `tileInterchange`,
395  /// and `tileDistribution`.
397  tileRootOp(OpBuilder &b, ArrayRef<int64_t> tileSizes,
398  ArrayRef<int64_t> tileInterchange,
399  Optional<LinalgLoopDistributionOptions> tileDistribution);
400 
401  /// Fuse the producer of `consumerOpOperand` into the tile loop nest. Returns
402  /// the fused producer or fails if fusion is not possible.
403  FailureOr<LinalgOp> fuseProducer(OpBuilder &b, OpOperand *consumerOpOperand);
404 
405  /// Returns the replacement results for the original untiled root operation.
406  ValueRange getRootOpReplacementResults();
407 
408  /// Returns the tiled root operation.
409  LinalgOp getRootOp() { return rootOp; }
410 
411  /// Returns the tiled root operation and the fused producers.
412  SmallVector<LinalgOp> getAllTiledAndFusedOps();
413 
414  /// Returns the loop ops generated from tiling.
415  ArrayRef<scf::ForOp> getLoopOps() { return tileLoopOps; }
416 
417  /// Returns true if the tile loop nest has no tile loops.
418  bool isEmpty();
419 
420 private:
421  /// Returns true if the tile loop nest invariants are satisfied:
422  /// - The `rootOp` has been tiled at least once.
423  /// - The number of tile loop operations and dimensions match.
424  /// - The innermost tile loop is the parent of `tiledOp`.
425  /// - The tile loops are directly nested.
426  // TODO: relax to support additional control flow, e.g., IfOp.
427  bool isValid();
428 
429  /// Searches the block arguments tied to a block argument `bbArg` of the
430  /// innermost tile loop. Returns the block argument from outermost to
431  /// innermost or an empty vector if none are found.
432  SmallVector<BlockArgument> getTiedBBArgs(BlockArgument bbArg);
433 
434  /// Returns the iteration argument of the outermost tile loop mapped to a
435  /// block argument `bbArg` of the innermost tile loop.
436  OpOperand *getTiedIterArg(BlockArgument bbArg);
437 
438  /// Returns true if `bbArg` has other used than `sliceOp` and its
439  /// dependencies. Only if there are no other uses, the producer output
440  /// iteration argument may reused to pass the producer result after fusion.
441  bool hasOtherUses(BlockArgument bbArg, tensor::ExtractSliceOp sliceOp);
442 
443  LinalgOp rootOp;
444  SmallVector<scf::ForOp> tileLoopOps;
445  DenseMap<Operation *, SmallVector<int64_t>> tiledRootAndFusedOpsLoops;
446 };
447 
448 /// Tiles `consumerOp` and fuses its dependencies if possible. Uses the
449 /// `tileSizes`, `tileInterchange`, and `tileDistribution` parameters to control
450 /// the tiling.
452  OpBuilder &b, LinalgOp consumerOp, ArrayRef<int64_t> tileSizes,
453  ArrayRef<int64_t> tileInterchange,
454  const Optional<LinalgLoopDistributionOptions> &tileDistribution);
455 
456 //===----------------------------------------------------------------------===//
457 // Generic op region utilities
458 //===----------------------------------------------------------------------===//
459 
460 /// A struct containing common matchers over linalg op's region.
462  enum class BinaryOpKind {
463  IAdd,
464  };
465 
466  /// Matches the given linalg op if its body is performing binary operation on
467  /// int or float scalar values and returns the binary op kind.
468  ///
469  /// The linalg op's region is expected to be
470  /// ```
471  /// {
472  /// ^bb(%a: <scalar-type>, %b: <scalar-type>):
473  /// %0 = <binary-op> %a, %b: <scalar-type>
474  /// linalg.yield %0: <scalar-type>
475  /// }
476  /// ```
477  static Optional<BinaryOpKind> matchAsScalarBinaryOp(GenericOp op);
478 };
479 
480 //===----------------------------------------------------------------------===//
481 // Loop nest utilities
482 //===----------------------------------------------------------------------===//
483 
484 /// Utility class used to generate nested loops with ranges described by
485 /// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
486 /// is used to generate the body of the innermost loop. It is passed a range
487 /// of loop induction variables and a range of operand values to use.
488 template <typename LoopTy>
490  static void doit(OpBuilder &b, Location loc, ArrayRef<Range> loopRanges,
491  LinalgOp linalgOp, ArrayRef<Attribute> iteratorTypes,
494  bodyBuilderFn,
495  ArrayRef<linalg::ProcInfo> procInfo = {});
496 };
497 
498 /// Returns an attribute list that excludes pre-defined attributes.
499 template <typename OpTy>
501  llvm::StringSet<> elidedAttrs;
502  elidedAttrs.insert(op.getAttributeNames().begin(),
503  op.getAttributeNames().end());
504  if (isa<linalg::LinalgOp>(op.getOperation()))
505  elidedAttrs.insert(LinalgDialect::kMemoizedIndexingMapsAttrName);
507  for (auto attr : op->getAttrs()) {
508  if (elidedAttrs.count(attr.getName()))
509  continue;
510  attrs.push_back(attr);
511  }
512  return attrs;
513 }
514 
515 } // namespace linalg
516 } // namespace mlir
517 
518 #endif // MLIR_DIALECT_LINALG_UTILS_UTILS_H
Include the generated interface declarations.
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition: Utils.h:489
LinalgOp getRootOp()
Returns the tiled root operation.
Definition: Utils.h:409
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition: Utils.cpp:460
SmallVector< NamedAttribute > getPrunedAttributeList(OpTy op)
Returns an attribute list that excludes pre-defined attributes.
Definition: Utils.h:500
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition: Utils.h:360
bool isProducerLastWriteOfView(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer)
Checks whether the specific producer is the last write to exactly the whole consumedView.
Definition: Fusion.cpp:226
SmallVector< OpFoldResult > strides
Definition: Utils.h:200
This is a value defined by a result of an operation.
Definition: Value.h:446
llvm::MapVector< Operation *, SmallVector< LinalgDependenceGraph::LinalgDependenceGraphElem, 1 > > FusableOpDependencesTy
Definition: Utils.h:272
FailureOr< FusionInfo > fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult, OpOperand &consumerOpOperand)
Tensor counterpart of fuseProducerOfBuffer.
Definition: Fusion.cpp:413
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition: Utils.cpp:160
std::vector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition: SCF.h:63
Optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition: Utils.cpp:1049
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition: Utils.cpp:154
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition: Utils.cpp:558
DistributionMethod distributionMethod
Definition: Utils.h:363
FailureOr< FusionInfo > fuseProducerOfBuffer(OpBuilder &b, OpOperand &consumerOpOperand, const LinalgDependenceGraph &graph)
Fuses producer into consumer if the producer is structurally feasible and the fusion would not violat...
Definition: Fusion.cpp:331
static Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source...
std::function< SmallVector< ProcInfo >(OpBuilder &b, Location loc, ArrayRef< Range > parallelLoopRanges)> ProcInfoCallBackFn
Definition: Utils.h:366
static constexpr const bool value
FusableOpDependencesTy findAllFusableDependences(ArrayRef< LinalgOp > ops, const LinalgDependenceGraph &dependenceGraph)
void getUpperBoundForIndex(Value value, AffineMap &boundMap, SmallVectorImpl< Value > &boundOperands, bool constantRequired=false)
Computes an upper bound for the result value of an index computation.
Definition: Utils.cpp:242
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer)
Checks whether fusing the specific producer of the consumedView is feasible.
Definition: Fusion.cpp:250
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:78
bool isParallelIterator(Attribute attr)
Check if attr has "parallel" iterator type semantics.
Definition: Utils.cpp:202
bool isReductionIterator(Attribute attr)
Check if attr has "reduction" iterator type semantics.
Definition: Utils.cpp:207
static OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
SmallVector< OpFoldResult > sizes
Definition: Utils.h:199
SmallVector< OpFoldResult > offsets
Definition: Utils.h:198
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition: Utils.cpp:918
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition: Utils.cpp:893
Data structure for holding a dependence graph that operates on LinalgOp and views as SSA values...
DistributionMethod
Scheme used to distribute loops to processors.
Definition: Utils.h:315
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:361
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder...
Definition: Utils.cpp:992
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:42
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
ArrayRef< scf::ForOp > getLoopOps()
Returns the loop ops generated from tiling.
Definition: Utils.h:415
LinalgOp fusedProducer
Definition: Utils.h:282
This class represents an argument of a Block.
Definition: Value.h:300
SmallVector< Optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition: Utils.cpp:945
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
A struct to manage the tile loop nest specific information.
Definition: Utils.h:390
bool isPermutation(ArrayRef< int64_t > permutation)
Check if permutation is a permutation of the range [0, permutation.size()).
Definition: Utils.cpp:189
LinalgOp originalProducer
Definition: Utils.h:281
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition: Utils.cpp:879
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:909
LinalgTilingLoopType
The type of loops to be generated during tiling.
Definition: Utils.h:149
ProcInfoCallBackFn procInfo
Callback function that returns the Values for processor ID (procId), and number of processors (nprocs...
Definition: Utils.h:377
SmallVector< Value, 4 > getDynOperands(Location loc, Value val, OpBuilder &b)
Given an operation, retrieves the value of each dynamic dimension through constructing the necessary ...
Definition: Utils.cpp:232
This class represents an operand of an operation.
Definition: Value.h:251
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
A struct containing the Linalg producer before and after fusion.
Definition: Utils.h:280
TileLoopNest(LinalgOp rootOp)
Definition: Utils.h:392
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:772
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:197
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:342
A struct containing common matchers over linalg op&#39;s region.
Definition: Utils.h:461
FailureOr< TileLoopNest > tileConsumerAndFuseProducers(OpBuilder &b, LinalgOp consumerOp, ArrayRef< int64_t > tileSizes, ArrayRef< int64_t > tileInterchange, const Optional< LinalgLoopDistributionOptions > &tileDistribution)
Tiles consumerOp and fuses its dependencies if possible.
Options that allow distribution of loops generated in Linalg transforms to processors while generatin...
Definition: Utils.h:370
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that tansposes inputTensor into outputTensor using transposeVector to permute the...
Definition: Utils.cpp:421
This class helps build Operations.
Definition: Builders.h:196
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:345
void offsetIndices(RewriterBase &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Definition: Utils.cpp:1019
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition: Utils.cpp:760
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:398
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:174