MLIR  14.0.0git
HoistPadding.cpp
Go to the documentation of this file.
1 //===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions concerned with hoisting padding operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
18 #include "mlir/Dialect/SCF/SCF.h"
19 #include "mlir/Dialect/SCF/Utils.h"
24 #include "mlir/IR/AsmState.h"
25 #include "mlir/IR/BuiltinOps.h"
26 #include "mlir/IR/Dominance.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Support/Debug.h"
30 
31 using llvm::dbgs;
32 
33 #define DEBUG_TYPE "hoist-padding"
34 
35 #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ")
36 
37 using namespace mlir;
38 using namespace mlir::linalg;
39 
40 /// Analysis class to support PadTensorOp hoisting across multiple enclosing
41 /// loops. The failure conditions are:
42 /// 1. Pad op has a use that is not an input of a LinalgOp.
43 /// 2. Pad op does not have a constant padding value.
44 /// 3. There is no immediately enclosing scf::ForOp.
45 /// 4. The backward slice from the pad op to the scf::ForOp to hoist above
46 /// contains an unknown op with non index type operands, a region, or a
47 /// memory effect.
48 /// 5. The backward slice from the pad op to the scf::ForOp to hoist above is
49 /// empty.
50 /// 6. The source tensor of pad op is not defined by an extract slice op.
51 /// 7. The source tensor of the extract slice op is not defined outside of
52 /// the outermost enclosing scf::ForOp.
53 /// 8. There is no enclosing scf::ForOp that indexes the padded data.
54 /// Other cases succeed and will trigger hoisting of the pad op.
56  HoistingAnalysis(PadTensorOp padTensorOp, int numLoops);
57 
58  bool isValid() { return valid; }
59 
60  /// Footprint of the packedTensor, computed from the packingLoops.
61  SmallVector<Value> getPackedTensorSizes(ImplicitLocOpBuilder &b);
62 
63  /// The outermost loop, determined by `nLevels` above which `padTensorOp` will
64  /// be hoisted.
66 
67  /// Backward slice rooted at `padTensorOp` and nested under
68  /// `outermostEnclosingForOp`.
70 
71  /// The scf::ForOp immediately enclosing `padTensorOp` such that:
72  /// 1. they are nested under `outermostEnclosingForOp` (inclusive)
73  /// 2. whose induction variable is used, directly or indirectly, in the
74  /// computation of `padTensorOp`.
75  /// The span of these loops determines the footprint of the packed tensor.
77 
78 private:
79  /// Drop any non-index dependencies of `padTensorOp` and `sliceOp` from
80  /// `backwardSlice`. The method follows the use-def chains of the index
81  /// operands consumed by `padTensorOp` and `sliceOp` and drops the operations
82  /// not part of this index computation. Afterwards, the filtered
83  /// `backwardSlice` contains only the loops whose induction variable is used,
84  /// directly or indirectly, to index the padded tensor. The method returns
85  /// failure if the filtered backward slice contains an unexpected operation.
86  ///
87  /// Example:
88  /// ```
89  /// %source = linalg.fill(%cst, %arg0)
90  /// scf.for %i
91  /// %unrelated = linalg.fill(%cst, %arg1) // not used to index %source!
92  /// scf.for %j (%arg2 = %unrelated)
93  /// scf.for %k // not used to index %source!
94  /// %ubi = affine.min #map(%i)
95  /// %ubj = affine.min #map(%j)
96  /// %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj]
97  /// %padded_slice = linalg.pad_tensor %slice
98  /// ```
99  /// dropNonIndexDependencies(%padded_slice, %slice)
100  /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice.
101  LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp,
102  tensor::ExtractSliceOp sliceOp);
103 
104  /// Encodes whether the analysis is valid and hoisting can proceed.
105  bool valid;
106 };
107 
108 /// Return true if all uses of `padTensorOp` are an input tensor of some
109 /// LinalgOp.
110 static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) {
111  for (OpOperand &use : padTensorOp.result().getUses()) {
112  auto linalgUser = dyn_cast<linalg::LinalgOp>(use.getOwner());
113  if (!linalgUser || !linalgUser.isInputTensor(&use)) {
114  LLVM_DEBUG(DBGS() << "Found a use of " << *(padTensorOp)
115  << "\nthat is not an input tensor of a LinalgOp, "
116  << "cannot hoist\n"
117  << *(use.getOwner()) << "\n");
118  return false;
119  }
120  }
121  return true;
122 }
123 
124 /// Return at most nLevels of immediately enclosing scf::ForOp loops.
125 /// Stops at the first parent that is not an scf::ForOp.
126 /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm.
127 /// Control-flow and other containing ops with regions are not modeled atm.
128 static void
129 getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
130  SmallVector<scf::ForOp> &reverseEnclosingLoops) {
131  AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>());
132  (void)state;
133  scf::ForOp outermostEnclosingForOp = nullptr;
134  Operation *nextEnclosingOp = padTensorOp->getParentOp();
135  while (nLevels-- > 0 &&
136  (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
137  LLVM_DEBUG(
138  DBGS() << "loops: ";
139  outermostEnclosingForOp.getInductionVar().printAsOperand(dbgs(), state);
140  dbgs() << "\n");
141  reverseEnclosingLoops.push_back(outermostEnclosingForOp);
142  nextEnclosingOp = outermostEnclosingForOp->getParentOp();
143  }
144 }
145 
146 HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
147  valid = false;
148 
149  // Bail on any use that isn't an input of a Linalg op.
150  // Hoisting of inplace updates happens after vectorization.
151  if (!isOnlyUsedAsInputOfLinalgOp(padTensorOp))
152  return;
153 
154  // Get at most `numLoops` of immediately enclosing loops.
155  SmallVector<scf::ForOp> reverseEnclosingLoops;
156  getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops);
157  if (reverseEnclosingLoops.empty()) {
158  LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n");
159  return;
160  }
161 
162  outermostEnclosingForOp = reverseEnclosingLoops.back();
163 
164  // Get the `sliceOp` that defines the source tensor of `padTensorOp` and
165  // check its source is defined outside of the outermost loop. This check
166  // ensures the padded data is available for packing before entering the
167  // outermost enclosing loop.
168  //
169  // Example:
170  // ```
171  // %source = linalg.fill(%cst, %arg0)
172  // // %source is available for packing here!
173  // scf.for %i
174  // scf.for %j
175  // scf.for %k
176  // %slice = tensor.extract_slice %source [%i, %j]
177  // %padded_slice = linalg.pad_tensor %slice
178  // ```
179  auto sliceOp = padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>();
180  if (!sliceOp) {
181  LLVM_DEBUG(DBGS() << "Cannot find the extract slice op -> skip\n");
182  return;
183  }
184  if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.source())) {
185  LLVM_DEBUG(DBGS() << "Source not defined outside of loops -> skip\n");
186  return;
187  }
188 
189  // Check the region of `padTensorOp` depends on a constant only. Adding
190  // hoisting support for arbitrary padding regions would require cloning all
191  // dependencies captured by the padding region.
192  Value paddingValue = padTensorOp.getConstantPaddingValue();
193  if (!paddingValue ||
194  !isa_and_nonnull<arith::ConstantOp>(paddingValue.getDefiningOp())) {
195  LLVM_DEBUG(DBGS() << "Cannot find constant padding value -> skip\n");
196  return;
197  }
198 
199  // Get all the ops in the backwards slice starting from `padTensorOp` and that
200  // are dominated by the outermost enclosing loop.
201  DominanceInfo domInfo(outermostEnclosingForOp);
202  getBackwardSlice(padTensorOp.getOperation(), &backwardSlice,
203  [&](Operation *op) {
204  return domInfo.dominates(outermostEnclosingForOp, op);
205  });
206  if (backwardSlice.empty())
207  return;
208  // Add `padTensorOp` itself to the backward slice.
209  backwardSlice.insert(padTensorOp.getOperation());
210 
211  // Remove all ops in the backward slice that are not used to index the padded
212  // tensor. In particular, keep `padTensorOp`, `sliceOp`, and the loop and
213  // affine operations used for the index computation.
214  if (failed(dropNonIndexDependencies(padTensorOp, sliceOp)))
215  return;
216 
217  // Add only the loops part of the filtered `backwardSlice` to the packing
218  // loops. All other loops are not used to index the padded data and
219  // consequently access the same data in every loop iteration. Adding them to
220  // the packing loops would increase the cache footprint of the packed data
221  // by storing the same data multiple times.
222  for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops))
223  if (backwardSlice.contains(forOp))
224  packingLoops.push_back(forOp);
225  if (packingLoops.empty()) {
226  LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n");
227  return;
228  }
229 
230  // The analysis is valid and hoisting can occur.
231  valid = true;
232 }
233 
235 HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp,
236  tensor::ExtractSliceOp sliceOp) {
237  // Set of all values used for index computation.
238  SetVector<Value> indexEdges;
239 
240  // Add all index operands of `operation` to `indexEdges`. An index operand is
241  // an operand of type index.
242  auto addIndexOperandsToIndexEdges = [&](Operation *operation) {
243  for (Value operand : operation->getOperands())
244  if (operand.getType().isIndex())
245  indexEdges.insert(operand);
246  };
247 
248  // Check if any operation result is contained in `indexEdges`.
249  auto hasIndexResult = [&](Operation *operation) {
250  return llvm::any_of(operation->getResults(), [&](Value result) {
251  return indexEdges.contains(result);
252  });
253  };
254 
255  // Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index
256  // type in `backwardSlice`. Add the index operands of an operation to
257  // `indexEdges` and remove all operations from `backwardSlice` that are not
258  // part of the index computation.
259  //
260  // Example:
261  // ```
262  // %source = linalg.fill(%cst, %arg0)
263  // scf.for %i
264  // %unrelated = linalg.fill(%cst, %arg1) // not used to index %source!
265  // scf.for %j (%arg2 = %unrelated)
266  // scf.for %k // not used to index %source!
267  // %ubi = affine.min #map(%i)
268  // %ubj = affine.min #map(%j)
269  // %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj]
270  // %padded_slice = linalg.pad_tensor %slice
271  // ```
272  // After iterating `backwardSlice` we obtain:
273  // indexEdges = [%i, %j, %ubi, %ubj]
274  // backwardSlice = backwardSlice / [linalg.fill(%cst, %arg1), scf.for %k]
275  SetVector<Operation *> operationsToRemove;
276  for (Operation *op : llvm::reverse(backwardSlice)) {
277  // Add the index operands of `padTensorOp` and `sliceOp` to start the
278  // exploration of the index computation.
279  if (op == padTensorOp || op == sliceOp) {
280  addIndexOperandsToIndexEdges(op);
281  continue;
282  }
283  // Add the index operands of the loop if its induction variable is
284  // used for index computation.
285  if (auto forOp = dyn_cast<scf::ForOp>(op)) {
286  if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
287  addIndexOperandsToIndexEdges(op);
288  continue;
289  }
290  }
291  // Add the index operands of all other operations if at least one result is
292  // used for index computation.
293  if (hasIndexResult(op)) {
294  addIndexOperandsToIndexEdges(op);
295  // Check the operands of the remaining operations all have index type.
296  if (llvm::any_of(op->getOperandTypes(),
297  [](Type type) { return !type.isIndex(); })) {
298  LLVM_DEBUG(DBGS() << "Unsupported op with non index type operands: "
299  << op << " -> skip\n");
300  return failure();
301  }
302  // Check the remaining operations do not have regions or memory effects.
303  auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
304  bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
305  if (hasMemoryEffect || op->getNumRegions() != 0) {
306  LLVM_DEBUG(DBGS() << "Unsupported op with region or memory effect: "
307  << op << " -> skip\n");
308  return failure();
309  }
310  continue;
311  }
312  // Remove all other operations not used by the index computation. An
313  // exception are constant operations that may be used by `padTensorOp`.
314  if (!isa<arith::ConstantOp>(op))
315  operationsToRemove.insert(op);
316  }
317  backwardSlice.set_subtract(operationsToRemove);
318  return success();
319 }
320 
323  SmallVector<Value> dynamicTensorSizes;
324 
325  // Upper bound the packing loop lengths to size the packed tensor. Taking
326  // upper bounds can make the sizes of the packed tensor independent of the
327  // enclosing loops. This independence is a prerequisite for reusing the same
328  // buffer for all enclosing loop iterations and hoisting its allocation out of
329  // the enclosing loops.
330  for (auto forOp : packingLoops) {
331  // Compute an upper bound `ubVal` for the upper bound of `forOp`.
332  AffineMap boundMap;
333  SmallVector<Value> boundOperands;
334  getUpperBoundForIndex(forOp.getUpperBound(), boundMap, boundOperands);
335  Value ubVal = b.createOrFold<AffineMinOp>(boundMap, boundOperands);
336  // Compute the maximal packing loop length as (ub - lb).ceilDiv(step) and
337  // store the result to `dynamicTensorSizes`.
338  // TODO: instead of using the lower bound of `forOp` directly, implement a
339  // lower bound computation similar to the upper bound computation.
340  AffineExpr lb, ub, step;
341  bindDims(b.getContext(), lb, ub);
342  bindSymbols(b.getContext(), step);
343  Value res = b.createOrFold<AffineApplyOp>(
344  (ub - lb).ceilDiv(step), ValueRange{forOp.getLowerBound(), ubVal,
345  cast<scf::ForOp>(forOp).getStep()});
346  dynamicTensorSizes.push_back(res);
347  }
348 
349  return dynamicTensorSizes;
350 }
351 
352 static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) {
353  return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp<ConstantOp>();
354 }
355 
356 /// Return the current iteration number in the loop (iv - lb).ceilDiv(step).
357 /// The returned Value is guaranteed not to depend on any loop comprised in
358 /// [`outer`, `forOp`].
359 /// Return null if such a loop-independent quantity cannot be computed.
360 static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer,
361  scf::ForOp forOp) {
362  MLIRContext *ctx = forOp->getContext();
363  AffineExpr iv, lb, step;
364  bindDims(ctx, iv, lb);
365  bindSymbols(ctx, step);
366  if (!isDefinedOutsideOrConstant(outer, forOp.getLowerBound()) ||
367  !isDefinedOutsideOrConstant(outer, forOp.getStep()))
368  return Value();
369  Value ivVal = forOp.getInductionVar(), lbVal = forOp.getLowerBound(),
370  stepVal = forOp.getStep();
371  auto loc = forOp->getLoc();
372  return b.createOrFold<AffineApplyOp>(loc, (iv - lb).ceilDiv(step),
373  ValueRange{ivVal, lbVal, stepVal});
374 }
375 
377  int numLoops,
378  PadTensorOp &hoistedOp) {
379  LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops
380  << " loops\n");
381  HoistingAnalysis analysis(opToHoist, numLoops);
382  if (!analysis.isValid()) {
383  LLVM_DEBUG(DBGS() << "Analysis failed -> Skip\n");
384  return failure();
385  }
386 
387  scf::ForOp outer = analysis.outermostEnclosingForOp;
388  ImplicitLocOpBuilder b(outer->getLoc(), outer);
389 
390  SmallVector<Value> dynamicTensorSizes = analysis.getPackedTensorSizes(b);
391 
392  // Update actual number of loops, which may be smaller.
393  int nPackedLoops = analysis.packingLoops.size();
394 
395  Location loc = opToHoist->getLoc();
396  RankedTensorType paddedTensorType = opToHoist.getResultType();
397  int paddedRank = paddedTensorType.getRank();
398 
399  // Create the packed tensor<?x?x..?xpadded_shape> into which we amortize
400  // padding.
401  SmallVector<int64_t> packedShape(nPackedLoops, ShapedType::kDynamicSize);
402  // TODO: go grab dims when necessary, for now PadTensorOp returns a static
403  // tensor.
404  llvm::append_range(packedShape, paddedTensorType.getShape());
405  auto packedTensorType =
406  RankedTensorType::get(packedShape, paddedTensorType.getElementType());
407  Value packedTensor = b.create<linalg::InitTensorOp>(
408  loc, dynamicTensorSizes, packedTensorType.getShape(),
409  packedTensorType.getElementType());
410 
411  // Clone the operations involved in the backward slice, iteratively stepping
412  // into the loops that we encounter.
413  // The implementation proceeds in a stack-like fashion:
414  // 1. Iteratively clone and step into the loops, pushing the `packedTensor`
415  // deeper in the stack.
416  // 2. Create a InsertSliceOp at the top of the stack.
417  // 3. Iteratively pop and yield the result of the InsertSliceOp across
418  // the cloned loops.
419  SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;
420  clonedLoopIvs.reserve(nPackedLoops);
421  leadingPackedTensorIndexings.reserve(nPackedLoops);
423  // Stack step 1. iteratively clone loops and push `packedTensor`.
424  for (Operation *op : analysis.backwardSlice) {
425  // Specifically sit out in the extract_slice(packedTensor) case: this is the
426  // piece we seek to replace.
427  if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op))
428  if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor)
429  continue;
430  // Clone all operations except it is a loop.
431  auto forOp = dyn_cast<scf::ForOp>(op);
432  if (!forOp) {
433  b.clone(*op, bvm);
434  continue;
435  }
436  // Create a packing loop that takes `packedTensor` as iteration argument.
437  auto clonedForOp = b.create<scf::ForOp>(
438  loc, bvm.lookupOrDefault(forOp.getLowerBound()),
439  bvm.lookupOrDefault(forOp.getUpperBound()),
440  bvm.lookupOrDefault(forOp.getStep()), packedTensor);
441  // Map the induction var, region args and results to the `clonedForOp`.
442  bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar());
443  bvm.map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
444  bvm.map(forOp.getResults(), clonedForOp.getResults());
445  assert(clonedForOp->getNumRegions() == 1);
446  clonedLoopIvs.push_back(clonedForOp.getInductionVar());
447 
448  b.setInsertionPointToStart(&clonedForOp->getRegion(0).front());
449  Value loopIndependentIterationCount =
450  buildLoopIterationCount(b, outer, clonedForOp);
451  // Assert the loop-independent iteration count can be computed.
452  if (!loopIndependentIterationCount)
453  llvm_unreachable("loop independence prerequisite not met");
454  leadingPackedTensorIndexings.push_back(loopIndependentIterationCount);
455  packedTensor = clonedForOp.getRegionIterArgs().front();
456  }
457 
458  // Stack step 2. create InsertSliceOp at the top of the stack.
459  // offsets = [clonedLoopIvs, 0 .. 0].
460  SmallVector<OpFoldResult> offsets(leadingPackedTensorIndexings.begin(),
461  leadingPackedTensorIndexings.end());
462  offsets.append(paddedRank, b.getIndexAttr(0));
463  // sizes = [1 .. 1, paddedShape].
464  SmallVector<OpFoldResult> sizes(nPackedLoops, b.getIndexAttr(1));
465  for (int64_t sz : paddedTensorType.getShape()) {
466  // TODO: go grab dims when necessary, for now PadTensorOp returns a static
467  // tensor.
468  assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes");
469  sizes.push_back(b.getIndexAttr(sz));
470  }
471  // strides = [1 .. 1].
472  SmallVector<OpFoldResult> strides(nPackedLoops + paddedRank,
473  b.getIndexAttr(1));
474 
475  Value inserted =
476  b.create<tensor::InsertSliceOp>(loc, bvm.lookup(opToHoist.result()),
477  packedTensor, offsets, sizes, strides);
478 
479  // Stack step 3. iteratively pop the stack and propagate the yield.
480  Value valueToYield = inserted;
481  for (Value iv : llvm::reverse(clonedLoopIvs)) {
482  auto forOp = scf::getForInductionVarOwner(iv);
483  b.setInsertionPointToEnd(&forOp.getRegion().front());
484  b.create<scf::YieldOp>(loc, valueToYield);
485  valueToYield = forOp.getResult(0);
486  }
487 
488  // Now the packed tensor is ready, replace the original padding op by a
489  // 1x..x1 slice [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1].
490  b.setInsertionPoint(opToHoist);
491  SmallVector<Value> loopIterationCounts = llvm::to_vector<4>(
492  llvm::map_range(analysis.packingLoops, [&](Operation *loop) {
493  return buildLoopIterationCount(b, outer, cast<scf::ForOp>(loop));
494  }));
495  // Assert all loop iteration counts can be computed.
496  if (llvm::any_of(loopIterationCounts, [](Value v) { return !v; }))
497  llvm_unreachable("loop independence prerequisite not met");
498  // offsets = [originalLoopIvs, 0 .. 0].
499  offsets.assign(loopIterationCounts.begin(), loopIterationCounts.end());
500  offsets.append(paddedRank, b.getIndexAttr(0));
501  // sizes = [1 .. 1, paddedShape] (definedabove).
502  // strides = [1 .. 1] (defined above)
503  packedTensor =
504  scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0);
505  Value newResult = b.create<tensor::ExtractSliceOp>(
506  loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides);
507 
508  // Make the newly cloned `opToHoist` available to the caller.
509  hoistedOp = cast<PadTensorOp>(bvm.lookup(opToHoist.result()).getDefiningOp());
510  return newResult;
511 }
Include the generated interface declarations.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:335
MLIRContext * getContext() const
Definition: Builders.h:54
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:444
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
void getBackwardSlice(Operation *op, SetVector< Operation *> *backwardSlice, TransitiveFilter filter=nullptr)
Fills backwardSlice with the computed backward slice (i.e.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
A class for computing basic dominance information.
Definition: Dominance.h:117
static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer, scf::ForOp forOp)
Return the current iteration number in the loop (iv - lb).ceilDiv(step).
T lookup(T from) const
Lookup a mapped value within the map.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
scf::ForOp outermostEnclosingForOp
The outermost loop, determined by nLevels above which padTensorOp will be hoisted.
void map(Block *from, Block *to)
Inserts a new mapping for &#39;from&#39; to &#39;to&#39;.
FailureOr< Value > hoistPaddingOnTensors(PadTensorOp opToHoist, int numLoops, PadTensorOp &hoistedOp)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor...
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:77
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR&#39;s ceildiv operation on constants.
Definition: MathExtras.h:23
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:117
ForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Base type for affine expression.
Definition: AffineExpr.h:68
HoistingAnalysis(PadTensorOp padTensorOp, int numLoops)
static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp)
Return true if all uses of padTensorOp are an input tensor of some LinalgOp.
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:38
void createOrFold(llvm::SmallVectorImpl< Value > &results, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Analysis class to support PadTensorOp hoisting across multiple enclosing loops.
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:26
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:72
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:84
ImplicitLocOpBuilder maintains a &#39;current location&#39;, allowing use of the create<> method without spec...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:137
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:55
Block * lookupOrDefault(Block *from) const
Lookup a mapped value within the map.
void getUpperBoundForIndex(Value value, AffineMap &boundMap, SmallVectorImpl< Value > &boundOperands)
Computes an upper bound for the result value of an index computation.
Definition: Utils.cpp:179
This class represents an operand of an operation.
Definition: Value.h:249
SmallVector< Value > getPackedTensorSizes(ImplicitLocOpBuilder &b)
Footprint of the packedTensor, computed from the packingLoops.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:328
This class helps build Operations.
Definition: Builders.h:177
This class provides an abstraction over the different types of ranges over Values.
SmallVector< scf::ForOp > packingLoops
The scf::ForOp immediately enclosing padTensorOp such that:
static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v)
#define DBGS()
static void getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels, SmallVector< scf::ForOp > &reverseEnclosingLoops)
Return at most nLevels of immediately enclosing scf::ForOp loops.
SetVector< Operation * > backwardSlice
Backward slice rooted at padTensorOp and nested under outermostEnclosingForOp.
This class provides management for the lifetime of the state used when printing the IR...
Definition: AsmState.h:36