MLIR  18.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for loop transformation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop transformation routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
18 #include "mlir/IR/BuiltinOps.h"
19 #include "mlir/IR/IRMapping.h"
20 #include "mlir/IR/PatternMatch.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 
29 using namespace mlir;
30 
31 namespace {
32 // This structure is to pass and return sets of loop parameters without
33 // confusing the order.
34 struct LoopParams {
35  Value lowerBound;
36  Value upperBound;
37  Value step;
38 };
39 } // namespace
40 
42  RewriterBase &rewriter, MutableArrayRef<scf::ForOp> loopNest,
43  ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn,
44  bool replaceIterOperandsUsesInLoop) {
45  if (loopNest.empty())
46  return {};
47  // This method is recursive (to make it more readable). Adding an
48  // assertion here to limit the recursion. (See
49  // https://discourse.llvm.org/t/rfc-update-to-mlir-developer-policy-on-recursion/62235)
50  assert(loopNest.size() <= 10 &&
51  "exceeded recursion limit when yielding value from loop nest");
52 
53  // To yield a value from a perfectly nested loop nest, the following
54  // pattern needs to be created, i.e. starting with
55  //
56  // ```mlir
57  // scf.for .. {
58  // scf.for .. {
59  // scf.for .. {
60  // %value = ...
61  // }
62  // }
63  // }
64  // ```
65  //
66  // needs to be modified to
67  //
68  // ```mlir
69  // %0 = scf.for .. iter_args(%arg0 = %init) {
70  // %1 = scf.for .. iter_args(%arg1 = %arg0) {
71  // %2 = scf.for .. iter_args(%arg2 = %arg1) {
72  // %value = ...
73  // scf.yield %value
74  // }
75  // scf.yield %2
76  // }
77  // scf.yield %1
78  // }
79  // ```
80  //
81  // The inner most loop is handled using the `replaceWithAdditionalYields`
82  // that works on a single loop.
83  if (loopNest.size() == 1) {
84  auto innerMostLoop =
85  cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
86  rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
87  newYieldValuesFn));
88  return {innerMostLoop};
89  }
90  // The outer loops are modified by calling this method recursively
91  // - The return value of the inner loop is the value yielded by this loop.
92  // - The region iter args of this loop are the init_args for the inner loop.
93  SmallVector<scf::ForOp> newLoopNest;
94  NewYieldValuesFn fn =
95  [&](OpBuilder &innerBuilder, Location loc,
96  ArrayRef<BlockArgument> innerNewBBArgs) -> SmallVector<Value> {
97  newLoopNest = replaceLoopNestWithNewYields(rewriter, loopNest.drop_front(),
98  innerNewBBArgs, newYieldValuesFn,
99  replaceIterOperandsUsesInLoop);
100  return llvm::to_vector(llvm::map_range(
101  newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
102  [](OpResult r) -> Value { return r; }));
103  };
104  scf::ForOp outerMostLoop =
105  cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
106  rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
107  newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
108  return newLoopNest;
109 }
110 
111 /// Outline a region with a single block into a new FuncOp.
112 /// Assumes the FuncOp result types is the type of the yielded operands of the
113 /// single block. This constraint makes it easy to determine the result.
114 /// This method also clones the `arith::ConstantIndexOp` at the start of
115 /// `outlinedFuncBody` to alloc simple canonicalizations. If `callOp` is
116 /// provided, it will be set to point to the operation that calls the outlined
117 /// function.
118 // TODO: support more than single-block regions.
119 // TODO: more flexible constant handling.
121  Location loc,
122  Region &region,
123  StringRef funcName,
124  func::CallOp *callOp) {
125  assert(!funcName.empty() && "funcName cannot be empty");
126  if (!region.hasOneBlock())
127  return failure();
128 
129  Block *originalBlock = &region.front();
130  Operation *originalTerminator = originalBlock->getTerminator();
131 
132  // Outline before current function.
133  OpBuilder::InsertionGuard g(rewriter);
134  rewriter.setInsertionPoint(region.getParentOfType<func::FuncOp>());
135 
136  SetVector<Value> captures;
137  getUsedValuesDefinedAbove(region, captures);
138 
139  ValueRange outlinedValues(captures.getArrayRef());
140  SmallVector<Type> outlinedFuncArgTypes;
141  SmallVector<Location> outlinedFuncArgLocs;
142  // Region's arguments are exactly the first block's arguments as per
143  // Region::getArguments().
144  // Func's arguments are cat(regions's arguments, captures arguments).
145  for (BlockArgument arg : region.getArguments()) {
146  outlinedFuncArgTypes.push_back(arg.getType());
147  outlinedFuncArgLocs.push_back(arg.getLoc());
148  }
149  for (Value value : outlinedValues) {
150  outlinedFuncArgTypes.push_back(value.getType());
151  outlinedFuncArgLocs.push_back(value.getLoc());
152  }
153  FunctionType outlinedFuncType =
154  FunctionType::get(rewriter.getContext(), outlinedFuncArgTypes,
155  originalTerminator->getOperandTypes());
156  auto outlinedFunc =
157  rewriter.create<func::FuncOp>(loc, funcName, outlinedFuncType);
158  Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
159 
160  // Merge blocks while replacing the original block operands.
161  // Warning: `mergeBlocks` erases the original block, reconstruct it later.
162  int64_t numOriginalBlockArguments = originalBlock->getNumArguments();
163  auto outlinedFuncBlockArgs = outlinedFuncBody->getArguments();
164  {
165  OpBuilder::InsertionGuard g(rewriter);
166  rewriter.setInsertionPointToEnd(outlinedFuncBody);
167  rewriter.mergeBlocks(
168  originalBlock, outlinedFuncBody,
169  outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
170  // Explicitly set up a new ReturnOp terminator.
171  rewriter.setInsertionPointToEnd(outlinedFuncBody);
172  rewriter.create<func::ReturnOp>(loc, originalTerminator->getResultTypes(),
173  originalTerminator->getOperands());
174  }
175 
176  // Reconstruct the block that was deleted and add a
177  // terminator(call_results).
178  Block *newBlock = rewriter.createBlock(
179  &region, region.begin(),
180  TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
181  ArrayRef<Location>(outlinedFuncArgLocs)
182  .take_front(numOriginalBlockArguments));
183  {
184  OpBuilder::InsertionGuard g(rewriter);
185  rewriter.setInsertionPointToEnd(newBlock);
186  SmallVector<Value> callValues;
187  llvm::append_range(callValues, newBlock->getArguments());
188  llvm::append_range(callValues, outlinedValues);
189  auto call = rewriter.create<func::CallOp>(loc, outlinedFunc, callValues);
190  if (callOp)
191  *callOp = call;
192 
193  // `originalTerminator` was moved to `outlinedFuncBody` and is still valid.
194  // Clone `originalTerminator` to take the callOp results then erase it from
195  // `outlinedFuncBody`.
196  IRMapping bvm;
197  bvm.map(originalTerminator->getOperands(), call->getResults());
198  rewriter.clone(*originalTerminator, bvm);
199  rewriter.eraseOp(originalTerminator);
200  }
201 
202  // Lastly, explicit RAUW outlinedValues, only for uses within `outlinedFunc`.
203  // Clone the `arith::ConstantIndexOp` at the start of `outlinedFuncBody`.
204  for (auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
205  outlinedValues.size()))) {
206  Value orig = std::get<0>(it);
207  Value repl = std::get<1>(it);
208  {
209  OpBuilder::InsertionGuard g(rewriter);
210  rewriter.setInsertionPointToStart(outlinedFuncBody);
211  if (Operation *cst = orig.getDefiningOp<arith::ConstantIndexOp>()) {
212  IRMapping bvm;
213  repl = rewriter.clone(*cst, bvm)->getResult(0);
214  }
215  }
216  orig.replaceUsesWithIf(repl, [&](OpOperand &opOperand) {
217  return outlinedFunc->isProperAncestor(opOperand.getOwner());
218  });
219  }
220 
221  return outlinedFunc;
222 }
223 
225  func::FuncOp *thenFn, StringRef thenFnName,
226  func::FuncOp *elseFn, StringRef elseFnName) {
227  IRRewriter rewriter(b);
228  Location loc = ifOp.getLoc();
229  FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
230  if (thenFn && !ifOp.getThenRegion().empty()) {
231  outlinedFuncOpOrFailure = outlineSingleBlockRegion(
232  rewriter, loc, ifOp.getThenRegion(), thenFnName);
233  if (failed(outlinedFuncOpOrFailure))
234  return failure();
235  *thenFn = *outlinedFuncOpOrFailure;
236  }
237  if (elseFn && !ifOp.getElseRegion().empty()) {
238  outlinedFuncOpOrFailure = outlineSingleBlockRegion(
239  rewriter, loc, ifOp.getElseRegion(), elseFnName);
240  if (failed(outlinedFuncOpOrFailure))
241  return failure();
242  *elseFn = *outlinedFuncOpOrFailure;
243  }
244  return success();
245 }
246 
249  assert(rootOp != nullptr && "Root operation must not be a nullptr.");
250  bool rootEnclosesPloops = false;
251  for (Region &region : rootOp->getRegions()) {
252  for (Block &block : region.getBlocks()) {
253  for (Operation &op : block) {
254  bool enclosesPloops = getInnermostParallelLoops(&op, result);
255  rootEnclosesPloops |= enclosesPloops;
256  if (auto ploop = dyn_cast<scf::ParallelOp>(op)) {
257  rootEnclosesPloops = true;
258 
259  // Collect parallel loop if it is an innermost one.
260  if (!enclosesPloops)
261  result.push_back(ploop);
262  }
263  }
264  }
265  }
266  return rootEnclosesPloops;
267 }
268 
269 // Build the IR that performs ceil division of a positive value by a constant:
270 // ceildiv(a, B) = divis(a + (B-1), B)
271 // where divis is rounding-to-zero division.
272 static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
273  int64_t divisor) {
274  assert(divisor > 0 && "expected positive divisor");
275  assert(dividend.getType().isIndex() && "expected index-typed value");
276 
277  Value divisorMinusOneCst =
278  builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
279  Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
280  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
281  return builder.create<arith::DivUIOp>(loc, sum, divisorCst);
282 }
283 
284 // Build the IR that performs ceil division of a positive value by another
285 // positive value:
286 // ceildiv(a, b) = divis(a + (b - 1), b)
287 // where divis is rounding-to-zero division.
288 static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
289  Value divisor) {
290  assert(dividend.getType().isIndex() && "expected index-typed value");
291 
292  Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
293  Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
294  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
295  return builder.create<arith::DivUIOp>(loc, sum, divisor);
296 }
297 
298 /// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
299 /// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
300 /// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
301 /// unrolled iteration using annotateFn.
303  Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
304  function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
305  function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
306  ValueRange iterArgs, ValueRange yieldedValues) {
307  // Builder to insert unrolled bodies just before the terminator of the body of
308  // 'forOp'.
309  auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
310 
311  if (!annotateFn)
312  annotateFn = [](unsigned, Operation *, OpBuilder) {};
313 
314  // Keep a pointer to the last non-terminator operation in the original block
315  // so that we know what to clone (since we are doing this in-place).
316  Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
317 
318  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
319  SmallVector<Value, 4> lastYielded(yieldedValues);
320 
321  for (unsigned i = 1; i < unrollFactor; i++) {
322  IRMapping operandMap;
323 
324  // Prepare operand map.
325  operandMap.map(iterArgs, lastYielded);
326 
327  // If the induction variable is used, create a remapping to the value for
328  // this unrolled instance.
329  if (!forOpIV.use_empty()) {
330  Value ivUnroll = ivRemapFn(i, forOpIV, builder);
331  operandMap.map(forOpIV, ivUnroll);
332  }
333 
334  // Clone the original body of 'forOp'.
335  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
336  Operation *clonedOp = builder.clone(*it, operandMap);
337  annotateFn(i, clonedOp, builder);
338  }
339 
340  // Update yielded values.
341  for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
342  lastYielded[i] = operandMap.lookup(yieldedValues[i]);
343  }
344 
345  // Make sure we annotate the Ops in the original body. We do this last so that
346  // any annotations are not copied into the cloned Ops above.
347  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
348  annotateFn(0, &*it, builder);
349 
350  // Update operands of the yield statement.
351  loopBodyBlock->getTerminator()->setOperands(lastYielded);
352 }
353 
354 /// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
356  scf::ForOp forOp, uint64_t unrollFactor,
357  function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
358  assert(unrollFactor > 0 && "expected positive unroll factor");
359 
360  // Return if the loop body is empty.
361  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
362  return success();
363 
364  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
365  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
366  OpBuilder boundsBuilder(forOp);
367  IRRewriter rewriter(forOp.getContext());
368  auto loc = forOp.getLoc();
369  Value step = forOp.getStep();
370  Value upperBoundUnrolled;
371  Value stepUnrolled;
372  bool generateEpilogueLoop = true;
373 
374  std::optional<int64_t> lbCstOp = getConstantIntValue(forOp.getLowerBound());
375  std::optional<int64_t> ubCstOp = getConstantIntValue(forOp.getUpperBound());
376  std::optional<int64_t> stepCstOp = getConstantIntValue(forOp.getStep());
377  if (lbCstOp && ubCstOp && stepCstOp) {
378  // Constant loop bounds computation.
379  int64_t lbCst = lbCstOp.value();
380  int64_t ubCst = ubCstOp.value();
381  int64_t stepCst = stepCstOp.value();
382  assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
383  "expected positive loop bounds and step");
384  int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
385 
386  if (unrollFactor == 1) {
387  if (tripCount == 1 && failed(forOp.promoteIfSingleIteration(rewriter)))
388  return failure();
389  return success();
390  }
391 
392  int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
393  int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
394  int64_t stepUnrolledCst = stepCst * unrollFactor;
395 
396  // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
397  generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
398  if (generateEpilogueLoop)
399  upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
400  loc, upperBoundUnrolledCst);
401  else
402  upperBoundUnrolled = forOp.getUpperBound();
403 
404  // Create constant for 'stepUnrolled'.
405  stepUnrolled = stepCst == stepUnrolledCst
406  ? step
407  : boundsBuilder.create<arith::ConstantIndexOp>(
408  loc, stepUnrolledCst);
409  } else {
410  // Dynamic loop bounds computation.
411  // TODO: Add dynamic asserts for negative lb/ub/step, or
412  // consider using ceilDiv from AffineApplyExpander.
413  auto lowerBound = forOp.getLowerBound();
414  auto upperBound = forOp.getUpperBound();
415  Value diff =
416  boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
417  Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
418  Value unrollFactorCst =
419  boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
420  Value tripCountRem =
421  boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
422  // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
423  Value tripCountEvenMultiple =
424  boundsBuilder.create<arith::SubIOp>(loc, tripCount, tripCountRem);
425  // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
426  upperBoundUnrolled = boundsBuilder.create<arith::AddIOp>(
427  loc, lowerBound,
428  boundsBuilder.create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
429  // Scale 'step' by 'unrollFactor'.
430  stepUnrolled =
431  boundsBuilder.create<arith::MulIOp>(loc, step, unrollFactorCst);
432  }
433 
434  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
435  if (generateEpilogueLoop) {
436  OpBuilder epilogueBuilder(forOp->getContext());
437  epilogueBuilder.setInsertionPoint(forOp->getBlock(),
438  std::next(Block::iterator(forOp)));
439  auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.clone(*forOp));
440  epilogueForOp.setLowerBound(upperBoundUnrolled);
441 
442  // Update uses of loop results.
443  auto results = forOp.getResults();
444  auto epilogueResults = epilogueForOp.getResults();
445 
446  for (auto e : llvm::zip(results, epilogueResults)) {
447  std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
448  }
449  epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
450  epilogueForOp.getInitArgs().size(), results);
451  (void)epilogueForOp.promoteIfSingleIteration(rewriter);
452  }
453 
454  // Create unrolled loop.
455  forOp.setUpperBound(upperBoundUnrolled);
456  forOp.setStep(stepUnrolled);
457 
458  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
459  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
460 
462  forOp.getBody(), forOp.getInductionVar(), unrollFactor,
463  [&](unsigned i, Value iv, OpBuilder b) {
464  // iv' = iv + step * i;
465  auto stride = b.create<arith::MulIOp>(
466  loc, step, b.create<arith::ConstantIndexOp>(loc, i));
467  return b.create<arith::AddIOp>(loc, iv, stride);
468  },
469  annotateFn, iterArgs, yieldedValues);
470  // Promote the loop body up if this has turned into a single iteration loop.
471  (void)forOp.promoteIfSingleIteration(rewriter);
472  return success();
473 }
474 
475 /// Return the new lower bound, upper bound, and step in that order. Insert any
476 /// additional bounds calculations before the given builder and any additional
477 /// conversion back to the original loop induction value inside the given Block.
478 static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
479  OpBuilder &insideLoopBuilder, Location loc,
480  Value lowerBound, Value upperBound, Value step,
481  Value inductionVar) {
482  // Check if the loop is already known to have a constant zero lower bound or
483  // a constant one step.
484  bool isZeroBased = false;
485  if (auto ubCst = getConstantIntValue(lowerBound))
486  isZeroBased = ubCst.value() == 0;
487 
488  bool isStepOne = false;
489  if (auto stepCst = getConstantIntValue(step))
490  isStepOne = stepCst.value() == 1;
491 
492  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
493  // assuming the step is strictly positive. Update the bounds and the step
494  // of the loop to go from 0 to the number of iterations, if necessary.
495  if (isZeroBased && isStepOne)
496  return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
497  /*step=*/step};
498 
499  Value diff = boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
500  Value newUpperBound =
501  boundsBuilder.create<arith::CeilDivSIOp>(loc, diff, step);
502 
503  Value newLowerBound =
504  isZeroBased ? lowerBound
505  : boundsBuilder.create<arith::ConstantIndexOp>(loc, 0);
506  Value newStep =
507  isStepOne ? step : boundsBuilder.create<arith::ConstantIndexOp>(loc, 1);
508 
509  // Insert code computing the value of the original loop induction variable
510  // from the "normalized" one.
511  Value scaled =
512  isStepOne
513  ? inductionVar
514  : insideLoopBuilder.create<arith::MulIOp>(loc, inductionVar, step);
515  Value shifted =
516  isZeroBased
517  ? scaled
518  : insideLoopBuilder.create<arith::AddIOp>(loc, scaled, lowerBound);
519 
520  SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
521  shifted.getDefiningOp()};
522  inductionVar.replaceAllUsesExcept(shifted, preserve);
523  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
524  /*step=*/newStep};
525 }
526 
527 /// Transform a loop with a strictly positive step
528 /// for %i = %lb to %ub step %s
529 /// into a 0-based loop with step 1
530 /// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
531 /// %i = %ii * %s + %lb
532 /// Insert the induction variable remapping in the body of `inner`, which is
533 /// expected to be either `loop` or another loop perfectly nested under `loop`.
534 /// Insert the definition of new bounds immediate before `outer`, which is
535 /// expected to be either `loop` or its parent in the loop nest.
536 static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
537  OpBuilder builder(outer);
538  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
539  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
540  loop.getLowerBound(), loop.getUpperBound(),
541  loop.getStep(), loop.getInductionVar());
542 
543  loop.setLowerBound(loopPieces.lowerBound);
544  loop.setUpperBound(loopPieces.upperBound);
545  loop.setStep(loopPieces.step);
546 }
547 
549  if (loops.size() < 2)
550  return failure();
551 
552  scf::ForOp innermost = loops.back();
553  scf::ForOp outermost = loops.front();
554 
555  // 1. Make sure all loops iterate from 0 to upperBound with step 1. This
556  // allows the following code to assume upperBound is the number of iterations.
557  for (auto loop : loops)
558  normalizeLoop(loop, outermost, innermost);
559 
560  // 2. Emit code computing the upper bound of the coalesced loop as product
561  // of the number of iterations of all loops.
562  OpBuilder builder(outermost);
563  Location loc = outermost.getLoc();
564  Value upperBound = outermost.getUpperBound();
565  for (auto loop : loops.drop_front())
566  upperBound =
567  builder.create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
568  outermost.setUpperBound(upperBound);
569 
570  builder.setInsertionPointToStart(outermost.getBody());
571 
572  // 3. Remap induction variables. For each original loop, the value of the
573  // induction variable can be obtained by dividing the induction variable of
574  // the linearized loop by the total number of iterations of the loops nested
575  // in it modulo the number of iterations in this loop (remove the values
576  // related to the outer loops):
577  // iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
578  // Compute these iteratively from the innermost loop by creating a "running
579  // quotient" of division by the range.
580  Value previous = outermost.getInductionVar();
581  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
582  unsigned idx = loops.size() - i - 1;
583  if (i != 0)
584  previous = builder.create<arith::DivSIOp>(loc, previous,
585  loops[idx + 1].getUpperBound());
586 
587  Value iv = (i == e - 1) ? previous
588  : builder.create<arith::RemSIOp>(
589  loc, previous, loops[idx].getUpperBound());
590  replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
591  loops.back().getRegion());
592  }
593 
594  // 4. Move the operations from the innermost just above the second-outermost
595  // loop, delete the extra terminator and the second-outermost loop.
596  scf::ForOp second = loops[1];
597  innermost.getBody()->back().erase();
598  outermost.getBody()->getOperations().splice(
599  Block::iterator(second.getOperation()),
600  innermost.getBody()->getOperations());
601  second.erase();
602  return success();
603 }
604 
606  scf::ParallelOp loops, ArrayRef<std::vector<unsigned>> combinedDimensions) {
607  OpBuilder outsideBuilder(loops);
608  Location loc = loops.getLoc();
609 
610  // Presort combined dimensions.
611  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
612  for (auto &dims : sortedDimensions)
613  llvm::sort(dims);
614 
615  // Normalize ParallelOp's iteration pattern.
616  SmallVector<Value, 3> normalizedLowerBounds, normalizedSteps,
617  normalizedUpperBounds;
618  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
619  OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
620  auto resultBounds =
621  normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
622  loops.getLowerBound()[i], loops.getUpperBound()[i],
623  loops.getStep()[i], loops.getBody()->getArgument(i));
624 
625  normalizedLowerBounds.push_back(resultBounds.lowerBound);
626  normalizedUpperBounds.push_back(resultBounds.upperBound);
627  normalizedSteps.push_back(resultBounds.step);
628  }
629 
630  // Combine iteration spaces.
631  SmallVector<Value, 3> lowerBounds, upperBounds, steps;
632  auto cst0 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 0);
633  auto cst1 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
634  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
635  Value newUpperBound = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
636  for (auto idx : sortedDimensions[i]) {
637  newUpperBound = outsideBuilder.create<arith::MulIOp>(
638  loc, newUpperBound, normalizedUpperBounds[idx]);
639  }
640  lowerBounds.push_back(cst0);
641  steps.push_back(cst1);
642  upperBounds.push_back(newUpperBound);
643  }
644 
645  // Create new ParallelLoop with conversions to the original induction values.
646  // The loop below uses divisions to get the relevant range of values in the
647  // new induction value that represent each range of the original induction
648  // value. The remainders then determine based on that range, which iteration
649  // of the original induction value this represents. This is a normalized value
650  // that is un-normalized already by the previous logic.
651  auto newPloop = outsideBuilder.create<scf::ParallelOp>(
652  loc, lowerBounds, upperBounds, steps,
653  [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
654  for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
655  Value previous = ploopIVs[i];
656  unsigned numberCombinedDimensions = combinedDimensions[i].size();
657  // Iterate over all except the last induction value.
658  for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
659  unsigned idx = combinedDimensions[i][j];
660 
661  // Determine the current induction value's current loop iteration
662  Value iv = insideBuilder.create<arith::RemSIOp>(
663  loc, previous, normalizedUpperBounds[idx]);
664  replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
665  loops.getRegion());
666 
667  // Remove the effect of the current induction value to prepare for
668  // the next value.
669  previous = insideBuilder.create<arith::DivSIOp>(
670  loc, previous, normalizedUpperBounds[idx]);
671  }
672 
673  // The final induction value is just the remaining value.
674  unsigned idx = combinedDimensions[i][0];
675  replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
676  previous, loops.getRegion());
677  }
678  });
679 
680  // Replace the old loop with the new loop.
681  loops.getBody()->back().erase();
682  newPloop.getBody()->getOperations().splice(
683  Block::iterator(newPloop.getBody()->back()),
684  loops.getBody()->getOperations());
685  loops.erase();
686 }
687 
688 // Hoist the ops within `outer` that appear before `inner`.
689 // Such ops include the ops that have been introduced by parametric tiling.
690 // Ops that come from triangular loops (i.e. that belong to the program slice
691 // rooted at `outer`) and ops that have side effects cannot be hoisted.
692 // Return failure when any op fails to hoist.
693 static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
694  SetVector<Operation *> forwardSlice;
696  options.filter = [&inner](Operation *op) {
697  return op != inner.getOperation();
698  };
699  getForwardSlice(outer.getInductionVar(), &forwardSlice, options);
700  LogicalResult status = success();
702  for (auto &op : outer.getBody()->without_terminator()) {
703  // Stop when encountering the inner loop.
704  if (&op == inner.getOperation())
705  break;
706  // Skip over non-hoistable ops.
707  if (forwardSlice.count(&op) > 0) {
708  status = failure();
709  continue;
710  }
711  // Skip intermediate scf::ForOp, these are not considered a failure.
712  if (isa<scf::ForOp>(op))
713  continue;
714  // Skip other ops with regions.
715  if (op.getNumRegions() > 0) {
716  status = failure();
717  continue;
718  }
719  // Skip if op has side effects.
720  // TODO: loads to immutable memory regions are ok.
721  if (!isMemoryEffectFree(&op)) {
722  status = failure();
723  continue;
724  }
725  toHoist.push_back(&op);
726  }
727  auto *outerForOp = outer.getOperation();
728  for (auto *op : toHoist)
729  op->moveBefore(outerForOp);
730  return status;
731 }
732 
733 // Traverse the interTile and intraTile loops and try to hoist ops such that
734 // bands of perfectly nested loops are isolated.
735 // Return failure if either perfect interTile or perfect intraTile bands cannot
736 // be formed.
737 static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
738  LogicalResult status = success();
739  const Loops &interTile = tileLoops.first;
740  const Loops &intraTile = tileLoops.second;
741  auto size = interTile.size();
742  assert(size == intraTile.size());
743  if (size <= 1)
744  return success();
745  for (unsigned s = 1; s < size; ++s)
746  status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
747  : failure();
748  for (unsigned s = 1; s < size; ++s)
749  status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
750  : failure();
751  return status;
752 }
753 
754 /// Collect perfectly nested loops starting from `rootForOps`. Loops are
755 /// perfectly nested if each loop is the first and only non-terminator operation
756 /// in the parent loop. Collect at most `maxLoops` loops and append them to
757 /// `forOps`.
758 template <typename T>
760  SmallVectorImpl<T> &forOps, T rootForOp,
761  unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
762  for (unsigned i = 0; i < maxLoops; ++i) {
763  forOps.push_back(rootForOp);
764  Block &body = rootForOp.getRegion().front();
765  if (body.begin() != std::prev(body.end(), 2))
766  return;
767 
768  rootForOp = dyn_cast<T>(&body.front());
769  if (!rootForOp)
770  return;
771  }
772 }
773 
774 static Loops stripmineSink(scf::ForOp forOp, Value factor,
775  ArrayRef<scf::ForOp> targets) {
776  auto originalStep = forOp.getStep();
777  auto iv = forOp.getInductionVar();
778 
779  OpBuilder b(forOp);
780  forOp.setStep(b.create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
781 
782  Loops innerLoops;
783  for (auto t : targets) {
784  // Save information for splicing ops out of t when done
785  auto begin = t.getBody()->begin();
786  auto nOps = t.getBody()->getOperations().size();
787 
788  // Insert newForOp before the terminator of `t`.
789  auto b = OpBuilder::atBlockTerminator((t.getBody()));
790  Value stepped = b.create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
791  Value less = b.create<arith::CmpIOp>(t.getLoc(), arith::CmpIPredicate::slt,
792  forOp.getUpperBound(), stepped);
793  Value ub = b.create<arith::SelectOp>(t.getLoc(), less,
794  forOp.getUpperBound(), stepped);
795 
796  // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
797  auto newForOp = b.create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
798  newForOp.getBody()->getOperations().splice(
799  newForOp.getBody()->getOperations().begin(),
800  t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
801  replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
802  newForOp.getRegion());
803 
804  innerLoops.push_back(newForOp);
805  }
806 
807  return innerLoops;
808 }
809 
810 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
811 // Returns the new for operation, nested immediately under `target`.
812 template <typename SizeType>
813 static scf::ForOp stripmineSink(scf::ForOp forOp, SizeType factor,
814  scf::ForOp target) {
815  // TODO: Use cheap structural assertions that targets are nested under
816  // forOp and that targets are not nested under each other when DominanceInfo
817  // exposes the capability. It seems overkill to construct a whole function
818  // dominance tree at this point.
819  auto res = stripmineSink(forOp, factor, ArrayRef<scf::ForOp>(target));
820  assert(res.size() == 1 && "Expected 1 inner forOp");
821  return res[0];
822 }
823 
825  ArrayRef<Value> sizes,
826  ArrayRef<scf::ForOp> targets) {
828  SmallVector<scf::ForOp, 8> currentTargets(targets.begin(), targets.end());
829  for (auto it : llvm::zip(forOps, sizes)) {
830  auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
831  res.push_back(step);
832  currentTargets = step;
833  }
834  return res;
835 }
836 
838  scf::ForOp target) {
840  for (auto loops : tile(forOps, sizes, ArrayRef<scf::ForOp>(target))) {
841  assert(loops.size() == 1);
842  res.push_back(loops[0]);
843  }
844  return res;
845 }
846 
847 Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes) {
848  // Collect perfectly nested loops. If more size values provided than nested
849  // loops available, truncate `sizes`.
851  forOps.reserve(sizes.size());
852  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
853  if (forOps.size() < sizes.size())
854  sizes = sizes.take_front(forOps.size());
855 
856  return ::tile(forOps, sizes, forOps.back());
857 }
858 
860  scf::ForOp root) {
861  getPerfectlyNestedLoopsImpl(nestedLoops, root);
862 }
863 
865  ArrayRef<int64_t> sizes) {
866  // Collect perfectly nested loops. If more size values provided than nested
867  // loops available, truncate `sizes`.
869  forOps.reserve(sizes.size());
870  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
871  if (forOps.size() < sizes.size())
872  sizes = sizes.take_front(forOps.size());
873 
874  // Compute the tile sizes such that i-th outer loop executes size[i]
875  // iterations. Given that the loop current executes
876  // numIterations = ceildiv((upperBound - lowerBound), step)
877  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
878  SmallVector<Value, 4> tileSizes;
879  tileSizes.reserve(sizes.size());
880  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
881  assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
882 
883  auto forOp = forOps[i];
884  OpBuilder builder(forOp);
885  auto loc = forOp.getLoc();
886  Value diff = builder.create<arith::SubIOp>(loc, forOp.getUpperBound(),
887  forOp.getLowerBound());
888  Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
889  Value iterationsPerBlock =
890  ceilDivPositive(builder, loc, numIterations, sizes[i]);
891  tileSizes.push_back(iterationsPerBlock);
892  }
893 
894  // Call parametric tiling with the given sizes.
895  auto intraTile = tile(forOps, tileSizes, forOps.back());
896  TileLoops tileLoops = std::make_pair(forOps, intraTile);
897 
898  // TODO: for now we just ignore the result of band isolation.
899  // In the future, mapping decisions may be impacted by the ability to
900  // isolate perfectly nested bands.
901  (void)tryIsolateBands(tileLoops);
902 
903  return tileLoops;
904 }
905 
906 scf::ForallOp mlir::fuseIndependentSiblingForallLoops(scf::ForallOp target,
907  scf::ForallOp source,
908  RewriterBase &rewriter) {
909  unsigned numTargetOuts = target.getNumResults();
910  unsigned numSourceOuts = source.getNumResults();
911 
912  OperandRange targetOuts = target.getOutputs();
913  OperandRange sourceOuts = source.getOutputs();
914 
915  // Create fused shared_outs.
916  SmallVector<Value> fusedOuts;
917  fusedOuts.reserve(numTargetOuts + numSourceOuts);
918  fusedOuts.append(targetOuts.begin(), targetOuts.end());
919  fusedOuts.append(sourceOuts.begin(), sourceOuts.end());
920 
921  // Create a new scf::forall op after the source loop.
922  rewriter.setInsertionPointAfter(source);
923  scf::ForallOp fusedLoop = rewriter.create<scf::ForallOp>(
924  source.getLoc(), source.getMixedLowerBound(), source.getMixedUpperBound(),
925  source.getMixedStep(), fusedOuts, source.getMapping());
926 
927  // Map control operands.
928  IRMapping fusedMapping;
929  fusedMapping.map(target.getInductionVars(), fusedLoop.getInductionVars());
930  fusedMapping.map(source.getInductionVars(), fusedLoop.getInductionVars());
931 
932  // Map shared outs.
933  fusedMapping.map(target.getOutputBlockArguments(),
934  fusedLoop.getOutputBlockArguments().slice(0, numTargetOuts));
935  fusedMapping.map(
936  source.getOutputBlockArguments(),
937  fusedLoop.getOutputBlockArguments().slice(numTargetOuts, numSourceOuts));
938 
939  // Append everything except the terminator into the fused operation.
940  rewriter.setInsertionPointToStart(fusedLoop.getBody());
941  for (Operation &op : target.getBody()->without_terminator())
942  rewriter.clone(op, fusedMapping);
943  for (Operation &op : source.getBody()->without_terminator())
944  rewriter.clone(op, fusedMapping);
945 
946  // Fuse the old terminator in_parallel ops into the new one.
947  scf::InParallelOp targetTerm = target.getTerminator();
948  scf::InParallelOp sourceTerm = source.getTerminator();
949  scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
950 
951  rewriter.setInsertionPointToStart(fusedTerm.getBody());
952  for (Operation &op : targetTerm.getYieldingOps())
953  rewriter.clone(op, fusedMapping);
954  for (Operation &op : sourceTerm.getYieldingOps())
955  rewriter.clone(op, fusedMapping);
956 
957  // Replace all uses of the old loops with the fused loop.
958  rewriter.replaceAllUsesWith(target.getResults(),
959  fusedLoop.getResults().slice(0, numTargetOuts));
960  rewriter.replaceAllUsesWith(
961  source.getResults(),
962  fusedLoop.getResults().slice(numTargetOuts, numSourceOuts));
963 
964  // Erase the old loops.
965  rewriter.eraseOp(target);
966  rewriter.eraseOp(source);
967 
968  return fusedLoop;
969 }
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
Definition: Utils.cpp:737
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
Definition: Utils.cpp:759
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
Definition: Utils.cpp:693
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of scf::ForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor',...
Definition: Utils.cpp:302
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
Definition: Utils.cpp:774
static LoopParams normalizeLoop(OpBuilder &boundsBuilder, OpBuilder &insideLoopBuilder, Location loc, Value lowerBound, Value upperBound, Value step, Value inductionVar)
Return the new lower bound, upper bound, and step in that order.
Definition: Utils.cpp:478
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
Definition: Utils.cpp:272
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class represents an argument of a Block.
Definition: Value.h:315
Block represents an ordered list of Operations.
Definition: Block.h:30
OpListType::iterator iterator
Definition: Block.h:133
unsigned getNumArguments()
Definition: Block.h:121
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:238
BlockArgListType getArguments()
Definition: Block.h:80
Operation & front()
Definition: Block.h:146
iterator end()
Definition: Block.h:137
iterator begin()
Definition: Block.h:136
MLIRContext * getContext() const
Definition: Builders.h:55
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
auto lookup(T from) const
Lookup a mapped value within the map.
Definition: IRMapping.h:72
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:710
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:333
This class helps build Operations.
Definition: Builders.h:206
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:238
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:528
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:416
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:383
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
Definition: Builders.h:250
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:421
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition: Builders.cpp:419
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:446
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:397
This class represents an operand of an operation.
Definition: Value.h:263
This is a value defined by a result of an operation.
Definition: Value.h:453
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Definition: Operation.cpp:686
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:652
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:655
operand_type_range getOperandTypes()
Definition: Operation.h:392
result_type_range getResultTypes()
Definition: Operation.h:423
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:554
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:236
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
Definition: Region.h:205
bool hasOneBlock()
Return true if this region has exactly one block.
Definition: Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:399
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
Definition: PatternMatch.h:615
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
bool isIndex() const
Definition: Types.cpp:56
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:378
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition: Value.h:214
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Definition: Value.cpp:81
Type getType() const
Return the type of this value.
Definition: Value.h:125
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:90
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: LoopUtils.cpp:1624
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
Definition: Utils.cpp:859
LogicalResult loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
Definition: Utils.cpp:355
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
Definition: Utils.cpp:224
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:28
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
Definition: Utils.cpp:41
void collapseParallelLoops(scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
Definition: Utils.cpp:605
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
std::pair< Loops, Loops > TileLoops
Definition: Utils.h:122
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
Definition: MathExtras.h:23
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
Definition: Utils.cpp:847
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
Definition: Utils.cpp:247
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Definition: RegionUtils.cpp:63
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: Utils.cpp:824
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region &region, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
Definition: Utils.cpp:120
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
Definition: Utils.cpp:906
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition: Utils.cpp:548
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Definition: Utils.cpp:864
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.