MLIR  16.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for loop transformation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop transformation routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
19 #include "mlir/IR/BuiltinOps.h"
20 #include "mlir/IR/PatternMatch.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 
28 using namespace mlir;
29 
30 namespace {
31 // This structure is to pass and return sets of loop parameters without
32 // confusing the order.
33 struct LoopParams {
34  Value lowerBound;
35  Value upperBound;
36  Value step;
37 };
38 } // namespace
39 
40 scf::ForOp
41 mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,
42  ValueRange newIterOperands,
43  const NewYieldValueFn &newYieldValuesFn) {
44  // Create a new loop before the existing one, with the extra operands.
45  OpBuilder::InsertionGuard g(builder);
46  builder.setInsertionPoint(loop);
47  auto operands = llvm::to_vector(loop.getIterOperands());
48  operands.append(newIterOperands.begin(), newIterOperands.end());
49  scf::ForOp newLoop = builder.create<scf::ForOp>(
50  loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(), loop.getStep(),
51  operands, [](OpBuilder &, Location, Value, ValueRange) {});
52 
53  Block *loopBody = loop.getBody();
54  Block *newLoopBody = newLoop.getBody();
55 
56  // Move the body of the original loop to the new loop.
57  newLoopBody->getOperations().splice(newLoopBody->end(),
58  loopBody->getOperations());
59 
60  // Generate the new yield values to use by using the callback and append the
61  // yield values to the scf.yield operation.
62  auto yield = cast<scf::YieldOp>(newLoopBody->getTerminator());
63  ArrayRef<BlockArgument> newBBArgs =
64  newLoopBody->getArguments().take_back(newIterOperands.size());
65  {
66  OpBuilder::InsertionGuard g(builder);
67  builder.setInsertionPoint(yield);
68  SmallVector<Value> newYieldedValues =
69  newYieldValuesFn(builder, loop.getLoc(), newBBArgs);
70  assert(newIterOperands.size() == newYieldedValues.size() &&
71  "expected as many new yield values as new iter operands");
72  yield.getResultsMutable().append(newYieldedValues);
73  }
74 
75  // Remap the BlockArguments from the original loop to the new loop
76  // BlockArguments.
77  ArrayRef<BlockArgument> bbArgs = loopBody->getArguments();
78  for (auto it :
79  llvm::zip(bbArgs, newLoopBody->getArguments().take_front(bbArgs.size())))
80  std::get<0>(it).replaceAllUsesWith(std::get<1>(it));
81 
82  // Replace all uses of `newIterOperands` with the corresponding basic block
83  // arguments.
84  for (auto it : llvm::zip(newIterOperands, newBBArgs)) {
85  std::get<0>(it).replaceUsesWithIf(std::get<1>(it), [&](OpOperand &use) {
86  Operation *user = use.getOwner();
87  return newLoop->isProperAncestor(user);
88  });
89  }
90 
91  // Replace all uses of the original loop with corresponding values from the
92  // new loop.
93  loop.replaceAllUsesWith(
94  newLoop.getResults().take_front(loop.getNumResults()));
95 
96  // Add a fake yield to the original loop body that just returns the
97  // BlockArguments corresponding to the iter_args. This makes it a no-op loop.
98  // The loop is dead. The caller is expected to erase it.
99  builder.setInsertionPointToEnd(loopBody);
100  builder.create<scf::YieldOp>(loop->getLoc(), loop.getRegionIterArgs());
101 
102  return newLoop;
103 }
104 
106  OpBuilder &builder, ArrayRef<scf::ForOp> loopNest,
107  ValueRange newIterOperands, NewYieldValueFn newYieldValueFn) {
108  if (loopNest.empty())
109  return {};
110  SmallVector<scf::ForOp> newLoopNest(loopNest.size());
111 
112  newLoopNest.back() = replaceLoopWithNewYields(
113  builder, loopNest.back(), newIterOperands, newYieldValueFn);
114 
115  for (unsigned loopDepth :
116  llvm::reverse(llvm::seq<unsigned>(0, loopNest.size() - 1))) {
117  NewYieldValueFn fn = [&](OpBuilder &innerBuilder, Location loc,
118  ArrayRef<BlockArgument> innerNewBBArgs) {
119  SmallVector<Value> newYields(
120  newLoopNest[loopDepth + 1]->getResults().take_back(
121  newIterOperands.size()));
122  return newYields;
123  };
124  newLoopNest[loopDepth] = replaceLoopWithNewYields(
125  builder, loopNest[loopDepth], newIterOperands, fn);
126  }
127  return newLoopNest;
128 }
129 
130 /// Outline a region with a single block into a new FuncOp.
131 /// Assumes the FuncOp result types is the type of the yielded operands of the
132 /// single block. This constraint makes it easy to determine the result.
133 /// This method also clones the `arith::ConstantIndexOp` at the start of
134 /// `outlinedFuncBody` to alloc simple canonicalizations. If `callOp` is
135 /// provided, it will be set to point to the operation that calls the outlined
136 /// function.
137 // TODO: support more than single-block regions.
138 // TODO: more flexible constant handling.
140  Location loc,
141  Region &region,
142  StringRef funcName,
143  func::CallOp *callOp) {
144  assert(!funcName.empty() && "funcName cannot be empty");
145  if (!region.hasOneBlock())
146  return failure();
147 
148  Block *originalBlock = &region.front();
149  Operation *originalTerminator = originalBlock->getTerminator();
150 
151  // Outline before current function.
152  OpBuilder::InsertionGuard g(rewriter);
153  rewriter.setInsertionPoint(region.getParentOfType<func::FuncOp>());
154 
155  SetVector<Value> captures;
156  getUsedValuesDefinedAbove(region, captures);
157 
158  ValueRange outlinedValues(captures.getArrayRef());
159  SmallVector<Type> outlinedFuncArgTypes;
160  SmallVector<Location> outlinedFuncArgLocs;
161  // Region's arguments are exactly the first block's arguments as per
162  // Region::getArguments().
163  // Func's arguments are cat(regions's arguments, captures arguments).
164  for (BlockArgument arg : region.getArguments()) {
165  outlinedFuncArgTypes.push_back(arg.getType());
166  outlinedFuncArgLocs.push_back(arg.getLoc());
167  }
168  for (Value value : outlinedValues) {
169  outlinedFuncArgTypes.push_back(value.getType());
170  outlinedFuncArgLocs.push_back(value.getLoc());
171  }
172  FunctionType outlinedFuncType =
173  FunctionType::get(rewriter.getContext(), outlinedFuncArgTypes,
174  originalTerminator->getOperandTypes());
175  auto outlinedFunc =
176  rewriter.create<func::FuncOp>(loc, funcName, outlinedFuncType);
177  Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
178 
179  // Merge blocks while replacing the original block operands.
180  // Warning: `mergeBlocks` erases the original block, reconstruct it later.
181  int64_t numOriginalBlockArguments = originalBlock->getNumArguments();
182  auto outlinedFuncBlockArgs = outlinedFuncBody->getArguments();
183  {
184  OpBuilder::InsertionGuard g(rewriter);
185  rewriter.setInsertionPointToEnd(outlinedFuncBody);
186  rewriter.mergeBlocks(
187  originalBlock, outlinedFuncBody,
188  outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
189  // Explicitly set up a new ReturnOp terminator.
190  rewriter.setInsertionPointToEnd(outlinedFuncBody);
191  rewriter.create<func::ReturnOp>(loc, originalTerminator->getResultTypes(),
192  originalTerminator->getOperands());
193  }
194 
195  // Reconstruct the block that was deleted and add a
196  // terminator(call_results).
197  Block *newBlock = rewriter.createBlock(
198  &region, region.begin(),
199  TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
200  ArrayRef<Location>(outlinedFuncArgLocs)
201  .take_front(numOriginalBlockArguments));
202  {
203  OpBuilder::InsertionGuard g(rewriter);
204  rewriter.setInsertionPointToEnd(newBlock);
205  SmallVector<Value> callValues;
206  llvm::append_range(callValues, newBlock->getArguments());
207  llvm::append_range(callValues, outlinedValues);
208  auto call = rewriter.create<func::CallOp>(loc, outlinedFunc, callValues);
209  if (callOp)
210  *callOp = call;
211 
212  // `originalTerminator` was moved to `outlinedFuncBody` and is still valid.
213  // Clone `originalTerminator` to take the callOp results then erase it from
214  // `outlinedFuncBody`.
216  bvm.map(originalTerminator->getOperands(), call->getResults());
217  rewriter.clone(*originalTerminator, bvm);
218  rewriter.eraseOp(originalTerminator);
219  }
220 
221  // Lastly, explicit RAUW outlinedValues, only for uses within `outlinedFunc`.
222  // Clone the `arith::ConstantIndexOp` at the start of `outlinedFuncBody`.
223  for (auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
224  outlinedValues.size()))) {
225  Value orig = std::get<0>(it);
226  Value repl = std::get<1>(it);
227  {
228  OpBuilder::InsertionGuard g(rewriter);
229  rewriter.setInsertionPointToStart(outlinedFuncBody);
230  if (Operation *cst = orig.getDefiningOp<arith::ConstantIndexOp>()) {
232  repl = rewriter.clone(*cst, bvm)->getResult(0);
233  }
234  }
235  orig.replaceUsesWithIf(repl, [&](OpOperand &opOperand) {
236  return outlinedFunc->isProperAncestor(opOperand.getOwner());
237  });
238  }
239 
240  return outlinedFunc;
241 }
242 
244  func::FuncOp *thenFn, StringRef thenFnName,
245  func::FuncOp *elseFn, StringRef elseFnName) {
246  IRRewriter rewriter(b);
247  Location loc = ifOp.getLoc();
248  FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
249  if (thenFn && !ifOp.getThenRegion().empty()) {
250  outlinedFuncOpOrFailure = outlineSingleBlockRegion(
251  rewriter, loc, ifOp.getThenRegion(), thenFnName);
252  if (failed(outlinedFuncOpOrFailure))
253  return failure();
254  *thenFn = *outlinedFuncOpOrFailure;
255  }
256  if (elseFn && !ifOp.getElseRegion().empty()) {
257  outlinedFuncOpOrFailure = outlineSingleBlockRegion(
258  rewriter, loc, ifOp.getElseRegion(), elseFnName);
259  if (failed(outlinedFuncOpOrFailure))
260  return failure();
261  *elseFn = *outlinedFuncOpOrFailure;
262  }
263  return success();
264 }
265 
268  assert(rootOp != nullptr && "Root operation must not be a nullptr.");
269  bool rootEnclosesPloops = false;
270  for (Region &region : rootOp->getRegions()) {
271  for (Block &block : region.getBlocks()) {
272  for (Operation &op : block) {
273  bool enclosesPloops = getInnermostParallelLoops(&op, result);
274  rootEnclosesPloops |= enclosesPloops;
275  if (auto ploop = dyn_cast<scf::ParallelOp>(op)) {
276  rootEnclosesPloops = true;
277 
278  // Collect parallel loop if it is an innermost one.
279  if (!enclosesPloops)
280  result.push_back(ploop);
281  }
282  }
283  }
284  }
285  return rootEnclosesPloops;
286 }
287 
288 // Build the IR that performs ceil division of a positive value by a constant:
289 // ceildiv(a, B) = divis(a + (B-1), B)
290 // where divis is rounding-to-zero division.
291 static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
292  int64_t divisor) {
293  assert(divisor > 0 && "expected positive divisor");
294  assert(dividend.getType().isIndex() && "expected index-typed value");
295 
296  Value divisorMinusOneCst =
297  builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
298  Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
299  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
300  return builder.create<arith::DivSIOp>(loc, sum, divisorCst);
301 }
302 
303 // Build the IR that performs ceil division of a positive value by another
304 // positive value:
305 // ceildiv(a, b) = divis(a + (b - 1), b)
306 // where divis is rounding-to-zero division.
307 static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
308  Value divisor) {
309  assert(dividend.getType().isIndex() && "expected index-typed value");
310 
311  Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
312  Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
313  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
314  return builder.create<arith::DivSIOp>(loc, sum, divisor);
315 }
316 
317 /// Helper to replace uses of loop carried values (iter_args) and loop
318 /// yield values while promoting single iteration scf.for ops.
319 static void replaceIterArgsAndYieldResults(scf::ForOp forOp) {
320  // Replace uses of iter arguments with iter operands (initial values).
321  auto iterOperands = forOp.getIterOperands();
322  auto iterArgs = forOp.getRegionIterArgs();
323  for (auto e : llvm::zip(iterOperands, iterArgs))
324  std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
325 
326  // Replace uses of loop results with the values yielded by the loop.
327  auto outerResults = forOp.getResults();
328  auto innerResults = forOp.getBody()->getTerminator()->getOperands();
329  for (auto e : llvm::zip(outerResults, innerResults))
330  std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
331 }
332 
333 /// Promotes the loop body of a forOp to its containing block if the forOp
334 /// it can be determined that the loop has a single iteration.
336  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
337  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
338  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
339  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 ||
340  ubCstOp.value() < 0 || stepCstOp.value() < 0)
341  return failure();
342  int64_t tripCount =
343  mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value());
344  if (tripCount != 1)
345  return failure();
346  auto iv = forOp.getInductionVar();
347  iv.replaceAllUsesWith(lbCstOp);
348 
350 
351  // Move the loop body operations, except for its terminator, to the loop's
352  // containing block.
353  auto *parentBlock = forOp->getBlock();
354  forOp.getBody()->getTerminator()->erase();
355  parentBlock->getOperations().splice(Block::iterator(forOp),
356  forOp.getBody()->getOperations());
357  forOp.erase();
358  return success();
359 }
360 
361 /// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
362 /// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
363 /// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
364 /// unrolled iteration using annotateFn.
366  Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
367  function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
368  function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
369  ValueRange iterArgs, ValueRange yieldedValues) {
370  // Builder to insert unrolled bodies just before the terminator of the body of
371  // 'forOp'.
372  auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
373 
374  if (!annotateFn)
375  annotateFn = [](unsigned, Operation *, OpBuilder) {};
376 
377  // Keep a pointer to the last non-terminator operation in the original block
378  // so that we know what to clone (since we are doing this in-place).
379  Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
380 
381  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
382  SmallVector<Value, 4> lastYielded(yieldedValues);
383 
384  for (unsigned i = 1; i < unrollFactor; i++) {
385  BlockAndValueMapping operandMap;
386 
387  // Prepare operand map.
388  operandMap.map(iterArgs, lastYielded);
389 
390  // If the induction variable is used, create a remapping to the value for
391  // this unrolled instance.
392  if (!forOpIV.use_empty()) {
393  Value ivUnroll = ivRemapFn(i, forOpIV, builder);
394  operandMap.map(forOpIV, ivUnroll);
395  }
396 
397  // Clone the original body of 'forOp'.
398  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
399  Operation *clonedOp = builder.clone(*it, operandMap);
400  annotateFn(i, clonedOp, builder);
401  }
402 
403  // Update yielded values.
404  for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
405  lastYielded[i] = operandMap.lookup(yieldedValues[i]);
406  }
407 
408  // Make sure we annotate the Ops in the original body. We do this last so that
409  // any annotations are not copied into the cloned Ops above.
410  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
411  annotateFn(0, &*it, builder);
412 
413  // Update operands of the yield statement.
414  loopBodyBlock->getTerminator()->setOperands(lastYielded);
415 }
416 
417 /// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
419  scf::ForOp forOp, uint64_t unrollFactor,
420  function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
421  assert(unrollFactor > 0 && "expected positive unroll factor");
422 
423  // Return if the loop body is empty.
424  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
425  return success();
426 
427  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
428  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
429  OpBuilder boundsBuilder(forOp);
430  auto loc = forOp.getLoc();
431  auto step = forOp.getStep();
432  Value upperBoundUnrolled;
433  Value stepUnrolled;
434  bool generateEpilogueLoop = true;
435 
436  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
437  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
438  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
439  if (lbCstOp && ubCstOp && stepCstOp) {
440  // Constant loop bounds computation.
441  int64_t lbCst = lbCstOp.value();
442  int64_t ubCst = ubCstOp.value();
443  int64_t stepCst = stepCstOp.value();
444  assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
445  "expected positive loop bounds and step");
446  int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
447 
448  if (unrollFactor == 1) {
449  if (tripCount == 1 && failed(promoteIfSingleIteration(forOp)))
450  return failure();
451  return success();
452  }
453 
454  int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
455  int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
456  assert(upperBoundUnrolledCst <= ubCst);
457  int64_t stepUnrolledCst = stepCst * unrollFactor;
458 
459  // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
460  generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
461  if (generateEpilogueLoop)
462  upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
463  loc, upperBoundUnrolledCst);
464  else
465  upperBoundUnrolled = ubCstOp;
466 
467  // Create constant for 'stepUnrolled'.
468  stepUnrolled = stepCst == stepUnrolledCst
469  ? step
470  : boundsBuilder.create<arith::ConstantIndexOp>(
471  loc, stepUnrolledCst);
472  } else {
473  // Dynamic loop bounds computation.
474  // TODO: Add dynamic asserts for negative lb/ub/step, or
475  // consider using ceilDiv from AffineApplyExpander.
476  auto lowerBound = forOp.getLowerBound();
477  auto upperBound = forOp.getUpperBound();
478  Value diff =
479  boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
480  Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
481  Value unrollFactorCst =
482  boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
483  Value tripCountRem =
484  boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
485  // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
486  Value tripCountEvenMultiple =
487  boundsBuilder.create<arith::SubIOp>(loc, tripCount, tripCountRem);
488  // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
489  upperBoundUnrolled = boundsBuilder.create<arith::AddIOp>(
490  loc, lowerBound,
491  boundsBuilder.create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
492  // Scale 'step' by 'unrollFactor'.
493  stepUnrolled =
494  boundsBuilder.create<arith::MulIOp>(loc, step, unrollFactorCst);
495  }
496 
497  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
498  if (generateEpilogueLoop) {
499  OpBuilder epilogueBuilder(forOp->getContext());
500  epilogueBuilder.setInsertionPoint(forOp->getBlock(),
501  std::next(Block::iterator(forOp)));
502  auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.clone(*forOp));
503  epilogueForOp.setLowerBound(upperBoundUnrolled);
504 
505  // Update uses of loop results.
506  auto results = forOp.getResults();
507  auto epilogueResults = epilogueForOp.getResults();
508 
509  for (auto e : llvm::zip(results, epilogueResults)) {
510  std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
511  }
512  epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
513  epilogueForOp.getNumIterOperands(), results);
514  (void)promoteIfSingleIteration(epilogueForOp);
515  }
516 
517  // Create unrolled loop.
518  forOp.setUpperBound(upperBoundUnrolled);
519  forOp.setStep(stepUnrolled);
520 
521  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
522  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
523 
525  forOp.getBody(), forOp.getInductionVar(), unrollFactor,
526  [&](unsigned i, Value iv, OpBuilder b) {
527  // iv' = iv + step * i;
528  auto stride = b.create<arith::MulIOp>(
529  loc, step, b.create<arith::ConstantIndexOp>(loc, i));
530  return b.create<arith::AddIOp>(loc, iv, stride);
531  },
532  annotateFn, iterArgs, yieldedValues);
533  // Promote the loop body up if this has turned into a single iteration loop.
535  return success();
536 }
537 
538 /// Return the new lower bound, upper bound, and step in that order. Insert any
539 /// additional bounds calculations before the given builder and any additional
540 /// conversion back to the original loop induction value inside the given Block.
541 static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
542  OpBuilder &insideLoopBuilder, Location loc,
543  Value lowerBound, Value upperBound, Value step,
544  Value inductionVar) {
545  // Check if the loop is already known to have a constant zero lower bound or
546  // a constant one step.
547  bool isZeroBased = false;
548  if (auto ubCst = lowerBound.getDefiningOp<arith::ConstantIndexOp>())
549  isZeroBased = ubCst.value() == 0;
550 
551  bool isStepOne = false;
552  if (auto stepCst = step.getDefiningOp<arith::ConstantIndexOp>())
553  isStepOne = stepCst.value() == 1;
554 
555  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
556  // assuming the step is strictly positive. Update the bounds and the step
557  // of the loop to go from 0 to the number of iterations, if necessary.
558  // TODO: introduce support for negative steps or emit dynamic asserts
559  // on step positivity, whatever gets implemented first.
560  if (isZeroBased && isStepOne)
561  return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
562  /*step=*/step};
563 
564  Value diff = boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
565  Value newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
566 
567  Value newLowerBound =
568  isZeroBased ? lowerBound
569  : boundsBuilder.create<arith::ConstantIndexOp>(loc, 0);
570  Value newStep =
571  isStepOne ? step : boundsBuilder.create<arith::ConstantIndexOp>(loc, 1);
572 
573  // Insert code computing the value of the original loop induction variable
574  // from the "normalized" one.
575  Value scaled =
576  isStepOne
577  ? inductionVar
578  : insideLoopBuilder.create<arith::MulIOp>(loc, inductionVar, step);
579  Value shifted =
580  isZeroBased
581  ? scaled
582  : insideLoopBuilder.create<arith::AddIOp>(loc, scaled, lowerBound);
583 
584  SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
585  shifted.getDefiningOp()};
586  inductionVar.replaceAllUsesExcept(shifted, preserve);
587  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
588  /*step=*/newStep};
589 }
590 
591 /// Transform a loop with a strictly positive step
592 /// for %i = %lb to %ub step %s
593 /// into a 0-based loop with step 1
594 /// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
595 /// %i = %ii * %s + %lb
596 /// Insert the induction variable remapping in the body of `inner`, which is
597 /// expected to be either `loop` or another loop perfectly nested under `loop`.
598 /// Insert the definition of new bounds immediate before `outer`, which is
599 /// expected to be either `loop` or its parent in the loop nest.
600 static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
601  OpBuilder builder(outer);
602  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
603  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
604  loop.getLowerBound(), loop.getUpperBound(),
605  loop.getStep(), loop.getInductionVar());
606 
607  loop.setLowerBound(loopPieces.lowerBound);
608  loop.setUpperBound(loopPieces.upperBound);
609  loop.setStep(loopPieces.step);
610 }
611 
613  if (loops.size() < 2)
614  return;
615 
616  scf::ForOp innermost = loops.back();
617  scf::ForOp outermost = loops.front();
618 
619  // 1. Make sure all loops iterate from 0 to upperBound with step 1. This
620  // allows the following code to assume upperBound is the number of iterations.
621  for (auto loop : loops)
622  normalizeLoop(loop, outermost, innermost);
623 
624  // 2. Emit code computing the upper bound of the coalesced loop as product
625  // of the number of iterations of all loops.
626  OpBuilder builder(outermost);
627  Location loc = outermost.getLoc();
628  Value upperBound = outermost.getUpperBound();
629  for (auto loop : loops.drop_front())
630  upperBound =
631  builder.create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
632  outermost.setUpperBound(upperBound);
633 
634  builder.setInsertionPointToStart(outermost.getBody());
635 
636  // 3. Remap induction variables. For each original loop, the value of the
637  // induction variable can be obtained by dividing the induction variable of
638  // the linearized loop by the total number of iterations of the loops nested
639  // in it modulo the number of iterations in this loop (remove the values
640  // related to the outer loops):
641  // iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
642  // Compute these iteratively from the innermost loop by creating a "running
643  // quotient" of division by the range.
644  Value previous = outermost.getInductionVar();
645  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
646  unsigned idx = loops.size() - i - 1;
647  if (i != 0)
648  previous = builder.create<arith::DivSIOp>(loc, previous,
649  loops[idx + 1].getUpperBound());
650 
651  Value iv = (i == e - 1) ? previous
652  : builder.create<arith::RemSIOp>(
653  loc, previous, loops[idx].getUpperBound());
654  replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
655  loops.back().getRegion());
656  }
657 
658  // 4. Move the operations from the innermost just above the second-outermost
659  // loop, delete the extra terminator and the second-outermost loop.
660  scf::ForOp second = loops[1];
661  innermost.getBody()->back().erase();
662  outermost.getBody()->getOperations().splice(
663  Block::iterator(second.getOperation()),
664  innermost.getBody()->getOperations());
665  second.erase();
666 }
667 
669  scf::ParallelOp loops, ArrayRef<std::vector<unsigned>> combinedDimensions) {
670  OpBuilder outsideBuilder(loops);
671  Location loc = loops.getLoc();
672 
673  // Presort combined dimensions.
674  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
675  for (auto &dims : sortedDimensions)
676  llvm::sort(dims);
677 
678  // Normalize ParallelOp's iteration pattern.
679  SmallVector<Value, 3> normalizedLowerBounds, normalizedSteps,
680  normalizedUpperBounds;
681  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
682  OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
683  auto resultBounds =
684  normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
685  loops.getLowerBound()[i], loops.getUpperBound()[i],
686  loops.getStep()[i], loops.getBody()->getArgument(i));
687 
688  normalizedLowerBounds.push_back(resultBounds.lowerBound);
689  normalizedUpperBounds.push_back(resultBounds.upperBound);
690  normalizedSteps.push_back(resultBounds.step);
691  }
692 
693  // Combine iteration spaces.
694  SmallVector<Value, 3> lowerBounds, upperBounds, steps;
695  auto cst0 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 0);
696  auto cst1 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
697  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
698  Value newUpperBound = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
699  for (auto idx : sortedDimensions[i]) {
700  newUpperBound = outsideBuilder.create<arith::MulIOp>(
701  loc, newUpperBound, normalizedUpperBounds[idx]);
702  }
703  lowerBounds.push_back(cst0);
704  steps.push_back(cst1);
705  upperBounds.push_back(newUpperBound);
706  }
707 
708  // Create new ParallelLoop with conversions to the original induction values.
709  // The loop below uses divisions to get the relevant range of values in the
710  // new induction value that represent each range of the original induction
711  // value. The remainders then determine based on that range, which iteration
712  // of the original induction value this represents. This is a normalized value
713  // that is un-normalized already by the previous logic.
714  auto newPloop = outsideBuilder.create<scf::ParallelOp>(
715  loc, lowerBounds, upperBounds, steps,
716  [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
717  for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
718  Value previous = ploopIVs[i];
719  unsigned numberCombinedDimensions = combinedDimensions[i].size();
720  // Iterate over all except the last induction value.
721  for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
722  unsigned idx = combinedDimensions[i][j];
723 
724  // Determine the current induction value's current loop iteration
725  Value iv = insideBuilder.create<arith::RemSIOp>(
726  loc, previous, normalizedUpperBounds[idx]);
727  replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
728  loops.getRegion());
729 
730  // Remove the effect of the current induction value to prepare for
731  // the next value.
732  previous = insideBuilder.create<arith::DivSIOp>(
733  loc, previous, normalizedUpperBounds[idx]);
734  }
735 
736  // The final induction value is just the remaining value.
737  unsigned idx = combinedDimensions[i][0];
738  replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
739  previous, loops.getRegion());
740  }
741  });
742 
743  // Replace the old loop with the new loop.
744  loops.getBody()->back().erase();
745  newPloop.getBody()->getOperations().splice(
746  Block::iterator(newPloop.getBody()->back()),
747  loops.getBody()->getOperations());
748  loops.erase();
749 }
750 
751 // Hoist the ops within `outer` that appear before `inner`.
752 // Such ops include the ops that have been introduced by parametric tiling.
753 // Ops that come from triangular loops (i.e. that belong to the program slice
754 // rooted at `outer`) and ops that have side effects cannot be hoisted.
755 // Return failure when any op fails to hoist.
756 static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
757  SetVector<Operation *> forwardSlice;
759  outer.getInductionVar(), &forwardSlice,
760  [&inner](Operation *op) { return op != inner.getOperation(); });
761  LogicalResult status = success();
763  for (auto &op : outer.getBody()->without_terminator()) {
764  // Stop when encountering the inner loop.
765  if (&op == inner.getOperation())
766  break;
767  // Skip over non-hoistable ops.
768  if (forwardSlice.count(&op) > 0) {
769  status = failure();
770  continue;
771  }
772  // Skip intermediate scf::ForOp, these are not considered a failure.
773  if (isa<scf::ForOp>(op))
774  continue;
775  // Skip other ops with regions.
776  if (op.getNumRegions() > 0) {
777  status = failure();
778  continue;
779  }
780  // Skip if op has side effects.
781  // TODO: loads to immutable memory regions are ok.
782  if (!MemoryEffectOpInterface::hasNoEffect(&op)) {
783  status = failure();
784  continue;
785  }
786  toHoist.push_back(&op);
787  }
788  auto *outerForOp = outer.getOperation();
789  for (auto *op : toHoist)
790  op->moveBefore(outerForOp);
791  return status;
792 }
793 
794 // Traverse the interTile and intraTile loops and try to hoist ops such that
795 // bands of perfectly nested loops are isolated.
796 // Return failure if either perfect interTile or perfect intraTile bands cannot
797 // be formed.
798 static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
799  LogicalResult status = success();
800  const Loops &interTile = tileLoops.first;
801  const Loops &intraTile = tileLoops.second;
802  auto size = interTile.size();
803  assert(size == intraTile.size());
804  if (size <= 1)
805  return success();
806  for (unsigned s = 1; s < size; ++s)
807  status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
808  : failure();
809  for (unsigned s = 1; s < size; ++s)
810  status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
811  : failure();
812  return status;
813 }
814 
815 /// Collect perfectly nested loops starting from `rootForOps`. Loops are
816 /// perfectly nested if each loop is the first and only non-terminator operation
817 /// in the parent loop. Collect at most `maxLoops` loops and append them to
818 /// `forOps`.
819 template <typename T>
821  SmallVectorImpl<T> &forOps, T rootForOp,
822  unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
823  for (unsigned i = 0; i < maxLoops; ++i) {
824  forOps.push_back(rootForOp);
825  Block &body = rootForOp.getRegion().front();
826  if (body.begin() != std::prev(body.end(), 2))
827  return;
828 
829  rootForOp = dyn_cast<T>(&body.front());
830  if (!rootForOp)
831  return;
832  }
833 }
834 
835 static Loops stripmineSink(scf::ForOp forOp, Value factor,
836  ArrayRef<scf::ForOp> targets) {
837  auto originalStep = forOp.getStep();
838  auto iv = forOp.getInductionVar();
839 
840  OpBuilder b(forOp);
841  forOp.setStep(b.create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
842 
843  Loops innerLoops;
844  for (auto t : targets) {
845  // Save information for splicing ops out of t when done
846  auto begin = t.getBody()->begin();
847  auto nOps = t.getBody()->getOperations().size();
848 
849  // Insert newForOp before the terminator of `t`.
850  auto b = OpBuilder::atBlockTerminator((t.getBody()));
851  Value stepped = b.create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
852  Value less = b.create<arith::CmpIOp>(t.getLoc(), arith::CmpIPredicate::slt,
853  forOp.getUpperBound(), stepped);
854  Value ub = b.create<arith::SelectOp>(t.getLoc(), less,
855  forOp.getUpperBound(), stepped);
856 
857  // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
858  auto newForOp = b.create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
859  newForOp.getBody()->getOperations().splice(
860  newForOp.getBody()->getOperations().begin(),
861  t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
862  replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
863  newForOp.getRegion());
864 
865  innerLoops.push_back(newForOp);
866  }
867 
868  return innerLoops;
869 }
870 
871 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
872 // Returns the new for operation, nested immediately under `target`.
873 template <typename SizeType>
874 static scf::ForOp stripmineSink(scf::ForOp forOp, SizeType factor,
875  scf::ForOp target) {
876  // TODO: Use cheap structural assertions that targets are nested under
877  // forOp and that targets are not nested under each other when DominanceInfo
878  // exposes the capability. It seems overkill to construct a whole function
879  // dominance tree at this point.
880  auto res = stripmineSink(forOp, factor, ArrayRef<scf::ForOp>(target));
881  assert(res.size() == 1 && "Expected 1 inner forOp");
882  return res[0];
883 }
884 
886  ArrayRef<Value> sizes,
887  ArrayRef<scf::ForOp> targets) {
889  SmallVector<scf::ForOp, 8> currentTargets(targets.begin(), targets.end());
890  for (auto it : llvm::zip(forOps, sizes)) {
891  auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
892  res.push_back(step);
893  currentTargets = step;
894  }
895  return res;
896 }
897 
899  scf::ForOp target) {
901  for (auto loops : tile(forOps, sizes, ArrayRef<scf::ForOp>(target))) {
902  assert(loops.size() == 1);
903  res.push_back(loops[0]);
904  }
905  return res;
906 }
907 
908 Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes) {
909  // Collect perfectly nested loops. If more size values provided than nested
910  // loops available, truncate `sizes`.
912  forOps.reserve(sizes.size());
913  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
914  if (forOps.size() < sizes.size())
915  sizes = sizes.take_front(forOps.size());
916 
917  return ::tile(forOps, sizes, forOps.back());
918 }
919 
921  scf::ForOp root) {
922  getPerfectlyNestedLoopsImpl(nestedLoops, root);
923 }
924 
926  ArrayRef<int64_t> sizes) {
927  // Collect perfectly nested loops. If more size values provided than nested
928  // loops available, truncate `sizes`.
930  forOps.reserve(sizes.size());
931  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
932  if (forOps.size() < sizes.size())
933  sizes = sizes.take_front(forOps.size());
934 
935  // Compute the tile sizes such that i-th outer loop executes size[i]
936  // iterations. Given that the loop current executes
937  // numIterations = ceildiv((upperBound - lowerBound), step)
938  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
939  SmallVector<Value, 4> tileSizes;
940  tileSizes.reserve(sizes.size());
941  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
942  assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
943 
944  auto forOp = forOps[i];
945  OpBuilder builder(forOp);
946  auto loc = forOp.getLoc();
947  Value diff = builder.create<arith::SubIOp>(loc, forOp.getUpperBound(),
948  forOp.getLowerBound());
949  Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
950  Value iterationsPerBlock =
951  ceilDivPositive(builder, loc, numIterations, sizes[i]);
952  tileSizes.push_back(iterationsPerBlock);
953  }
954 
955  // Call parametric tiling with the given sizes.
956  auto intraTile = tile(forOps, tileSizes, forOps.back());
957  TileLoops tileLoops = std::make_pair(forOps, intraTile);
958 
959  // TODO: for now we just ignore the result of band isolation.
960  // In the future, mapping decisions may be impacted by the ability to
961  // isolate perfectly nested bands.
962  (void)tryIsolateBands(tileLoops);
963 
964  return tileLoops;
965 }
Include the generated interface declarations.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(OpBuilder &builder, ArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, NewYieldValueFn newYieldValueFn)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
Definition: Utils.cpp:105
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region &region, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
Definition: Utils.cpp:139
This class contains a list of basic blocks and a link to the parent operation it is attached to...
Definition: Region.h:26
iterator begin()
Definition: Block.h:134
MLIRContext * getContext() const
Definition: Builders.h:54
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
Definition: Utils.cpp:266
std::pair< Loops, Loops > TileLoops
Definition: Utils.h:143
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:480
operand_range getOperands()
Returns an iterator on the underlying Value&#39;s.
Definition: Operation.h:295
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in &#39;operands&#39;.
Definition: Operation.cpp:190
Block represents an ordered list of Operations.
Definition: Block.h:29
Block & front()
Definition: Region.h:65
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:345
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
Definition: Utils.cpp:798
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:496
OpListType & getOperations()
Definition: Block.h:128
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
LogicalResult coalesceLoops(MutableArrayRef< AffineForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition: LoopUtils.cpp:1724
operand_type_range getOperandTypes()
Definition: Operation.h:314
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of &#39;this&#39; value with &#39;newValue&#39; if the given callback returns true.
Definition: Value.cpp:82
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:131
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value...
Definition: LogicalResult.h:68
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, bool cleanUpUnroll=false)
Unrolls this for operation by the specified unroll factor.
Definition: LoopUtils.cpp:1091
Operation & front()
Definition: Block.h:144
T lookup(T from) const
Lookup a mapped value within the map.
BlockArgListType getArguments()
Definition: Region.h:81
bool hasOneBlock()
Return true if this region has exactly one block.
Definition: Region.h:68
static constexpr const bool value
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:414
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:225
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
void map(Block *from, Block *to)
Inserts a new mapping for &#39;from&#39; to &#39;to&#39;.
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops...
Definition: LoopUtils.cpp:859
void collapseParallelLoops(scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension...
Definition: Utils.cpp:668
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
Definition: Utils.cpp:243
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:408
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
OpListType::iterator iterator
Definition: Block.h:131
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
Definition: Utils.cpp:835
iterator begin()
Definition: Region.h:55
This class provides support for representing a failure result, or a valid value of type T...
Definition: LogicalResult.h:78
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Definition: RegionUtils.cpp:59
static void replaceIterArgsAndYieldResults(scf::ForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting singl...
Definition: Utils.cpp:319
iterator end()
Definition: Block.h:135
unsigned getNumArguments()
Definition: Block.h:119
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: LoopUtils.cpp:1701
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR&#39;s ceildiv operation on constants.
Definition: MathExtras.h:23
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of scf::ForOp &#39;loopBodyBlock&#39;, with associated &#39;forOpIV&#39; by &#39;unrollFactor&#39;...
Definition: Utils.cpp:365
void getForwardSlice(Operation *op, SetVector< Operation *> *forwardSlice, TransitiveFilter filter=nullptr)
Fills forwardSlice with the computed forward slice (i.e.
bool isIndex() const
Definition: Types.cpp:28
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:24
OpResult getResult(unsigned idx)
Get the &#39;idx&#39;th result of this operation.
Definition: Operation.h:324
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:32
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
Definition: LoopUtils.cpp:945
scf::ForOp replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop, ValueRange newIterOperands, const NewYieldValueFn &newYieldValuesFn)
Definition: Utils.cpp:41
BlockArgListType getArguments()
Definition: Block.h:76
This class represents an argument of a Block.
Definition: Value.h:300
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation...
Definition: Region.h:205
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.
static LoopParams normalizeLoop(OpBuilder &boundsBuilder, OpBuilder &insideLoopBuilder, Location loc, Value lowerBound, Value upperBound, Value step, Value inductionVar)
Return the new lower bound, upper bound, and step in that order.
Definition: Utils.cpp:541
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:584
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Definition: Utils.cpp:925
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
bool use_empty() const
Returns true if this value has no uses.
Definition: Value.h:203
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:230
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:378
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:295
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
Definition: Utils.cpp:291
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
Definition: Builders.h:237
Type getType() const
Return the type of this value.
Definition: Value.h:118
Specialization of arith.constant op that returns an integer of index type.
Definition: Arithmetic.h:80
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:40
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
This class represents an operand of an operation.
Definition: Value.h:251
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:383
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=llvm::None, ArrayRef< Location > locs=llvm::None)
Add new block with &#39;argTypes&#39; arguments and set the insertion point to the end of it...
Definition: Builders.cpp:381
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
Definition: Utils.cpp:756
virtual void mergeBlocks(Block *source, Block *dest, ValueRange argValues=llvm::None)
Merge the operations of block &#39;source&#39; into the end of block &#39;dest&#39;.
This class helps build Operations.
Definition: Builders.h:193
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:345
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
Definition: Utils.cpp:820
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBBArgs)> NewYieldValueFn
Replace the loop with newIterOperands added as new initialization values.
Definition: Utils.h:50
result_type_range getResultTypes()
Definition: Operation.h:345
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:172
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:398
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)