MLIR  16.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for loop transformation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop transformation routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
19 #include "mlir/IR/BuiltinOps.h"
20 #include "mlir/IR/PatternMatch.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 
29 using namespace mlir;
30 
31 namespace {
32 // This structure is to pass and return sets of loop parameters without
33 // confusing the order.
34 struct LoopParams {
35  Value lowerBound;
36  Value upperBound;
37  Value step;
38 };
39 } // namespace
40 
41 scf::ForOp
42 mlir::replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop,
43  ValueRange newIterOperands,
44  const NewYieldValueFn &newYieldValuesFn,
45  bool replaceIterOperandsUsesInLoop) {
46  // Create a new loop before the existing one, with the extra operands.
47  OpBuilder::InsertionGuard g(builder);
48  builder.setInsertionPoint(loop);
49  auto operands = llvm::to_vector(loop.getIterOperands());
50  operands.append(newIterOperands.begin(), newIterOperands.end());
51  scf::ForOp newLoop = builder.create<scf::ForOp>(
52  loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(), loop.getStep(),
53  operands, [](OpBuilder &, Location, Value, ValueRange) {});
54 
55  Block *loopBody = loop.getBody();
56  Block *newLoopBody = newLoop.getBody();
57 
58  // Move the body of the original loop to the new loop.
59  newLoopBody->getOperations().splice(newLoopBody->end(),
60  loopBody->getOperations());
61 
62  // Generate the new yield values to use by using the callback and append the
63  // yield values to the scf.yield operation.
64  auto yield = cast<scf::YieldOp>(newLoopBody->getTerminator());
65  ArrayRef<BlockArgument> newBBArgs =
66  newLoopBody->getArguments().take_back(newIterOperands.size());
67  {
68  OpBuilder::InsertionGuard g(builder);
69  builder.setInsertionPoint(yield);
70  SmallVector<Value> newYieldedValues =
71  newYieldValuesFn(builder, loop.getLoc(), newBBArgs);
72  assert(newIterOperands.size() == newYieldedValues.size() &&
73  "expected as many new yield values as new iter operands");
74  yield.getResultsMutable().append(newYieldedValues);
75  }
76 
77  // Remap the BlockArguments from the original loop to the new loop
78  // BlockArguments.
79  ArrayRef<BlockArgument> bbArgs = loopBody->getArguments();
80  for (auto it :
81  llvm::zip(bbArgs, newLoopBody->getArguments().take_front(bbArgs.size())))
82  std::get<0>(it).replaceAllUsesWith(std::get<1>(it));
83 
84  if (replaceIterOperandsUsesInLoop) {
85  // Replace all uses of `newIterOperands` with the corresponding basic block
86  // arguments.
87  for (auto it : llvm::zip(newIterOperands, newBBArgs)) {
88  std::get<0>(it).replaceUsesWithIf(std::get<1>(it), [&](OpOperand &use) {
89  Operation *user = use.getOwner();
90  return newLoop->isProperAncestor(user);
91  });
92  }
93  }
94 
95  // Replace all uses of the original loop with corresponding values from the
96  // new loop.
97  loop.replaceAllUsesWith(
98  newLoop.getResults().take_front(loop.getNumResults()));
99 
100  // Add a fake yield to the original loop body that just returns the
101  // BlockArguments corresponding to the iter_args. This makes it a no-op loop.
102  // The loop is dead. The caller is expected to erase it.
103  builder.setInsertionPointToEnd(loopBody);
104  builder.create<scf::YieldOp>(loop->getLoc(), loop.getRegionIterArgs());
105 
106  return newLoop;
107 }
108 
110  OpBuilder &builder, ArrayRef<scf::ForOp> loopNest,
111  ValueRange newIterOperands, const NewYieldValueFn &newYieldValueFn,
112  bool replaceIterOperandsUsesInLoop) {
113  if (loopNest.empty())
114  return {};
115  // This method is recursive (to make it more readable). Adding an
116  // assertion here to limit the recursion. (See
117  // https://discourse.llvm.org/t/rfc-update-to-mlir-developer-policy-on-recursion/62235)
118  assert(loopNest.size() <= 10 &&
119  "exceeded recursion limit when yielding value from loop nest");
120 
121  // To yield a value from a perfectly nested loop nest, the following
122  // pattern needs to be created, i.e. starting with
123  //
124  // ```mlir
125  // scf.for .. {
126  // scf.for .. {
127  // scf.for .. {
128  // %value = ...
129  // }
130  // }
131  // }
132  // ```
133  //
134  // needs to be modified to
135  //
136  // ```mlir
137  // %0 = scf.for .. iter_args(%arg0 = %init) {
138  // %1 = scf.for .. iter_args(%arg1 = %arg0) {
139  // %2 = scf.for .. iter_args(%arg2 = %arg1) {
140  // %value = ...
141  // scf.yield %value
142  // }
143  // scf.yield %2
144  // }
145  // scf.yield %1
146  // }
147  // ```
148  //
149  // The inner most loop is handled using the `replaceLoopWithNewYields`
150  // that works on a single loop.
151  if (loopNest.size() == 1) {
152  auto innerMostLoop = replaceLoopWithNewYields(
153  builder, loopNest.back(), newIterOperands, newYieldValueFn,
154  replaceIterOperandsUsesInLoop);
155  return {innerMostLoop};
156  }
157  // The outer loops are modified by calling this method recursively
158  // - The return value of the inner loop is the value yielded by this loop.
159  // - The region iter args of this loop are the init_args for the inner loop.
160  SmallVector<scf::ForOp> newLoopNest;
161  NewYieldValueFn fn =
162  [&](OpBuilder &innerBuilder, Location loc,
163  ArrayRef<BlockArgument> innerNewBBArgs) -> SmallVector<Value> {
164  newLoopNest = replaceLoopNestWithNewYields(builder, loopNest.drop_front(),
165  innerNewBBArgs, newYieldValueFn,
166  replaceIterOperandsUsesInLoop);
167  return llvm::to_vector(llvm::map_range(
168  newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
169  [](OpResult r) -> Value { return r; }));
170  };
171  scf::ForOp outerMostLoop =
172  replaceLoopWithNewYields(builder, loopNest.front(), newIterOperands, fn,
173  replaceIterOperandsUsesInLoop);
174  newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
175  return newLoopNest;
176 }
177 
178 /// Outline a region with a single block into a new FuncOp.
179 /// Assumes the FuncOp result types is the type of the yielded operands of the
180 /// single block. This constraint makes it easy to determine the result.
181 /// This method also clones the `arith::ConstantIndexOp` at the start of
182 /// `outlinedFuncBody` to alloc simple canonicalizations. If `callOp` is
183 /// provided, it will be set to point to the operation that calls the outlined
184 /// function.
185 // TODO: support more than single-block regions.
186 // TODO: more flexible constant handling.
188  Location loc,
189  Region &region,
190  StringRef funcName,
191  func::CallOp *callOp) {
192  assert(!funcName.empty() && "funcName cannot be empty");
193  if (!region.hasOneBlock())
194  return failure();
195 
196  Block *originalBlock = &region.front();
197  Operation *originalTerminator = originalBlock->getTerminator();
198 
199  // Outline before current function.
200  OpBuilder::InsertionGuard g(rewriter);
201  rewriter.setInsertionPoint(region.getParentOfType<func::FuncOp>());
202 
203  SetVector<Value> captures;
204  getUsedValuesDefinedAbove(region, captures);
205 
206  ValueRange outlinedValues(captures.getArrayRef());
207  SmallVector<Type> outlinedFuncArgTypes;
208  SmallVector<Location> outlinedFuncArgLocs;
209  // Region's arguments are exactly the first block's arguments as per
210  // Region::getArguments().
211  // Func's arguments are cat(regions's arguments, captures arguments).
212  for (BlockArgument arg : region.getArguments()) {
213  outlinedFuncArgTypes.push_back(arg.getType());
214  outlinedFuncArgLocs.push_back(arg.getLoc());
215  }
216  for (Value value : outlinedValues) {
217  outlinedFuncArgTypes.push_back(value.getType());
218  outlinedFuncArgLocs.push_back(value.getLoc());
219  }
220  FunctionType outlinedFuncType =
221  FunctionType::get(rewriter.getContext(), outlinedFuncArgTypes,
222  originalTerminator->getOperandTypes());
223  auto outlinedFunc =
224  rewriter.create<func::FuncOp>(loc, funcName, outlinedFuncType);
225  Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
226 
227  // Merge blocks while replacing the original block operands.
228  // Warning: `mergeBlocks` erases the original block, reconstruct it later.
229  int64_t numOriginalBlockArguments = originalBlock->getNumArguments();
230  auto outlinedFuncBlockArgs = outlinedFuncBody->getArguments();
231  {
232  OpBuilder::InsertionGuard g(rewriter);
233  rewriter.setInsertionPointToEnd(outlinedFuncBody);
234  rewriter.mergeBlocks(
235  originalBlock, outlinedFuncBody,
236  outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
237  // Explicitly set up a new ReturnOp terminator.
238  rewriter.setInsertionPointToEnd(outlinedFuncBody);
239  rewriter.create<func::ReturnOp>(loc, originalTerminator->getResultTypes(),
240  originalTerminator->getOperands());
241  }
242 
243  // Reconstruct the block that was deleted and add a
244  // terminator(call_results).
245  Block *newBlock = rewriter.createBlock(
246  &region, region.begin(),
247  TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
248  ArrayRef<Location>(outlinedFuncArgLocs)
249  .take_front(numOriginalBlockArguments));
250  {
251  OpBuilder::InsertionGuard g(rewriter);
252  rewriter.setInsertionPointToEnd(newBlock);
253  SmallVector<Value> callValues;
254  llvm::append_range(callValues, newBlock->getArguments());
255  llvm::append_range(callValues, outlinedValues);
256  auto call = rewriter.create<func::CallOp>(loc, outlinedFunc, callValues);
257  if (callOp)
258  *callOp = call;
259 
260  // `originalTerminator` was moved to `outlinedFuncBody` and is still valid.
261  // Clone `originalTerminator` to take the callOp results then erase it from
262  // `outlinedFuncBody`.
264  bvm.map(originalTerminator->getOperands(), call->getResults());
265  rewriter.clone(*originalTerminator, bvm);
266  rewriter.eraseOp(originalTerminator);
267  }
268 
269  // Lastly, explicit RAUW outlinedValues, only for uses within `outlinedFunc`.
270  // Clone the `arith::ConstantIndexOp` at the start of `outlinedFuncBody`.
271  for (auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
272  outlinedValues.size()))) {
273  Value orig = std::get<0>(it);
274  Value repl = std::get<1>(it);
275  {
276  OpBuilder::InsertionGuard g(rewriter);
277  rewriter.setInsertionPointToStart(outlinedFuncBody);
278  if (Operation *cst = orig.getDefiningOp<arith::ConstantIndexOp>()) {
280  repl = rewriter.clone(*cst, bvm)->getResult(0);
281  }
282  }
283  orig.replaceUsesWithIf(repl, [&](OpOperand &opOperand) {
284  return outlinedFunc->isProperAncestor(opOperand.getOwner());
285  });
286  }
287 
288  return outlinedFunc;
289 }
290 
292  func::FuncOp *thenFn, StringRef thenFnName,
293  func::FuncOp *elseFn, StringRef elseFnName) {
294  IRRewriter rewriter(b);
295  Location loc = ifOp.getLoc();
296  FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
297  if (thenFn && !ifOp.getThenRegion().empty()) {
298  outlinedFuncOpOrFailure = outlineSingleBlockRegion(
299  rewriter, loc, ifOp.getThenRegion(), thenFnName);
300  if (failed(outlinedFuncOpOrFailure))
301  return failure();
302  *thenFn = *outlinedFuncOpOrFailure;
303  }
304  if (elseFn && !ifOp.getElseRegion().empty()) {
305  outlinedFuncOpOrFailure = outlineSingleBlockRegion(
306  rewriter, loc, ifOp.getElseRegion(), elseFnName);
307  if (failed(outlinedFuncOpOrFailure))
308  return failure();
309  *elseFn = *outlinedFuncOpOrFailure;
310  }
311  return success();
312 }
313 
316  assert(rootOp != nullptr && "Root operation must not be a nullptr.");
317  bool rootEnclosesPloops = false;
318  for (Region &region : rootOp->getRegions()) {
319  for (Block &block : region.getBlocks()) {
320  for (Operation &op : block) {
321  bool enclosesPloops = getInnermostParallelLoops(&op, result);
322  rootEnclosesPloops |= enclosesPloops;
323  if (auto ploop = dyn_cast<scf::ParallelOp>(op)) {
324  rootEnclosesPloops = true;
325 
326  // Collect parallel loop if it is an innermost one.
327  if (!enclosesPloops)
328  result.push_back(ploop);
329  }
330  }
331  }
332  }
333  return rootEnclosesPloops;
334 }
335 
336 // Build the IR that performs ceil division of a positive value by a constant:
337 // ceildiv(a, B) = divis(a + (B-1), B)
338 // where divis is rounding-to-zero division.
339 static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
340  int64_t divisor) {
341  assert(divisor > 0 && "expected positive divisor");
342  assert(dividend.getType().isIndex() && "expected index-typed value");
343 
344  Value divisorMinusOneCst =
345  builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
346  Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
347  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
348  return builder.create<arith::DivUIOp>(loc, sum, divisorCst);
349 }
350 
351 // Build the IR that performs ceil division of a positive value by another
352 // positive value:
353 // ceildiv(a, b) = divis(a + (b - 1), b)
354 // where divis is rounding-to-zero division.
355 static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
356  Value divisor) {
357  assert(dividend.getType().isIndex() && "expected index-typed value");
358 
359  Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
360  Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
361  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
362  return builder.create<arith::DivUIOp>(loc, sum, divisor);
363 }
364 
365 /// Helper to replace uses of loop carried values (iter_args) and loop
366 /// yield values while promoting single iteration scf.for ops.
367 static void replaceIterArgsAndYieldResults(scf::ForOp forOp) {
368  // Replace uses of iter arguments with iter operands (initial values).
369  auto iterOperands = forOp.getIterOperands();
370  auto iterArgs = forOp.getRegionIterArgs();
371  for (auto e : llvm::zip(iterOperands, iterArgs))
372  std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
373 
374  // Replace uses of loop results with the values yielded by the loop.
375  auto outerResults = forOp.getResults();
376  auto innerResults = forOp.getBody()->getTerminator()->getOperands();
377  for (auto e : llvm::zip(outerResults, innerResults))
378  std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
379 }
380 
381 /// Promotes the loop body of a forOp to its containing block if the forOp
382 /// it can be determined that the loop has a single iteration.
384  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
385  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
386  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
387  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 ||
388  ubCstOp.value() < 0 || stepCstOp.value() < 0)
389  return failure();
390  int64_t tripCount =
391  mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value());
392  if (tripCount != 1)
393  return failure();
394  auto iv = forOp.getInductionVar();
395  iv.replaceAllUsesWith(lbCstOp);
396 
398 
399  // Move the loop body operations, except for its terminator, to the loop's
400  // containing block.
401  auto *parentBlock = forOp->getBlock();
402  forOp.getBody()->getTerminator()->erase();
403  parentBlock->getOperations().splice(Block::iterator(forOp),
404  forOp.getBody()->getOperations());
405  forOp.erase();
406  return success();
407 }
408 
409 /// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
410 /// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
411 /// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
412 /// unrolled iteration using annotateFn.
414  Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
415  function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
416  function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
417  ValueRange iterArgs, ValueRange yieldedValues) {
418  // Builder to insert unrolled bodies just before the terminator of the body of
419  // 'forOp'.
420  auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
421 
422  if (!annotateFn)
423  annotateFn = [](unsigned, Operation *, OpBuilder) {};
424 
425  // Keep a pointer to the last non-terminator operation in the original block
426  // so that we know what to clone (since we are doing this in-place).
427  Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
428 
429  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
430  SmallVector<Value, 4> lastYielded(yieldedValues);
431 
432  for (unsigned i = 1; i < unrollFactor; i++) {
433  BlockAndValueMapping operandMap;
434 
435  // Prepare operand map.
436  operandMap.map(iterArgs, lastYielded);
437 
438  // If the induction variable is used, create a remapping to the value for
439  // this unrolled instance.
440  if (!forOpIV.use_empty()) {
441  Value ivUnroll = ivRemapFn(i, forOpIV, builder);
442  operandMap.map(forOpIV, ivUnroll);
443  }
444 
445  // Clone the original body of 'forOp'.
446  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
447  Operation *clonedOp = builder.clone(*it, operandMap);
448  annotateFn(i, clonedOp, builder);
449  }
450 
451  // Update yielded values.
452  for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
453  lastYielded[i] = operandMap.lookup(yieldedValues[i]);
454  }
455 
456  // Make sure we annotate the Ops in the original body. We do this last so that
457  // any annotations are not copied into the cloned Ops above.
458  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
459  annotateFn(0, &*it, builder);
460 
461  // Update operands of the yield statement.
462  loopBodyBlock->getTerminator()->setOperands(lastYielded);
463 }
464 
465 /// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
467  scf::ForOp forOp, uint64_t unrollFactor,
468  function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
469  assert(unrollFactor > 0 && "expected positive unroll factor");
470 
471  // Return if the loop body is empty.
472  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
473  return success();
474 
475  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
476  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
477  OpBuilder boundsBuilder(forOp);
478  auto loc = forOp.getLoc();
479  Value step = forOp.getStep();
480  Value upperBoundUnrolled;
481  Value stepUnrolled;
482  bool generateEpilogueLoop = true;
483 
484  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
485  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
486  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
487  if (lbCstOp && ubCstOp && stepCstOp) {
488  // Constant loop bounds computation.
489  int64_t lbCst = lbCstOp.value();
490  int64_t ubCst = ubCstOp.value();
491  int64_t stepCst = stepCstOp.value();
492  assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
493  "expected positive loop bounds and step");
494  int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
495 
496  if (unrollFactor == 1) {
497  if (tripCount == 1 && failed(promoteIfSingleIteration(forOp)))
498  return failure();
499  return success();
500  }
501 
502  int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
503  int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
504  assert(upperBoundUnrolledCst <= ubCst);
505  int64_t stepUnrolledCst = stepCst * unrollFactor;
506 
507  // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
508  generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
509  if (generateEpilogueLoop)
510  upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
511  loc, upperBoundUnrolledCst);
512  else
513  upperBoundUnrolled = ubCstOp;
514 
515  // Create constant for 'stepUnrolled'.
516  stepUnrolled = stepCst == stepUnrolledCst
517  ? step
518  : boundsBuilder.create<arith::ConstantIndexOp>(
519  loc, stepUnrolledCst);
520  } else {
521  // Dynamic loop bounds computation.
522  // TODO: Add dynamic asserts for negative lb/ub/step, or
523  // consider using ceilDiv from AffineApplyExpander.
524  auto lowerBound = forOp.getLowerBound();
525  auto upperBound = forOp.getUpperBound();
526  Value diff =
527  boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
528  Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
529  Value unrollFactorCst =
530  boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
531  Value tripCountRem =
532  boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
533  // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
534  Value tripCountEvenMultiple =
535  boundsBuilder.create<arith::SubIOp>(loc, tripCount, tripCountRem);
536  // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
537  upperBoundUnrolled = boundsBuilder.create<arith::AddIOp>(
538  loc, lowerBound,
539  boundsBuilder.create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
540  // Scale 'step' by 'unrollFactor'.
541  stepUnrolled =
542  boundsBuilder.create<arith::MulIOp>(loc, step, unrollFactorCst);
543  }
544 
545  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
546  if (generateEpilogueLoop) {
547  OpBuilder epilogueBuilder(forOp->getContext());
548  epilogueBuilder.setInsertionPoint(forOp->getBlock(),
549  std::next(Block::iterator(forOp)));
550  auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.clone(*forOp));
551  epilogueForOp.setLowerBound(upperBoundUnrolled);
552 
553  // Update uses of loop results.
554  auto results = forOp.getResults();
555  auto epilogueResults = epilogueForOp.getResults();
556 
557  for (auto e : llvm::zip(results, epilogueResults)) {
558  std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
559  }
560  epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
561  epilogueForOp.getNumIterOperands(), results);
562  (void)promoteIfSingleIteration(epilogueForOp);
563  }
564 
565  // Create unrolled loop.
566  forOp.setUpperBound(upperBoundUnrolled);
567  forOp.setStep(stepUnrolled);
568 
569  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
570  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
571 
573  forOp.getBody(), forOp.getInductionVar(), unrollFactor,
574  [&](unsigned i, Value iv, OpBuilder b) {
575  // iv' = iv + step * i;
576  auto stride = b.create<arith::MulIOp>(
577  loc, step, b.create<arith::ConstantIndexOp>(loc, i));
578  return b.create<arith::AddIOp>(loc, iv, stride);
579  },
580  annotateFn, iterArgs, yieldedValues);
581  // Promote the loop body up if this has turned into a single iteration loop.
582  (void)promoteIfSingleIteration(forOp);
583  return success();
584 }
585 
586 /// Return the new lower bound, upper bound, and step in that order. Insert any
587 /// additional bounds calculations before the given builder and any additional
588 /// conversion back to the original loop induction value inside the given Block.
589 static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
590  OpBuilder &insideLoopBuilder, Location loc,
591  Value lowerBound, Value upperBound, Value step,
592  Value inductionVar) {
593  // Check if the loop is already known to have a constant zero lower bound or
594  // a constant one step.
595  bool isZeroBased = false;
596  if (auto ubCst = lowerBound.getDefiningOp<arith::ConstantIndexOp>())
597  isZeroBased = ubCst.value() == 0;
598 
599  bool isStepOne = false;
600  if (auto stepCst = step.getDefiningOp<arith::ConstantIndexOp>())
601  isStepOne = stepCst.value() == 1;
602 
603  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
604  // assuming the step is strictly positive. Update the bounds and the step
605  // of the loop to go from 0 to the number of iterations, if necessary.
606  if (isZeroBased && isStepOne)
607  return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
608  /*step=*/step};
609 
610  Value diff = boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
611  Value newUpperBound =
612  boundsBuilder.create<arith::CeilDivSIOp>(loc, diff, step);
613 
614  Value newLowerBound =
615  isZeroBased ? lowerBound
616  : boundsBuilder.create<arith::ConstantIndexOp>(loc, 0);
617  Value newStep =
618  isStepOne ? step : boundsBuilder.create<arith::ConstantIndexOp>(loc, 1);
619 
620  // Insert code computing the value of the original loop induction variable
621  // from the "normalized" one.
622  Value scaled =
623  isStepOne
624  ? inductionVar
625  : insideLoopBuilder.create<arith::MulIOp>(loc, inductionVar, step);
626  Value shifted =
627  isZeroBased
628  ? scaled
629  : insideLoopBuilder.create<arith::AddIOp>(loc, scaled, lowerBound);
630 
631  SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
632  shifted.getDefiningOp()};
633  inductionVar.replaceAllUsesExcept(shifted, preserve);
634  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
635  /*step=*/newStep};
636 }
637 
638 /// Transform a loop with a strictly positive step
639 /// for %i = %lb to %ub step %s
640 /// into a 0-based loop with step 1
641 /// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
642 /// %i = %ii * %s + %lb
643 /// Insert the induction variable remapping in the body of `inner`, which is
644 /// expected to be either `loop` or another loop perfectly nested under `loop`.
645 /// Insert the definition of new bounds immediate before `outer`, which is
646 /// expected to be either `loop` or its parent in the loop nest.
647 static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
648  OpBuilder builder(outer);
649  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
650  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
651  loop.getLowerBound(), loop.getUpperBound(),
652  loop.getStep(), loop.getInductionVar());
653 
654  loop.setLowerBound(loopPieces.lowerBound);
655  loop.setUpperBound(loopPieces.upperBound);
656  loop.setStep(loopPieces.step);
657 }
658 
660  if (loops.size() < 2)
661  return;
662 
663  scf::ForOp innermost = loops.back();
664  scf::ForOp outermost = loops.front();
665 
666  // 1. Make sure all loops iterate from 0 to upperBound with step 1. This
667  // allows the following code to assume upperBound is the number of iterations.
668  for (auto loop : loops)
669  normalizeLoop(loop, outermost, innermost);
670 
671  // 2. Emit code computing the upper bound of the coalesced loop as product
672  // of the number of iterations of all loops.
673  OpBuilder builder(outermost);
674  Location loc = outermost.getLoc();
675  Value upperBound = outermost.getUpperBound();
676  for (auto loop : loops.drop_front())
677  upperBound =
678  builder.create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
679  outermost.setUpperBound(upperBound);
680 
681  builder.setInsertionPointToStart(outermost.getBody());
682 
683  // 3. Remap induction variables. For each original loop, the value of the
684  // induction variable can be obtained by dividing the induction variable of
685  // the linearized loop by the total number of iterations of the loops nested
686  // in it modulo the number of iterations in this loop (remove the values
687  // related to the outer loops):
688  // iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
689  // Compute these iteratively from the innermost loop by creating a "running
690  // quotient" of division by the range.
691  Value previous = outermost.getInductionVar();
692  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
693  unsigned idx = loops.size() - i - 1;
694  if (i != 0)
695  previous = builder.create<arith::DivSIOp>(loc, previous,
696  loops[idx + 1].getUpperBound());
697 
698  Value iv = (i == e - 1) ? previous
699  : builder.create<arith::RemSIOp>(
700  loc, previous, loops[idx].getUpperBound());
701  replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
702  loops.back().getRegion());
703  }
704 
705  // 4. Move the operations from the innermost just above the second-outermost
706  // loop, delete the extra terminator and the second-outermost loop.
707  scf::ForOp second = loops[1];
708  innermost.getBody()->back().erase();
709  outermost.getBody()->getOperations().splice(
710  Block::iterator(second.getOperation()),
711  innermost.getBody()->getOperations());
712  second.erase();
713 }
714 
716  scf::ParallelOp loops, ArrayRef<std::vector<unsigned>> combinedDimensions) {
717  OpBuilder outsideBuilder(loops);
718  Location loc = loops.getLoc();
719 
720  // Presort combined dimensions.
721  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
722  for (auto &dims : sortedDimensions)
723  llvm::sort(dims);
724 
725  // Normalize ParallelOp's iteration pattern.
726  SmallVector<Value, 3> normalizedLowerBounds, normalizedSteps,
727  normalizedUpperBounds;
728  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
729  OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
730  auto resultBounds =
731  normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
732  loops.getLowerBound()[i], loops.getUpperBound()[i],
733  loops.getStep()[i], loops.getBody()->getArgument(i));
734 
735  normalizedLowerBounds.push_back(resultBounds.lowerBound);
736  normalizedUpperBounds.push_back(resultBounds.upperBound);
737  normalizedSteps.push_back(resultBounds.step);
738  }
739 
740  // Combine iteration spaces.
741  SmallVector<Value, 3> lowerBounds, upperBounds, steps;
742  auto cst0 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 0);
743  auto cst1 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
744  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
745  Value newUpperBound = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
746  for (auto idx : sortedDimensions[i]) {
747  newUpperBound = outsideBuilder.create<arith::MulIOp>(
748  loc, newUpperBound, normalizedUpperBounds[idx]);
749  }
750  lowerBounds.push_back(cst0);
751  steps.push_back(cst1);
752  upperBounds.push_back(newUpperBound);
753  }
754 
755  // Create new ParallelLoop with conversions to the original induction values.
756  // The loop below uses divisions to get the relevant range of values in the
757  // new induction value that represent each range of the original induction
758  // value. The remainders then determine based on that range, which iteration
759  // of the original induction value this represents. This is a normalized value
760  // that is un-normalized already by the previous logic.
761  auto newPloop = outsideBuilder.create<scf::ParallelOp>(
762  loc, lowerBounds, upperBounds, steps,
763  [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
764  for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
765  Value previous = ploopIVs[i];
766  unsigned numberCombinedDimensions = combinedDimensions[i].size();
767  // Iterate over all except the last induction value.
768  for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
769  unsigned idx = combinedDimensions[i][j];
770 
771  // Determine the current induction value's current loop iteration
772  Value iv = insideBuilder.create<arith::RemSIOp>(
773  loc, previous, normalizedUpperBounds[idx]);
774  replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
775  loops.getRegion());
776 
777  // Remove the effect of the current induction value to prepare for
778  // the next value.
779  previous = insideBuilder.create<arith::DivSIOp>(
780  loc, previous, normalizedUpperBounds[idx]);
781  }
782 
783  // The final induction value is just the remaining value.
784  unsigned idx = combinedDimensions[i][0];
785  replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
786  previous, loops.getRegion());
787  }
788  });
789 
790  // Replace the old loop with the new loop.
791  loops.getBody()->back().erase();
792  newPloop.getBody()->getOperations().splice(
793  Block::iterator(newPloop.getBody()->back()),
794  loops.getBody()->getOperations());
795  loops.erase();
796 }
797 
798 // Hoist the ops within `outer` that appear before `inner`.
799 // Such ops include the ops that have been introduced by parametric tiling.
800 // Ops that come from triangular loops (i.e. that belong to the program slice
801 // rooted at `outer`) and ops that have side effects cannot be hoisted.
802 // Return failure when any op fails to hoist.
803 static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
804  SetVector<Operation *> forwardSlice;
806  outer.getInductionVar(), &forwardSlice,
807  [&inner](Operation *op) { return op != inner.getOperation(); });
808  LogicalResult status = success();
810  for (auto &op : outer.getBody()->without_terminator()) {
811  // Stop when encountering the inner loop.
812  if (&op == inner.getOperation())
813  break;
814  // Skip over non-hoistable ops.
815  if (forwardSlice.count(&op) > 0) {
816  status = failure();
817  continue;
818  }
819  // Skip intermediate scf::ForOp, these are not considered a failure.
820  if (isa<scf::ForOp>(op))
821  continue;
822  // Skip other ops with regions.
823  if (op.getNumRegions() > 0) {
824  status = failure();
825  continue;
826  }
827  // Skip if op has side effects.
828  // TODO: loads to immutable memory regions are ok.
829  if (!isMemoryEffectFree(&op)) {
830  status = failure();
831  continue;
832  }
833  toHoist.push_back(&op);
834  }
835  auto *outerForOp = outer.getOperation();
836  for (auto *op : toHoist)
837  op->moveBefore(outerForOp);
838  return status;
839 }
840 
841 // Traverse the interTile and intraTile loops and try to hoist ops such that
842 // bands of perfectly nested loops are isolated.
843 // Return failure if either perfect interTile or perfect intraTile bands cannot
844 // be formed.
845 static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
846  LogicalResult status = success();
847  const Loops &interTile = tileLoops.first;
848  const Loops &intraTile = tileLoops.second;
849  auto size = interTile.size();
850  assert(size == intraTile.size());
851  if (size <= 1)
852  return success();
853  for (unsigned s = 1; s < size; ++s)
854  status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
855  : failure();
856  for (unsigned s = 1; s < size; ++s)
857  status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
858  : failure();
859  return status;
860 }
861 
862 /// Collect perfectly nested loops starting from `rootForOps`. Loops are
863 /// perfectly nested if each loop is the first and only non-terminator operation
864 /// in the parent loop. Collect at most `maxLoops` loops and append them to
865 /// `forOps`.
866 template <typename T>
868  SmallVectorImpl<T> &forOps, T rootForOp,
869  unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
870  for (unsigned i = 0; i < maxLoops; ++i) {
871  forOps.push_back(rootForOp);
872  Block &body = rootForOp.getRegion().front();
873  if (body.begin() != std::prev(body.end(), 2))
874  return;
875 
876  rootForOp = dyn_cast<T>(&body.front());
877  if (!rootForOp)
878  return;
879  }
880 }
881 
882 static Loops stripmineSink(scf::ForOp forOp, Value factor,
883  ArrayRef<scf::ForOp> targets) {
884  auto originalStep = forOp.getStep();
885  auto iv = forOp.getInductionVar();
886 
887  OpBuilder b(forOp);
888  forOp.setStep(b.create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
889 
890  Loops innerLoops;
891  for (auto t : targets) {
892  // Save information for splicing ops out of t when done
893  auto begin = t.getBody()->begin();
894  auto nOps = t.getBody()->getOperations().size();
895 
896  // Insert newForOp before the terminator of `t`.
897  auto b = OpBuilder::atBlockTerminator((t.getBody()));
898  Value stepped = b.create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
899  Value less = b.create<arith::CmpIOp>(t.getLoc(), arith::CmpIPredicate::slt,
900  forOp.getUpperBound(), stepped);
901  Value ub = b.create<arith::SelectOp>(t.getLoc(), less,
902  forOp.getUpperBound(), stepped);
903 
904  // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
905  auto newForOp = b.create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
906  newForOp.getBody()->getOperations().splice(
907  newForOp.getBody()->getOperations().begin(),
908  t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
909  replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
910  newForOp.getRegion());
911 
912  innerLoops.push_back(newForOp);
913  }
914 
915  return innerLoops;
916 }
917 
918 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
919 // Returns the new for operation, nested immediately under `target`.
920 template <typename SizeType>
921 static scf::ForOp stripmineSink(scf::ForOp forOp, SizeType factor,
922  scf::ForOp target) {
923  // TODO: Use cheap structural assertions that targets are nested under
924  // forOp and that targets are not nested under each other when DominanceInfo
925  // exposes the capability. It seems overkill to construct a whole function
926  // dominance tree at this point.
927  auto res = stripmineSink(forOp, factor, ArrayRef<scf::ForOp>(target));
928  assert(res.size() == 1 && "Expected 1 inner forOp");
929  return res[0];
930 }
931 
933  ArrayRef<Value> sizes,
934  ArrayRef<scf::ForOp> targets) {
936  SmallVector<scf::ForOp, 8> currentTargets(targets.begin(), targets.end());
937  for (auto it : llvm::zip(forOps, sizes)) {
938  auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
939  res.push_back(step);
940  currentTargets = step;
941  }
942  return res;
943 }
944 
946  scf::ForOp target) {
948  for (auto loops : tile(forOps, sizes, ArrayRef<scf::ForOp>(target))) {
949  assert(loops.size() == 1);
950  res.push_back(loops[0]);
951  }
952  return res;
953 }
954 
955 Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes) {
956  // Collect perfectly nested loops. If more size values provided than nested
957  // loops available, truncate `sizes`.
959  forOps.reserve(sizes.size());
960  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
961  if (forOps.size() < sizes.size())
962  sizes = sizes.take_front(forOps.size());
963 
964  return ::tile(forOps, sizes, forOps.back());
965 }
966 
968  scf::ForOp root) {
969  getPerfectlyNestedLoopsImpl(nestedLoops, root);
970 }
971 
973  ArrayRef<int64_t> sizes) {
974  // Collect perfectly nested loops. If more size values provided than nested
975  // loops available, truncate `sizes`.
977  forOps.reserve(sizes.size());
978  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
979  if (forOps.size() < sizes.size())
980  sizes = sizes.take_front(forOps.size());
981 
982  // Compute the tile sizes such that i-th outer loop executes size[i]
983  // iterations. Given that the loop current executes
984  // numIterations = ceildiv((upperBound - lowerBound), step)
985  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
986  SmallVector<Value, 4> tileSizes;
987  tileSizes.reserve(sizes.size());
988  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
989  assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
990 
991  auto forOp = forOps[i];
992  OpBuilder builder(forOp);
993  auto loc = forOp.getLoc();
994  Value diff = builder.create<arith::SubIOp>(loc, forOp.getUpperBound(),
995  forOp.getLowerBound());
996  Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
997  Value iterationsPerBlock =
998  ceilDivPositive(builder, loc, numIterations, sizes[i]);
999  tileSizes.push_back(iterationsPerBlock);
1000  }
1001 
1002  // Call parametric tiling with the given sizes.
1003  auto intraTile = tile(forOps, tileSizes, forOps.back());
1004  TileLoops tileLoops = std::make_pair(forOps, intraTile);
1005 
1006  // TODO: for now we just ignore the result of band isolation.
1007  // In the future, mapping decisions may be impacted by the ability to
1008  // isolate perfectly nested bands.
1009  (void)tryIsolateBands(tileLoops);
1010 
1011  return tileLoops;
1012 }
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
Definition: Utils.cpp:845
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
Definition: Utils.cpp:867
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
Definition: Utils.cpp:803
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of scf::ForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor',...
Definition: Utils.cpp:413
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
Definition: Utils.cpp:882
static LoopParams normalizeLoop(OpBuilder &boundsBuilder, OpBuilder &insideLoopBuilder, Location loc, Value lowerBound, Value upperBound, Value step, Value inductionVar)
Return the new lower bound, upper bound, and step in that order.
Definition: Utils.cpp:589
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
Definition: Utils.cpp:339
static void replaceIterArgsAndYieldResults(scf::ForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting singl...
Definition: Utils.cpp:367
static constexpr const bool value
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Block * lookup(Block *from) const
Lookup a mapped value within the map.
void map(Block *from, Block *to)
Inserts a new mapping for 'from' to 'to'.
This class represents an argument of a Block.
Definition: Value.h:296
Block represents an ordered list of Operations.
Definition: Block.h:30
OpListType::iterator iterator
Definition: Block.h:129
unsigned getNumArguments()
Definition: Block.h:117
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:232
OpListType & getOperations()
Definition: Block.h:126
BlockArgListType getArguments()
Definition: Block.h:76
Operation & front()
Definition: Block.h:142
iterator end()
Definition: Block.h:133
iterator begin()
Definition: Block.h:132
MLIRContext * getContext() const
Definition: Builders.h:54
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:589
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:64
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:300
This class helps build Operations.
Definition: Builders.h:198
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:230
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:383
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:350
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
Definition: Builders.h:242
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:388
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=llvm::None, ArrayRef< Location > locs=llvm::None)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition: Builders.cpp:395
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:422
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:510
This class represents an operand of an operation.
Definition: Value.h:247
This is a value defined by a result of an operation.
Definition: Value.h:442
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:324
Operation * clone(BlockAndValueMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Definition: Operation.cpp:558
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:477
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:480
operand_type_range getOperandTypes()
Definition: Operation.h:314
result_type_range getResultTypes()
Definition: Operation.h:345
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:295
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:434
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:194
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:176
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:418
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
BlockArgListType getArguments()
Definition: Region.h:81
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
Definition: Region.h:205
bool hasOneBlock()
Return true if this region has exactly one block.
Definition: Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:398
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
virtual void mergeBlocks(Block *source, Block *dest, ValueRange argValues=llvm::None)
Merge the operations of block 'source' into the end of block 'dest'.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
bool isIndex() const
Definition: Types.cpp:30
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:349
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
bool use_empty() const
Returns true if this value has no uses.
Definition: Value.h:199
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Definition: Value.cpp:82
Type getType() const
Return the type of this value.
Definition: Value.h:114
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions) const
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:89
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:40
Include the generated interface declarations.
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, bool cleanUpUnroll=false)
Unrolls this for operation by the specified unroll factor.
Definition: LoopUtils.cpp:1091
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
Definition: Utils.cpp:291
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:24
void collapseParallelLoops(scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
Definition: Utils.cpp:715
std::pair< Loops, Loops > TileLoops
Definition: Utils.h:147
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
Definition: MathExtras.h:23
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: LoopUtils.cpp:1701
scf::ForOp replaceLoopWithNewYields(OpBuilder &builder, scf::ForOp loop, ValueRange newIterOperands, const NewYieldValueFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Definition: Utils.cpp:42
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
Definition: LoopUtils.cpp:945
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
Definition: Utils.cpp:314
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Definition: RegionUtils.cpp:59
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:131
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region &region, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
Definition: Utils.cpp:187
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, TransitiveFilter filter=nullptr)
Fills forwardSlice with the computed forward slice (i.e.
LogicalResult coalesceLoops(MutableArrayRef< AffineForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition: LoopUtils.cpp:1724
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(OpBuilder &builder, ArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValueFn &newYieldValueFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
Definition: Utils.cpp:109
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBBArgs)> NewYieldValueFn
Replace the loop with newIterOperands added as new initialization values.
Definition: Utils.h:51
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Definition: Utils.cpp:972
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
Definition: LoopUtils.cpp:859
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.