25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
45 bool replaceIterOperandsUsesInLoop) {
51 assert(loopNest.size() <= 10 &&
52 "exceeded recursion limit when yielding value from loop nest");
84 if (loopNest.size() == 1) {
86 cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
87 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
89 return {innerMostLoop};
99 innerNewBBArgs, newYieldValuesFn,
100 replaceIterOperandsUsesInLoop);
101 return llvm::to_vector(llvm::map_range(
102 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
105 scf::ForOp outerMostLoop =
106 cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
107 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
108 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
125 func::CallOp *callOp) {
126 assert(!funcName.empty() &&
"funcName cannot be empty");
140 ValueRange outlinedValues(captures.getArrayRef());
147 outlinedFuncArgTypes.push_back(arg.getType());
148 outlinedFuncArgLocs.push_back(arg.getLoc());
150 for (
Value value : outlinedValues) {
151 outlinedFuncArgTypes.push_back(value.getType());
152 outlinedFuncArgLocs.push_back(value.getLoc());
154 FunctionType outlinedFuncType =
158 rewriter.
create<func::FuncOp>(loc, funcName, outlinedFuncType);
159 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
164 auto outlinedFuncBlockArgs = outlinedFuncBody->getArguments();
169 originalBlock, outlinedFuncBody,
170 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
180 ®ion, region.
begin(),
181 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
183 .take_front(numOriginalBlockArguments));
188 llvm::append_range(callValues, newBlock->
getArguments());
189 llvm::append_range(callValues, outlinedValues);
190 auto call = rewriter.
create<func::CallOp>(loc, outlinedFunc, callValues);
199 rewriter.
clone(*originalTerminator, bvm);
200 rewriter.
eraseOp(originalTerminator);
205 for (
auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
206 outlinedValues.size()))) {
207 Value orig = std::get<0>(it);
208 Value repl = std::get<1>(it);
218 return outlinedFunc->isProperAncestor(opOperand.
getOwner());
226 func::FuncOp *thenFn, StringRef thenFnName,
227 func::FuncOp *elseFn, StringRef elseFnName) {
231 if (thenFn && !ifOp.getThenRegion().empty()) {
233 rewriter, loc, ifOp.getThenRegion(), thenFnName);
234 if (
failed(outlinedFuncOpOrFailure))
236 *thenFn = *outlinedFuncOpOrFailure;
238 if (elseFn && !ifOp.getElseRegion().empty()) {
240 rewriter, loc, ifOp.getElseRegion(), elseFnName);
241 if (
failed(outlinedFuncOpOrFailure))
243 *elseFn = *outlinedFuncOpOrFailure;
250 assert(rootOp !=
nullptr &&
"Root operation must not be a nullptr.");
251 bool rootEnclosesPloops =
false;
253 for (
Block &block : region.getBlocks()) {
256 rootEnclosesPloops |= enclosesPloops;
257 if (
auto ploop = dyn_cast<scf::ParallelOp>(op)) {
258 rootEnclosesPloops =
true;
262 result.push_back(ploop);
267 return rootEnclosesPloops;
275 assert(divisor > 0 &&
"expected positive divisor");
276 assert(dividend.
getType().
isIndex() &&
"expected index-typed value");
278 Value divisorMinusOneCst =
279 builder.
create<arith::ConstantIndexOp>(loc, divisor - 1);
280 Value divisorCst = builder.
create<arith::ConstantIndexOp>(loc, divisor);
281 Value sum = builder.
create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
282 return builder.
create<arith::DivUIOp>(loc, sum, divisorCst);
291 assert(dividend.
getType().
isIndex() &&
"expected index-typed value");
293 Value cstOne = builder.
create<arith::ConstantIndexOp>(loc, 1);
294 Value divisorMinusOne = builder.
create<arith::SubIOp>(loc, divisor, cstOne);
295 Value sum = builder.
create<arith::AddIOp>(loc, dividend, divisorMinusOne);
296 return builder.
create<arith::DivUIOp>(loc, sum, divisor);
304 Block *loopBodyBlock,
Value forOpIV, uint64_t unrollFactor,
322 for (
unsigned i = 1; i < unrollFactor; i++) {
326 operandMap.
map(iterArgs, lastYielded);
331 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
332 operandMap.
map(forOpIV, ivUnroll);
336 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
338 annotateFn(i, clonedOp, builder);
342 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
343 lastYielded[i] = operandMap.
lookup(yieldedValues[i]);
348 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
349 annotateFn(0, &*it, builder);
357 scf::ForOp forOp, uint64_t unrollFactor,
359 assert(unrollFactor > 0 &&
"expected positive unroll factor");
362 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
369 auto loc = forOp.getLoc();
370 Value step = forOp.getStep();
371 Value upperBoundUnrolled;
373 bool generateEpilogueLoop =
true;
378 if (lbCstOp && ubCstOp && stepCstOp) {
380 int64_t lbCst = lbCstOp.value();
381 int64_t ubCst = ubCstOp.value();
382 int64_t stepCst = stepCstOp.value();
383 assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
384 "expected positive loop bounds and step");
387 if (unrollFactor == 1) {
388 if (tripCount == 1 &&
failed(forOp.promoteIfSingleIteration(rewriter)))
393 int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
394 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
395 int64_t stepUnrolledCst = stepCst * unrollFactor;
398 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
399 if (generateEpilogueLoop)
401 loc, upperBoundUnrolledCst);
403 upperBoundUnrolled = forOp.getUpperBound();
406 stepUnrolled = stepCst == stepUnrolledCst
409 loc, stepUnrolledCst);
414 auto lowerBound = forOp.getLowerBound();
415 auto upperBound = forOp.getUpperBound();
417 boundsBuilder.
create<arith::SubIOp>(loc, upperBound, lowerBound);
419 Value unrollFactorCst =
422 boundsBuilder.
create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
424 Value tripCountEvenMultiple =
425 boundsBuilder.
create<arith::SubIOp>(loc, tripCount, tripCountRem);
427 upperBoundUnrolled = boundsBuilder.
create<arith::AddIOp>(
429 boundsBuilder.
create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
432 boundsBuilder.
create<arith::MulIOp>(loc, step, unrollFactorCst);
436 if (generateEpilogueLoop) {
437 OpBuilder epilogueBuilder(forOp->getContext());
440 auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.
clone(*forOp));
441 epilogueForOp.setLowerBound(upperBoundUnrolled);
444 auto results = forOp.getResults();
445 auto epilogueResults = epilogueForOp.getResults();
447 for (
auto e : llvm::zip(results, epilogueResults)) {
448 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
450 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
451 epilogueForOp.getInitArgs().size(), results);
452 (void)epilogueForOp.promoteIfSingleIteration(rewriter);
456 forOp.setUpperBound(upperBoundUnrolled);
457 forOp.setStep(stepUnrolled);
459 auto iterArgs =
ValueRange(forOp.getRegionIterArgs());
460 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
463 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
466 auto stride = b.create<arith::MulIOp>(
467 loc, step, b.create<arith::ConstantIndexOp>(loc, i));
468 return b.create<arith::AddIOp>(loc, iv, stride);
470 annotateFn, iterArgs, yieldedValues);
472 (void)forOp.promoteIfSingleIteration(rewriter);
490 bool isZeroBased =
false;
492 isZeroBased = lbCst.value() == 0;
494 bool isStepOne =
false;
496 isStepOne = stepCst.value() == 1;
501 if (isZeroBased && isStepOne)
502 return {lb, ub, step};
504 Value diff = isZeroBased ? ub : rewriter.
create<arith::SubIOp>(loc, ub, lb);
505 Value newUpperBound =
506 isStepOne ? diff : rewriter.
create<arith::CeilDivSIOp>(loc, diff, step);
508 Value newLowerBound = isZeroBased
510 : rewriter.
create<arith::ConstantOp>(
512 Value newStep = isStepOne
514 : rewriter.
create<arith::ConstantOp>(
517 return {newLowerBound, newUpperBound, newStep};
524 Value denormalizedIv;
529 Value scaled = normalizedIv;
531 scaled = rewriter.
create<arith::MulIOp>(loc, normalizedIv, origStep);
534 denormalizedIv = scaled;
536 denormalizedIv = rewriter.
create<arith::AddIOp>(loc, scaled, origLb);
546 assert(!values.empty() &&
"unexpected empty list");
547 Value productOf = values.front();
548 for (
auto v : values.drop_front()) {
549 productOf = rewriter.
create<arith::MulIOp>(loc, productOf, v);
565 Value previous = linearizedIv;
568 for (
unsigned i = 0, e = ubs.size(); i < e; ++i) {
569 unsigned idx = ubs.size() - i - 1;
571 previous = rewriter.
create<arith::DivSIOp>(loc, previous, ubs[idx + 1]);
572 preservedUsers.insert(previous.getDefiningOp());
576 iv = rewriter.
create<arith::RemSIOp>(loc, previous, ubs[idx]);
579 delinearizedIvs[idx] = iv;
581 return {delinearizedIvs, preservedUsers};
586 if (loops.size() < 2)
589 scf::ForOp innermost = loops.back();
590 scf::ForOp outermost = loops.front();
594 for (
auto loop : loops) {
597 Value lb = loop.getLowerBound();
598 Value ub = loop.getUpperBound();
599 Value step = loop.getStep();
604 loop.setLowerBound(newLoopParams.lowerBound);
605 loop.setUpperBound(newLoopParams.upperBound);
606 loop.setStep(newLoopParams.step);
611 loop.getInductionVar(), lb, step);
620 loops, [](
auto loop) {
return loop.getUpperBound(); });
622 outermost.setUpperBound(upperBound);
626 rewriter, loc, outermost.getInductionVar(), upperBounds);
630 for (
int i = loops.size() - 1; i > 0; --i) {
631 auto outerLoop = loops[i - 1];
632 auto innerLoop = loops[i];
634 Operation *innerTerminator = innerLoop.getBody()->getTerminator();
635 auto yieldedVals = llvm::to_vector(innerTerminator->
getOperands());
636 rewriter.
eraseOp(innerTerminator);
639 innerBlockArgs.push_back(delinearizeIvs[i]);
640 llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs());
643 rewriter.
replaceOp(innerLoop, yieldedVals);
652 IRRewriter rewriter(loops.front().getContext());
667 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
668 operandsDefinedAbove[i] = i;
669 for (
unsigned j = 0;
j < i; ++
j) {
671 loops[i].getUpperBound(),
674 operandsDefinedAbove[i] =
j;
685 iterArgChainStart[0] = 0;
686 for (
unsigned i = 1, e = loops.size(); i < e; ++i) {
688 iterArgChainStart[i] = i;
689 auto outerloop = loops[i - 1];
690 auto innerLoop = loops[i];
691 if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) {
694 if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) {
697 auto outerloopTerminator = outerloop.getBody()->getTerminator();
698 if (!llvm::equal(outerloopTerminator->getOperands(),
699 innerLoop.getResults())) {
702 iterArgChainStart[i] = iterArgChainStart[i - 1];
708 for (
unsigned end = loops.size(); end > 0; --end) {
710 for (; start < end - 1; ++start) {
712 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
713 std::next(operandsDefinedAbove.begin(), end));
716 if (iterArgChainStart[end - 1] > start)
725 if (start != end - 1)
733 ArrayRef<std::vector<unsigned>> combinedDimensions) {
739 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
740 for (
auto &dims : sortedDimensions)
745 normalizedUpperBounds;
746 for (
unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
749 Value lb = loops.getLowerBound()[i];
750 Value ub = loops.getUpperBound()[i];
751 Value step = loops.getStep()[i];
753 normalizedLowerBounds.push_back(newLoopParams.lowerBound);
754 normalizedUpperBounds.push_back(newLoopParams.upperBound);
755 normalizedSteps.push_back(newLoopParams.step);
766 for (
auto &sortedDimension : sortedDimensions) {
768 for (
auto idx : sortedDimension) {
769 newUpperBound = rewriter.
create<arith::MulIOp>(
770 loc, newUpperBound, normalizedUpperBounds[idx]);
772 lowerBounds.push_back(cst0);
773 steps.push_back(cst1);
774 upperBounds.push_back(newUpperBound);
783 auto newPloop = rewriter.
create<scf::ParallelOp>(
784 loc, lowerBounds, upperBounds, steps,
786 for (
unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
787 Value previous = ploopIVs[i];
788 unsigned numberCombinedDimensions = combinedDimensions[i].size();
790 for (
unsigned j = numberCombinedDimensions - 1;
j > 0; --
j) {
791 unsigned idx = combinedDimensions[i][
j];
794 Value iv = insideBuilder.create<arith::RemSIOp>(
795 loc, previous, normalizedUpperBounds[idx]);
801 previous = insideBuilder.create<arith::DivSIOp>(
802 loc, previous, normalizedUpperBounds[idx]);
806 unsigned idx = combinedDimensions[i][0];
808 previous, loops.getRegion());
813 loops.getBody()->back().
erase();
814 newPloop.getBody()->getOperations().splice(
816 loops.getBody()->getOperations());
829 return op != inner.getOperation();
834 for (
auto &op : outer.getBody()->without_terminator()) {
836 if (&op == inner.getOperation())
839 if (forwardSlice.count(&op) > 0) {
844 if (isa<scf::ForOp>(op))
857 toHoist.push_back(&op);
859 auto *outerForOp = outer.getOperation();
860 for (
auto *op : toHoist)
871 const Loops &interTile = tileLoops.first;
872 const Loops &intraTile = tileLoops.second;
873 auto size = interTile.size();
874 assert(size == intraTile.size());
877 for (
unsigned s = 1; s < size; ++s)
880 for (
unsigned s = 1; s < size; ++s)
890 template <
typename T>
894 for (
unsigned i = 0; i < maxLoops; ++i) {
895 forOps.push_back(rootForOp);
897 if (body.
begin() != std::prev(body.
end(), 2))
900 rootForOp = dyn_cast<T>(&body.
front());
908 auto originalStep = forOp.getStep();
909 auto iv = forOp.getInductionVar();
912 forOp.setStep(b.
create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
915 for (
auto t : targets) {
917 auto begin = t.getBody()->begin();
918 auto nOps = t.getBody()->getOperations().size();
922 Value stepped = b.
create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
924 b.
create<arith::MinSIOp>(t.getLoc(), forOp.getUpperBound(), stepped);
927 auto newForOp = b.
create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
928 newForOp.getBody()->getOperations().splice(
929 newForOp.getBody()->getOperations().begin(),
930 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
932 newForOp.getRegion());
934 innerLoops.push_back(newForOp);
942 template <
typename SizeType>
950 assert(res.size() == 1 &&
"Expected 1 inner forOp");
959 for (
auto it : llvm::zip(forOps, sizes)) {
960 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
962 currentTargets = step;
971 assert(loops.size() == 1);
972 res.push_back(loops[0]);
981 forOps.reserve(sizes.size());
983 if (forOps.size() < sizes.size())
984 sizes = sizes.take_front(forOps.size());
999 forOps.reserve(sizes.size());
1001 if (forOps.size() < sizes.size())
1002 sizes = sizes.take_front(forOps.size());
1009 tileSizes.reserve(sizes.size());
1010 for (
unsigned i = 0, e = sizes.size(); i < e; ++i) {
1011 assert(sizes[i] > 0 &&
"expected strictly positive size for strip-mining");
1013 auto forOp = forOps[i];
1015 auto loc = forOp.getLoc();
1016 Value diff = builder.
create<arith::SubIOp>(loc, forOp.getUpperBound(),
1017 forOp.getLowerBound());
1019 Value iterationsPerBlock =
1021 tileSizes.push_back(iterationsPerBlock);
1025 auto intraTile =
tile(forOps, tileSizes, forOps.back());
1026 TileLoops tileLoops = std::make_pair(forOps, intraTile);
1037 scf::ForallOp source,
1039 unsigned numTargetOuts = target.getNumResults();
1040 unsigned numSourceOuts = source.getNumResults();
1044 llvm::append_range(fusedOuts, target.getOutputs());
1045 llvm::append_range(fusedOuts, source.getOutputs());
1049 scf::ForallOp fusedLoop = rewriter.
create<scf::ForallOp>(
1050 source.getLoc(), source.getMixedLowerBound(), source.getMixedUpperBound(),
1051 source.getMixedStep(), fusedOuts, source.getMapping());
1055 mapping.
map(target.getInductionVars(), fusedLoop.getInductionVars());
1056 mapping.map(source.getInductionVars(), fusedLoop.getInductionVars());
1059 mapping.map(target.getRegionIterArgs(),
1060 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1061 mapping.map(source.getRegionIterArgs(),
1062 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1066 for (
Operation &op : target.getBody()->without_terminator())
1067 rewriter.
clone(op, mapping);
1068 for (
Operation &op : source.getBody()->without_terminator())
1069 rewriter.
clone(op, mapping);
1072 scf::InParallelOp targetTerm = target.getTerminator();
1073 scf::InParallelOp sourceTerm = source.getTerminator();
1074 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
1076 for (
Operation &op : targetTerm.getYieldingOps())
1077 rewriter.
clone(op, mapping);
1078 for (
Operation &op : sourceTerm.getYieldingOps())
1079 rewriter.
clone(op, mapping);
1082 rewriter.
replaceOp(target, fusedLoop.getResults().take_front(numTargetOuts));
1083 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1091 unsigned numTargetOuts = target.getNumResults();
1092 unsigned numSourceOuts = source.getNumResults();
1096 llvm::append_range(fusedInitArgs, target.getInitArgs());
1097 llvm::append_range(fusedInitArgs, source.getInitArgs());
1102 scf::ForOp fusedLoop = rewriter.
create<scf::ForOp>(
1103 source.getLoc(), source.getLowerBound(), source.getUpperBound(),
1104 source.getStep(), fusedInitArgs);
1108 mapping.
map(target.getInductionVar(), fusedLoop.getInductionVar());
1109 mapping.map(target.getRegionIterArgs(),
1110 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1111 mapping.map(source.getInductionVar(), fusedLoop.getInductionVar());
1112 mapping.map(source.getRegionIterArgs(),
1113 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1117 for (
Operation &op : target.getBody()->without_terminator())
1118 rewriter.
clone(op, mapping);
1119 for (
Operation &op : source.getBody()->without_terminator())
1120 rewriter.
clone(op, mapping);
1124 for (
Value operand : target.getBody()->getTerminator()->getOperands())
1125 yieldResults.push_back(mapping.lookupOrDefault(operand));
1126 for (
Value operand : source.getBody()->getTerminator()->getOperands())
1127 yieldResults.push_back(mapping.lookupOrDefault(operand));
1128 if (!yieldResults.empty())
1129 rewriter.
create<scf::YieldOp>(source.getLoc(), yieldResults);
1132 rewriter.
replaceOp(target, fusedLoop.getResults().take_front(numTargetOuts));
1133 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of scf::ForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor',...
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static std::pair< SmallVector< Value >, SmallPtrSet< Operation *, 2 > > delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef< Value > ubs)
For each original loop, the value of the induction variable can be obtained by dividing the induction...
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef< Value > values)
Helper function to multiply a sequence of values.
static LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, Value lb, Value ub, Value step)
Transform a loop with a strictly positive step for i = lb to ub step s into a 0-based loop with step ...
static void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, Value origLb, Value origStep)
Get back the original induction variable values after loop normalization.
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIntegerAttr(Type type, int64_t value)
TypedAttr getZeroAttr(Type type)
MLIRContext * getContext() const
This class provides support for representing a failure result, or a valid value of type T.
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void replaceAllUsesExcept(Value from, Value to, Operation *exceptedUser)
Find uses of from and replace them with to except if the user is exceptedUser.
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
virtual void inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into block 'dest' before the given position.
This class provides an abstraction over the various different ranges of value types.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Specialization of arith.constant op that returns an integer of index type.
Operation * getOwner() const
Return the owner of this operand.
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
bool isConstantIntValue(OpFoldResult ofr, int64_t value)
Return true if ofr is constant integer equal to value.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
std::pair< Loops, Loops > TileLoops
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
This class represents an efficient way to signal success or failure.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.