25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/Support/DebugLog.h"
33 #define DEBUG_TYPE "scf-utils"
38 bool replaceIterOperandsUsesInLoop) {
44 assert(loopNest.size() <= 10 &&
45 "exceeded recursion limit when yielding value from loop nest");
77 if (loopNest.size() == 1) {
79 cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
80 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
82 return {innerMostLoop};
92 innerNewBBArgs, newYieldValuesFn,
93 replaceIterOperandsUsesInLoop);
94 return llvm::to_vector(llvm::map_range(
95 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
98 scf::ForOp outerMostLoop =
99 cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
100 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
101 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
118 func::CallOp *callOp) {
119 assert(!funcName.empty() &&
"funcName cannot be empty");
133 ValueRange outlinedValues(captures.getArrayRef());
140 outlinedFuncArgTypes.push_back(arg.getType());
141 outlinedFuncArgLocs.push_back(arg.getLoc());
143 for (
Value value : outlinedValues) {
144 outlinedFuncArgTypes.push_back(value.getType());
145 outlinedFuncArgLocs.push_back(value.getLoc());
147 FunctionType outlinedFuncType =
151 func::FuncOp::create(rewriter, loc, funcName, outlinedFuncType);
152 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
157 auto outlinedFuncBlockArgs = outlinedFuncBody->
getArguments();
162 originalBlock, outlinedFuncBody,
163 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
166 func::ReturnOp::create(rewriter, loc, originalTerminator->
getResultTypes(),
173 ®ion, region.
begin(),
174 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
176 .take_front(numOriginalBlockArguments));
181 llvm::append_range(callValues, newBlock->
getArguments());
182 llvm::append_range(callValues, outlinedValues);
183 auto call = func::CallOp::create(rewriter, loc, outlinedFunc, callValues);
192 rewriter.
clone(*originalTerminator, bvm);
193 rewriter.
eraseOp(originalTerminator);
198 for (
auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
199 outlinedValues.size()))) {
200 Value orig = std::get<0>(it);
201 Value repl = std::get<1>(it);
210 return outlinedFunc->isProperAncestor(opOperand.
getOwner());
218 func::FuncOp *thenFn, StringRef thenFnName,
219 func::FuncOp *elseFn, StringRef elseFnName) {
222 FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
223 if (thenFn && !ifOp.getThenRegion().empty()) {
225 rewriter, loc, ifOp.getThenRegion(), thenFnName);
226 if (
failed(outlinedFuncOpOrFailure))
228 *thenFn = *outlinedFuncOpOrFailure;
230 if (elseFn && !ifOp.getElseRegion().empty()) {
232 rewriter, loc, ifOp.getElseRegion(), elseFnName);
233 if (
failed(outlinedFuncOpOrFailure))
235 *elseFn = *outlinedFuncOpOrFailure;
242 assert(rootOp !=
nullptr &&
"Root operation must not be a nullptr.");
243 bool rootEnclosesPloops =
false;
245 for (
Block &block : region.getBlocks()) {
248 rootEnclosesPloops |= enclosesPloops;
249 if (
auto ploop = dyn_cast<scf::ParallelOp>(op)) {
250 rootEnclosesPloops =
true;
254 result.push_back(ploop);
259 return rootEnclosesPloops;
267 assert(divisor > 0 &&
"expected positive divisor");
269 "expected integer or index-typed value");
271 Value divisorMinusOneCst = arith::ConstantOp::create(
273 Value divisorCst = arith::ConstantOp::create(
275 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOneCst);
276 return arith::DivUIOp::create(builder, loc, sum, divisorCst);
286 "expected integer or index-typed value");
287 Value cstOne = arith::ConstantOp::create(
289 Value divisorMinusOne = arith::SubIOp::create(builder, loc, divisor, cstOne);
290 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOne);
291 return arith::DivUIOp::create(builder, loc, sum, divisor);
295 Block *loopBodyBlock,
Value iv, uint64_t unrollFactor,
303 auto findOriginalSrcOp =
308 while (srcOp && clonedToSrcOpsMap.
contains(srcOp))
309 srcOp = clonedToSrcOpsMap.
lookup(srcOp);
319 annotateFn = noopAnnotateFn;
329 for (
unsigned i = 1; i < unrollFactor; i++) {
332 operandMap.
map(iterArgs, lastYielded);
337 Value ivUnroll = ivRemapFn(i, iv, builder);
338 operandMap.
map(iv, ivUnroll);
342 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
345 annotateFn(i, clonedOp, builder);
346 if (clonedToSrcOpsMap)
347 clonedToSrcOpsMap->
map(clonedOp,
348 findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
352 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
358 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
359 annotateFn(0, &*it, builder);
368 scf::ForOp forOp, uint64_t unrollFactor,
370 assert(unrollFactor > 0 &&
"expected positive unroll factor");
373 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
380 auto loc = forOp.getLoc();
381 Value step = forOp.getStep();
382 Value upperBoundUnrolled;
384 bool generateEpilogueLoop =
true;
386 std::optional<APInt> constTripCount = forOp.getStaticTripCount();
387 if (constTripCount) {
392 if (unrollFactor == 1) {
393 if (*constTripCount == 1 &&
394 failed(forOp.promoteIfSingleIteration(rewriter)))
399 int64_t tripCountEvenMultiple =
400 constTripCount->getSExtValue() -
401 (constTripCount->getSExtValue() % unrollFactor);
402 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
403 int64_t stepUnrolledCst = stepCst * unrollFactor;
406 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
407 if (generateEpilogueLoop)
408 upperBoundUnrolled = arith::ConstantOp::create(
411 upperBoundUnrolledCst));
413 upperBoundUnrolled = forOp.getUpperBound();
417 stepCst == stepUnrolledCst
419 : arith::ConstantOp::create(boundsBuilder, loc,
421 step.
getType(), stepUnrolledCst));
426 auto lowerBound = forOp.getLowerBound();
427 auto upperBound = forOp.getUpperBound();
429 arith::SubIOp::create(boundsBuilder, loc, upperBound, lowerBound);
431 Value unrollFactorCst = arith::ConstantOp::create(
435 arith::RemSIOp::create(boundsBuilder, loc, tripCount, unrollFactorCst);
437 Value tripCountEvenMultiple =
438 arith::SubIOp::create(boundsBuilder, loc, tripCount, tripCountRem);
440 upperBoundUnrolled = arith::AddIOp::create(
441 boundsBuilder, loc, lowerBound,
442 arith::MulIOp::create(boundsBuilder, loc, tripCountEvenMultiple, step));
445 arith::MulIOp::create(boundsBuilder, loc, step, unrollFactorCst);
451 if (generateEpilogueLoop) {
452 OpBuilder epilogueBuilder(forOp->getContext());
454 auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.
clone(*forOp));
455 epilogueForOp.setLowerBound(upperBoundUnrolled);
458 auto results = forOp.getResults();
459 auto epilogueResults = epilogueForOp.getResults();
461 for (
auto e : llvm::zip(results, epilogueResults)) {
462 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
464 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
465 epilogueForOp.getInitArgs().size(), results);
466 if (epilogueForOp.promoteIfSingleIteration(rewriter).failed())
471 forOp.setUpperBound(upperBoundUnrolled);
472 forOp.setStep(stepUnrolled);
474 auto iterArgs =
ValueRange(forOp.getRegionIterArgs());
475 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
478 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
481 auto stride = arith::MulIOp::create(
483 arith::ConstantOp::create(b, loc,
484 b.getIntegerAttr(iv.getType(), i)));
485 return arith::AddIOp::create(b, loc, iv, stride);
487 annotateFn, iterArgs, yieldedValues);
489 if (forOp.promoteIfSingleIteration(rewriter).failed())
497 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
498 if (!mayBeConstantTripCount.has_value())
500 const APInt &tripCount = *mayBeConstantTripCount;
501 if (tripCount.isZero())
503 if (tripCount.getSExtValue() == 1)
504 return forOp.promoteIfSingleIteration(rewriter);
511 auto walkResult = forOp.walk([&](scf::ForOp innerForOp) {
512 if (!forOp.isDefinedOutsideOfLoop(innerForOp.getLowerBound()) ||
513 !forOp.isDefinedOutsideOfLoop(innerForOp.getUpperBound()) ||
514 !forOp.isDefinedOutsideOfLoop(innerForOp.getStep()))
519 return !walkResult.wasInterrupted();
524 uint64_t unrollJamFactor) {
525 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
527 if (unrollJamFactor == 1)
533 LDBG() <<
"failed to unroll and jam: inner bounds are not invariant";
538 if (forOp->getNumResults() > 0) {
539 LDBG() <<
"failed to unroll and jam: unsupported loop with results";
545 std::optional<APInt> tripCount = forOp.getStaticTripCount();
546 if (!tripCount.has_value()) {
548 LDBG() <<
"failed to unroll and jam: trip count could not be determined";
551 if (unrollJamFactor > tripCount->getZExtValue()) {
552 LDBG() <<
"unroll and jam factor is greater than trip count, set factor to "
555 unrollJamFactor = tripCount->getZExtValue();
556 }
else if (tripCount->getSExtValue() % unrollJamFactor != 0) {
557 LDBG() <<
"failed to unroll and jam: unsupported trip count that is not a "
558 "multiple of unroll jam factor";
563 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
573 forOp.walk([&](scf::ForOp innerForOp) { innerLoops.push_back(innerForOp); });
584 for (scf::ForOp oldForOp : innerLoops) {
586 ValueRange oldIterOperands = oldForOp.getInits();
587 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
589 cast<scf::YieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
592 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
593 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
594 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
598 bool forOpReplaced = oldForOp == forOp;
599 scf::ForOp newForOp =
600 cast<scf::ForOp>(*oldForOp.replaceWithAdditionalYields(
601 rewriter, dupIterOperands,
false,
603 return dupYieldOperands;
605 newInnerLoops.push_back(newForOp);
610 ValueRange newIterArgs = newForOp.getRegionIterArgs();
611 unsigned oldNumIterArgs = oldIterArgs.size();
612 ValueRange newResults = newForOp.getResults();
613 unsigned oldNumResults = newResults.size() / unrollJamFactor;
614 assert(oldNumIterArgs == oldNumResults &&
615 "oldNumIterArgs must be the same as oldNumResults");
616 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
617 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
621 operandMaps[i - 1].map(newIterArgs[
j],
622 newIterArgs[i * oldNumIterArgs +
j]);
623 operandMaps[i - 1].map(newResults[
j],
624 newResults[i * oldNumResults +
j]);
631 int64_t step = forOp.getConstantStep()->getSExtValue();
633 forOp.getLoc(), forOp.getStep(),
635 forOp.getLoc(), rewriter.
getIndexAttr(unrollJamFactor)));
636 forOp.setStep(newStep);
637 auto forOpIV = forOp.getInductionVar();
640 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
641 for (
auto &subBlock : subBlocks) {
644 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
648 if (!forOpIV.use_empty()) {
653 builder.
createOrFold<arith::AddIOp>(forOp.getLoc(), forOpIV, ivTag);
654 operandMaps[i - 1].map(forOpIV, ivUnroll);
657 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
658 builder.
clone(*it, operandMaps[i - 1]);
661 for (
auto newForOp : newInnerLoops) {
662 unsigned oldNumIterOperands =
663 newForOp.getNumRegionIterArgs() / unrollJamFactor;
664 unsigned numControlOperands = newForOp.getNumControlOperands();
665 auto yieldOp = cast<scf::YieldOp>(newForOp.getBody()->getTerminator());
666 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
667 assert(oldNumIterOperands == oldNumYieldOperands &&
668 "oldNumIterOperands must be the same as oldNumYieldOperands");
669 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
673 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
674 operandMaps[i - 1].lookupOrDefault(
675 newForOp.getOperand(numControlOperands +
j)));
677 i * oldNumYieldOperands +
j,
678 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
684 (void)forOp.promoteIfSingleIteration(rewriter);
692 Range normalizedLoopBounds;
698 normalizedLoopBounds.
size =
700 return normalizedLoopBounds;
712 bool isZeroBased =
false;
714 isZeroBased = lbCst.value() == 0;
716 bool isStepOne =
false;
718 isStepOne = stepCst.value() == 1;
722 "expected matching types");
727 if (isZeroBased && isStepOne)
728 return {lb, ub, step};
738 newUpperBound = rewriter.
createOrFold<arith::CeilDivSIOp>(
746 return {newLowerBound, newUpperBound, newStep};
760 Value denormalizedIvVal =
767 if (
Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
768 preservedUses.insert(preservedUse);
777 if (
getType(origLb).isIndex()) {
781 Value denormalizedIv;
786 Value scaled = normalizedIv;
788 Value origStepValue =
790 scaled = arith::MulIOp::create(rewriter, loc, normalizedIv, origStepValue);
793 denormalizedIv = scaled;
796 denormalizedIv = arith::AddIOp::create(rewriter, loc, scaled, origLbValue);
805 assert(!values.empty() &&
"unexecpted empty array");
810 for (
auto v : values) {
820 assert(!values.empty() &&
"unexpected empty list");
826 std::optional<Value> productOf;
827 for (
auto v : values) {
829 if (vOne && vOne.value() == 1)
832 productOf = arith::MulIOp::create(rewriter, loc, productOf.value(), v)
838 productOf = arith::ConstantOp::create(
842 return productOf.value();
858 Operation *delinearizedOp = affine::AffineDelinearizeIndexOp::create(
859 rewriter, loc, linearizedIv, ubs);
860 auto resultVals = llvm::map_to_vector(
868 llvm::BitVector isUbOne(ubs.size());
871 if (ubCst && ubCst.value() == 1)
876 unsigned numLeadingOneUbs = 0;
878 if (!isUbOne.test(index)) {
881 delinearizedIvs[index] = arith::ConstantOp::create(
882 rewriter, loc, rewriter.
getZeroAttr(ub.getType()));
886 Value previous = linearizedIv;
887 for (
unsigned i = numLeadingOneUbs, e = ubs.size(); i < e; ++i) {
888 unsigned idx = ubs.size() - (i - numLeadingOneUbs) - 1;
889 if (i != numLeadingOneUbs && !isUbOne.test(idx + 1)) {
890 previous = arith::DivSIOp::create(rewriter, loc, previous, ubs[idx + 1]);
895 if (!isUbOne.test(idx)) {
896 iv = arith::RemSIOp::create(rewriter, loc, previous, ubs[idx]);
899 iv = arith::ConstantOp::create(
900 rewriter, loc, rewriter.
getZeroAttr(ubs[idx].getType()));
903 delinearizedIvs[idx] = iv;
905 return {delinearizedIvs, preservedUsers};
910 if (loops.size() < 2)
913 scf::ForOp innermost = loops.back();
914 scf::ForOp outermost = loops.front();
918 for (
auto loop : loops) {
921 Value lb = loop.getLowerBound();
922 Value ub = loop.getUpperBound();
923 Value step = loop.getStep();
929 newLoopRange.offset));
933 newLoopRange.stride));
937 loop.getInductionVar(), lb, step);
946 loops, [](
auto loop) {
return loop.getUpperBound(); });
948 outermost.setUpperBound(upperBound);
952 rewriter, loc, outermost.getInductionVar(), upperBounds);
956 for (
int i = loops.size() - 1; i > 0; --i) {
957 auto outerLoop = loops[i - 1];
958 auto innerLoop = loops[i];
960 Operation *innerTerminator = innerLoop.getBody()->getTerminator();
961 auto yieldedVals = llvm::to_vector(innerTerminator->
getOperands());
962 assert(llvm::equal(outerLoop.getRegionIterArgs(), innerLoop.getInitArgs()));
963 for (
Value &yieldedVal : yieldedVals) {
966 auto iter = llvm::find(innerLoop.getRegionIterArgs(), yieldedVal);
967 if (iter != innerLoop.getRegionIterArgs().end()) {
968 unsigned iterArgIndex = iter - innerLoop.getRegionIterArgs().begin();
970 assert(iterArgIndex < innerLoop.getInitArgs().size());
971 yieldedVal = innerLoop.getInitArgs()[iterArgIndex];
974 rewriter.
eraseOp(innerTerminator);
977 innerBlockArgs.push_back(delinearizeIvs[i]);
978 llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs());
981 rewriter.
replaceOp(innerLoop, yieldedVals);
990 IRRewriter rewriter(loops.front().getContext());
995 LogicalResult result(failure());
1005 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
1006 operandsDefinedAbove[i] = i;
1007 for (
unsigned j = 0;
j < i; ++
j) {
1009 loops[i].getUpperBound(),
1010 loops[i].getStep()};
1012 operandsDefinedAbove[i] =
j;
1023 iterArgChainStart[0] = 0;
1024 for (
unsigned i = 1, e = loops.size(); i < e; ++i) {
1026 iterArgChainStart[i] = i;
1027 auto outerloop = loops[i - 1];
1028 auto innerLoop = loops[i];
1029 if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) {
1032 if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) {
1035 auto outerloopTerminator = outerloop.getBody()->getTerminator();
1036 if (!llvm::equal(outerloopTerminator->getOperands(),
1037 innerLoop.getResults())) {
1040 iterArgChainStart[i] = iterArgChainStart[i - 1];
1046 for (
unsigned end = loops.size(); end > 0; --end) {
1048 for (; start < end - 1; ++start) {
1050 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
1051 std::next(operandsDefinedAbove.begin(), end));
1054 if (iterArgChainStart[end - 1] > start)
1063 if (start != end - 1)
1071 ArrayRef<std::vector<unsigned>> combinedDimensions) {
1077 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
1078 for (
auto &dims : sortedDimensions)
1083 for (
unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
1086 Value lb = loops.getLowerBound()[i];
1087 Value ub = loops.getUpperBound()[i];
1088 Value step = loops.getStep()[i];
1091 rewriter, loops.getLoc(), newLoopRange.size));
1102 for (
auto &sortedDimension : sortedDimensions) {
1104 for (
auto idx : sortedDimension) {
1105 newUpperBound = arith::MulIOp::create(rewriter, loc, newUpperBound,
1106 normalizedUpperBounds[idx]);
1108 lowerBounds.push_back(cst0);
1109 steps.push_back(cst1);
1110 upperBounds.push_back(newUpperBound);
1119 auto newPloop = scf::ParallelOp::create(
1120 rewriter, loc, lowerBounds, upperBounds, steps,
1122 for (
unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
1123 Value previous = ploopIVs[i];
1124 unsigned numberCombinedDimensions = combinedDimensions[i].size();
1126 for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
1127 unsigned idx = combinedDimensions[i][j];
1130 Value iv = arith::RemSIOp::create(insideBuilder, loc, previous,
1131 normalizedUpperBounds[idx]);
1132 replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
1137 previous = arith::DivSIOp::create(insideBuilder, loc, previous,
1138 normalizedUpperBounds[idx]);
1142 unsigned idx = combinedDimensions[i][0];
1144 previous, loops.getRegion());
1149 loops.getBody()->back().erase();
1150 newPloop.getBody()->getOperations().splice(
1152 loops.getBody()->getOperations());
1165 return op != inner.getOperation();
1168 LogicalResult status = success();
1170 for (
auto &op : outer.getBody()->without_terminator()) {
1172 if (&op == inner.getOperation())
1175 if (forwardSlice.count(&op) > 0) {
1180 if (isa<scf::ForOp>(op))
1183 if (op.getNumRegions() > 0) {
1193 toHoist.push_back(&op);
1195 auto *outerForOp = outer.getOperation();
1196 for (
auto *op : toHoist)
1197 op->moveBefore(outerForOp);
1206 LogicalResult status = success();
1207 const Loops &interTile = tileLoops.first;
1208 const Loops &intraTile = tileLoops.second;
1209 auto size = interTile.size();
1210 assert(size == intraTile.size());
1213 for (
unsigned s = 1; s < size; ++s)
1214 status = succeeded(status) ?
hoistOpsBetween(intraTile[0], intraTile[s])
1216 for (
unsigned s = 1; s < size; ++s)
1217 status = succeeded(status) ?
hoistOpsBetween(interTile[0], interTile[s])
1226 template <
typename T>
1230 for (
unsigned i = 0; i < maxLoops; ++i) {
1231 forOps.push_back(rootForOp);
1233 if (body.
begin() != std::prev(body.
end(), 2))
1236 rootForOp = dyn_cast<T>(&body.
front());
1244 assert(!forOp.getUnsignedCmp() &&
"unsigned loops are not supported");
1245 auto originalStep = forOp.getStep();
1246 auto iv = forOp.getInductionVar();
1249 forOp.setStep(arith::MulIOp::create(b, forOp.getLoc(), originalStep, factor));
1252 for (
auto t : targets) {
1253 assert(!t.getUnsignedCmp() &&
"unsigned loops are not supported");
1256 auto begin = t.getBody()->begin();
1257 auto nOps = t.getBody()->getOperations().size();
1260 auto b = OpBuilder::atBlockTerminator((t.getBody()));
1261 Value stepped = arith::AddIOp::create(b, t.getLoc(), iv, forOp.getStep());
1263 arith::MinSIOp::create(b, t.getLoc(), forOp.getUpperBound(), stepped);
1266 auto newForOp = scf::ForOp::create(b, t.getLoc(), iv, ub, originalStep);
1267 newForOp.getBody()->getOperations().splice(
1268 newForOp.getBody()->getOperations().begin(),
1269 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
1271 newForOp.getRegion());
1273 innerLoops.push_back(newForOp);
1281 template <
typename SizeType>
1283 scf::ForOp target) {
1289 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1298 for (
auto it : llvm::zip(forOps, sizes)) {
1299 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1300 res.push_back(step);
1301 currentTargets = step;
1307 scf::ForOp target) {
1310 res.push_back(llvm::getSingleElement(loops));
1318 forOps.reserve(sizes.size());
1320 if (forOps.size() < sizes.size())
1321 sizes = sizes.take_front(forOps.size());
1336 forOps.reserve(sizes.size());
1338 if (forOps.size() < sizes.size())
1339 sizes = sizes.take_front(forOps.size());
1346 tileSizes.reserve(sizes.size());
1347 for (
unsigned i = 0, e = sizes.size(); i < e; ++i) {
1348 assert(sizes[i] > 0 &&
"expected strictly positive size for strip-mining");
1350 auto forOp = forOps[i];
1352 auto loc = forOp.getLoc();
1353 Value diff = arith::SubIOp::create(builder, loc, forOp.getUpperBound(),
1354 forOp.getLowerBound());
1356 Value iterationsPerBlock =
1358 tileSizes.push_back(iterationsPerBlock);
1362 auto intraTile =
tile(forOps, tileSizes, forOps.back());
1363 TileLoops tileLoops = std::make_pair(forOps, intraTile);
1374 scf::ForallOp source,
1376 unsigned numTargetOuts = target.getNumResults();
1377 unsigned numSourceOuts = source.getNumResults();
1381 llvm::append_range(fusedOuts, target.getOutputs());
1382 llvm::append_range(fusedOuts, source.getOutputs());
1386 scf::ForallOp fusedLoop = scf::ForallOp::create(
1387 rewriter, source.getLoc(), source.getMixedLowerBound(),
1388 source.getMixedUpperBound(), source.getMixedStep(), fusedOuts,
1389 source.getMapping());
1393 mapping.
map(target.getInductionVars(), fusedLoop.getInductionVars());
1394 mapping.
map(source.getInductionVars(), fusedLoop.getInductionVars());
1397 mapping.
map(target.getRegionIterArgs(),
1398 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1399 mapping.
map(source.getRegionIterArgs(),
1400 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1404 for (
Operation &op : target.getBody()->without_terminator())
1405 rewriter.
clone(op, mapping);
1406 for (
Operation &op : source.getBody()->without_terminator())
1407 rewriter.
clone(op, mapping);
1410 scf::InParallelOp targetTerm = target.getTerminator();
1411 scf::InParallelOp sourceTerm = source.getTerminator();
1412 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
1414 for (
Operation &op : targetTerm.getYieldingOps())
1415 rewriter.
clone(op, mapping);
1416 for (
Operation &op : sourceTerm.getYieldingOps())
1417 rewriter.
clone(op, mapping);
1420 rewriter.
replaceOp(target, fusedLoop.getResults().take_front(numTargetOuts));
1421 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1429 assert(source.getUnsignedCmp() == target.getUnsignedCmp() &&
1430 "incompatible signedness");
1431 unsigned numTargetOuts = target.getNumResults();
1432 unsigned numSourceOuts = source.getNumResults();
1436 llvm::append_range(fusedInitArgs, target.getInitArgs());
1437 llvm::append_range(fusedInitArgs, source.getInitArgs());
1442 scf::ForOp fusedLoop = scf::ForOp::create(
1443 rewriter, source.getLoc(), source.getLowerBound(), source.getUpperBound(),
1444 source.getStep(), fusedInitArgs,
nullptr,
1445 source.getUnsignedCmp());
1449 mapping.
map(target.getInductionVar(), fusedLoop.getInductionVar());
1450 mapping.
map(target.getRegionIterArgs(),
1451 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1452 mapping.
map(source.getInductionVar(), fusedLoop.getInductionVar());
1453 mapping.
map(source.getRegionIterArgs(),
1454 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1458 for (
Operation &op : target.getBody()->without_terminator())
1459 rewriter.
clone(op, mapping);
1460 for (
Operation &op : source.getBody()->without_terminator())
1461 rewriter.
clone(op, mapping);
1465 for (
Value operand : target.getBody()->getTerminator()->getOperands())
1467 for (
Value operand : source.getBody()->getTerminator()->getOperands())
1469 if (!yieldResults.empty())
1470 scf::YieldOp::create(rewriter, source.getLoc(), yieldResults);
1473 rewriter.
replaceOp(target, fusedLoop.getResults().take_front(numTargetOuts));
1474 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1480 scf::ForallOp forallOp) {
1485 if (forallOp.isNormalized())
1489 auto loc = forallOp.getLoc();
1492 for (
auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1493 Range normalizedLoopParams =
1495 newUbs.push_back(normalizedLoopParams.
size);
1501 auto normalizedForallOp = scf::ForallOp::create(
1502 rewriter, loc, newUbs, forallOp.getOutputs(), forallOp.getMapping(),
1506 normalizedForallOp.getBodyRegion(),
1507 normalizedForallOp.getBodyRegion().begin());
1509 rewriter.
eraseBlock(&normalizedForallOp.getBodyRegion().back());
1513 for (
auto [idx, iv] :
1520 rewriter.
replaceOp(forallOp, normalizedForallOp);
1521 return normalizedForallOp;
1526 assert(!loops.empty() &&
"unexpected empty loop nest");
1527 if (loops.size() == 1)
1528 return isa_and_nonnull<scf::ForOp>(loops.front().getOperation());
1529 for (
auto [outerLoop, innerLoop] :
1530 llvm::zip_equal(loops.drop_back(), loops.drop_front())) {
1531 auto outerFor = dyn_cast_or_null<scf::ForOp>(outerLoop.getOperation());
1532 auto innerFor = dyn_cast_or_null<scf::ForOp>(innerLoop.getOperation());
1533 if (!outerFor || !innerFor)
1535 auto outerBBArgs = outerFor.getRegionIterArgs();
1536 auto innerIterArgs = innerFor.getInitArgs();
1537 if (outerBBArgs.size() != innerIterArgs.size())
1540 for (
auto [outerBBArg, innerIterArg] :
1541 llvm::zip_equal(outerBBArgs, innerIterArgs)) {
1542 if (!llvm::hasSingleElement(outerBBArg.getUses()) ||
1543 innerIterArg != outerBBArg)
1548 cast<scf::YieldOp>(outerFor.getBody()->getTerminator())->getOperands();
1549 ValueRange innerResults = innerFor.getResults();
1550 if (outerYields.size() != innerResults.size())
1552 for (
auto [outerYield, innerResult] :
1553 llvm::zip_equal(outerYields, innerResults)) {
1554 if (!llvm::hasSingleElement(innerResult.getUses()) ||
1555 outerYield != innerResult)
1564 std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
1565 std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
1566 std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
1567 if (!loBnds || !upBnds || !steps)
1570 for (
auto [lb, ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
1575 tripCounts.push_back(numIter->getSExtValue());
1585 const unsigned numLoops = op.getNumLoops();
1586 assert(llvm::none_of(unrollFactors, [](uint64_t f) {
return f == 0; }) &&
1587 "Expected positive unroll factors");
1588 assert((!unrollFactors.empty() && (unrollFactors.size() <= numLoops)) &&
1589 "Expected non-empty unroll factors of size <= to the number of loops");
1592 if (llvm::all_of(unrollFactors, [](uint64_t f) {
return f == 1; }))
1594 op,
"Unrolling not applied if all factors are 1");
1597 if (llvm::hasSingleElement(op.getBody()->getOperations()))
1602 const unsigned firstLoopDimIdx = numLoops - unrollFactors.size();
1607 if (tripCounts.empty())
1609 op,
"Failed to compute constant trip counts for the loop. Note that "
1610 "dynamic loop sizes are not supported.");
1612 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1613 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1614 if (tripCounts[dimIdx] % unrollFactor)
1616 op,
"Unroll factors don't divide the iteration space evenly");
1619 std::optional<SmallVector<OpFoldResult>> maybeFoldSteps = op.getLoopSteps();
1620 if (!maybeFoldSteps)
1623 for (
auto step : *maybeFoldSteps)
1626 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1627 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1628 if (unrollFactor == 1)
1630 const size_t origStep = steps[dimIdx];
1631 const int64_t newStep = origStep * unrollFactor;
1635 auto yieldedValues = op.getBody()->getTerminator()->getOperands();
1638 op.getBody(), op.getInductionVars()[dimIdx], unrollFactor,
1641 const AffineExpr expr = b.getAffineDimExpr(0) + (origStep * i);
1643 b.getDimIdentityMap().dropResult(0).insertResult(expr, 0);
1644 return affine::AffineApplyOp::create(b, iv.getLoc(), map,
1647 annotateFn, iterArgs, yieldedValues, &clonedToSrcOpsMap);
1652 op.getStepMutable()[dimIdx].assign(
1653 arith::ConstantIndexOp::create(rewriter, op.getLoc(), newStep));
static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > values)
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static std::pair< SmallVector< Value >, SmallPtrSet< Operation *, 2 > > delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef< Value > ubs)
For each original loop, the value of the induction variable can be obtained by dividing the induction...
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef< Value > values)
Helper function to multiply a sequence of values.
static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
static bool areInnerBoundsInvariant(scf::ForOp forOp)
Check if bounds of all inner loops are defined outside of forOp and return false if not.
static int64_t product(ArrayRef< int64_t > vals)
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor'...
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Base type for affine expression.
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getIntegerAttr(Type type, int64_t value)
TypedAttr getZeroAttr(Type type)
MLIRContext * getContext() const
TypedAttr getOneAttr(Type type)
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void restoreInsertionPoint(InsertPoint ip)
Restore the insert point to a previously saved point.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
result_range getResults()
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void replaceAllUsesExcept(Value from, Value to, Operation *exceptedUser)
Find uses of from and replace them with to except if the user is exceptedUser.
virtual void inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues={})
Inline the operations of block 'source' into block 'dest' before the given position.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndex() const
Return true if this is an integer (of any signedness) or an index type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
Specialization of arith.constant op that returns an integer of index type.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Operation * getOwner() const
Return the owner of this operand.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
std::optional< llvm::APSInt > computeUbMinusLb(Value lb, Value ub, bool isSigned)
Helper function to compute the difference between two values.
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
bool isPerfectlyNestedForLoops(MutableArrayRef< LoopLikeOpInterface > loops)
Check if the provided loops are perfectly nested for-loops.
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
void generateUnrolledLoop(Block *loopBodyBlock, Value iv, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues, IRMapping *clonedToSrcOpsMap=nullptr)
Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs' and 'yieldedValues' as the...
std::pair< Loops, Loops > TileLoops
Value getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
llvm::SmallVector< int64_t > getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp)
Get constant trip counts for each of the induction variables of the given loop operation.
LogicalResult loopUnrollFull(scf::ForOp forOp)
Unrolls this loop completely.
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
FailureOr< UnrolledLoopInfo > loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor)
Unrolls and jams this scf.for operation by the specified unroll factor.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
FailureOr< scf::ParallelOp > parallelLoopUnrollByFactors(scf::ParallelOp op, ArrayRef< uint64_t > unrollFactors, RewriterBase &rewriter, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, IRMapping *clonedToSrcOpsMap=nullptr)
Unroll this scf::Parallel loop by the specified unroll factors.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Get back the original induction variable values after loop normalization.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
Materialize bounds and step of a zero-based and unit-step loop derived by normalizing the specified b...
bool isOneInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 1.
std::optional< APInt > constantTripCount(OpFoldResult lb, OpFoldResult ub, OpFoldResult step, bool isSigned, llvm::function_ref< std::optional< llvm::APSInt >(Value, Value, bool)> computeUbMinusLb)
Return the number of iterations for a loop with a lower bound lb, upper bound ub and step step.
LogicalResult foldDynamicIndexList(SmallVectorImpl< OpFoldResult > &ofrs, bool onlyNonNegative=false, bool onlyNonZero=false)
Returns "success" when any of the elements in ofrs is a constant value.
FailureOr< scf::ForallOp > normalizeForallOp(RewriterBase &rewriter, scf::ForallOp forallOp)
Normalize an scf.forall operation.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
std::optional< scf::ForOp > epilogueLoopOp
std::optional< scf::ForOp > mainLoopOp
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.