25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/SmallVectorExtras.h"
29#include "llvm/Support/DebugLog.h"
34#define DEBUG_TYPE "scf-utils"
39 bool replaceIterOperandsUsesInLoop) {
45 assert(loopNest.size() <= 10 &&
46 "exceeded recursion limit when yielding value from loop nest");
78 if (loopNest.size() == 1) {
80 cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
81 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
83 return {innerMostLoop};
93 innerNewBBArgs, newYieldValuesFn,
94 replaceIterOperandsUsesInLoop);
95 return llvm::map_to_vector(
96 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
99 scf::ForOp outerMostLoop =
100 cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
101 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
102 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
119 func::CallOp *callOp) {
120 assert(!funcName.empty() &&
"funcName cannot be empty");
134 ValueRange outlinedValues(captures.getArrayRef());
141 outlinedFuncArgTypes.push_back(arg.getType());
142 outlinedFuncArgLocs.push_back(arg.getLoc());
144 for (
Value value : outlinedValues) {
145 outlinedFuncArgTypes.push_back(value.getType());
146 outlinedFuncArgLocs.push_back(value.getLoc());
148 FunctionType outlinedFuncType =
149 FunctionType::get(rewriter.
getContext(), outlinedFuncArgTypes,
152 func::FuncOp::create(rewriter, loc, funcName, outlinedFuncType);
153 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
158 auto outlinedFuncBlockArgs = outlinedFuncBody->
getArguments();
163 originalBlock, outlinedFuncBody,
164 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
167 func::ReturnOp::create(rewriter, loc, originalTerminator->
getResultTypes(),
174 ®ion, region.
begin(),
175 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
177 .take_front(numOriginalBlockArguments));
182 llvm::append_range(callValues, newBlock->
getArguments());
183 llvm::append_range(callValues, outlinedValues);
184 auto call = func::CallOp::create(rewriter, loc, outlinedFunc, callValues);
193 rewriter.
clone(*originalTerminator, bvm);
194 rewriter.
eraseOp(originalTerminator);
199 for (
auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
200 outlinedValues.size()))) {
201 Value orig = std::get<0>(it);
202 Value repl = std::get<1>(it);
211 return outlinedFunc->isProperAncestor(opOperand.
getOwner());
219 func::FuncOp *thenFn, StringRef thenFnName,
220 func::FuncOp *elseFn, StringRef elseFnName) {
223 FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
224 if (thenFn && !ifOp.getThenRegion().empty()) {
226 rewriter, loc, ifOp.getThenRegion(), thenFnName);
227 if (failed(outlinedFuncOpOrFailure))
229 *thenFn = *outlinedFuncOpOrFailure;
231 if (elseFn && !ifOp.getElseRegion().empty()) {
233 rewriter, loc, ifOp.getElseRegion(), elseFnName);
234 if (failed(outlinedFuncOpOrFailure))
236 *elseFn = *outlinedFuncOpOrFailure;
243 assert(rootOp !=
nullptr &&
"Root operation must not be a nullptr.");
244 bool rootEnclosesPloops =
false;
246 for (
Block &block : region.getBlocks()) {
249 rootEnclosesPloops |= enclosesPloops;
250 if (
auto ploop = dyn_cast<scf::ParallelOp>(op)) {
251 rootEnclosesPloops =
true;
260 return rootEnclosesPloops;
268 assert(divisor > 0 &&
"expected positive divisor");
270 "expected integer or index-typed value");
272 Value divisorMinusOneCst = arith::ConstantOp::create(
274 Value divisorCst = arith::ConstantOp::create(
276 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOneCst);
277 return arith::DivUIOp::create(builder, loc, sum, divisorCst);
287 "expected integer or index-typed value");
288 Value cstOne = arith::ConstantOp::create(
290 Value divisorMinusOne = arith::SubIOp::create(builder, loc, divisor, cstOne);
291 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOne);
292 return arith::DivUIOp::create(builder, loc, sum, divisor);
296 Block *loopBodyBlock,
Value iv, uint64_t unrollFactor,
304 auto findOriginalSrcOp =
309 while (srcOp && clonedToSrcOpsMap.
contains(srcOp))
310 srcOp = clonedToSrcOpsMap.
lookup(srcOp);
320 annotateFn = noopAnnotateFn;
330 for (
unsigned i = 1; i < unrollFactor; i++) {
333 operandMap.
map(iterArgs, lastYielded);
338 Value ivUnroll = ivRemapFn(i, iv, builder);
339 operandMap.
map(iv, ivUnroll);
343 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
346 annotateFn(i, clonedOp, builder);
347 if (clonedToSrcOpsMap)
348 clonedToSrcOpsMap->
map(clonedOp,
349 findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
353 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
359 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
360 annotateFn(0, &*it, builder);
369 scf::ForOp forOp, uint64_t unrollFactor,
371 assert(unrollFactor > 0 &&
"expected positive unroll factor");
374 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
381 auto loc = forOp.getLoc();
382 Value step = forOp.getStep();
383 Value upperBoundUnrolled;
385 bool generateEpilogueLoop =
true;
387 std::optional<APInt> constTripCount = forOp.getStaticTripCount();
388 if (constTripCount) {
393 if (unrollFactor == 1) {
394 if (constTripCount->isOne() &&
395 failed(forOp.promoteIfSingleIteration(rewriter)))
400 uint64_t tripCount = constTripCount->getZExtValue();
401 uint64_t tripCountEvenMultiple = tripCount - tripCount % unrollFactor;
402 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
403 int64_t stepUnrolledCst = stepCst * unrollFactor;
406 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
407 if (generateEpilogueLoop)
408 upperBoundUnrolled = arith::ConstantOp::create(
411 upperBoundUnrolledCst));
413 upperBoundUnrolled = forOp.getUpperBound();
417 stepCst == stepUnrolledCst
419 : arith::ConstantOp::create(boundsBuilder, loc,
421 step.
getType(), stepUnrolledCst));
426 auto lowerBound = forOp.getLowerBound();
427 auto upperBound = forOp.getUpperBound();
429 arith::SubIOp::create(boundsBuilder, loc, upperBound, lowerBound);
431 Value unrollFactorCst = arith::ConstantOp::create(
435 arith::RemSIOp::create(boundsBuilder, loc, tripCount, unrollFactorCst);
437 Value tripCountEvenMultiple =
438 arith::SubIOp::create(boundsBuilder, loc, tripCount, tripCountRem);
440 upperBoundUnrolled = arith::AddIOp::create(
441 boundsBuilder, loc, lowerBound,
442 arith::MulIOp::create(boundsBuilder, loc, tripCountEvenMultiple, step));
445 arith::MulIOp::create(boundsBuilder, loc, step, unrollFactorCst);
451 if (generateEpilogueLoop) {
452 OpBuilder epilogueBuilder(forOp->getContext());
454 auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.
clone(*forOp));
455 epilogueForOp.setLowerBound(upperBoundUnrolled);
458 auto results = forOp.getResults();
459 auto epilogueResults = epilogueForOp.getResults();
461 for (
auto e : llvm::zip(results, epilogueResults)) {
462 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
464 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
465 epilogueForOp.getInitArgs().size(), results);
466 if (epilogueForOp.promoteIfSingleIteration(rewriter).failed())
471 forOp.setUpperBound(upperBoundUnrolled);
472 forOp.setStep(stepUnrolled);
474 auto iterArgs =
ValueRange(forOp.getRegionIterArgs());
475 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
478 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
481 auto stride = arith::MulIOp::create(
483 arith::ConstantOp::create(b, loc,
484 b.getIntegerAttr(iv.getType(), i)));
485 return arith::AddIOp::create(b, loc, iv, stride);
487 annotateFn, iterArgs, yieldedValues);
489 if (forOp.promoteIfSingleIteration(rewriter).failed())
497 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
498 if (!mayBeConstantTripCount.has_value())
500 const APInt &tripCount = *mayBeConstantTripCount;
501 if (tripCount.isZero())
503 if (tripCount.isOne())
504 return forOp.promoteIfSingleIteration(rewriter);
511 auto walkResult = forOp.walk([&](scf::ForOp innerForOp) {
512 if (!forOp.isDefinedOutsideOfLoop(innerForOp.getLowerBound()) ||
513 !forOp.isDefinedOutsideOfLoop(innerForOp.getUpperBound()) ||
514 !forOp.isDefinedOutsideOfLoop(innerForOp.getStep()))
519 return !walkResult.wasInterrupted();
524 uint64_t unrollJamFactor) {
525 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
527 if (unrollJamFactor == 1)
533 LDBG() <<
"failed to unroll and jam: inner bounds are not invariant";
538 if (forOp->getNumResults() > 0) {
539 LDBG() <<
"failed to unroll and jam: unsupported loop with results";
545 std::optional<APInt> tripCount = forOp.getStaticTripCount();
546 if (!tripCount.has_value()) {
548 LDBG() <<
"failed to unroll and jam: trip count could not be determined";
551 uint64_t tripCountValue = tripCount->getZExtValue();
552 if (tripCountValue == 0)
554 if (unrollJamFactor > tripCountValue) {
555 LDBG() <<
"unroll and jam factor is greater than trip count, set factor to "
558 unrollJamFactor = tripCountValue;
559 }
else if (tripCountValue % unrollJamFactor != 0) {
560 LDBG() <<
"failed to unroll and jam: unsupported trip count that is not a "
561 "multiple of unroll jam factor";
566 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
576 forOp.walk([&](scf::ForOp innerForOp) { innerLoops.push_back(innerForOp); });
587 for (scf::ForOp oldForOp : innerLoops) {
589 ValueRange oldIterOperands = oldForOp.getInits();
590 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
592 cast<scf::YieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
595 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
596 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
597 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
601 bool forOpReplaced = oldForOp == forOp;
602 scf::ForOp newForOp =
603 cast<scf::ForOp>(*oldForOp.replaceWithAdditionalYields(
604 rewriter, dupIterOperands,
false,
606 return dupYieldOperands;
608 newInnerLoops.push_back(newForOp);
613 ValueRange newIterArgs = newForOp.getRegionIterArgs();
614 unsigned oldNumIterArgs = oldIterArgs.size();
615 ValueRange newResults = newForOp.getResults();
616 unsigned oldNumResults = newResults.size() / unrollJamFactor;
617 assert(oldNumIterArgs == oldNumResults &&
618 "oldNumIterArgs must be the same as oldNumResults");
619 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
620 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
624 operandMaps[i - 1].map(newIterArgs[
j],
625 newIterArgs[i * oldNumIterArgs +
j]);
626 operandMaps[i - 1].map(newResults[
j],
627 newResults[i * oldNumResults +
j]);
634 int64_t step = forOp.getConstantStep()->getSExtValue();
636 forOp.getLoc(), forOp.getStep(),
638 forOp.getLoc(), rewriter.
getIndexAttr(unrollJamFactor)));
639 forOp.setStep(newStep);
640 auto forOpIV = forOp.getInductionVar();
643 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
644 for (
auto &subBlock : subBlocks) {
647 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
651 if (!forOpIV.use_empty()) {
656 builder.
createOrFold<arith::AddIOp>(forOp.getLoc(), forOpIV, ivTag);
657 operandMaps[i - 1].map(forOpIV, ivUnroll);
660 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
661 builder.
clone(*it, operandMaps[i - 1]);
664 for (
auto newForOp : newInnerLoops) {
665 unsigned oldNumIterOperands =
666 newForOp.getNumRegionIterArgs() / unrollJamFactor;
667 unsigned numControlOperands = newForOp.getNumControlOperands();
668 auto yieldOp = cast<scf::YieldOp>(newForOp.getBody()->getTerminator());
669 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
670 assert(oldNumIterOperands == oldNumYieldOperands &&
671 "oldNumIterOperands must be the same as oldNumYieldOperands");
672 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
676 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
677 operandMaps[i - 1].lookupOrDefault(
678 newForOp.getOperand(numControlOperands +
j)));
680 i * oldNumYieldOperands +
j,
681 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
687 (
void)forOp.promoteIfSingleIteration(rewriter);
695 Range normalizedLoopBounds;
701 normalizedLoopBounds.
size =
703 return normalizedLoopBounds;
715 bool isZeroBased =
false;
717 isZeroBased = lbCst.value() == 0;
719 bool isStepOne =
false;
721 isStepOne = stepCst.value() == 1;
725 "expected matching types");
730 if (isZeroBased && isStepOne)
731 return {lb,
ub, step};
741 newUpperBound = rewriter.
createOrFold<arith::CeilDivSIOp>(
749 return {newLowerBound, newUpperBound, newStep};
763 Value denormalizedIvVal =
770 if (
Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
771 preservedUses.insert(preservedUse);
780 if (
getType(origLb).isIndex()) {
784 Value denormalizedIv;
789 Value scaled = normalizedIv;
791 Value origStepValue =
793 scaled = arith::MulIOp::create(rewriter, loc, normalizedIv, origStepValue);
796 denormalizedIv = scaled;
799 denormalizedIv = arith::AddIOp::create(rewriter, loc, scaled, origLbValue);
808 assert(!values.empty() &&
"unexecpted empty array");
813 for (
auto v : values) {
823 assert(!values.empty() &&
"unexpected empty list");
829 std::optional<Value> productOf;
830 for (
auto v : values) {
832 if (vOne && vOne.value() == 1)
835 productOf = arith::MulIOp::create(rewriter, loc, productOf.value(), v)
841 productOf = arith::ConstantOp::create(
845 return productOf.value();
861 Operation *delinearizedOp = affine::AffineDelinearizeIndexOp::create(
862 rewriter, loc, linearizedIv, ubs);
863 auto resultVals = llvm::map_to_vector(
871 llvm::BitVector isUbOne(ubs.size());
872 for (
auto [
index,
ub] : llvm::enumerate(ubs)) {
874 if (ubCst && ubCst.value() == 1)
879 unsigned numLeadingOneUbs = 0;
880 for (
auto [
index,
ub] : llvm::enumerate(ubs)) {
881 if (!isUbOne.test(
index)) {
884 delinearizedIvs[
index] = arith::ConstantOp::create(
889 Value previous = linearizedIv;
890 for (
unsigned i = numLeadingOneUbs, e = ubs.size(); i < e; ++i) {
891 unsigned idx = ubs.size() - (i - numLeadingOneUbs) - 1;
892 if (i != numLeadingOneUbs && !isUbOne.test(idx + 1)) {
893 previous = arith::DivSIOp::create(rewriter, loc, previous, ubs[idx + 1]);
898 if (!isUbOne.test(idx)) {
899 iv = arith::RemSIOp::create(rewriter, loc, previous, ubs[idx]);
902 iv = arith::ConstantOp::create(
903 rewriter, loc, rewriter.
getZeroAttr(ubs[idx].getType()));
906 delinearizedIvs[idx] = iv;
908 return {delinearizedIvs, preservedUsers};
913 if (loops.size() < 2)
916 scf::ForOp innermost = loops.back();
917 scf::ForOp outermost = loops.front();
921 for (
auto loop : loops) {
923 if (step.value() == 0) {
930 for (
auto loop : loops) {
933 Value lb = loop.getLowerBound();
934 Value ub = loop.getUpperBound();
935 Value step = loop.getStep();
941 newLoopRange.offset));
945 newLoopRange.stride));
949 loop.getInductionVar(), lb, step);
958 loops, [](
auto loop) {
return loop.getUpperBound(); });
960 outermost.setUpperBound(upperBound);
965 rewriter, loc, outermost.getInductionVar(), upperBounds);
969 for (
int i = loops.size() - 1; i > 0; --i) {
970 auto outerLoop = loops[i - 1];
971 auto innerLoop = loops[i];
973 Operation *innerTerminator = innerLoop.getBody()->getTerminator();
974 auto yieldedVals = llvm::to_vector(innerTerminator->
getOperands());
975 assert(llvm::equal(outerLoop.getRegionIterArgs(), innerLoop.getInitArgs()));
976 for (
Value &yieldedVal : yieldedVals) {
979 auto iter = llvm::find(innerLoop.getRegionIterArgs(), yieldedVal);
980 if (iter != innerLoop.getRegionIterArgs().end()) {
981 unsigned iterArgIndex = iter - innerLoop.getRegionIterArgs().begin();
983 assert(iterArgIndex < innerLoop.getInitArgs().size());
984 yieldedVal = innerLoop.getInitArgs()[iterArgIndex];
987 rewriter.
eraseOp(innerTerminator);
990 innerBlockArgs.push_back(delinearizeIvs[i]);
991 llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs());
994 rewriter.
replaceOp(innerLoop, yieldedVals);
1000 if (loops.empty()) {
1003 IRRewriter rewriter(loops.front().getContext());
1008 LogicalResult
result(failure());
1018 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
1019 operandsDefinedAbove[i] = i;
1020 for (
unsigned j = 0;
j < i; ++
j) {
1022 loops[i].getUpperBound(),
1023 loops[i].getStep()};
1025 operandsDefinedAbove[i] =
j;
1036 iterArgChainStart[0] = 0;
1037 for (
unsigned i = 1, e = loops.size(); i < e; ++i) {
1039 iterArgChainStart[i] = i;
1040 auto outerloop = loops[i - 1];
1041 auto innerLoop = loops[i];
1042 if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) {
1045 if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) {
1048 auto outerloopTerminator = outerloop.getBody()->getTerminator();
1049 if (!llvm::equal(outerloopTerminator->getOperands(),
1050 innerLoop.getResults())) {
1053 iterArgChainStart[i] = iterArgChainStart[i - 1];
1059 for (
unsigned end = loops.size(); end > 0; --end) {
1061 for (; start < end - 1; ++start) {
1063 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
1064 std::next(operandsDefinedAbove.begin(), end));
1067 if (iterArgChainStart[end - 1] > start)
1076 if (start != end - 1)
1084 ArrayRef<std::vector<unsigned>> combinedDimensions) {
1090 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
1091 for (
auto &dims : sortedDimensions)
1096 for (
unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
1099 Value lb = loops.getLowerBound()[i];
1100 Value ub = loops.getUpperBound()[i];
1101 Value step = loops.getStep()[i];
1104 rewriter, loops.getLoc(), newLoopRange.size));
1115 for (
auto &sortedDimension : sortedDimensions) {
1117 for (
auto idx : sortedDimension) {
1118 newUpperBound = arith::MulIOp::create(rewriter, loc, newUpperBound,
1119 normalizedUpperBounds[idx]);
1121 lowerBounds.push_back(cst0);
1122 steps.push_back(cst1);
1123 upperBounds.push_back(newUpperBound);
1132 auto newPloop = scf::ParallelOp::create(
1133 rewriter, loc, lowerBounds, upperBounds, steps,
1135 for (
unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
1136 Value previous = ploopIVs[i];
1137 unsigned numberCombinedDimensions = combinedDimensions[i].size();
1139 for (
unsigned j = numberCombinedDimensions - 1;
j > 0; --
j) {
1140 unsigned idx = combinedDimensions[i][
j];
1143 Value iv = arith::RemSIOp::create(insideBuilder, loc, previous,
1144 normalizedUpperBounds[idx]);
1150 previous = arith::DivSIOp::create(insideBuilder, loc, previous,
1151 normalizedUpperBounds[idx]);
1155 unsigned idx = combinedDimensions[i][0];
1157 previous, loops.getRegion());
1162 loops.getBody()->back().erase();
1163 newPloop.getBody()->getOperations().splice(
1165 loops.getBody()->getOperations());
1178 return op != inner.getOperation();
1181 LogicalResult status =
success();
1183 for (
auto &op : outer.getBody()->without_terminator()) {
1185 if (&op == inner.getOperation())
1188 if (forwardSlice.count(&op) > 0) {
1193 if (isa<scf::ForOp>(op))
1196 if (op.getNumRegions() > 0) {
1206 toHoist.push_back(&op);
1208 auto *outerForOp = outer.getOperation();
1209 for (
auto *op : toHoist)
1210 op->moveBefore(outerForOp);
1219 LogicalResult status =
success();
1220 const Loops &interTile = tileLoops.first;
1221 const Loops &intraTile = tileLoops.second;
1222 auto size = interTile.size();
1223 assert(size == intraTile.size());
1226 for (
unsigned s = 1; s < size; ++s)
1227 status = succeeded(status) ?
hoistOpsBetween(intraTile[0], intraTile[s])
1229 for (
unsigned s = 1; s < size; ++s)
1230 status = succeeded(status) ?
hoistOpsBetween(interTile[0], interTile[s])
1239template <
typename T>
1242 unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
1243 for (
unsigned i = 0; i < maxLoops; ++i) {
1244 forOps.push_back(rootForOp);
1245 Block &body = rootForOp.getRegion().front();
1246 if (body.
begin() != std::prev(body.
end(), 2))
1249 rootForOp = dyn_cast<T>(&body.
front());
1257 assert(!forOp.getUnsignedCmp() &&
"unsigned loops are not supported");
1258 auto originalStep = forOp.getStep();
1259 auto iv = forOp.getInductionVar();
1262 forOp.setStep(arith::MulIOp::create(
b, forOp.getLoc(), originalStep, factor));
1265 for (
auto t : targets) {
1266 assert(!t.getUnsignedCmp() &&
"unsigned loops are not supported");
1269 auto begin = t.getBody()->begin();
1270 auto nOps = t.getBody()->getOperations().size();
1274 Value stepped = arith::AddIOp::create(
b, t.getLoc(), iv, forOp.getStep());
1276 arith::MinSIOp::create(
b, t.getLoc(), forOp.getUpperBound(), stepped);
1279 auto newForOp = scf::ForOp::create(
b, t.getLoc(), iv,
ub, originalStep);
1280 newForOp.getBody()->getOperations().splice(
1281 newForOp.getBody()->getOperations().begin(),
1282 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
1284 newForOp.getRegion());
1286 innerLoops.push_back(newForOp);
1294template <
typename SizeType>
1302 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1311 for (
auto it : llvm::zip(forOps, sizes)) {
1312 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1313 res.push_back(step);
1314 currentTargets = step;
1323 res.push_back(llvm::getSingleElement(loops));
1331 forOps.reserve(sizes.size());
1333 if (forOps.size() < sizes.size())
1334 sizes = sizes.take_front(forOps.size());
1336 return ::tile(forOps, sizes, forOps.back());
1349 forOps.reserve(sizes.size());
1351 if (forOps.size() < sizes.size())
1352 sizes = sizes.take_front(forOps.size());
1359 tileSizes.reserve(sizes.size());
1360 for (
unsigned i = 0, e = sizes.size(); i < e; ++i) {
1361 assert(sizes[i] > 0 &&
"expected strictly positive size for strip-mining");
1363 auto forOp = forOps[i];
1365 auto loc = forOp.getLoc();
1366 Value diff = arith::SubIOp::create(builder, loc, forOp.getUpperBound(),
1367 forOp.getLowerBound());
1369 Value iterationsPerBlock =
1371 tileSizes.push_back(iterationsPerBlock);
1375 auto intraTile =
tile(forOps, tileSizes, forOps.back());
1376 TileLoops tileLoops = std::make_pair(forOps, intraTile);
1387 scf::ForallOp source,
1389 unsigned numTargetOuts =
target.getNumResults();
1390 unsigned numSourceOuts = source.getNumResults();
1394 llvm::append_range(fusedOuts,
target.getOutputs());
1395 llvm::append_range(fusedOuts, source.getOutputs());
1399 scf::ForallOp fusedLoop = scf::ForallOp::create(
1400 rewriter, source.getLoc(), source.getMixedLowerBound(),
1401 source.getMixedUpperBound(), source.getMixedStep(), fusedOuts,
1402 source.getMapping());
1406 mapping.
map(
target.getInductionVars(), fusedLoop.getInductionVars());
1407 mapping.
map(source.getInductionVars(), fusedLoop.getInductionVars());
1411 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1412 mapping.
map(source.getRegionIterArgs(),
1413 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1418 rewriter.
clone(op, mapping);
1419 for (
Operation &op : source.getBody()->without_terminator())
1420 rewriter.
clone(op, mapping);
1423 scf::InParallelOp targetTerm =
target.getTerminator();
1424 scf::InParallelOp sourceTerm = source.getTerminator();
1425 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
1427 for (
Operation &op : targetTerm.getYieldingOps())
1428 rewriter.
clone(op, mapping);
1429 for (
Operation &op : sourceTerm.getYieldingOps())
1430 rewriter.
clone(op, mapping);
1433 rewriter.
replaceOp(
target, fusedLoop.getResults().take_front(numTargetOuts));
1434 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1442 assert(source.getUnsignedCmp() ==
target.getUnsignedCmp() &&
1443 "incompatible signedness");
1444 unsigned numTargetOuts =
target.getNumResults();
1445 unsigned numSourceOuts = source.getNumResults();
1449 llvm::append_range(fusedInitArgs,
target.getInitArgs());
1450 llvm::append_range(fusedInitArgs, source.getInitArgs());
1455 scf::ForOp fusedLoop = scf::ForOp::create(
1456 rewriter, source.getLoc(), source.getLowerBound(), source.getUpperBound(),
1457 source.getStep(), fusedInitArgs,
nullptr,
1458 source.getUnsignedCmp());
1462 mapping.
map(
target.getInductionVar(), fusedLoop.getInductionVar());
1464 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1465 mapping.
map(source.getInductionVar(), fusedLoop.getInductionVar());
1466 mapping.
map(source.getRegionIterArgs(),
1467 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1472 rewriter.
clone(op, mapping);
1473 for (
Operation &op : source.getBody()->without_terminator())
1474 rewriter.
clone(op, mapping);
1478 for (
Value operand :
target.getBody()->getTerminator()->getOperands())
1480 for (
Value operand : source.getBody()->getTerminator()->getOperands())
1482 if (!yieldResults.empty())
1483 scf::YieldOp::create(rewriter, source.getLoc(), yieldResults);
1486 rewriter.
replaceOp(
target, fusedLoop.getResults().take_front(numTargetOuts));
1487 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1493 scf::ForallOp forallOp) {
1498 if (forallOp.isNormalized())
1502 auto loc = forallOp.getLoc();
1505 for (
auto [lb,
ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1506 Range normalizedLoopParams =
1508 newUbs.push_back(normalizedLoopParams.
size);
1514 auto normalizedForallOp = scf::ForallOp::create(
1515 rewriter, loc, newUbs, forallOp.getOutputs(), forallOp.getMapping(),
1519 normalizedForallOp.getBodyRegion(),
1520 normalizedForallOp.getBodyRegion().begin());
1522 rewriter.
eraseBlock(&normalizedForallOp.getBodyRegion().back());
1526 for (
auto [idx, iv] :
1527 llvm::enumerate(normalizedForallOp.getInductionVars())) {
1533 rewriter.
replaceOp(forallOp, normalizedForallOp);
1534 return normalizedForallOp;
1539 assert(!loops.empty() &&
"unexpected empty loop nest");
1540 if (loops.size() == 1)
1541 return isa_and_nonnull<scf::ForOp>(loops.front().getOperation());
1542 for (
auto [outerLoop, innerLoop] :
1543 llvm::zip_equal(loops.drop_back(), loops.drop_front())) {
1544 auto outerFor = dyn_cast_or_null<scf::ForOp>(outerLoop.getOperation());
1545 auto innerFor = dyn_cast_or_null<scf::ForOp>(innerLoop.getOperation());
1546 if (!outerFor || !innerFor)
1548 auto outerBBArgs = outerFor.getRegionIterArgs();
1549 auto innerIterArgs = innerFor.getInitArgs();
1550 if (outerBBArgs.size() != innerIterArgs.size())
1553 for (
auto [outerBBArg, innerIterArg] :
1554 llvm::zip_equal(outerBBArgs, innerIterArgs)) {
1555 if (!llvm::hasSingleElement(outerBBArg.getUses()) ||
1556 innerIterArg != outerBBArg)
1561 cast<scf::YieldOp>(outerFor.getBody()->getTerminator())->getOperands();
1562 ValueRange innerResults = innerFor.getResults();
1563 if (outerYields.size() != innerResults.size())
1565 for (
auto [outerYield, innerResult] :
1566 llvm::zip_equal(outerYields, innerResults)) {
1567 if (!llvm::hasSingleElement(innerResult.getUses()) ||
1568 outerYield != innerResult)
1577 std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
1578 std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
1579 std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
1580 if (!loBnds || !upBnds || !steps)
1583 for (
auto [lb,
ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
1587 if (!lbCst || !ubCst || !stepCst)
1589 loopRanges.emplace_back(*lbCst, *ubCst, *stepCst);
1596 std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
1597 std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
1598 std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
1599 if (!loBnds || !upBnds || !steps)
1602 for (
auto [lb,
ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
1608 tripCounts.push_back(*numIter);
1618 const unsigned numLoops = op.getNumLoops();
1619 assert(llvm::none_of(unrollFactors, [](uint64_t f) {
return f == 0; }) &&
1620 "Expected positive unroll factors");
1621 assert((!unrollFactors.empty() && (unrollFactors.size() <= numLoops)) &&
1622 "Expected non-empty unroll factors of size <= to the number of loops");
1625 if (llvm::all_of(unrollFactors, [](uint64_t f) {
return f == 1; }))
1627 op,
"Unrolling not applied if all factors are 1");
1630 if (llvm::hasSingleElement(op.getBody()->getOperations()))
1635 const unsigned firstLoopDimIdx = numLoops - unrollFactors.size();
1640 if (tripCounts.empty())
1642 op,
"Failed to compute constant trip counts for the loop. Note that "
1643 "dynamic loop sizes are not supported.");
1645 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1646 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1647 if (tripCounts[dimIdx].urem(unrollFactor) != 0)
1649 op,
"Unroll factors don't divide the iteration space evenly");
1652 std::optional<SmallVector<OpFoldResult>> maybeFoldSteps = op.getLoopSteps();
1653 if (!maybeFoldSteps)
1656 for (
auto step : *maybeFoldSteps)
1659 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1660 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1661 if (unrollFactor == 1)
1663 const size_t origStep = steps[dimIdx];
1664 const int64_t newStep = origStep * unrollFactor;
1668 auto yieldedValues = op.getBody()->getTerminator()->getOperands();
1671 op.getBody(), op.getInductionVars()[dimIdx], unrollFactor,
1674 const AffineExpr expr = b.getAffineDimExpr(0) + (origStep * i);
1676 b.getDimIdentityMap().dropResult(0).insertResult(expr, 0);
1677 return affine::AffineApplyOp::create(b, iv.getLoc(), map,
1680 annotateFn, iterArgs, yieldedValues, &clonedToSrcOpsMap);
1685 op.getStepMutable()[dimIdx].assign(
static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > values)
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef< Value > values)
Helper function to multiply a sequence of values.
static std::pair< SmallVector< Value >, SmallPtrSet< Operation *, 2 > > delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef< Value > ubs)
For each original loop, the value of the induction variable can be obtained by dividing the induction...
static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
static bool areInnerBoundsInvariant(scf::ForOp forOp)
Check if bounds of all inner loops are defined outside of forOp and return false if not.
static int64_t product(ArrayRef< int64_t > vals)
static llvm::ManagedStatic< PassManagerOptions > options
Base type for affine expression.
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getIntegerAttr(Type type, int64_t value)
TypedAttr getZeroAttr(Type type)
MLIRContext * getContext() const
TypedAttr getOneAttr(Type type)
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void restoreInsertionPoint(InsertPoint ip)
Restore the insert point to a previously saved point.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
operand_type_range getOperandTypes()
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
result_range getResults()
Operation * clone(IRMapping &mapper, const CloneOptions &options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void replaceAllUsesExcept(Value from, Value to, Operation *exceptedUser)
Find uses of from and replace them with to except if the user is exceptedUser.
virtual void inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues={})
Inline the operations of block 'source' into block 'dest' before the given position.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndex() const
Return true if this is an integer (of any signedness) or an index type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
Specialization of arith.constant op that returns an integer of index type.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Operation * getOwner() const
Return the owner of this operand.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
std::optional< llvm::APSInt > computeUbMinusLb(Value lb, Value ub, bool isSigned)
Helper function to compute the difference between two values.
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
bool isPerfectlyNestedForLoops(MutableArrayRef< LoopLikeOpInterface > loops)
Check if the provided loops are perfectly nested for-loops.
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
std::function< SmallVector< Value >( OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
void generateUnrolledLoop(Block *loopBodyBlock, Value iv, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues, IRMapping *clonedToSrcOpsMap=nullptr)
Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs' and 'yieldedValues' as the...
Value getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
LogicalResult loopUnrollFull(scf::ForOp forOp)
Unrolls this loop completely.
llvm::SmallVector< llvm::APInt > getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp)
Get constant trip counts for each of the induction variables of the given loop operation.
std::pair< Loops, Loops > TileLoops
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
llvm::SmallVector< std::tuple< int64_t, int64_t, int64_t > > getConstLoopBounds(mlir::LoopLikeOpInterface loopOp)
Get constant loop bounds and steps for each of the induction variables of the given loop operation,...
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned > > combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
llvm::SetVector< T, Vector, Set, N > SetVector
SliceOptions ForwardSliceOptions
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
FailureOr< UnrolledLoopInfo > loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor)
Unrolls and jams this scf.for operation by the specified unroll factor.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
bool isZeroInteger(OpFoldResult v)
Return "true" if v is an integer value/attribute with constant value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
FailureOr< scf::ParallelOp > parallelLoopUnrollByFactors(scf::ParallelOp op, ArrayRef< uint64_t > unrollFactors, RewriterBase &rewriter, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, IRMapping *clonedToSrcOpsMap=nullptr)
Unroll this scf::Parallel loop by the specified unroll factors.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Get back the original induction variable values after loop normalization.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
llvm::function_ref< Fn > function_ref
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
Materialize bounds and step of a zero-based and unit-step loop derived by normalizing the specified b...
SmallVector< scf::ForOp, 8 > Loops
Tile a nest of standard for loops rooted at rootForOp by finding such parametric tile sizes that the ...
bool isOneInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 1.
std::optional< APInt > constantTripCount(OpFoldResult lb, OpFoldResult ub, OpFoldResult step, bool isSigned, llvm::function_ref< std::optional< llvm::APSInt >(Value, Value, bool)> computeUbMinusLb)
Return the number of iterations for a loop with a lower bound lb, upper bound ub and step step,...
LogicalResult foldDynamicIndexList(SmallVectorImpl< OpFoldResult > &ofrs, bool onlyNonNegative=false, bool onlyNonZero=false)
Returns "success" when any of the elements in ofrs is a constant value.
FailureOr< scf::ForallOp > normalizeForallOp(RewriterBase &rewriter, scf::ForallOp forallOp)
Normalize an scf.forall operation.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
std::optional< scf::ForOp > epilogueLoopOp
std::optional< scf::ForOp > mainLoopOp
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.