25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/SmallVectorExtras.h"
29#include "llvm/Support/DebugLog.h"
34#define DEBUG_TYPE "scf-utils"
39 bool replaceIterOperandsUsesInLoop) {
45 assert(loopNest.size() <= 10 &&
46 "exceeded recursion limit when yielding value from loop nest");
78 if (loopNest.size() == 1) {
80 cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
81 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
83 return {innerMostLoop};
93 innerNewBBArgs, newYieldValuesFn,
94 replaceIterOperandsUsesInLoop);
95 return llvm::map_to_vector(
96 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
99 scf::ForOp outerMostLoop =
100 cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
101 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
102 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
119 func::CallOp *callOp) {
120 assert(!funcName.empty() &&
"funcName cannot be empty");
134 ValueRange outlinedValues(captures.getArrayRef());
141 outlinedFuncArgTypes.push_back(arg.getType());
142 outlinedFuncArgLocs.push_back(arg.getLoc());
144 for (
Value value : outlinedValues) {
145 outlinedFuncArgTypes.push_back(value.getType());
146 outlinedFuncArgLocs.push_back(value.getLoc());
148 FunctionType outlinedFuncType =
149 FunctionType::get(rewriter.
getContext(), outlinedFuncArgTypes,
152 func::FuncOp::create(rewriter, loc, funcName, outlinedFuncType);
153 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
158 auto outlinedFuncBlockArgs = outlinedFuncBody->
getArguments();
163 originalBlock, outlinedFuncBody,
164 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
167 func::ReturnOp::create(rewriter, loc, originalTerminator->
getResultTypes(),
174 ®ion, region.
begin(),
175 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
177 .take_front(numOriginalBlockArguments));
182 llvm::append_range(callValues, newBlock->
getArguments());
183 llvm::append_range(callValues, outlinedValues);
184 auto call = func::CallOp::create(rewriter, loc, outlinedFunc, callValues);
193 rewriter.
clone(*originalTerminator, bvm);
194 rewriter.
eraseOp(originalTerminator);
199 for (
auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
200 outlinedValues.size()))) {
201 Value orig = std::get<0>(it);
202 Value repl = std::get<1>(it);
211 return outlinedFunc->isProperAncestor(opOperand.
getOwner());
219 func::FuncOp *thenFn, StringRef thenFnName,
220 func::FuncOp *elseFn, StringRef elseFnName) {
223 FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
224 if (thenFn && !ifOp.getThenRegion().empty()) {
226 rewriter, loc, ifOp.getThenRegion(), thenFnName);
227 if (failed(outlinedFuncOpOrFailure))
229 *thenFn = *outlinedFuncOpOrFailure;
231 if (elseFn && !ifOp.getElseRegion().empty()) {
233 rewriter, loc, ifOp.getElseRegion(), elseFnName);
234 if (failed(outlinedFuncOpOrFailure))
236 *elseFn = *outlinedFuncOpOrFailure;
243 assert(rootOp !=
nullptr &&
"Root operation must not be a nullptr.");
244 bool rootEnclosesPloops =
false;
246 for (
Block &block : region.getBlocks()) {
249 rootEnclosesPloops |= enclosesPloops;
250 if (
auto ploop = dyn_cast<scf::ParallelOp>(op)) {
251 rootEnclosesPloops =
true;
260 return rootEnclosesPloops;
268 assert(divisor > 0 &&
"expected positive divisor");
270 "expected integer or index-typed value");
272 Value divisorMinusOneCst = arith::ConstantOp::create(
274 Value divisorCst = arith::ConstantOp::create(
276 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOneCst);
277 return arith::DivUIOp::create(builder, loc, sum, divisorCst);
287 "expected integer or index-typed value");
288 Value cstOne = arith::ConstantOp::create(
290 Value divisorMinusOne = arith::SubIOp::create(builder, loc, divisor, cstOne);
291 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOne);
292 return arith::DivUIOp::create(builder, loc, sum, divisor);
296 Block *loopBodyBlock,
Value iv, uint64_t unrollFactor,
304 auto findOriginalSrcOp =
309 while (srcOp && clonedToSrcOpsMap.
contains(srcOp))
310 srcOp = clonedToSrcOpsMap.
lookup(srcOp);
320 annotateFn = noopAnnotateFn;
330 for (
unsigned i = 1; i < unrollFactor; i++) {
333 operandMap.
map(iterArgs, lastYielded);
338 Value ivUnroll = ivRemapFn(i, iv, builder);
339 operandMap.
map(iv, ivUnroll);
343 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
346 annotateFn(i, clonedOp, builder);
347 if (clonedToSrcOpsMap)
348 clonedToSrcOpsMap->
map(clonedOp,
349 findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
353 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
359 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
360 annotateFn(0, &*it, builder);
369 scf::ForOp forOp, uint64_t unrollFactor,
371 assert(unrollFactor > 0 &&
"expected positive unroll factor");
374 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
381 auto loc = forOp.getLoc();
382 Value step = forOp.getStep();
383 Value upperBoundUnrolled;
385 bool generateEpilogueLoop =
true;
387 std::optional<APInt> constTripCount = forOp.getStaticTripCount();
388 if (constTripCount) {
393 if (unrollFactor == 1) {
394 if (constTripCount->isOne() &&
395 failed(forOp.promoteIfSingleIteration(rewriter)))
400 uint64_t tripCount = constTripCount->getZExtValue();
401 uint64_t tripCountEvenMultiple = tripCount - tripCount % unrollFactor;
402 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
403 int64_t stepUnrolledCst = stepCst * unrollFactor;
406 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
407 if (generateEpilogueLoop)
408 upperBoundUnrolled = arith::ConstantOp::create(
411 upperBoundUnrolledCst));
413 upperBoundUnrolled = forOp.getUpperBound();
417 stepCst == stepUnrolledCst
419 : arith::ConstantOp::create(boundsBuilder, loc,
421 step.
getType(), stepUnrolledCst));
426 auto lowerBound = forOp.getLowerBound();
427 auto upperBound = forOp.getUpperBound();
429 arith::SubIOp::create(boundsBuilder, loc, upperBound, lowerBound);
431 Value unrollFactorCst = arith::ConstantOp::create(
435 arith::RemSIOp::create(boundsBuilder, loc, tripCount, unrollFactorCst);
437 Value tripCountEvenMultiple =
438 arith::SubIOp::create(boundsBuilder, loc, tripCount, tripCountRem);
440 upperBoundUnrolled = arith::AddIOp::create(
441 boundsBuilder, loc, lowerBound,
442 arith::MulIOp::create(boundsBuilder, loc, tripCountEvenMultiple, step));
445 arith::MulIOp::create(boundsBuilder, loc, step, unrollFactorCst);
451 if (generateEpilogueLoop) {
452 OpBuilder epilogueBuilder(forOp->getContext());
454 auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.
clone(*forOp));
455 epilogueForOp.setLowerBound(upperBoundUnrolled);
458 auto results = forOp.getResults();
459 auto epilogueResults = epilogueForOp.getResults();
461 for (
auto e : llvm::zip(results, epilogueResults)) {
462 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
464 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
465 epilogueForOp.getInitArgs().size(), results);
466 if (epilogueForOp.promoteIfSingleIteration(rewriter).failed())
471 forOp.setUpperBound(upperBoundUnrolled);
472 forOp.setStep(stepUnrolled);
474 auto iterArgs =
ValueRange(forOp.getRegionIterArgs());
475 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
478 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
481 auto stride = arith::MulIOp::create(
483 arith::ConstantOp::create(b, loc,
484 b.getIntegerAttr(iv.getType(), i)));
485 return arith::AddIOp::create(b, loc, iv, stride);
487 annotateFn, iterArgs, yieldedValues);
489 if (forOp.promoteIfSingleIteration(rewriter).failed())
497 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
498 if (!mayBeConstantTripCount.has_value())
500 const APInt &tripCount = *mayBeConstantTripCount;
501 if (tripCount.isZero())
503 if (tripCount.isOne())
504 return forOp.promoteIfSingleIteration(rewriter);
511 auto walkResult = forOp.walk([&](scf::ForOp innerForOp) {
512 if (!forOp.isDefinedOutsideOfLoop(innerForOp.getLowerBound()) ||
513 !forOp.isDefinedOutsideOfLoop(innerForOp.getUpperBound()) ||
514 !forOp.isDefinedOutsideOfLoop(innerForOp.getStep()))
519 return !walkResult.wasInterrupted();
524 uint64_t unrollJamFactor) {
525 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
527 if (unrollJamFactor == 1)
533 LDBG() <<
"failed to unroll and jam: inner bounds are not invariant";
538 if (forOp->getNumResults() > 0) {
539 LDBG() <<
"failed to unroll and jam: unsupported loop with results";
545 std::optional<APInt> tripCount = forOp.getStaticTripCount();
546 if (!tripCount.has_value()) {
548 LDBG() <<
"failed to unroll and jam: trip count could not be determined";
551 uint64_t tripCountValue = tripCount->getZExtValue();
552 if (unrollJamFactor > tripCountValue) {
553 LDBG() <<
"unroll and jam factor is greater than trip count, set factor to "
556 unrollJamFactor = tripCountValue;
557 }
else if (tripCountValue % unrollJamFactor != 0) {
558 LDBG() <<
"failed to unroll and jam: unsupported trip count that is not a "
559 "multiple of unroll jam factor";
564 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
574 forOp.walk([&](scf::ForOp innerForOp) { innerLoops.push_back(innerForOp); });
585 for (scf::ForOp oldForOp : innerLoops) {
587 ValueRange oldIterOperands = oldForOp.getInits();
588 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
590 cast<scf::YieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
593 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
594 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
595 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
599 bool forOpReplaced = oldForOp == forOp;
600 scf::ForOp newForOp =
601 cast<scf::ForOp>(*oldForOp.replaceWithAdditionalYields(
602 rewriter, dupIterOperands,
false,
604 return dupYieldOperands;
606 newInnerLoops.push_back(newForOp);
611 ValueRange newIterArgs = newForOp.getRegionIterArgs();
612 unsigned oldNumIterArgs = oldIterArgs.size();
613 ValueRange newResults = newForOp.getResults();
614 unsigned oldNumResults = newResults.size() / unrollJamFactor;
615 assert(oldNumIterArgs == oldNumResults &&
616 "oldNumIterArgs must be the same as oldNumResults");
617 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
618 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
622 operandMaps[i - 1].map(newIterArgs[
j],
623 newIterArgs[i * oldNumIterArgs +
j]);
624 operandMaps[i - 1].map(newResults[
j],
625 newResults[i * oldNumResults +
j]);
632 int64_t step = forOp.getConstantStep()->getSExtValue();
634 forOp.getLoc(), forOp.getStep(),
636 forOp.getLoc(), rewriter.
getIndexAttr(unrollJamFactor)));
637 forOp.setStep(newStep);
638 auto forOpIV = forOp.getInductionVar();
641 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
642 for (
auto &subBlock : subBlocks) {
645 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
649 if (!forOpIV.use_empty()) {
654 builder.
createOrFold<arith::AddIOp>(forOp.getLoc(), forOpIV, ivTag);
655 operandMaps[i - 1].map(forOpIV, ivUnroll);
658 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
659 builder.
clone(*it, operandMaps[i - 1]);
662 for (
auto newForOp : newInnerLoops) {
663 unsigned oldNumIterOperands =
664 newForOp.getNumRegionIterArgs() / unrollJamFactor;
665 unsigned numControlOperands = newForOp.getNumControlOperands();
666 auto yieldOp = cast<scf::YieldOp>(newForOp.getBody()->getTerminator());
667 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
668 assert(oldNumIterOperands == oldNumYieldOperands &&
669 "oldNumIterOperands must be the same as oldNumYieldOperands");
670 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
674 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
675 operandMaps[i - 1].lookupOrDefault(
676 newForOp.getOperand(numControlOperands +
j)));
678 i * oldNumYieldOperands +
j,
679 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
685 (
void)forOp.promoteIfSingleIteration(rewriter);
693 Range normalizedLoopBounds;
699 normalizedLoopBounds.
size =
701 return normalizedLoopBounds;
713 bool isZeroBased =
false;
715 isZeroBased = lbCst.value() == 0;
717 bool isStepOne =
false;
719 isStepOne = stepCst.value() == 1;
723 "expected matching types");
728 if (isZeroBased && isStepOne)
729 return {lb,
ub, step};
739 newUpperBound = rewriter.
createOrFold<arith::CeilDivSIOp>(
747 return {newLowerBound, newUpperBound, newStep};
761 Value denormalizedIvVal =
768 if (
Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
769 preservedUses.insert(preservedUse);
778 if (
getType(origLb).isIndex()) {
782 Value denormalizedIv;
787 Value scaled = normalizedIv;
789 Value origStepValue =
791 scaled = arith::MulIOp::create(rewriter, loc, normalizedIv, origStepValue);
794 denormalizedIv = scaled;
797 denormalizedIv = arith::AddIOp::create(rewriter, loc, scaled, origLbValue);
806 assert(!values.empty() &&
"unexecpted empty array");
811 for (
auto v : values) {
821 assert(!values.empty() &&
"unexpected empty list");
827 std::optional<Value> productOf;
828 for (
auto v : values) {
830 if (vOne && vOne.value() == 1)
833 productOf = arith::MulIOp::create(rewriter, loc, productOf.value(), v)
839 productOf = arith::ConstantOp::create(
843 return productOf.value();
859 Operation *delinearizedOp = affine::AffineDelinearizeIndexOp::create(
860 rewriter, loc, linearizedIv, ubs);
861 auto resultVals = llvm::map_to_vector(
869 llvm::BitVector isUbOne(ubs.size());
870 for (
auto [
index,
ub] : llvm::enumerate(ubs)) {
872 if (ubCst && ubCst.value() == 1)
877 unsigned numLeadingOneUbs = 0;
878 for (
auto [
index,
ub] : llvm::enumerate(ubs)) {
879 if (!isUbOne.test(
index)) {
882 delinearizedIvs[
index] = arith::ConstantOp::create(
887 Value previous = linearizedIv;
888 for (
unsigned i = numLeadingOneUbs, e = ubs.size(); i < e; ++i) {
889 unsigned idx = ubs.size() - (i - numLeadingOneUbs) - 1;
890 if (i != numLeadingOneUbs && !isUbOne.test(idx + 1)) {
891 previous = arith::DivSIOp::create(rewriter, loc, previous, ubs[idx + 1]);
896 if (!isUbOne.test(idx)) {
897 iv = arith::RemSIOp::create(rewriter, loc, previous, ubs[idx]);
900 iv = arith::ConstantOp::create(
901 rewriter, loc, rewriter.
getZeroAttr(ubs[idx].getType()));
904 delinearizedIvs[idx] = iv;
906 return {delinearizedIvs, preservedUsers};
911 if (loops.size() < 2)
914 scf::ForOp innermost = loops.back();
915 scf::ForOp outermost = loops.front();
919 for (
auto loop : loops) {
922 Value lb = loop.getLowerBound();
923 Value ub = loop.getUpperBound();
924 Value step = loop.getStep();
930 newLoopRange.offset));
934 newLoopRange.stride));
938 loop.getInductionVar(), lb, step);
947 loops, [](
auto loop) {
return loop.getUpperBound(); });
949 outermost.setUpperBound(upperBound);
953 rewriter, loc, outermost.getInductionVar(), upperBounds);
957 for (
int i = loops.size() - 1; i > 0; --i) {
958 auto outerLoop = loops[i - 1];
959 auto innerLoop = loops[i];
961 Operation *innerTerminator = innerLoop.getBody()->getTerminator();
962 auto yieldedVals = llvm::to_vector(innerTerminator->
getOperands());
963 assert(llvm::equal(outerLoop.getRegionIterArgs(), innerLoop.getInitArgs()));
964 for (
Value &yieldedVal : yieldedVals) {
967 auto iter = llvm::find(innerLoop.getRegionIterArgs(), yieldedVal);
968 if (iter != innerLoop.getRegionIterArgs().end()) {
969 unsigned iterArgIndex = iter - innerLoop.getRegionIterArgs().begin();
971 assert(iterArgIndex < innerLoop.getInitArgs().size());
972 yieldedVal = innerLoop.getInitArgs()[iterArgIndex];
975 rewriter.
eraseOp(innerTerminator);
978 innerBlockArgs.push_back(delinearizeIvs[i]);
979 llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs());
982 rewriter.
replaceOp(innerLoop, yieldedVals);
991 IRRewriter rewriter(loops.front().getContext());
996 LogicalResult
result(failure());
1006 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
1007 operandsDefinedAbove[i] = i;
1008 for (
unsigned j = 0;
j < i; ++
j) {
1010 loops[i].getUpperBound(),
1011 loops[i].getStep()};
1013 operandsDefinedAbove[i] =
j;
1024 iterArgChainStart[0] = 0;
1025 for (
unsigned i = 1, e = loops.size(); i < e; ++i) {
1027 iterArgChainStart[i] = i;
1028 auto outerloop = loops[i - 1];
1029 auto innerLoop = loops[i];
1030 if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) {
1033 if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) {
1036 auto outerloopTerminator = outerloop.getBody()->getTerminator();
1037 if (!llvm::equal(outerloopTerminator->getOperands(),
1038 innerLoop.getResults())) {
1041 iterArgChainStart[i] = iterArgChainStart[i - 1];
1047 for (
unsigned end = loops.size(); end > 0; --end) {
1049 for (; start < end - 1; ++start) {
1051 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
1052 std::next(operandsDefinedAbove.begin(), end));
1055 if (iterArgChainStart[end - 1] > start)
1064 if (start != end - 1)
1072 ArrayRef<std::vector<unsigned>> combinedDimensions) {
1078 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
1079 for (
auto &dims : sortedDimensions)
1084 for (
unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
1087 Value lb = loops.getLowerBound()[i];
1088 Value ub = loops.getUpperBound()[i];
1089 Value step = loops.getStep()[i];
1092 rewriter, loops.getLoc(), newLoopRange.size));
1103 for (
auto &sortedDimension : sortedDimensions) {
1105 for (
auto idx : sortedDimension) {
1106 newUpperBound = arith::MulIOp::create(rewriter, loc, newUpperBound,
1107 normalizedUpperBounds[idx]);
1109 lowerBounds.push_back(cst0);
1110 steps.push_back(cst1);
1111 upperBounds.push_back(newUpperBound);
1120 auto newPloop = scf::ParallelOp::create(
1121 rewriter, loc, lowerBounds, upperBounds, steps,
1123 for (
unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
1124 Value previous = ploopIVs[i];
1125 unsigned numberCombinedDimensions = combinedDimensions[i].size();
1127 for (
unsigned j = numberCombinedDimensions - 1;
j > 0; --
j) {
1128 unsigned idx = combinedDimensions[i][
j];
1131 Value iv = arith::RemSIOp::create(insideBuilder, loc, previous,
1132 normalizedUpperBounds[idx]);
1138 previous = arith::DivSIOp::create(insideBuilder, loc, previous,
1139 normalizedUpperBounds[idx]);
1143 unsigned idx = combinedDimensions[i][0];
1145 previous, loops.getRegion());
1150 loops.getBody()->back().erase();
1151 newPloop.getBody()->getOperations().splice(
1153 loops.getBody()->getOperations());
1166 return op != inner.getOperation();
1169 LogicalResult status =
success();
1171 for (
auto &op : outer.getBody()->without_terminator()) {
1173 if (&op == inner.getOperation())
1176 if (forwardSlice.count(&op) > 0) {
1181 if (isa<scf::ForOp>(op))
1184 if (op.getNumRegions() > 0) {
1194 toHoist.push_back(&op);
1196 auto *outerForOp = outer.getOperation();
1197 for (
auto *op : toHoist)
1198 op->moveBefore(outerForOp);
1207 LogicalResult status =
success();
1208 const Loops &interTile = tileLoops.first;
1209 const Loops &intraTile = tileLoops.second;
1210 auto size = interTile.size();
1211 assert(size == intraTile.size());
1214 for (
unsigned s = 1; s < size; ++s)
1215 status = succeeded(status) ?
hoistOpsBetween(intraTile[0], intraTile[s])
1217 for (
unsigned s = 1; s < size; ++s)
1218 status = succeeded(status) ?
hoistOpsBetween(interTile[0], interTile[s])
1227template <
typename T>
1230 unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
1231 for (
unsigned i = 0; i < maxLoops; ++i) {
1232 forOps.push_back(rootForOp);
1233 Block &body = rootForOp.getRegion().front();
1234 if (body.
begin() != std::prev(body.
end(), 2))
1237 rootForOp = dyn_cast<T>(&body.
front());
1245 assert(!forOp.getUnsignedCmp() &&
"unsigned loops are not supported");
1246 auto originalStep = forOp.getStep();
1247 auto iv = forOp.getInductionVar();
1250 forOp.setStep(arith::MulIOp::create(
b, forOp.getLoc(), originalStep, factor));
1253 for (
auto t : targets) {
1254 assert(!t.getUnsignedCmp() &&
"unsigned loops are not supported");
1257 auto begin = t.getBody()->begin();
1258 auto nOps = t.getBody()->getOperations().size();
1262 Value stepped = arith::AddIOp::create(
b, t.getLoc(), iv, forOp.getStep());
1264 arith::MinSIOp::create(
b, t.getLoc(), forOp.getUpperBound(), stepped);
1267 auto newForOp = scf::ForOp::create(
b, t.getLoc(), iv,
ub, originalStep);
1268 newForOp.getBody()->getOperations().splice(
1269 newForOp.getBody()->getOperations().begin(),
1270 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
1272 newForOp.getRegion());
1274 innerLoops.push_back(newForOp);
1282template <
typename SizeType>
1290 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1299 for (
auto it : llvm::zip(forOps, sizes)) {
1300 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1301 res.push_back(step);
1302 currentTargets = step;
1311 res.push_back(llvm::getSingleElement(loops));
1319 forOps.reserve(sizes.size());
1321 if (forOps.size() < sizes.size())
1322 sizes = sizes.take_front(forOps.size());
1324 return ::tile(forOps, sizes, forOps.back());
1337 forOps.reserve(sizes.size());
1339 if (forOps.size() < sizes.size())
1340 sizes = sizes.take_front(forOps.size());
1347 tileSizes.reserve(sizes.size());
1348 for (
unsigned i = 0, e = sizes.size(); i < e; ++i) {
1349 assert(sizes[i] > 0 &&
"expected strictly positive size for strip-mining");
1351 auto forOp = forOps[i];
1353 auto loc = forOp.getLoc();
1354 Value diff = arith::SubIOp::create(builder, loc, forOp.getUpperBound(),
1355 forOp.getLowerBound());
1357 Value iterationsPerBlock =
1359 tileSizes.push_back(iterationsPerBlock);
1363 auto intraTile =
tile(forOps, tileSizes, forOps.back());
1364 TileLoops tileLoops = std::make_pair(forOps, intraTile);
1375 scf::ForallOp source,
1377 unsigned numTargetOuts =
target.getNumResults();
1378 unsigned numSourceOuts = source.getNumResults();
1382 llvm::append_range(fusedOuts,
target.getOutputs());
1383 llvm::append_range(fusedOuts, source.getOutputs());
1387 scf::ForallOp fusedLoop = scf::ForallOp::create(
1388 rewriter, source.getLoc(), source.getMixedLowerBound(),
1389 source.getMixedUpperBound(), source.getMixedStep(), fusedOuts,
1390 source.getMapping());
1394 mapping.
map(
target.getInductionVars(), fusedLoop.getInductionVars());
1395 mapping.
map(source.getInductionVars(), fusedLoop.getInductionVars());
1399 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1400 mapping.
map(source.getRegionIterArgs(),
1401 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1406 rewriter.
clone(op, mapping);
1407 for (
Operation &op : source.getBody()->without_terminator())
1408 rewriter.
clone(op, mapping);
1411 scf::InParallelOp targetTerm =
target.getTerminator();
1412 scf::InParallelOp sourceTerm = source.getTerminator();
1413 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
1415 for (
Operation &op : targetTerm.getYieldingOps())
1416 rewriter.
clone(op, mapping);
1417 for (
Operation &op : sourceTerm.getYieldingOps())
1418 rewriter.
clone(op, mapping);
1421 rewriter.
replaceOp(
target, fusedLoop.getResults().take_front(numTargetOuts));
1422 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1430 assert(source.getUnsignedCmp() ==
target.getUnsignedCmp() &&
1431 "incompatible signedness");
1432 unsigned numTargetOuts =
target.getNumResults();
1433 unsigned numSourceOuts = source.getNumResults();
1437 llvm::append_range(fusedInitArgs,
target.getInitArgs());
1438 llvm::append_range(fusedInitArgs, source.getInitArgs());
1443 scf::ForOp fusedLoop = scf::ForOp::create(
1444 rewriter, source.getLoc(), source.getLowerBound(), source.getUpperBound(),
1445 source.getStep(), fusedInitArgs,
nullptr,
1446 source.getUnsignedCmp());
1450 mapping.
map(
target.getInductionVar(), fusedLoop.getInductionVar());
1452 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1453 mapping.
map(source.getInductionVar(), fusedLoop.getInductionVar());
1454 mapping.
map(source.getRegionIterArgs(),
1455 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1460 rewriter.
clone(op, mapping);
1461 for (
Operation &op : source.getBody()->without_terminator())
1462 rewriter.
clone(op, mapping);
1466 for (
Value operand :
target.getBody()->getTerminator()->getOperands())
1468 for (
Value operand : source.getBody()->getTerminator()->getOperands())
1470 if (!yieldResults.empty())
1471 scf::YieldOp::create(rewriter, source.getLoc(), yieldResults);
1474 rewriter.
replaceOp(
target, fusedLoop.getResults().take_front(numTargetOuts));
1475 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1481 scf::ForallOp forallOp) {
1486 if (forallOp.isNormalized())
1490 auto loc = forallOp.getLoc();
1493 for (
auto [lb,
ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1494 Range normalizedLoopParams =
1496 newUbs.push_back(normalizedLoopParams.
size);
1502 auto normalizedForallOp = scf::ForallOp::create(
1503 rewriter, loc, newUbs, forallOp.getOutputs(), forallOp.getMapping(),
1507 normalizedForallOp.getBodyRegion(),
1508 normalizedForallOp.getBodyRegion().begin());
1510 rewriter.
eraseBlock(&normalizedForallOp.getBodyRegion().back());
1514 for (
auto [idx, iv] :
1515 llvm::enumerate(normalizedForallOp.getInductionVars())) {
1521 rewriter.
replaceOp(forallOp, normalizedForallOp);
1522 return normalizedForallOp;
1527 assert(!loops.empty() &&
"unexpected empty loop nest");
1528 if (loops.size() == 1)
1529 return isa_and_nonnull<scf::ForOp>(loops.front().getOperation());
1530 for (
auto [outerLoop, innerLoop] :
1531 llvm::zip_equal(loops.drop_back(), loops.drop_front())) {
1532 auto outerFor = dyn_cast_or_null<scf::ForOp>(outerLoop.getOperation());
1533 auto innerFor = dyn_cast_or_null<scf::ForOp>(innerLoop.getOperation());
1534 if (!outerFor || !innerFor)
1536 auto outerBBArgs = outerFor.getRegionIterArgs();
1537 auto innerIterArgs = innerFor.getInitArgs();
1538 if (outerBBArgs.size() != innerIterArgs.size())
1541 for (
auto [outerBBArg, innerIterArg] :
1542 llvm::zip_equal(outerBBArgs, innerIterArgs)) {
1543 if (!llvm::hasSingleElement(outerBBArg.getUses()) ||
1544 innerIterArg != outerBBArg)
1549 cast<scf::YieldOp>(outerFor.getBody()->getTerminator())->getOperands();
1550 ValueRange innerResults = innerFor.getResults();
1551 if (outerYields.size() != innerResults.size())
1553 for (
auto [outerYield, innerResult] :
1554 llvm::zip_equal(outerYields, innerResults)) {
1555 if (!llvm::hasSingleElement(innerResult.getUses()) ||
1556 outerYield != innerResult)
1565 std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
1566 std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
1567 std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
1568 if (!loBnds || !upBnds || !steps)
1571 for (
auto [lb,
ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
1577 tripCounts.push_back(*numIter);
1587 const unsigned numLoops = op.getNumLoops();
1588 assert(llvm::none_of(unrollFactors, [](uint64_t f) {
return f == 0; }) &&
1589 "Expected positive unroll factors");
1590 assert((!unrollFactors.empty() && (unrollFactors.size() <= numLoops)) &&
1591 "Expected non-empty unroll factors of size <= to the number of loops");
1594 if (llvm::all_of(unrollFactors, [](uint64_t f) {
return f == 1; }))
1596 op,
"Unrolling not applied if all factors are 1");
1599 if (llvm::hasSingleElement(op.getBody()->getOperations()))
1604 const unsigned firstLoopDimIdx = numLoops - unrollFactors.size();
1609 if (tripCounts.empty())
1611 op,
"Failed to compute constant trip counts for the loop. Note that "
1612 "dynamic loop sizes are not supported.");
1614 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1615 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1616 if (tripCounts[dimIdx].urem(unrollFactor) != 0)
1618 op,
"Unroll factors don't divide the iteration space evenly");
1621 std::optional<SmallVector<OpFoldResult>> maybeFoldSteps = op.getLoopSteps();
1622 if (!maybeFoldSteps)
1625 for (
auto step : *maybeFoldSteps)
1628 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1629 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1630 if (unrollFactor == 1)
1632 const size_t origStep = steps[dimIdx];
1633 const int64_t newStep = origStep * unrollFactor;
1637 auto yieldedValues = op.getBody()->getTerminator()->getOperands();
1640 op.getBody(), op.getInductionVars()[dimIdx], unrollFactor,
1643 const AffineExpr expr = b.getAffineDimExpr(0) + (origStep * i);
1645 b.getDimIdentityMap().dropResult(0).insertResult(expr, 0);
1646 return affine::AffineApplyOp::create(b, iv.getLoc(), map,
1649 annotateFn, iterArgs, yieldedValues, &clonedToSrcOpsMap);
1654 op.getStepMutable()[dimIdx].assign(
static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > values)
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef< Value > values)
Helper function to multiply a sequence of values.
static std::pair< SmallVector< Value >, SmallPtrSet< Operation *, 2 > > delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef< Value > ubs)
For each original loop, the value of the induction variable can be obtained by dividing the induction...
static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
static bool areInnerBoundsInvariant(scf::ForOp forOp)
Check if bounds of all inner loops are defined outside of forOp and return false if not.
static int64_t product(ArrayRef< int64_t > vals)
static llvm::ManagedStatic< PassManagerOptions > options
Base type for affine expression.
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getIntegerAttr(Type type, int64_t value)
TypedAttr getZeroAttr(Type type)
MLIRContext * getContext() const
TypedAttr getOneAttr(Type type)
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void restoreInsertionPoint(InsertPoint ip)
Restore the insert point to a previously saved point.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
operand_type_range getOperandTypes()
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
result_range getResults()
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void replaceAllUsesExcept(Value from, Value to, Operation *exceptedUser)
Find uses of from and replace them with to except if the user is exceptedUser.
virtual void inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues={})
Inline the operations of block 'source' into block 'dest' before the given position.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndex() const
Return true if this is an integer (of any signedness) or an index type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
Specialization of arith.constant op that returns an integer of index type.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Operation * getOwner() const
Return the owner of this operand.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
std::optional< llvm::APSInt > computeUbMinusLb(Value lb, Value ub, bool isSigned)
Helper function to compute the difference between two values.
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
bool isPerfectlyNestedForLoops(MutableArrayRef< LoopLikeOpInterface > loops)
Check if the provided loops are perfectly nested for-loops.
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
std::function< SmallVector< Value >( OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
void generateUnrolledLoop(Block *loopBodyBlock, Value iv, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues, IRMapping *clonedToSrcOpsMap=nullptr)
Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs' and 'yieldedValues' as the...
Value getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
LogicalResult loopUnrollFull(scf::ForOp forOp)
Unrolls this loop completely.
llvm::SmallVector< llvm::APInt > getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp)
Get constant trip counts for each of the induction variables of the given loop operation.
std::pair< Loops, Loops > TileLoops
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned > > combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
llvm::SetVector< T, Vector, Set, N > SetVector
SliceOptions ForwardSliceOptions
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
FailureOr< UnrolledLoopInfo > loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor)
Unrolls and jams this scf.for operation by the specified unroll factor.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
bool isZeroInteger(OpFoldResult v)
Return "true" if v is an integer value/attribute with constant value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
FailureOr< scf::ParallelOp > parallelLoopUnrollByFactors(scf::ParallelOp op, ArrayRef< uint64_t > unrollFactors, RewriterBase &rewriter, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, IRMapping *clonedToSrcOpsMap=nullptr)
Unroll this scf::Parallel loop by the specified unroll factors.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Get back the original induction variable values after loop normalization.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
llvm::function_ref< Fn > function_ref
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
Materialize bounds and step of a zero-based and unit-step loop derived by normalizing the specified b...
SmallVector< scf::ForOp, 8 > Loops
Tile a nest of standard for loops rooted at rootForOp by finding such parametric tile sizes that the ...
bool isOneInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 1.
std::optional< APInt > constantTripCount(OpFoldResult lb, OpFoldResult ub, OpFoldResult step, bool isSigned, llvm::function_ref< std::optional< llvm::APSInt >(Value, Value, bool)> computeUbMinusLb)
Return the number of iterations for a loop with a lower bound lb, upper bound ub and step step,...
LogicalResult foldDynamicIndexList(SmallVectorImpl< OpFoldResult > &ofrs, bool onlyNonNegative=false, bool onlyNonZero=false)
Returns "success" when any of the elements in ofrs is a constant value.
FailureOr< scf::ForallOp > normalizeForallOp(RewriterBase &rewriter, scf::ForallOp forallOp)
Normalize an scf.forall operation.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
std::optional< scf::ForOp > epilogueLoopOp
std::optional< scf::ForOp > mainLoopOp
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.