25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/SmallVectorExtras.h"
29#include "llvm/Support/DebugLog.h"
34#define DEBUG_TYPE "scf-utils"
39 bool replaceIterOperandsUsesInLoop) {
45 assert(loopNest.size() <= 10 &&
46 "exceeded recursion limit when yielding value from loop nest");
78 if (loopNest.size() == 1) {
80 cast<scf::ForOp>(*loopNest.back().replaceWithAdditionalYields(
81 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
83 return {innerMostLoop};
93 innerNewBBArgs, newYieldValuesFn,
94 replaceIterOperandsUsesInLoop);
95 return llvm::map_to_vector(
96 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
99 scf::ForOp outerMostLoop =
100 cast<scf::ForOp>(*loopNest.front().replaceWithAdditionalYields(
101 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
102 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
119 func::CallOp *callOp) {
120 assert(!funcName.empty() &&
"funcName cannot be empty");
134 ValueRange outlinedValues(captures.getArrayRef());
141 outlinedFuncArgTypes.push_back(arg.getType());
142 outlinedFuncArgLocs.push_back(arg.getLoc());
144 for (
Value value : outlinedValues) {
145 outlinedFuncArgTypes.push_back(value.getType());
146 outlinedFuncArgLocs.push_back(value.getLoc());
148 FunctionType outlinedFuncType =
149 FunctionType::get(rewriter.
getContext(), outlinedFuncArgTypes,
152 func::FuncOp::create(rewriter, loc, funcName, outlinedFuncType);
153 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
158 auto outlinedFuncBlockArgs = outlinedFuncBody->
getArguments();
163 originalBlock, outlinedFuncBody,
164 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
167 func::ReturnOp::create(rewriter, loc, originalTerminator->
getResultTypes(),
174 ®ion, region.
begin(),
175 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
177 .take_front(numOriginalBlockArguments));
182 llvm::append_range(callValues, newBlock->
getArguments());
183 llvm::append_range(callValues, outlinedValues);
184 auto call = func::CallOp::create(rewriter, loc, outlinedFunc, callValues);
193 rewriter.
clone(*originalTerminator, bvm);
194 rewriter.
eraseOp(originalTerminator);
199 for (
auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
200 outlinedValues.size()))) {
201 Value orig = std::get<0>(it);
202 Value repl = std::get<1>(it);
211 return outlinedFunc->isProperAncestor(opOperand.
getOwner());
219 func::FuncOp *thenFn, StringRef thenFnName,
220 func::FuncOp *elseFn, StringRef elseFnName) {
223 FailureOr<func::FuncOp> outlinedFuncOpOrFailure;
224 if (thenFn && !ifOp.getThenRegion().empty()) {
226 rewriter, loc, ifOp.getThenRegion(), thenFnName);
227 if (failed(outlinedFuncOpOrFailure))
229 *thenFn = *outlinedFuncOpOrFailure;
231 if (elseFn && !ifOp.getElseRegion().empty()) {
233 rewriter, loc, ifOp.getElseRegion(), elseFnName);
234 if (failed(outlinedFuncOpOrFailure))
236 *elseFn = *outlinedFuncOpOrFailure;
243 assert(rootOp !=
nullptr &&
"Root operation must not be a nullptr.");
244 bool rootEnclosesPloops =
false;
246 for (
Block &block : region.getBlocks()) {
249 rootEnclosesPloops |= enclosesPloops;
250 if (
auto ploop = dyn_cast<scf::ParallelOp>(op)) {
251 rootEnclosesPloops =
true;
260 return rootEnclosesPloops;
268 assert(divisor > 0 &&
"expected positive divisor");
270 "expected integer or index-typed value");
272 Value divisorMinusOneCst = arith::ConstantOp::create(
274 Value divisorCst = arith::ConstantOp::create(
276 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOneCst);
277 return arith::DivUIOp::create(builder, loc, sum, divisorCst);
287 "expected integer or index-typed value");
288 Value cstOne = arith::ConstantOp::create(
290 Value divisorMinusOne = arith::SubIOp::create(builder, loc, divisor, cstOne);
291 Value sum = arith::AddIOp::create(builder, loc, dividend, divisorMinusOne);
292 return arith::DivUIOp::create(builder, loc, sum, divisor);
296 Block *loopBodyBlock,
Value iv, uint64_t unrollFactor,
304 auto findOriginalSrcOp =
309 while (srcOp && clonedToSrcOpsMap.
contains(srcOp))
310 srcOp = clonedToSrcOpsMap.
lookup(srcOp);
320 annotateFn = noopAnnotateFn;
330 for (
unsigned i = 1; i < unrollFactor; i++) {
333 operandMap.
map(iterArgs, lastYielded);
338 Value ivUnroll = ivRemapFn(i, iv, builder);
339 operandMap.
map(iv, ivUnroll);
343 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
346 annotateFn(i, clonedOp, builder);
347 if (clonedToSrcOpsMap)
348 clonedToSrcOpsMap->
map(clonedOp,
349 findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
353 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
359 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
360 annotateFn(0, &*it, builder);
369 scf::ForOp forOp, uint64_t unrollFactor,
371 assert(unrollFactor > 0 &&
"expected positive unroll factor");
374 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
381 auto loc = forOp.getLoc();
382 Value step = forOp.getStep();
383 Value upperBoundUnrolled;
385 bool generateEpilogueLoop =
true;
387 std::optional<APInt> constTripCount = forOp.getStaticTripCount();
388 if (constTripCount) {
390 bool isUnsignedLoop = forOp.getUnsignedCmp();
396 if (isUnsignedLoop) {
397 if (
auto intTy = dyn_cast<IntegerType>(forOp.getUpperBound().getType()))
398 if (intTy.getWidth() >= 64)
403 assert(apInt &&
"expected constant loop bound");
404 return isUnsignedLoop ?
static_cast<int64_t>(apInt->first.getZExtValue())
405 : apInt->first.getSExtValue();
407 int64_t lbCst = getLoopBound(forOp.getLowerBound());
408 int64_t ubCst = getLoopBound(forOp.getUpperBound());
409 int64_t stepCst = getLoopBound(step);
410 if (unrollFactor == 1) {
411 if (constTripCount->isOne() &&
412 failed(forOp.promoteIfSingleIteration(rewriter)))
417 uint64_t tripCount = constTripCount->getZExtValue();
418 uint64_t tripCountEvenMultiple = tripCount - tripCount % unrollFactor;
419 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
420 int64_t stepUnrolledCst = stepCst * unrollFactor;
423 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
424 if (generateEpilogueLoop)
425 upperBoundUnrolled = arith::ConstantOp::create(
428 upperBoundUnrolledCst));
430 upperBoundUnrolled = forOp.getUpperBound();
438 bool mainLoopHasNoIter = (tripCountEvenMultiple == 0);
439 bool stepUnchanged = (stepCst == stepUnrolledCst);
441 (mainLoopHasNoIter || stepUnchanged)
443 : arith::ConstantOp::create(boundsBuilder, loc,
445 step.
getType(), stepUnrolledCst));
450 auto lowerBound = forOp.getLowerBound();
451 auto upperBound = forOp.getUpperBound();
453 arith::SubIOp::create(boundsBuilder, loc, upperBound, lowerBound);
455 Value unrollFactorCst = arith::ConstantOp::create(
459 arith::RemSIOp::create(boundsBuilder, loc, tripCount, unrollFactorCst);
461 Value tripCountEvenMultiple =
462 arith::SubIOp::create(boundsBuilder, loc, tripCount, tripCountRem);
464 upperBoundUnrolled = arith::AddIOp::create(
465 boundsBuilder, loc, lowerBound,
466 arith::MulIOp::create(boundsBuilder, loc, tripCountEvenMultiple, step));
469 arith::MulIOp::create(boundsBuilder, loc, step, unrollFactorCst);
475 if (generateEpilogueLoop) {
476 OpBuilder epilogueBuilder(forOp->getContext());
478 auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.
clone(*forOp));
479 epilogueForOp.setLowerBound(upperBoundUnrolled);
482 auto results = forOp.getResults();
483 auto epilogueResults = epilogueForOp.getResults();
485 for (
auto e : llvm::zip(results, epilogueResults)) {
486 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
488 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
489 epilogueForOp.getInitArgs().size(), results);
490 if (epilogueForOp.promoteIfSingleIteration(rewriter).failed())
495 forOp.setUpperBound(upperBoundUnrolled);
496 forOp.setStep(stepUnrolled);
498 auto iterArgs =
ValueRange(forOp.getRegionIterArgs());
499 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
502 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
505 auto stride = arith::MulIOp::create(
507 arith::ConstantOp::create(b, loc,
508 b.getIntegerAttr(iv.getType(), i)));
509 return arith::AddIOp::create(b, loc, iv, stride);
511 annotateFn, iterArgs, yieldedValues);
513 if (forOp.promoteIfSingleIteration(rewriter).failed())
521 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
522 if (!mayBeConstantTripCount.has_value())
524 const APInt &tripCount = *mayBeConstantTripCount;
525 if (tripCount.isZero())
527 if (tripCount.isOne())
528 return forOp.promoteIfSingleIteration(rewriter);
535 auto walkResult = forOp.walk([&](scf::ForOp innerForOp) {
536 if (!forOp.isDefinedOutsideOfLoop(innerForOp.getLowerBound()) ||
537 !forOp.isDefinedOutsideOfLoop(innerForOp.getUpperBound()) ||
538 !forOp.isDefinedOutsideOfLoop(innerForOp.getStep()))
543 return !walkResult.wasInterrupted();
548 uint64_t unrollJamFactor) {
549 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
551 if (unrollJamFactor == 1)
557 LDBG() <<
"failed to unroll and jam: inner bounds are not invariant";
562 if (forOp->getNumResults() > 0) {
563 LDBG() <<
"failed to unroll and jam: unsupported loop with results";
569 std::optional<APInt> tripCount = forOp.getStaticTripCount();
570 if (!tripCount.has_value()) {
572 LDBG() <<
"failed to unroll and jam: trip count could not be determined";
575 uint64_t tripCountValue = tripCount->getZExtValue();
576 if (tripCountValue == 0)
578 if (unrollJamFactor > tripCountValue) {
579 LDBG() <<
"unroll and jam factor is greater than trip count, set factor to "
582 unrollJamFactor = tripCountValue;
583 }
else if (tripCountValue % unrollJamFactor != 0) {
584 LDBG() <<
"failed to unroll and jam: unsupported trip count that is not a "
585 "multiple of unroll jam factor";
590 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
600 forOp.walk([&](scf::ForOp innerForOp) { innerLoops.push_back(innerForOp); });
611 for (scf::ForOp oldForOp : innerLoops) {
613 ValueRange oldIterOperands = oldForOp.getInits();
614 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
616 cast<scf::YieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
619 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
620 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
621 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
625 bool forOpReplaced = oldForOp == forOp;
626 scf::ForOp newForOp =
627 cast<scf::ForOp>(*oldForOp.replaceWithAdditionalYields(
628 rewriter, dupIterOperands,
false,
630 return dupYieldOperands;
632 newInnerLoops.push_back(newForOp);
637 ValueRange newIterArgs = newForOp.getRegionIterArgs();
638 unsigned oldNumIterArgs = oldIterArgs.size();
639 ValueRange newResults = newForOp.getResults();
640 unsigned oldNumResults = newResults.size() / unrollJamFactor;
641 assert(oldNumIterArgs == oldNumResults &&
642 "oldNumIterArgs must be the same as oldNumResults");
643 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
644 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
648 operandMaps[i - 1].map(newIterArgs[
j],
649 newIterArgs[i * oldNumIterArgs +
j]);
650 operandMaps[i - 1].map(newResults[
j],
651 newResults[i * oldNumResults +
j]);
658 int64_t step = forOp.getConstantStep()->getSExtValue();
660 forOp.getLoc(), forOp.getStep(),
662 forOp.getLoc(), rewriter.
getIndexAttr(unrollJamFactor)));
663 forOp.setStep(newStep);
664 auto forOpIV = forOp.getInductionVar();
667 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
668 for (
auto &subBlock : subBlocks) {
671 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
675 if (!forOpIV.use_empty()) {
680 builder.
createOrFold<arith::AddIOp>(forOp.getLoc(), forOpIV, ivTag);
681 operandMaps[i - 1].map(forOpIV, ivUnroll);
684 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
685 builder.
clone(*it, operandMaps[i - 1]);
688 for (
auto newForOp : newInnerLoops) {
689 unsigned oldNumIterOperands =
690 newForOp.getNumRegionIterArgs() / unrollJamFactor;
691 unsigned numControlOperands = newForOp.getNumControlOperands();
692 auto yieldOp = cast<scf::YieldOp>(newForOp.getBody()->getTerminator());
693 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
694 assert(oldNumIterOperands == oldNumYieldOperands &&
695 "oldNumIterOperands must be the same as oldNumYieldOperands");
696 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
700 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
701 operandMaps[i - 1].lookupOrDefault(
702 newForOp.getOperand(numControlOperands +
j)));
704 i * oldNumYieldOperands +
j,
705 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
711 (
void)forOp.promoteIfSingleIteration(rewriter);
719 Range normalizedLoopBounds;
725 normalizedLoopBounds.
size =
727 return normalizedLoopBounds;
739 bool isZeroBased =
false;
741 isZeroBased = lbCst.value() == 0;
743 bool isStepOne =
false;
745 isStepOne = stepCst.value() == 1;
749 "expected matching types");
754 if (isZeroBased && isStepOne)
755 return {lb,
ub, step};
765 newUpperBound = rewriter.
createOrFold<arith::CeilDivSIOp>(
773 return {newLowerBound, newUpperBound, newStep};
787 Value denormalizedIvVal =
794 if (
Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
795 preservedUses.insert(preservedUse);
804 if (
getType(origLb).isIndex()) {
808 Value denormalizedIv;
813 Value scaled = normalizedIv;
815 Value origStepValue =
817 scaled = arith::MulIOp::create(rewriter, loc, normalizedIv, origStepValue);
820 denormalizedIv = scaled;
823 denormalizedIv = arith::AddIOp::create(rewriter, loc, scaled, origLbValue);
832 assert(!values.empty() &&
"unexecpted empty array");
837 for (
auto v : values) {
847 assert(!values.empty() &&
"unexpected empty list");
853 std::optional<Value> productOf;
854 for (
auto v : values) {
856 if (vOne && vOne.value() == 1)
859 productOf = arith::MulIOp::create(rewriter, loc, productOf.value(), v)
865 productOf = arith::ConstantOp::create(
869 return productOf.value();
885 Operation *delinearizedOp = affine::AffineDelinearizeIndexOp::create(
886 rewriter, loc, linearizedIv, ubs);
887 auto resultVals = llvm::map_to_vector(
895 llvm::BitVector isUbOne(ubs.size());
896 for (
auto [
index,
ub] : llvm::enumerate(ubs)) {
898 if (ubCst && ubCst.value() == 1)
903 unsigned numLeadingOneUbs = 0;
904 for (
auto [
index,
ub] : llvm::enumerate(ubs)) {
905 if (!isUbOne.test(
index)) {
908 delinearizedIvs[
index] = arith::ConstantOp::create(
913 Value previous = linearizedIv;
914 for (
unsigned i = numLeadingOneUbs, e = ubs.size(); i < e; ++i) {
915 unsigned idx = ubs.size() - (i - numLeadingOneUbs) - 1;
916 if (i != numLeadingOneUbs && !isUbOne.test(idx + 1)) {
917 previous = arith::DivSIOp::create(rewriter, loc, previous, ubs[idx + 1]);
922 if (!isUbOne.test(idx)) {
923 iv = arith::RemSIOp::create(rewriter, loc, previous, ubs[idx]);
926 iv = arith::ConstantOp::create(
927 rewriter, loc, rewriter.
getZeroAttr(ubs[idx].getType()));
930 delinearizedIvs[idx] = iv;
932 return {delinearizedIvs, preservedUsers};
937 if (loops.size() < 2)
940 scf::ForOp innermost = loops.back();
941 scf::ForOp outermost = loops.front();
945 for (
auto loop : loops) {
947 if (step.value() == 0) {
954 for (
auto loop : loops) {
957 Value lb = loop.getLowerBound();
958 Value ub = loop.getUpperBound();
959 Value step = loop.getStep();
965 newLoopRange.offset));
969 newLoopRange.stride));
973 loop.getInductionVar(), lb, step);
982 loops, [](
auto loop) {
return loop.getUpperBound(); });
984 outermost.setUpperBound(upperBound);
989 rewriter, loc, outermost.getInductionVar(), upperBounds);
993 for (
int i = loops.size() - 1; i > 0; --i) {
994 auto outerLoop = loops[i - 1];
995 auto innerLoop = loops[i];
997 Operation *innerTerminator = innerLoop.getBody()->getTerminator();
998 auto yieldedVals = llvm::to_vector(innerTerminator->
getOperands());
999 assert(llvm::equal(outerLoop.getRegionIterArgs(), innerLoop.getInitArgs()));
1000 for (
Value &yieldedVal : yieldedVals) {
1003 auto iter = llvm::find(innerLoop.getRegionIterArgs(), yieldedVal);
1004 if (iter != innerLoop.getRegionIterArgs().end()) {
1005 unsigned iterArgIndex = iter - innerLoop.getRegionIterArgs().begin();
1007 assert(iterArgIndex < innerLoop.getInitArgs().size());
1008 yieldedVal = innerLoop.getInitArgs()[iterArgIndex];
1011 rewriter.
eraseOp(innerTerminator);
1014 innerBlockArgs.push_back(delinearizeIvs[i]);
1015 llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs());
1018 rewriter.
replaceOp(innerLoop, yieldedVals);
1024 if (loops.empty()) {
1027 IRRewriter rewriter(loops.front().getContext());
1032 LogicalResult
result(failure());
1042 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
1043 operandsDefinedAbove[i] = i;
1044 for (
unsigned j = 0;
j < i; ++
j) {
1046 loops[i].getUpperBound(),
1047 loops[i].getStep()};
1049 operandsDefinedAbove[i] =
j;
1060 iterArgChainStart[0] = 0;
1061 for (
unsigned i = 1, e = loops.size(); i < e; ++i) {
1063 iterArgChainStart[i] = i;
1064 auto outerloop = loops[i - 1];
1065 auto innerLoop = loops[i];
1066 if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) {
1069 if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) {
1072 auto outerloopTerminator = outerloop.getBody()->getTerminator();
1073 if (!llvm::equal(outerloopTerminator->getOperands(),
1074 innerLoop.getResults())) {
1077 iterArgChainStart[i] = iterArgChainStart[i - 1];
1083 for (
unsigned end = loops.size(); end > 0; --end) {
1085 for (; start < end - 1; ++start) {
1087 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
1088 std::next(operandsDefinedAbove.begin(), end));
1091 if (iterArgChainStart[end - 1] > start)
1100 if (start != end - 1)
1108 ArrayRef<std::vector<unsigned>> combinedDimensions) {
1114 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
1115 for (
auto &dims : sortedDimensions)
1120 for (
unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
1123 Value lb = loops.getLowerBound()[i];
1124 Value ub = loops.getUpperBound()[i];
1125 Value step = loops.getStep()[i];
1128 rewriter, loops.getLoc(), newLoopRange.size));
1139 for (
auto &sortedDimension : sortedDimensions) {
1141 for (
auto idx : sortedDimension) {
1142 newUpperBound = arith::MulIOp::create(rewriter, loc, newUpperBound,
1143 normalizedUpperBounds[idx]);
1145 lowerBounds.push_back(cst0);
1146 steps.push_back(cst1);
1147 upperBounds.push_back(newUpperBound);
1156 auto newPloop = scf::ParallelOp::create(
1157 rewriter, loc, lowerBounds, upperBounds, steps,
1159 for (
unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
1160 Value previous = ploopIVs[i];
1161 unsigned numberCombinedDimensions = combinedDimensions[i].size();
1163 for (
unsigned j = numberCombinedDimensions - 1;
j > 0; --
j) {
1164 unsigned idx = combinedDimensions[i][
j];
1167 Value iv = arith::RemSIOp::create(insideBuilder, loc, previous,
1168 normalizedUpperBounds[idx]);
1174 previous = arith::DivSIOp::create(insideBuilder, loc, previous,
1175 normalizedUpperBounds[idx]);
1179 unsigned idx = combinedDimensions[i][0];
1181 previous, loops.getRegion());
1186 loops.getBody()->back().erase();
1187 newPloop.getBody()->getOperations().splice(
1189 loops.getBody()->getOperations());
1202 return op != inner.getOperation();
1205 LogicalResult status =
success();
1207 for (
auto &op : outer.getBody()->without_terminator()) {
1209 if (&op == inner.getOperation())
1212 if (forwardSlice.count(&op) > 0) {
1217 if (isa<scf::ForOp>(op))
1220 if (op.getNumRegions() > 0) {
1230 toHoist.push_back(&op);
1232 auto *outerForOp = outer.getOperation();
1233 for (
auto *op : toHoist)
1234 op->moveBefore(outerForOp);
1243 LogicalResult status =
success();
1244 const Loops &interTile = tileLoops.first;
1245 const Loops &intraTile = tileLoops.second;
1246 auto size = interTile.size();
1247 assert(size == intraTile.size());
1250 for (
unsigned s = 1; s < size; ++s)
1251 status = succeeded(status) ?
hoistOpsBetween(intraTile[0], intraTile[s])
1253 for (
unsigned s = 1; s < size; ++s)
1254 status = succeeded(status) ?
hoistOpsBetween(interTile[0], interTile[s])
1263template <
typename T>
1266 unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
1267 for (
unsigned i = 0; i < maxLoops; ++i) {
1268 forOps.push_back(rootForOp);
1269 Block &body = rootForOp.getRegion().front();
1270 if (body.
begin() != std::prev(body.
end(), 2))
1273 rootForOp = dyn_cast<T>(&body.
front());
1281 assert(!forOp.getUnsignedCmp() &&
"unsigned loops are not supported");
1282 auto originalStep = forOp.getStep();
1283 auto iv = forOp.getInductionVar();
1286 forOp.setStep(arith::MulIOp::create(
b, forOp.getLoc(), originalStep, factor));
1289 for (
auto t : targets) {
1290 assert(!t.getUnsignedCmp() &&
"unsigned loops are not supported");
1293 auto begin = t.getBody()->begin();
1294 auto nOps = t.getBody()->getOperations().size();
1298 Value stepped = arith::AddIOp::create(
b, t.getLoc(), iv, forOp.getStep());
1300 arith::MinSIOp::create(
b, t.getLoc(), forOp.getUpperBound(), stepped);
1303 auto newForOp = scf::ForOp::create(
b, t.getLoc(), iv,
ub, originalStep);
1304 newForOp.getBody()->getOperations().splice(
1305 newForOp.getBody()->getOperations().begin(),
1306 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
1308 newForOp.getRegion());
1310 innerLoops.push_back(newForOp);
1318template <
typename SizeType>
1326 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1335 for (
auto it : llvm::zip(forOps, sizes)) {
1336 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1337 res.push_back(step);
1338 currentTargets = step;
1347 res.push_back(llvm::getSingleElement(loops));
1355 forOps.reserve(sizes.size());
1357 if (forOps.size() < sizes.size())
1358 sizes = sizes.take_front(forOps.size());
1360 return ::tile(forOps, sizes, forOps.back());
1373 forOps.reserve(sizes.size());
1375 if (forOps.size() < sizes.size())
1376 sizes = sizes.take_front(forOps.size());
1383 if (llvm::any_of(forOps,
1384 [](scf::ForOp op) {
return !op.getInitArgs().empty(); }))
1392 tileSizes.reserve(sizes.size());
1393 for (
unsigned i = 0, e = sizes.size(); i < e; ++i) {
1394 assert(sizes[i] > 0 &&
"expected strictly positive size for strip-mining");
1396 auto forOp = forOps[i];
1398 auto loc = forOp.getLoc();
1399 Value diff = arith::SubIOp::create(builder, loc, forOp.getUpperBound(),
1400 forOp.getLowerBound());
1402 Value iterationsPerBlock =
1404 tileSizes.push_back(iterationsPerBlock);
1408 auto intraTile =
tile(forOps, tileSizes, forOps.back());
1409 TileLoops tileLoops = std::make_pair(forOps, intraTile);
1420 scf::ForallOp source,
1422 unsigned numTargetOuts =
target.getNumResults();
1423 unsigned numSourceOuts = source.getNumResults();
1427 llvm::append_range(fusedOuts,
target.getOutputs());
1428 llvm::append_range(fusedOuts, source.getOutputs());
1432 scf::ForallOp fusedLoop = scf::ForallOp::create(
1433 rewriter, source.getLoc(), source.getMixedLowerBound(),
1434 source.getMixedUpperBound(), source.getMixedStep(), fusedOuts,
1435 source.getMapping());
1439 mapping.
map(
target.getInductionVars(), fusedLoop.getInductionVars());
1440 mapping.
map(source.getInductionVars(), fusedLoop.getInductionVars());
1444 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1445 mapping.
map(source.getRegionIterArgs(),
1446 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1451 rewriter.
clone(op, mapping);
1452 for (
Operation &op : source.getBody()->without_terminator())
1453 rewriter.
clone(op, mapping);
1456 scf::InParallelOp targetTerm =
target.getTerminator();
1457 scf::InParallelOp sourceTerm = source.getTerminator();
1458 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
1460 for (
Operation &op : targetTerm.getYieldingOps())
1461 rewriter.
clone(op, mapping);
1462 for (
Operation &op : sourceTerm.getYieldingOps())
1463 rewriter.
clone(op, mapping);
1466 rewriter.
replaceOp(
target, fusedLoop.getResults().take_front(numTargetOuts));
1467 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1475 assert(source.getUnsignedCmp() ==
target.getUnsignedCmp() &&
1476 "incompatible signedness");
1477 unsigned numTargetOuts =
target.getNumResults();
1478 unsigned numSourceOuts = source.getNumResults();
1482 llvm::append_range(fusedInitArgs,
target.getInitArgs());
1483 llvm::append_range(fusedInitArgs, source.getInitArgs());
1488 scf::ForOp fusedLoop = scf::ForOp::create(
1489 rewriter, source.getLoc(), source.getLowerBound(), source.getUpperBound(),
1490 source.getStep(), fusedInitArgs,
nullptr,
1491 source.getUnsignedCmp());
1495 mapping.
map(
target.getInductionVar(), fusedLoop.getInductionVar());
1497 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1498 mapping.
map(source.getInductionVar(), fusedLoop.getInductionVar());
1499 mapping.
map(source.getRegionIterArgs(),
1500 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1505 rewriter.
clone(op, mapping);
1506 for (
Operation &op : source.getBody()->without_terminator())
1507 rewriter.
clone(op, mapping);
1511 for (
Value operand :
target.getBody()->getTerminator()->getOperands())
1513 for (
Value operand : source.getBody()->getTerminator()->getOperands())
1515 if (!yieldResults.empty())
1516 scf::YieldOp::create(rewriter, source.getLoc(), yieldResults);
1519 rewriter.
replaceOp(
target, fusedLoop.getResults().take_front(numTargetOuts));
1520 rewriter.
replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1526 scf::ForallOp forallOp) {
1531 if (forallOp.isNormalized())
1535 auto loc = forallOp.getLoc();
1538 for (
auto [lb,
ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1539 Range normalizedLoopParams =
1541 newUbs.push_back(normalizedLoopParams.
size);
1547 auto normalizedForallOp = scf::ForallOp::create(
1548 rewriter, loc, newUbs, forallOp.getOutputs(), forallOp.getMapping(),
1552 normalizedForallOp.getBodyRegion(),
1553 normalizedForallOp.getBodyRegion().begin());
1555 rewriter.
eraseBlock(&normalizedForallOp.getBodyRegion().back());
1559 for (
auto [idx, iv] :
1560 llvm::enumerate(normalizedForallOp.getInductionVars())) {
1566 rewriter.
replaceOp(forallOp, normalizedForallOp);
1567 return normalizedForallOp;
1572 assert(!loops.empty() &&
"unexpected empty loop nest");
1573 if (loops.size() == 1)
1574 return isa_and_nonnull<scf::ForOp>(loops.front().getOperation());
1575 for (
auto [outerLoop, innerLoop] :
1576 llvm::zip_equal(loops.drop_back(), loops.drop_front())) {
1577 auto outerFor = dyn_cast_or_null<scf::ForOp>(outerLoop.getOperation());
1578 auto innerFor = dyn_cast_or_null<scf::ForOp>(innerLoop.getOperation());
1579 if (!outerFor || !innerFor)
1581 auto outerBBArgs = outerFor.getRegionIterArgs();
1582 auto innerIterArgs = innerFor.getInitArgs();
1583 if (outerBBArgs.size() != innerIterArgs.size())
1586 for (
auto [outerBBArg, innerIterArg] :
1587 llvm::zip_equal(outerBBArgs, innerIterArgs)) {
1588 if (!llvm::hasSingleElement(outerBBArg.getUses()) ||
1589 innerIterArg != outerBBArg)
1594 cast<scf::YieldOp>(outerFor.getBody()->getTerminator())->getOperands();
1595 ValueRange innerResults = innerFor.getResults();
1596 if (outerYields.size() != innerResults.size())
1598 for (
auto [outerYield, innerResult] :
1599 llvm::zip_equal(outerYields, innerResults)) {
1600 if (!llvm::hasSingleElement(innerResult.getUses()) ||
1601 outerYield != innerResult)
1610 std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
1611 std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
1612 std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
1613 if (!loBnds || !upBnds || !steps)
1616 for (
auto [lb,
ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
1620 if (!lbCst || !ubCst || !stepCst)
1622 loopRanges.emplace_back(*lbCst, *ubCst, *stepCst);
1629 std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
1630 std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
1631 std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
1632 if (!loBnds || !upBnds || !steps)
1635 for (
auto [lb,
ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
1641 tripCounts.push_back(*numIter);
1651 const unsigned numLoops = op.getNumLoops();
1652 assert(llvm::none_of(unrollFactors, [](uint64_t f) {
return f == 0; }) &&
1653 "Expected positive unroll factors");
1654 assert((!unrollFactors.empty() && (unrollFactors.size() <= numLoops)) &&
1655 "Expected non-empty unroll factors of size <= to the number of loops");
1658 if (llvm::all_of(unrollFactors, [](uint64_t f) {
return f == 1; }))
1660 op,
"Unrolling not applied if all factors are 1");
1663 if (llvm::hasSingleElement(op.getBody()->getOperations()))
1668 const unsigned firstLoopDimIdx = numLoops - unrollFactors.size();
1673 if (tripCounts.empty())
1675 op,
"Failed to compute constant trip counts for the loop. Note that "
1676 "dynamic loop sizes are not supported.");
1678 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1679 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1680 if (tripCounts[dimIdx].urem(unrollFactor) != 0)
1682 op,
"Unroll factors don't divide the iteration space evenly");
1685 std::optional<SmallVector<OpFoldResult>> maybeFoldSteps = op.getLoopSteps();
1686 if (!maybeFoldSteps)
1689 for (
auto step : *maybeFoldSteps)
1692 for (
unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
1693 const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
1694 if (unrollFactor == 1)
1696 const size_t origStep = steps[dimIdx];
1697 const int64_t newStep = origStep * unrollFactor;
1701 auto yieldedValues = op.getBody()->getTerminator()->getOperands();
1704 op.getBody(), op.getInductionVars()[dimIdx], unrollFactor,
1707 const AffineExpr expr = b.getAffineDimExpr(0) + (origStep * i);
1709 b.getDimIdentityMap().dropResult(0).insertResult(expr, 0);
1710 return affine::AffineApplyOp::create(b, iv.getLoc(), map,
1713 annotateFn, iterArgs, yieldedValues, &clonedToSrcOpsMap);
1718 op.getStepMutable()[dimIdx].assign(
static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > values)
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef< Value > values)
Helper function to multiply a sequence of values.
static std::pair< SmallVector< Value >, SmallPtrSet< Operation *, 2 > > delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef< Value > ubs)
For each original loop, the value of the induction variable can be obtained by dividing the induction...
static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
static bool areInnerBoundsInvariant(scf::ForOp forOp)
Check if bounds of all inner loops are defined outside of forOp and return false if not.
static int64_t product(ArrayRef< int64_t > vals)
static llvm::ManagedStatic< PassManagerOptions > options
Base type for affine expression.
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getIntegerAttr(Type type, int64_t value)
TypedAttr getZeroAttr(Type type)
MLIRContext * getContext() const
TypedAttr getOneAttr(Type type)
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
InsertPoint saveInsertionPoint() const
Return a saved insertion point.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void restoreInsertionPoint(InsertPoint ip)
Restore the insert point to a previously saved point.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
operand_type_range getOperandTypes()
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
result_range getResults()
Operation * clone(IRMapping &mapper, const CloneOptions &options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void replaceAllUsesExcept(Value from, Value to, Operation *exceptedUser)
Find uses of from and replace them with to except if the user is exceptedUser.
virtual void inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues={})
Inline the operations of block 'source' into block 'dest' before the given position.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndex() const
Return true if this is an integer (of any signedness) or an index type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
Specialization of arith.constant op that returns an integer of index type.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Operation * getOwner() const
Return the owner of this operand.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
std::optional< llvm::APSInt > computeUbMinusLb(Value lb, Value ub, bool isSigned)
Helper function to compute the difference between two values.
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
bool isPerfectlyNestedForLoops(MutableArrayRef< LoopLikeOpInterface > loops)
Check if the provided loops are perfectly nested for-loops.
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
std::function< SmallVector< Value >( OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
void generateUnrolledLoop(Block *loopBodyBlock, Value iv, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues, IRMapping *clonedToSrcOpsMap=nullptr)
Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs' and 'yieldedValues' as the...
Value getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
LogicalResult loopUnrollFull(scf::ForOp forOp)
Unrolls this loop completely.
llvm::SmallVector< llvm::APInt > getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp)
Get constant trip counts for each of the induction variables of the given loop operation.
std::pair< Loops, Loops > TileLoops
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
llvm::SmallVector< std::tuple< int64_t, int64_t, int64_t > > getConstLoopBounds(mlir::LoopLikeOpInterface loopOp)
Get constant loop bounds and steps for each of the induction variables of the given loop operation,...
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned > > combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
llvm::SetVector< T, Vector, Set, N > SetVector
std::optional< std::pair< APInt, bool > > getConstantAPIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
SliceOptions ForwardSliceOptions
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
FailureOr< UnrolledLoopInfo > loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor)
Unrolls and jams this scf.for operation by the specified unroll factor.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
bool isZeroInteger(OpFoldResult v)
Return "true" if v is an integer value/attribute with constant value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
FailureOr< scf::ParallelOp > parallelLoopUnrollByFactors(scf::ParallelOp op, ArrayRef< uint64_t > unrollFactors, RewriterBase &rewriter, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, IRMapping *clonedToSrcOpsMap=nullptr)
Unroll this scf::Parallel loop by the specified unroll factors.
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Get back the original induction variable values after loop normalization.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
llvm::function_ref< Fn > function_ref
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
Materialize bounds and step of a zero-based and unit-step loop derived by normalizing the specified b...
SmallVector< scf::ForOp, 8 > Loops
Tile a nest of standard for loops rooted at rootForOp by finding such parametric tile sizes that the ...
bool isOneInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 1.
std::optional< APInt > constantTripCount(OpFoldResult lb, OpFoldResult ub, OpFoldResult step, bool isSigned, llvm::function_ref< std::optional< llvm::APSInt >(Value, Value, bool)> computeUbMinusLb)
Return the number of iterations for a loop with a lower bound lb, upper bound ub and step step,...
LogicalResult foldDynamicIndexList(SmallVectorImpl< OpFoldResult > &ofrs, bool onlyNonNegative=false, bool onlyNonZero=false)
Returns "success" when any of the elements in ofrs is a constant value.
FailureOr< scf::ForallOp > normalizeForallOp(RewriterBase &rewriter, scf::ForallOp forallOp)
Normalize an scf.forall operation.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
std::optional< scf::ForOp > epilogueLoopOp
std::optional< scf::ForOp > mainLoopOp
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.