26#include "llvm/ADT/MapVector.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/DebugLog.h"
29#include "llvm/Support/raw_ostream.h"
32#define DEBUG_TYPE "loop-utils"
37using llvm::SmallMapVector;
58 auto lbMap = forOp.getLowerBoundMap();
59 auto lb = AffineApplyOp::create(
b, forOp.getLoc(), lbMap,
60 forOp.getLowerBoundOperands());
69 int64_t step = forOp.getStepAsInt();
70 for (
unsigned i = 0, e = tripCountMap.
getNumResults(); i < e; i++) {
71 auto tripCountExpr = tripCountMap.
getResult(i);
72 bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
76 AffineApplyOp::create(
b, forOp.getLoc(), bumpMap, tripCountOperands);
80 for (
unsigned i = 0, e = bumpExprs.size(); i < e; i++)
81 newUbExprs[i] =
b.getAffineDimExpr(0) +
b.getAffineDimExpr(i + 1);
83 cleanupLbOperands.clear();
84 cleanupLbOperands.push_back(lb);
85 cleanupLbOperands.append(bumpValues.begin(), bumpValues.end());
93 for (
auto v : bumpValues)
95 v.getDefiningOp()->erase();
105 auto iterOperands = forOp.getInits();
106 auto iterArgs = forOp.getRegionIterArgs();
107 for (
auto e : llvm::zip(iterOperands, iterArgs))
108 std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
111 auto outerResults = forOp.getResults();
112 auto innerResults = forOp.getBody()->getTerminator()->getOperands();
113 for (
auto e : llvm::zip(outerResults, innerResults))
114 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
120 std::optional<APInt> tripCount = forOp.getStaticTripCount();
121 if (!tripCount || *tripCount != 1)
125 if (forOp.getLowerBoundMap().getNumResults() != 1)
129 auto iv = forOp.getInductionVar();
130 auto *parentBlock = forOp->getBlock();
131 if (!iv.use_empty()) {
132 if (forOp.hasConstantLowerBound()) {
133 auto func = forOp->getParentOfType<FunctionOpInterface>();
136 builder.setInsertionPointToStart(&
func.getFunctionBody().front());
138 builder.setInsertionPoint(forOp);
140 builder, forOp.getLoc(), forOp.getConstantLowerBound());
141 iv.replaceAllUsesWith(constOp);
143 auto lbOperands = forOp.getLowerBoundOperands();
144 auto lbMap = forOp.getLowerBoundMap();
148 iv.replaceAllUsesWith(lbOperands[0]);
151 AffineApplyOp::create(builder, forOp.getLoc(), lbMap, lbOperands);
152 iv.replaceAllUsesWith(affineApplyOp);
161 forOp.getBody()->back().erase();
163 forOp.getBody()->getOperations());
178 unsigned offset, AffineForOp srcForOp,
OpBuilder b) {
179 auto lbOperands = srcForOp.getLowerBoundOperands();
180 auto ubOperands = srcForOp.getUpperBoundOperands();
186 AffineForOp::create(
b, srcForOp.getLoc(), lbOperands, lbMap, ubOperands,
187 ubMap, srcForOp.getStepAsInt());
188 auto loopChunkIV = loopChunk.getInductionVar();
189 auto srcIV = srcForOp.getInductionVar();
194 for (
const auto &it : llvm::drop_begin(opGroupQueue, offset)) {
195 uint64_t shift = it.first;
196 auto ops = it.second;
201 if (!srcIV.use_empty() && shift != 0) {
202 auto ivRemap = AffineApplyOp::create(
203 bodyBuilder, srcForOp.getLoc(),
204 bodyBuilder.getSingleDimShiftAffineMap(
205 -
static_cast<int64_t>(srcForOp.getStepAsInt() * shift)),
207 operandMap.
map(srcIV, ivRemap);
209 operandMap.
map(srcIV, loopChunkIV);
212 bodyBuilder.clone(*op, operandMap);
215 return AffineForOp();
232 bool unrollPrologueEpilogue) {
233 assert(forOp.getBody()->getOperations().size() == shifts.size() &&
234 "too few/many shifts");
235 if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
242 auto mayBeConstTripCount = forOp.getStaticTripCount();
243 if (!mayBeConstTripCount) {
244 LLVM_DEBUG(forOp.emitRemark(
"non-constant trip count loop not handled"));
247 uint64_t tripCount = mayBeConstTripCount->getZExtValue();
250 "shifts will lead to an invalid transformation\n");
252 int64_t step = forOp.getStepAsInt();
254 unsigned numChildOps = shifts.size();
257 uint64_t maxShift = *llvm::max_element(shifts);
258 if (maxShift >= numChildOps) {
260 forOp.emitWarning(
"not shifting because shifts are unrealistically large");
267 std::vector<std::vector<Operation *>> sortedOpGroups(maxShift + 1);
269 for (
auto &op : forOp.getBody()->without_terminator()) {
270 auto shift = shifts[pos++];
271 sortedOpGroups[shift].push_back(&op);
279 AffineForOp prologue, epilogue;
284 std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> opGroupQueue;
286 auto origLbMap = forOp.getLowerBoundMap();
287 uint64_t lbShift = 0;
289 for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
291 if (sortedOpGroups[d].empty())
293 if (!opGroupQueue.empty()) {
295 "Queue expected to be empty when the first block is found");
300 if (lbShift + tripCount * step < d * step) {
302 b.getShiftedAffineMap(origLbMap, lbShift),
303 b.getShiftedAffineMap(origLbMap, lbShift + tripCount * step),
304 opGroupQueue, 0, forOp,
b);
306 opGroupQueue.clear();
307 lbShift += tripCount * step;
310 b.getShiftedAffineMap(origLbMap, d),
311 opGroupQueue, 0, forOp,
b);
318 AffineForOp::getCanonicalizationPatterns(patterns, res.getContext());
321 res.getOperation(), std::move(patterns),
325 if (!erased && !prologue)
335 opGroupQueue.emplace_back(d, sortedOpGroups[d]);
340 for (
unsigned i = 0, e = opGroupQueue.size(); i < e; ++i) {
341 uint64_t ubShift = (opGroupQueue[i].first + tripCount) * step;
343 b.getShiftedAffineMap(origLbMap, ubShift),
344 opGroupQueue, i, forOp,
b);
353 if (unrollPrologueEpilogue && prologue)
355 if (unrollPrologueEpilogue && !epilogue && epilogue != prologue)
367 if (input.size() <= 1)
370 LDBG() <<
"Index set computation failed!";
374 LDBG() <<
"Non-hyperrectangular nests not supported for tiling!";
385 assert(input.size() == tileSizes.size() &&
"Too few/many tile sizes");
387 if (llvm::any_of(input,
388 [](AffineForOp op) {
return op.getNumResults() > 0; })) {
389 LDBG() <<
"Cannot tile nest where a loop has yield values";
395 LDBG() <<
"input loops not perfectly nested";
410 auto &ops = src.getBody()->getOperations();
411 dest.getBody()->getOperations().splice(loc, ops, ops.begin(),
412 std::prev(ops.end()));
424 AffineForOp rootAffineForOp,
unsigned width,
426 Location loc = rootAffineForOp.getLoc();
429 Operation *topLoop = rootAffineForOp.getOperation();
430 AffineForOp innermostPointLoop;
433 for (
unsigned i = 0; i < width; i++) {
436 AffineForOp pointLoop = AffineForOp::create(
b, loc, 0, 0);
437 pointLoop.getBody()->getOperations().splice(
440 tiledLoops[2 * width - 1 - i] = pointLoop;
441 topLoop = pointLoop.getOperation();
443 innermostPointLoop = pointLoop;
447 for (
unsigned i = width; i < 2 * width; i++) {
450 AffineForOp tileSpaceLoop = AffineForOp::create(
b, loc, 0, 0);
451 tileSpaceLoop.getBody()->getOperations().splice(
454 tiledLoops[2 * width - i - 1] = tileSpaceLoop;
455 topLoop = tileSpaceLoop.getOperation();
465 AffineForOp newInterTileLoop,
466 AffineForOp newIntraTileLoop,
476 assert(origLoop.hasConstantLowerBound() &&
477 "expected input loops to have constant lower bound.");
482 b.getAffineConstantExpr(origLoop.getConstantLowerBound());
489 ubOperands.reserve(
ub.getNumOperands() + 2);
495 ubOperands.push_back(
ub.getOperand(
j));
499 lbOperands.push_back(newInterTileLoop.getInductionVar());
500 ubOperands.push_back(newInterTileLoop.getInductionVar());
504 AffineExpr lbLoopIvExpr =
b.getAffineDimExpr(lbOperands.size() - 1);
505 AffineExpr ubLoopIvExpr =
b.getAffineDimExpr(ubOperands.size() - 1);
511 ubOperands.push_back(
ub.getOperand(origUbMap.
getNumDims() +
j));
514 lbOperands.push_back(tileSize);
515 ubOperands.push_back(tileSize);
527 lbBoundExprs.push_back(
528 ((lbLoopIvExpr - origLowerBoundExpr) * lbTileParameter) +
532 AffineExpr origLoopStep =
b.getAffineConstantExpr(origLoop.getStepAsInt());
536 ubBoundExprs.push_back(
537 ((ubLoopIvExpr - origLowerBoundExpr) * ubTileParameter) +
538 (ubTileParameter * origLoopStep) + origLowerBoundExpr);
540 ubBoundExprs.append(origUbMap.
getResults().begin(),
545 lbBoundExprs,
b.getContext());
546 newIntraTileLoop.setLowerBound(lbOperands, lbMap);
550 ubBoundExprs,
b.getContext());
551 newIntraTileLoop.setUpperBound(ubOperands, ubMap);
554 newIntraTileLoop.setStep(origLoop.getStepAsInt());
560 AffineForOp newLoop,
Value tileSize) {
561 OperandRange newLbOperands = origLoop.getLowerBoundOperands();
565 newLoop.setLowerBound(newLbOperands, origLoop.getLowerBoundMap());
576 assert(origLoop.hasConstantLowerBound() &&
577 "expected input loops to have constant lower bound.");
582 b.getAffineConstantExpr(origLoop.getConstantLowerBound());
587 ubOperands.reserve(
ub.getNumOperands() + 1);
590 ubOperands.push_back(
ub.getOperand(
j));
594 ubOperands.push_back(
ub.getOperand(origUbMap.
getNumDims() +
j));
597 ubOperands.push_back(tileSize);
609 if (origLoop.hasConstantUpperBound()) {
610 origUpperBound = origLoop.getConstantUpperBound();
613 origUpperBoundExpr =
b.getAffineConstantExpr(origUpperBound);
617 boundExprs.push_back(
619 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
640 boundExprs.push_back(
642 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
647 boundExprs,
b.getContext());
648 newLoop.setUpperBound(ubOperands, ubMap);
651 newLoop.setStep(origLoop.getStepAsInt());
663 assert(!origLoops.empty() &&
"expected atleast one loop in band");
664 assert(origLoops.size() == tileSizes.size() &&
665 "expected tiling parameter for each loop in band.");
668 unsigned width = origLoops.size();
671 for (
unsigned i = 0; i < width; ++i) {
676 for (
unsigned i = 0; i < width; ++i) {
678 newLoops[i + width], tileSizes[i]);
691 assert(!origLoops.empty());
692 assert(origLoops.size() == tileSizes.size());
695 unsigned width = origLoops.size();
698 for (
unsigned i = 0; i < width; i++) {
699 OperandRange newLbOperands = origLoops[i].getLowerBoundOperands();
700 OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
701 newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
702 newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
705 newLoops[i].setStep(tileSizes[i] * origLoops[i].getStepAsInt());
708 for (
unsigned i = 0; i < width; i++) {
710 AffineForOp forOp = origLoops[i];
711 std::optional<uint64_t> mayBeConstantCount = std::nullopt;
712 if (
auto staticTripCount = forOp.getStaticTripCount())
713 mayBeConstantCount = staticTripCount->getZExtValue();
716 newLoops[width + i].setLowerBound(
717 newLoops[i].getInductionVar(), lbMap);
719 newLoops[width + i].setStep(origLoops[i].getStepAsInt());
722 if (mayBeConstantCount && *mayBeConstantCount < tileSizes[i]) {
725 AffineMap ubMap =
b.getSingleDimShiftAffineMap(
726 *mayBeConstantCount * origLoops[i].getStepAsInt());
727 newLoops[width + i].setUpperBound(
728 newLoops[i].getInductionVar(), ubMap);
729 }
else if (largestDiv % tileSizes[i] != 0) {
739 ubOperands.reserve(
ub.getNumOperands() + 1);
742 ubOperands.push_back(
ub.getOperand(
j));
745 ubOperands.push_back(newLoops[i].getInductionVar());
749 ubOperands.push_back(
ub.getOperand(origUbMap.
getNumDims() +
j));
756 boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStepAsInt());
757 boundExprs.append(origUbMap.
getResults().begin(),
761 boundExprs,
b.getContext());
762 newLoops[width + i].setUpperBound(ubOperands, ubMap);
767 1, 0, dim + tileSizes[i] * origLoops[i].getStepAsInt());
768 newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
784 AffineForOp rootAffineForOp = origLoops[0];
787 unsigned width = input.size();
801 for (
unsigned i = 0; i < width; i++)
802 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
805 rootAffineForOp.erase();
808 *tiledNest = std::move(tiledLoops);
826 AffineForOp rootAffineForOp = origLoops[0];
827 unsigned width = input.size();
842 for (
unsigned i = 0; i < width; i++)
843 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
846 rootAffineForOp.erase();
849 *tiledNest = std::move(tiledLoops);
860 for (
unsigned i = 0; i < std::numeric_limits<unsigned>::max(); ++i) {
861 nestedLoops.push_back(root);
862 Block &body = root.getRegion().front();
863 if (body.
begin() != std::prev(body.
end(), 2))
866 root = dyn_cast<AffineForOp>(&body.
front());
874 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
875 if (mayBeConstantTripCount.has_value()) {
876 uint64_t tripCount = mayBeConstantTripCount->getZExtValue();
889 uint64_t unrollFactor) {
890 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
891 if (mayBeConstantTripCount.has_value() &&
892 mayBeConstantTripCount->ult(unrollFactor))
902 Block *loopBodyBlock,
Value forOpIV, uint64_t unrollFactor,
912 annotateFn = defaultAnnotateFn;
921 for (
unsigned i = 1; i < unrollFactor; i++) {
925 operandMap.
map(iterArgs, lastYielded);
930 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
931 operandMap.
map(forOpIV, ivUnroll);
935 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
937 annotateFn(i, clonedOp, builder);
944 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++) {
945 Operation *defOp = yieldedValues[i].getDefiningOp();
946 if (defOp && defOp->
getBlock() == loopBodyBlock)
947 lastYielded[i] = operandMap.
lookup(yieldedValues[i]);
953 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
954 annotateFn(0, &*it, builder);
963 uint64_t unrollFactor) {
966 auto cleanupForOp = cast<AffineForOp>(builder.
clone(*forOp));
970 auto results = forOp.getResults();
971 auto cleanupResults = cleanupForOp.getResults();
972 auto cleanupIterOperands = cleanupForOp.getInits();
974 for (
auto e : llvm::zip(results, cleanupResults, cleanupIterOperands)) {
975 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
976 cleanupForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
985 cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
991 forOp.setUpperBound(cleanupOperands, cleanupMap);
998 AffineForOp forOp, uint64_t unrollFactor,
1000 bool cleanUpUnroll) {
1001 assert(unrollFactor > 0 &&
"unroll factor should be positive");
1003 std::optional<uint64_t> mayBeConstantTripCount = std::nullopt;
1004 if (
auto staticTripCount = forOp.getStaticTripCount())
1005 mayBeConstantTripCount = staticTripCount->getZExtValue();
1006 if (unrollFactor == 1) {
1013 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1017 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
1018 if (cleanUpUnroll) {
1032 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1033 forOp.getUpperBoundMap().getNumResults() != 1)
1039 assert(
false &&
"cleanup loop lower bound map for single result lower "
1040 "and upper bound maps can always be determined");
1043 ValueRange iterArgs(forOp.getRegionIterArgs());
1044 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
1047 int64_t step = forOp.getStepAsInt();
1048 forOp.setStep(step * unrollFactor);
1050 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
1053 auto d0 = b.getAffineDimExpr(0);
1054 auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
1055 return AffineApplyOp::create(b, forOp.getLoc(), bumpMap, iv);
1058 iterArgs, yieldedValues);
1066 uint64_t unrollJamFactor) {
1067 std::optional<APInt> mayBeConstantTripCount = forOp.getStaticTripCount();
1068 if (mayBeConstantTripCount.has_value() &&
1069 mayBeConstantTripCount->getZExtValue() < unrollJamFactor)
1077 auto walkResult = forOp.walk([&](AffineForOp aForOp) {
1078 for (
auto controlOperand : aForOp.getControlOperands()) {
1079 if (!forOp.isDefinedOutsideOfLoop(controlOperand))
1084 return !walkResult.wasInterrupted();
1089 uint64_t unrollJamFactor) {
1090 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
1092 std::optional<uint64_t> mayBeConstantTripCount = std::nullopt;
1093 if (
auto staticTripCount = forOp.getStaticTripCount())
1094 mayBeConstantTripCount = staticTripCount->getZExtValue();
1095 if (unrollJamFactor == 1) {
1102 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1106 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollJamFactor) {
1107 LDBG() <<
"[failed] trip count < unroll-jam factor";
1123 forOp.walk([&](AffineForOp aForOp) {
1124 if (aForOp.getNumIterOperands() > 0)
1125 loopsWithIterArgs.push_back(aForOp);
1130 if (forOp.getNumIterOperands() > 0)
1140 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1141 forOp.getUpperBoundMap().getNumResults() != 1)
1144 assert(
false &&
"cleanup loop lower bound map for single result lower "
1145 "and upper bound maps can always be determined");
1157 for (AffineForOp oldForOp : loopsWithIterArgs) {
1159 ValueRange oldIterOperands = oldForOp.getInits();
1160 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
1162 cast<AffineYieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
1165 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1166 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
1167 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
1171 bool forOpReplaced = oldForOp == forOp;
1172 AffineForOp newForOp =
1173 cast<AffineForOp>(*oldForOp.replaceWithAdditionalYields(
1174 rewriter, dupIterOperands,
false,
1176 return dupYieldOperands;
1178 newLoopsWithIterArgs.push_back(newForOp);
1183 ValueRange newIterArgs = newForOp.getRegionIterArgs();
1184 unsigned oldNumIterArgs = oldIterArgs.size();
1185 ValueRange newResults = newForOp.getResults();
1186 unsigned oldNumResults = newResults.size() / unrollJamFactor;
1187 assert(oldNumIterArgs == oldNumResults &&
1188 "oldNumIterArgs must be the same as oldNumResults");
1189 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1190 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
1194 operandMaps[i - 1].map(newIterArgs[
j],
1195 newIterArgs[i * oldNumIterArgs +
j]);
1196 operandMaps[i - 1].map(newResults[
j],
1197 newResults[i * oldNumResults +
j]);
1203 int64_t step = forOp.getStepAsInt();
1204 forOp.setStep(step * unrollJamFactor);
1206 auto forOpIV = forOp.getInductionVar();
1208 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1209 for (
auto &subBlock : subBlocks) {
1212 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
1221 AffineApplyOp::create(builder, forOp.getLoc(), bumpMap, forOpIV);
1222 operandMaps[i - 1].map(forOpIV, ivUnroll);
1225 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
1226 builder.
clone(*it, operandMaps[i - 1]);
1229 for (
auto newForOp : newLoopsWithIterArgs) {
1230 unsigned oldNumIterOperands =
1231 newForOp.getNumIterOperands() / unrollJamFactor;
1232 unsigned numControlOperands = newForOp.getNumControlOperands();
1233 auto yieldOp = cast<AffineYieldOp>(newForOp.getBody()->getTerminator());
1234 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
1235 assert(oldNumIterOperands == oldNumYieldOperands &&
1236 "oldNumIterOperands must be the same as oldNumYieldOperands");
1237 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
1241 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
1242 operandMaps[i - 1].lookupOrDefault(
1243 newForOp.getOperand(numControlOperands +
j)));
1245 i * oldNumYieldOperands +
j,
1246 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
1250 if (forOp.getNumResults() > 0) {
1256 auto loc = forOp.getLoc();
1257 unsigned oldNumResults = forOp.getNumResults() / unrollJamFactor;
1259 unsigned pos = reduction.iterArgPosition;
1263 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1264 rhs = forOp.getResult(i * oldNumResults + pos);
1270 assert(op &&
"Reduction op should have been created");
1274 forOp.getResult(pos).replaceAllUsesExcept(
lhs, newOps);
1286 assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
1287 auto &forOpABody = forOpA.getBody()->getOperations();
1288 auto &forOpBBody = forOpB.getBody()->getOperations();
1294 forOpABody, forOpABody.begin(),
1295 std::prev(forOpABody.end()));
1298 forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
1299 std::prev(forOpBBody.end()));
1301 forOpBBody.splice(forOpBBody.begin(), forOpA->getBlock()->getOperations(),
1312 unsigned maxLoopDepth = loops.size();
1314 loopPermMapInv.resize(maxLoopDepth);
1315 for (
unsigned i = 0; i < maxLoopDepth; ++i)
1316 loopPermMapInv[loopPermMap[i]] = i;
1323 for (
const auto &depComps : depCompsVec) {
1324 assert(depComps.size() >= maxLoopDepth);
1327 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1328 unsigned permIndex = loopPermMapInv[
j];
1329 assert(depComps[permIndex].lb);
1330 int64_t depCompLb = *depComps[permIndex].lb;
1344 assert(loopPermMap.size() == loops.size() &&
"invalid loop perm map");
1345 unsigned maxLoopDepth = loops.size();
1346 if (maxLoopDepth == 1)
1352 if (llvm::any_of(loops, [](AffineForOp loop) {
1353 return loop.getNumIterOperands() > 0;
1359 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1366[[maybe_unused]]
bool
1368 assert(!loops.empty() &&
"no loops provided");
1371 auto hasTwoElements = [](
Block *block) {
1372 auto secondOpIt = std::next(block->begin());
1373 return secondOpIt != block->end() && &*secondOpIt == &block->back();
1376 auto enclosingLoop = loops.front();
1377 for (
auto loop : loops.drop_front()) {
1378 auto parentForOp = dyn_cast<AffineForOp>(loop->getParentOp());
1380 if (parentForOp != enclosingLoop || !hasTwoElements(parentForOp.getBody()))
1382 enclosingLoop = loop;
1391 assert(input.size() == permMap.size() &&
"invalid permutation map size");
1395 llvm::sort(checkPermMap);
1396 if (llvm::any_of(llvm::enumerate(checkPermMap),
1397 [](
const auto &en) {
return en.value() != en.index(); }))
1398 assert(
false &&
"invalid permutation map");
1401 if (input.size() < 2)
1409 for (
unsigned i = 0, e = input.size(); i < e; ++i)
1410 invPermMap.push_back({permMap[i], i});
1411 llvm::sort(invPermMap);
1415 if (permMap.back() != input.size() - 1) {
1416 Block *destBody = ((AffineForOp)input[invPermMap.back().second]).getBody();
1417 Block *srcBody = ((AffineForOp)input.back()).getBody();
1420 std::prev(srcBody->
end()));
1425 for (
int i = input.size() - 1; i >= 0; --i) {
1428 if (permMap[i] == 0) {
1433 auto *parentBlock = input[0]->getBlock();
1435 input[i]->getBlock()->getOperations(),
1442 unsigned parentPosInInput = invPermMap[permMap[i] - 1].second;
1443 if (i > 0 &&
static_cast<unsigned>(i - 1) == parentPosInInput)
1447 auto *destBody = ((AffineForOp)input[parentPosInInput]).getBody();
1448 destBody->getOperations().splice(destBody->begin(),
1449 input[i]->getBlock()->getOperations(),
1453 return invPermMap[0].second;
1462 if (loops.size() < 2)
1467 unsigned maxLoopDepth = loops.size();
1468 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1473 for (
auto &depComps : depCompsVec) {
1474 assert(depComps.size() >= maxLoopDepth);
1475 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1477 assert(depComp.
lb.has_value() && depComp.
ub.has_value());
1478 if (*depComp.
lb != 0 || *depComp.
ub != 0)
1479 isParallelLoop[
j] =
false;
1483 unsigned numParallelLoops = llvm::count(isParallelLoop,
true);
1488 unsigned nextSequentialLoop = numParallelLoops;
1489 unsigned nextParallelLoop = 0;
1490 for (
unsigned i = 0; i < maxLoopDepth; ++i) {
1491 if (isParallelLoop[i]) {
1492 loopPermMap[i] = nextParallelLoop++;
1494 loopPermMap[i] = nextSequentialLoop++;
1502 unsigned loopNestRootIndex =
permuteLoops(loops, loopPermMap);
1503 return loops[loopNestRootIndex];
1518 auto bounds = llvm::to_vector<4>(map->
getResults());
1519 bounds.push_back(
b.getAffineDimExpr(map->
getNumDims()) + offset);
1520 operands->insert(operands->begin() + map->
getNumDims(), iv);
1537 auto originalStep = forOp.getStepAsInt();
1538 auto scaledStep = originalStep * factor;
1539 forOp.setStep(scaledStep);
1544 auto lbMap = forOp.getLowerBoundMap();
1549 auto ubMap = forOp.getUpperBoundMap();
1554 auto iv = forOp.getInductionVar();
1556 for (
auto t : targets) {
1559 auto newForOp = AffineForOp::create(
b, t.getLoc(), lbOperands, lbMap,
1560 ubOperands, ubMap, originalStep);
1561 auto begin = t.getBody()->begin();
1563 auto nOps = t.getBody()->getOperations().size() - 2;
1564 newForOp.getBody()->getOperations().splice(
1565 newForOp.getBody()->getOperations().begin(),
1566 t.getBody()->getOperations(), begin, std::next(begin, nOps));
1568 newForOp.getRegion());
1569 innerLoops.push_back(newForOp);
1577template <
typename SizeType>
1585 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1594 for (
auto it : llvm::zip(forOps, sizes)) {
1595 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1596 res.push_back(step);
1597 currentTargets = step;
1607 res.push_back(llvm::getSingleElement(loops));
1612 if (loops.size() < 2)
1615 AffineForOp innermost = loops.back();
1616 AffineForOp outermost = loops.front();
1621 for (AffineForOp loop : loops) {
1623 if (loop.getStepAsInt() != 1 || !loop.hasConstantLowerBound() ||
1624 loop.getConstantLowerBound() != 0)
1629 ub.getOperands().end());
1633 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1634 prev = AffineMinOp::create(builder, loc, origUbMap, ubOperands);
1636 prev = AffineApplyOp::create(builder, loc, origUbMap, ubOperands);
1637 upperBoundSymbols.push_back(prev);
1641 for (AffineForOp loop : loops.drop_front()) {
1642 ub = loop.getUpperBound();
1643 origUbMap =
ub.getMap();
1644 ubOperands =
ub.getOperands();
1647 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1648 upperBound = AffineMinOp::create(builder, loc, origUbMap, ubOperands);
1650 upperBound = AffineApplyOp::create(builder, loc, origUbMap, ubOperands);
1651 upperBoundSymbols.push_back(upperBound);
1653 operands.push_back(prev);
1654 operands.push_back(upperBound);
1656 prev = AffineApplyOp::create(
1660 builder.getAffineDimExpr(0) *
1661 builder.getAffineSymbolExpr(0)),
1667 1, builder.getAffineSymbolExpr(0), builder.getContext());
1668 outermost.setUpperBound(prev, newUbMap);
1670 builder.setInsertionPointToStart(outermost.getBody());
1680 Value previous = outermost.getInductionVar();
1681 for (
unsigned idx = loops.size(); idx > 0; --idx) {
1682 if (idx != loops.size()) {
1684 operands.push_back(previous);
1685 operands.push_back(upperBoundSymbols[idx]);
1686 previous = AffineApplyOp::create(builder, loc,
1689 builder.getAffineDimExpr(0).floorDiv(
1690 builder.getAffineSymbolExpr(0))),
1695 Value inductionVariable;
1697 inductionVariable = previous;
1700 applyOperands.push_back(previous);
1701 applyOperands.push_back(upperBoundSymbols[idx - 1]);
1702 inductionVariable = AffineApplyOp::create(
1706 builder.getAffineDimExpr(0) % builder.getAffineSymbolExpr(0)),
1710 inductionVariable, loops.back().getRegion());
1715 AffineForOp secondOutermostLoop = loops[1];
1716 innermost.getBody()->back().erase();
1717 outermost.getBody()->getOperations().splice(
1719 innermost.getBody()->getOperations());
1720 for (
auto [iter, init] :
1721 llvm::zip_equal(secondOutermostLoop.getRegionIterArgs(),
1722 secondOutermostLoop.getInits())) {
1723 iter.replaceAllUsesWith(init);
1726 secondOutermostLoop.erase();
1730void mlir::affine::mapLoopToProcessorIds(scf::ForOp forOp,
1733 assert(processorId.size() == numProcessors.size());
1734 if (processorId.empty())
1744 Value linearIndex = processorId.front();
1745 for (
unsigned i = 1, e = processorId.size(); i < e; ++i) {
1746 auto mulApplyOp = AffineApplyOp::create(
1747 b, loc, mulMap,
ValueRange{linearIndex, numProcessors[i]});
1748 linearIndex = AffineApplyOp::create(
b, loc, addMap,
1752 auto mulApplyOp = AffineApplyOp::create(
1753 b, loc, mulMap,
ValueRange{linearIndex, forOp.getStep()});
1754 Value lb = AffineApplyOp::create(
1755 b, loc, addMap,
ValueRange{mulApplyOp, forOp.getLowerBound()});
1756 forOp.setLowerBound(lb);
1758 Value step = forOp.getStep();
1759 for (
auto numProcs : numProcessors)
1760 step = AffineApplyOp::create(
b, loc, mulMap,
ValueRange{numProcs, step});
1761 forOp.setStep(step);
1772 Block **copyPlacementBlock,
1777 cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
1783 auto it = enclosingAffineOps.rbegin();
1784 AffineForOp lastInvariantFor;
1785 for (
auto e = enclosingAffineOps.rend(); it != e; ++it) {
1790 LDBG() <<
"memref definition will end up not dominating hoist location";
1794 auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
1799 if (llvm::is_contained(symbols, affineFor.getInductionVar()))
1801 lastInvariantFor = affineFor;
1804 if (it != enclosingAffineOps.rbegin()) {
1806 *copyOutPlacementStart = std::next(*copyInPlacementStart);
1807 *copyPlacementBlock = lastInvariantFor->getBlock();
1809 *copyInPlacementStart = begin;
1810 *copyOutPlacementStart = end;
1811 *copyPlacementBlock = █
1829 if (bufferShape.size() <= 1)
1834 for (
int d = bufferShape.size() - 1; d >= 1; d--) {
1837 numEltPerStride *= bufferShape[d];
1841 if (bufferShape[d] < dimSize && bufferShape[d - 1] > 1) {
1842 strideInfos->push_back({stride, numEltPerStride});
1867 assert(llvm::all_of(lbMaps, [&](
AffineMap lbMap) {
1870 assert(llvm::all_of(ubMaps, [&](
AffineMap ubMap) {
1874 unsigned rank = cast<MemRefType>(
memref.getType()).getRank();
1876 assert(rank != 0 &&
"non-zero rank memref expected");
1877 assert(lbMaps.size() == rank &&
"wrong number of lb maps");
1878 assert(ubMaps.size() == rank &&
"wrong number of ub maps");
1885 for (
unsigned d = 0; d < rank; ++d) {
1886 auto forOp = createCanonicalizedAffineForOp(
b, loc, lbOperands, lbMaps[d],
1887 ubOperands, ubMaps[d]);
1893 auto fastBufOffsetMap =
1895 auto offset = AffineApplyOp::create(
b, loc, fastBufOffsetMap, lbOperands);
1900 b.getAffineDimExpr(2 * d));
1901 fastBufMapOperands.push_back(offset);
1902 fastBufMapOperands.push_back(forOp.getInductionVar());
1903 mayBeDeadApplys.push_back(offset);
1906 memIndices.push_back(forOp.getInductionVar());
1916 for (
auto applyOp : mayBeDeadApplys)
1917 if (applyOp.use_empty())
1922 auto load = AffineLoadOp::create(
b, loc,
memref, memIndices);
1924 fastBufMapOperands);
1930 AffineLoadOp::create(
b, loc, fastMemRef,
fastBufMap, fastBufMapOperands);
1931 AffineStoreOp::create(
b, loc,
load,
memref, memIndices);
1963 auto f = begin->getParentOfType<FunctionOpInterface>();
1964 OpBuilder topBuilder(f.getFunctionBody());
1976 Operation *lastCopyOp = end->getPrevNode();
1980 bool isCopyOutAtEndOfBlock = (end == copyOutPlacementStart);
1983 OpBuilder prologue(copyPlacementBlock, copyInPlacementStart);
1985 OpBuilder epilogue(copyPlacementBlock, copyOutPlacementStart);
1993 auto loc = region.
loc;
1995 auto memRefType = cast<MemRefType>(
memref.getType());
1997 if (!memRefType.getLayout().isIdentity()) {
1998 LDBG() <<
"Non-identity layout map not yet supported";
2008 unsigned rank = memRefType.getRank();
2010 LDBG() <<
"Non-zero ranked memrefs supported";
2019 std::optional<int64_t> numElements =
2022 LDBG() <<
"Non-constant region size not supported";
2028 LDBG() <<
"Max lower bound for memref region start not supported";
2032 if (*numElements == 0) {
2033 LDBG() <<
"Nothing to copy";
2038 for (
unsigned i = 0; i < rank; ++i) {
2040 if (lbMaps[i].getNumResults() == 0 || ubMaps[i].getNumResults() == 0) {
2041 LDBG() <<
"Missing lower or upper bound for region along dimension: "
2061 fastBufOffsets.reserve(rank);
2062 for (
unsigned d = 0; d < rank; d++) {
2063 assert(lbs[d].getNumSymbols() == cst->
getNumCols() - rank - 1 &&
2064 "incorrect bound size");
2068 if (lbs[d].isSingleConstant()) {
2069 auto indexVal = lbs[d].getSingleConstantResult();
2070 if (indexVal == 0) {
2071 memIndices.push_back(zeroIndex);
2073 memIndices.push_back(
2083 for (
unsigned i = 0, e = lbs[d].getNumSymbols(); i < e; ++i)
2084 symReplacements[i] = top.getAffineDimExpr(i);
2085 lbs[d] = lbs[d].replaceDimsAndSymbols(
2086 {}, symReplacements, lbs[d].getNumSymbols(),
2088 memIndices.push_back(
2089 AffineApplyOp::create(
b, loc, lbs[d], regionSymbols));
2092 bufIndices.push_back(zeroIndex);
2096 fastBufOffsets.push_back(lbs[d].getResult(0));
2103 bool existingBuf = fastBufferMap.count(
memref) > 0;
2105 AffineMap fastBufferLayout =
b.getMultiDimIdentityMap(rank);
2106 auto fastMemRefType =
2107 MemRefType::get(fastBufferShape, memRefType.getElementType(),
2113 memref::AllocOp::create(prologue, loc, fastMemRefType).getResult();
2115 fastBufferMap[
memref] = fastMemRef;
2119 *sizeInBytes = maySizeInBytes.value_or(0);
2122 <<
"Creating fast buffer of type " << fastMemRefType
2123 <<
" and size " << llvm::divideCeil(*sizeInBytes, 1024)
2127 fastMemRef = fastBufferMap[
memref];
2133 Value numEltPerDmaStride;
2140 if (dmaStrideInfos.size() > 1) {
2141 LDBG() <<
"Only up to one level of stride supported";
2145 if (!dmaStrideInfos.empty()) {
2149 top, loc, dmaStrideInfos[0].numEltPerStride);
2154 auto memAffineMap =
b.getMultiDimIdentityMap(memIndices.size());
2156 auto bufAffineMap =
b.getMultiDimIdentityMap(bufIndices.size());
2162 generatePointWiseCopy(loc,
memref, fastMemRef, lbMaps,
2163 regionSymbols, ubMaps,
2164 regionSymbols, fastBufOffsets,
2168 copyNests.insert(copyNest);
2172 if (region.
isWrite() && isCopyOutAtEndOfBlock)
2177 auto tagMemRefType = MemRefType::get({1}, top.getIntegerType(32), {},
2179 auto tagMemRef = memref::AllocOp::create(prologue, loc, tagMemRefType);
2182 auto tagAffineMap =
b.getMultiDimIdentityMap(tagIndices.size());
2186 AffineDmaStartOp::create(
b, loc,
memref, memAffineMap, memIndices,
2187 fastMemRef, bufAffineMap, bufIndices, tagMemRef,
2188 tagAffineMap, tagIndices, numElementsSSA,
2189 dmaStride, numEltPerDmaStride);
2192 auto op = AffineDmaStartOp::create(
2193 b, loc, fastMemRef, bufAffineMap, bufIndices,
memref, memAffineMap,
2194 memIndices, tagMemRef, tagAffineMap, tagIndices, numElementsSSA,
2195 dmaStride, numEltPerDmaStride);
2198 if (isCopyOutAtEndOfBlock)
2203 AffineDmaWaitOp::create(
b, loc, tagMemRef, tagAffineMap, zeroIndex,
2207 auto tagDeallocOp = memref::DeallocOp::create(epilogue, loc, tagMemRef);
2208 if (*nEnd == end && isCopyOutAtEndOfBlock)
2216 auto bufDeallocOp = memref::DeallocOp::create(epilogue, loc, fastMemRef);
2219 if (!copyOptions.
generateDma && *nEnd == end && isCopyOutAtEndOfBlock)
2231 remapExprs.reserve(rank);
2232 for (
unsigned i = 0; i < rank; i++) {
2236 auto dimExpr =
b.getAffineDimExpr(regionSymbols.size() + i);
2237 remapExprs.push_back(dimExpr - fastBufOffsets[i]);
2239 auto indexRemap =
AffineMap::get(regionSymbols.size() + rank, 0, remapExprs,
2244 bool isBeginAtStartOfBlock = (begin == block->
begin());
2245 if (!isBeginAtStartOfBlock)
2246 prevOfBegin = std::prev(begin);
2248 auto userFilterFn = [&](
Operation *user) {
2255 (
void)replaceAllMemRefUsesWith(
memref, fastMemRef,
2260 *nBegin = isBeginAtStartOfBlock ? block->
begin() : std::next(prevOfBegin);
2272 if (
auto loadOp = dyn_cast<AffineLoadOp>(op)) {
2273 rank = loadOp.getMemRefType().getRank();
2274 region->
memref = loadOp.getMemRef();
2276 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(op)) {
2277 rank = storeOp.getMemRefType().getRank();
2278 region->
memref = storeOp.getMemRef();
2281 assert(
false &&
"expected load or store op");
2284 auto memRefType = cast<MemRefType>(region->
memref.
getType());
2285 if (!memRefType.hasStaticShape())
2294 ivs.resize(numParamLoopIVs);
2298 regionCst->setValues(rank, rank + numParamLoopIVs, symbols);
2301 for (
unsigned d = 0; d < rank; d++) {
2302 auto dimSize = memRefType.getDimSize(d);
2303 assert(dimSize > 0 &&
"filtered dynamic shapes above");
2304 regionCst->addBound(BoundType::LB, d, 0);
2305 regionCst->addBound(BoundType::UB, d, dimSize - 1);
2313 std::optional<Value> filterMemRef,
2318 assert(begin->getBlock() == std::prev(end)->getBlock() &&
2319 "Inconsistent block begin/end args");
2320 assert(end != end->getBlock()->end() &&
"end can't be the block terminator");
2322 Block *block = begin->getBlock();
2328 LDBG() <<
"Generating copies at depth " << copyDepth;
2329 LDBG() <<
"from begin: "
2331 LDBG() <<
"to inclusive end: "
2337 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> readRegions;
2338 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> writeRegions;
2350 MemRefType memrefType;
2352 if (
auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
2353 memref = loadOp.getMemRef();
2354 memrefType = loadOp.getMemRefType();
2355 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
2356 memref = storeOp.getMemRef();
2357 memrefType = storeOp.getMemRefType();
2363 if ((filterMemRef.has_value() && filterMemRef !=
memref) ||
2364 (isa_and_nonnull<IntegerAttr>(memrefType.getMemorySpace()) &&
2369 LDBG() <<
"memref definition is inside of the depth at "
2370 <<
"which copy-in/copy-out would happen";
2375 auto region = std::make_unique<MemRefRegion>(opInst->
getLoc());
2376 if (
failed(region->compute(opInst, copyDepth,
nullptr,
2378 LDBG() <<
"Error obtaining memory region: semi-affine maps?";
2379 LDBG() <<
"over-approximating to the entire memref";
2381 LDBG() <<
"non-constant memref sizes not yet supported";
2402 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2404 const auto *
const it = targetRegions.find(region->memref);
2405 if (it == targetRegions.end())
2409 if (
failed(it->second->unionBoundingBox(*region))) {
2410 LDBG() <<
"Memory region bounding box failed; "
2411 <<
"over-approximating to the entire memref";
2414 LDBG() <<
"non-constant memref sizes not yet supported";
2418 it->second->getConstraints()->clearAndCopyFrom(
2419 *region->getConstraints());
2422 region->getConstraints()->clearAndCopyFrom(
2423 *it->second->getConstraints());
2428 bool existsInRead = updateRegion(readRegions);
2431 bool existsInWrite = updateRegion(writeRegions);
2436 if (region->isWrite() && !existsInWrite) {
2437 writeRegions[region->memref] = std::move(region);
2438 }
else if (!region->isWrite() && !existsInRead) {
2439 readRegions[region->memref] = std::move(region);
2444 LDBG() <<
"copy generation failed for one or more memref's in this block";
2448 uint64_t totalCopyBuffersSizeInBytes = 0;
2450 auto processRegions =
2451 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2453 for (
const auto ®ionEntry : regions) {
2457 Block *copyPlacementBlock;
2459 *regionEntry.second, *block, begin, end, ©PlacementBlock,
2460 ©InPlacementStart, ©OutPlacementStart);
2462 uint64_t sizeInBytes;
2465 *regionEntry.second, block, begin, end, copyPlacementBlock,
2466 copyInPlacementStart, copyOutPlacementStart, copyOptions,
2467 fastBufferMap, copyNests, &sizeInBytes, &nBegin, &nEnd);
2468 if (succeeded(iRet)) {
2472 totalCopyBuffersSizeInBytes += sizeInBytes;
2474 ret = ret & succeeded(iRet);
2477 processRegions(readRegions);
2478 processRegions(writeRegions);
2481 LDBG() <<
"copy generation failed for one or more memref's in this block";
2487 if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
2488 LLVM_DEBUG(forOp.emitRemark()
2489 << llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024)
2490 <<
" KiB of copy buffers in fast memory space for this block");
2495 "total size of all copy buffers' for this block exceeds fast memory "
2504LogicalResult mlir::affine::affineDataCopyGenerate(
2507 return affineDataCopyGenerate(forOp.getBody()->begin(),
2508 std::prev(forOp.getBody()->end()), copyOptions,
2509 filterMemRef, copyNests);
2512LogicalResult mlir::affine::generateCopyForMemRegion(
2516 auto begin = analyzedOp->getIterator();
2517 auto end = std::next(begin);
2521 auto err =
generateCopy(memrefRegion, block, begin, end, block, begin, end,
2522 copyOptions, fastBufferMap, copyNests,
2523 &
result.sizeInBytes, &begin, &end);
2527 const auto &en = fastBufferMap.find(memrefRegion.
memref);
2529 if (en == fastBufferMap.end())
2531 result.alloc = en->second.getDefiningOp();
2532 assert(
result.alloc &&
"fast buffer expected to be locally allocated");
2533 assert(copyNests.size() <= 1 &&
"At most one copy nest is expected.");
2534 result.copyNest = copyNests.empty() ?
nullptr : *copyNests.begin();
2543 assert(currLoopDepth <= depthToLoops.size() &&
"Unexpected currLoopDepth");
2544 if (currLoopDepth == depthToLoops.size())
2545 depthToLoops.emplace_back();
2547 for (
auto &op : *block) {
2548 if (
auto forOp = dyn_cast<AffineForOp>(op)) {
2549 depthToLoops[currLoopDepth].push_back(forOp);
2556void mlir::affine::gatherLoops(
2558 for (
auto &block :
func)
2562 if (!depthToLoops.empty()) {
2563 assert(depthToLoops.back().empty() &&
"Last loop level is not empty?");
2564 depthToLoops.pop_back();
2568AffineForOp mlir::affine::createCanonicalizedAffineForOp(
2581 return AffineForOp::create(
b, loc, lowerOperands, lbMap, upperOperands, ubMap,
2595 auto *context = loops[0].getContext();
2599 llvm::append_range(ops, loops);
2609 for (
auto loop : loops) {
2612 assert(loop.getStepAsInt() == 1 &&
"point loop step expected to be one");
2616 unsigned fullTileLbPos, fullTileUbPos;
2618 .getConstantBoundOnDimSize(0,
nullptr,
2620 nullptr, &fullTileLbPos,
2622 LDBG() <<
"Can't get constant diff pair for a loop";
2631 fullTileLb.assign(fLb.begin(), fLb.end());
2632 fullTileUb.assign(fUb.begin(), fUb.end());
2635 for (
auto lbIndex : lbIndices)
2636 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2637 cst.
atIneq(lbIndex, i) = fullTileLb[i] - cst.
atIneq(lbIndex, i);
2640 for (
auto ubIndex : ubIndices)
2641 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2642 cst.
atIneq(ubIndex, i) -= fullTileUb[i];
2665 return AffineIfOp::create(
b, loops[0].getLoc(), ifCondSet, setOperands,
2673 fullTileLoops.reserve(inputNest.size());
2678 for (
auto loop : inputNest) {
2680 if (loop.getStepAsInt() != 1) {
2681 LDBG() <<
"[tile separation] non-unit stride not implemented";
2689 unsigned lbPos, ubPos;
2691 .getConstantBoundOnDimSize(0,
nullptr,
2693 nullptr, &lbPos, &ubPos) ||
2695 LDBG() <<
"[tile separation] Can't get constant diff / "
2696 <<
"equalities not yet handled";
2705 cst.getIneqAsAffineValueMap(0, lbPos, lbVmap,
b.getContext());
2706 cst.getIneqAsAffineValueMap(0, ubPos, ubVmap,
b.getContext());
2707 AffineForOp fullTileLoop = createCanonicalizedAffineForOp(
2711 fullTileLoops.push_back(fullTileLoop);
2716 for (
const auto &loopEn : llvm::enumerate(inputNest))
2717 operandMap.
map(loopEn.value().getInductionVar(),
2718 fullTileLoops[loopEn.index()].getInductionVar());
2720 for (
auto &op : inputNest.back().getBody()->without_terminator())
2721 b.clone(op, operandMap);
2728 if (inputNest.empty())
2731 auto firstLoop = inputNest[0];
2734 auto prevLoop = firstLoop;
2735 for (
auto loop : inputNest.drop_front(1)) {
2736 assert(loop->getParentOp() == prevLoop &&
"input not contiguously nested");
2744 if (!fullTileLoops.empty())
2745 fullTileLoops.front().erase();
2753 fullTileLoops.front().erase();
2754 LDBG() <<
"All tiles are full tiles, or failure creating "
2755 <<
"separation condition";
2760 Block *thenBlock = ifOp.getThenBlock();
2761 AffineForOp outermostFullTileLoop = fullTileLoops[0];
2763 std::prev(thenBlock->
end()),
2764 outermostFullTileLoop->getBlock()->getOperations(),
2769 Block *elseBlock = ifOp.getElseBlock();
2771 firstLoop->getBlock()->getOperations(),
2775 *fullTileNest = std::move(fullTileLoops);
2780LogicalResult affine::coalescePerfectlyNestedAffineLoops(AffineForOp op) {
2781 LogicalResult
result(failure());
2784 if (loops.size() <= 1)
2792 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
2793 operandsDefinedAbove[i] = i;
2794 for (
unsigned j = 0;
j < i; ++
j) {
2796 operandsDefinedAbove[i] =
j;
2805 for (
unsigned end = loops.size(); end > 0; --end) {
2807 for (; start < end - 1; ++start) {
2809 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
2810 std::next(operandsDefinedAbove.begin(), end));
2813 assert(maxPos == start &&
2814 "expected loop bounds to be known at the start of the band");
2822 if (start != end - 1)
2831 while (
auto loopOp = currentOp->
getParentOfType<LoopLikeOpInterface>()) {
2832 if (!loopOp.isDefinedOutsideOfLoop(operand.
get()))
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its the output argument nEnd is set *to the new end sizeInBytes is set to the size of the fast buffer *allocated *static LogicalResult generateCopy(const MemRefRegion ®ion, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, const AffineCopyOptions ©Options, DenseMap< Value, Value > &fastBufferMap, DenseSet< Operation * > ©Nests, uint64_t *sizeInBytes, Block::iterator *nBegin, Block::iterator *nEnd)
static LogicalResult performPreTilingChecks(MutableArrayRef< AffineForOp > input, ArrayRef< t > tileSizes)
Check if the input nest is supported for tiling and whether tiling would be legal or not.
static void constructParametricallyTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< Value > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets,...
fullyComposeAffineMapAndOperands & fastBufMap
static InFlightDiagnostic emitRemarkForBlock(Block &block)
static void constructTiledLoopNest(MutableArrayRef< AffineForOp > origLoops, AffineForOp rootAffineForOp, unsigned width, MutableArrayRef< AffineForOp > tiledLoops)
Constructs tiled loop nest, without setting the loop bounds and move the body of the original loop ne...
static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs, MemRefRegion *region)
Construct the memref region to just include the entire memref.
static bool checkLoopInterchangeDependences(const std::vector< SmallVector< DependenceComponent, 2 > > &depCompsVec, ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
static LogicalResult checkIfHyperRectangular(MutableArrayRef< AffineForOp > input)
Checks whether a loop nest is hyper-rectangular or not.
static void findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block, Block::iterator &begin, Block::iterator &end, Block **copyPlacementBlock, Block::iterator *copyInPlacementStart, Block::iterator *copyOutPlacementStart)
Given a memref region, determine the lowest depth at which transfers can be placed for it,...
static void moveLoopBodyImpl(AffineForOp src, AffineForOp dest, Block::iterator loc)
Move the loop body of AffineForOp 'src' from 'src' into the specified location in destination's body,...
static void setInterTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newLoop, Value tileSize)
Set lower and upper bounds of inter-tile loops for parametric tiling.
static void setIntraTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newInterTileLoop, AffineForOp newIntraTileLoop, Value tileSize)
Set lower and upper bounds of intra-tile loops for parametric tiling.
static LogicalResult createFullTiles(MutableArrayRef< AffineForOp > inputNest, SmallVectorImpl< AffineForOp > &fullTileLoops, OpBuilder b)
Create the full tile loop nest (along with its body).
static AffineForOp generateShiftedLoop(AffineMap lbMap, AffineMap ubMap, const std::vector< std::pair< uint64_t, ArrayRef< Operation * > > > &opGroupQueue, unsigned offset, AffineForOp srcForOp, OpBuilder b)
Generates an affine.for op with the specified lower and upper bounds while generating the right IV re...
static void getMultiLevelStrides(const MemRefRegion ®ion, ArrayRef< int64_t > bufferShape, SmallVectorImpl< StrideInfo > *strideInfos)
Returns striding information for a copy/transfer of this region with potentially multiple striding le...
static void constructTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< unsigned > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets,...
static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp, uint64_t unrollFactor)
Helper to generate cleanup loop for unroll or unroll-and-jam when the trip count is not a multiple of...
static bool areInnerBoundsInvariant(AffineForOp forOp)
Check if all control operands of all loops are defined outside of forOp and return false if not.
static void moveLoopBody(AffineForOp src, AffineForOp dest)
Move the loop body of AffineForOp 'src' from 'src' to the start of dest body.
static AffineIfOp createSeparationCondition(MutableArrayRef< AffineForOp > loops, OpBuilder b)
Creates an AffineIfOp that encodes the conditional to choose between the constant trip count version ...
static void gatherLoopsInBlock(Block *block, unsigned currLoopDepth, std::vector< SmallVector< AffineForOp, 2 > > &depthToLoops)
Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, AffineMap &cleanupLbMap, SmallVectorImpl< Value > &cleanupLbOperands)
Computes the cleanup loop lower bound of the loop being unrolled with the specified unroll factor; th...
static void augmentMapAndBounds(OpBuilder &b, Value iv, AffineMap *map, SmallVector< Value, 4 > *operands, int64_t offset=0)
static SmallVector< AffineForOp, 8 > stripmineSink(AffineForOp forOp, uint64_t factor, ArrayRef< AffineForOp > targets)
static void replaceIterArgsAndYieldResults(AffineForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting singl...
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
Block represents an ordered list of Operations.
OpListType::iterator iterator
Operation * findAncestorOpInBlock(Operation &op)
Returns 'op' if 'op' lies in this block, or otherwise finds the ancestor operation of 'op' that lies ...
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
OpListType & getOperations()
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Operation * getTerminator()
Get the terminator operation of this block.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
AffineMap getDimIdentityMap()
AffineExpr getAffineDimExpr(unsigned position)
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
This class allows control over how the GreedyPatternRewriteDriver works.
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class represents a diagnostic that is inflight and set to be reported.
An integer set representing a conjunction of one or more affine equalities and inequalities.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Set of flags used to control the behavior of the various IR print methods (e.g.
A wrapper class that allows for printing an operation with a set of flags, useful to act as a "stream...
This class implements the operand iterators for the Operation class.
Operation is the basic unit of execution within MLIR.
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Block * getBlock()
Returns the operation block that contains this operation.
Location getLoc()
The source location the operation was defined or derived from.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Operation * clone(IRMapping &mapper, const CloneOptions &options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Region * getParentRegion()
Returns the region to which the instruction belongs.
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
static WalkResult advance()
static WalkResult interrupt()
AffineBound represents a lower or upper bound in the for operation.
Value getOperand(unsigned idx)
unsigned getNumOperands()
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
ArrayRef< Value > getOperands() const
AffineMap getAffineMap() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Operation * getOwner() const
Return the owner of this operand.
An IntegerRelation represents the set of points from a PresburgerSpace that satisfy a list of affine ...
void removeIndependentConstraints(unsigned pos, unsigned num)
Removes constraints that are independent of (i.e., do not have a coefficient) variables in the range ...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
ArrayRef< DynamicAPInt > getInequality(unsigned idx) const
DynamicAPInt atIneq(unsigned i, unsigned j) const
Returns the value at the specified inequality row and column.
bool isHyperRectangular(unsigned pos, unsigned num) const
Returns true if the set can be trivially detected as being hyper-rectangular on the specified contigu...
unsigned getNumVars() const
void setDimSymbolSeparation(unsigned newSymbolCount)
Changes the partition between dimensions and symbols.
unsigned getNumDimAndSymbolVars() const
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void getLowerAndUpperBoundIndices(unsigned pos, SmallVectorImpl< unsigned > *lbIndices, SmallVectorImpl< unsigned > *ubIndices, SmallVectorImpl< unsigned > *eqIndices=nullptr, unsigned offset=0, unsigned num=0) const
Gather positions of all lower and upper bounds of the variable at pos, and optionally any equalities ...
void removeVar(VarKind kind, unsigned pos)
Removes variables of the specified kind with the specified pos (or within the specified range) from t...
LogicalResult loopUnrollFull(AffineForOp forOp)
Unrolls this for operation completely if the trip count is known to be constant.
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor or by the trip count (if constant),...
void extractForInductionVars(ArrayRef< AffineForOp > forInsts, SmallVectorImpl< Value > *ivs)
Extracts the induction variables from a list of AffineForOps and places them in the output argument i...
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, bool cleanUpUnroll=false)
Unrolls this for operation by the specified unroll factor.
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
LogicalResult getIndexSet(MutableArrayRef< Operation * > ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isValidLoopInterchangePermutation(ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
Checks if the loop interchange permutation 'loopPermMap', of the perfectly nested sequence of loops i...
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor)
Unrolls this loop by the specified unroll factor or its trip count, whichever is lower.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 > > *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
unsigned permuteLoops(ArrayRef< AffineForOp > inputNest, ArrayRef< unsigned > permMap)
Performs a loop permutation on a perfectly nested loop nest inputNest (where the contained loops appe...
LogicalResult loopUnrollJamByFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor.
LogicalResult tilePerfectlyNestedParametric(MutableArrayRef< AffineForOp > input, ArrayRef< Value > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops,...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
void canonicalizeSetAndOperands(IntegerSet *set, SmallVectorImpl< Value > *operands)
Canonicalizes an integer set the same way canonicalizeMapAndOperands does for affine maps.
bool isPerfectlyNested(ArrayRef< AffineForOp > loops)
Returns true if loops is a perfectly nested loop nest, where loops appear in it from outermost to inn...
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
int64_t numEnclosingInvariantLoops(OpOperand &operand)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
AffineForOp sinkSequentialLoops(AffineForOp forOp)
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB)
Performs loop interchange on 'forOpA' and 'forOpB'.
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
AffineMap removeDuplicateExprs(AffineMap map)
Returns a map with the same dimension and symbol count as map, but whose results are the unique affin...
void generateUnrolledLoop(Block *loopBodyBlock, Value iv, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues, IRMapping *clonedToSrcOpsMap=nullptr)
Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs' and 'yieldedValues' as the...
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
@ ExistingAndNewOps
Only pre-existing and newly created ops are processed.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
llvm::function_ref< Fn > function_ref
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
uint64_t fastMemCapacityBytes
std::optional< int64_t > ub
std::optional< int64_t > lb
A description of a (parallelizable) reduction in an affine loop.
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
FlatAffineValueConstraints * getConstraints()
Value memref
Memref that this region corresponds to.
Location loc
If there is more than one load/store op associated with the region, the location information would co...
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.