25 #include "llvm/ADT/MapVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "loop-utils"
34 using namespace affine;
35 using namespace presburger;
36 using llvm::SmallMapVector;
57 auto lbMap = forOp.getLowerBoundMap();
58 auto lb = b.
create<AffineApplyOp>(forOp.getLoc(), lbMap,
59 forOp.getLowerBoundOperands());
68 int64_t step = forOp.getStepAsInt();
69 for (
unsigned i = 0, e = tripCountMap.
getNumResults(); i < e; i++) {
70 auto tripCountExpr = tripCountMap.
getResult(i);
71 bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
75 b.
create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands);
79 for (
unsigned i = 0, e = bumpExprs.size(); i < e; i++)
82 cleanupLbOperands.clear();
83 cleanupLbOperands.push_back(lb);
84 cleanupLbOperands.append(bumpValues.begin(), bumpValues.end());
92 for (
auto v : bumpValues)
94 v.getDefiningOp()->erase();
104 auto iterOperands = forOp.getInits();
105 auto iterArgs = forOp.getRegionIterArgs();
106 for (
auto e : llvm::zip(iterOperands, iterArgs))
107 std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
110 auto outerResults = forOp.getResults();
111 auto innerResults = forOp.getBody()->getTerminator()->getOperands();
112 for (
auto e : llvm::zip(outerResults, innerResults))
113 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
120 if (!tripCount || *tripCount != 1)
124 if (forOp.getLowerBoundMap().getNumResults() != 1)
128 auto iv = forOp.getInductionVar();
129 auto *parentBlock = forOp->getBlock();
130 if (!iv.use_empty()) {
131 if (forOp.hasConstantLowerBound()) {
132 auto func = forOp->getParentOfType<FunctionOpInterface>();
135 builder.setInsertionPointToStart(&func.getFunctionBody().front());
137 builder.setInsertionPoint(forOp);
139 forOp.getLoc(), forOp.getConstantLowerBound());
140 iv.replaceAllUsesWith(constOp);
142 auto lbOperands = forOp.getLowerBoundOperands();
143 auto lbMap = forOp.getLowerBoundMap();
147 iv.replaceAllUsesWith(lbOperands[0]);
150 builder.
create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
160 forOp.getBody()->back().erase();
162 forOp.getBody()->getOperations());
177 unsigned offset, AffineForOp srcForOp,
OpBuilder b) {
178 auto lbOperands = srcForOp.getLowerBoundOperands();
179 auto ubOperands = srcForOp.getUpperBoundOperands();
185 b.
create<AffineForOp>(srcForOp.getLoc(), lbOperands, lbMap, ubOperands,
186 ubMap, srcForOp.getStepAsInt());
187 auto loopChunkIV = loopChunk.getInductionVar();
188 auto srcIV = srcForOp.getInductionVar();
193 for (
const auto &it : llvm::drop_begin(opGroupQueue, offset)) {
194 uint64_t shift = it.first;
195 auto ops = it.second;
200 if (!srcIV.use_empty() && shift != 0) {
201 auto ivRemap = bodyBuilder.create<AffineApplyOp>(
203 bodyBuilder.getSingleDimShiftAffineMap(
204 -
static_cast<int64_t
>(srcForOp.getStepAsInt() * shift)),
206 operandMap.
map(srcIV, ivRemap);
208 operandMap.
map(srcIV, loopChunkIV);
211 bodyBuilder.clone(*op, operandMap);
214 return AffineForOp();
231 bool unrollPrologueEpilogue) {
232 assert(forOp.getBody()->getOperations().size() == shifts.size() &&
233 "too few/many shifts");
234 if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
242 if (!mayBeConstTripCount) {
243 LLVM_DEBUG(forOp.emitRemark(
"non-constant trip count loop not handled"));
246 uint64_t tripCount = *mayBeConstTripCount;
249 "shifts will lead to an invalid transformation\n");
251 int64_t step = forOp.getStepAsInt();
253 unsigned numChildOps = shifts.size();
256 uint64_t maxShift = *llvm::max_element(shifts);
257 if (maxShift >= numChildOps) {
259 forOp.emitWarning(
"not shifting because shifts are unrealistically large");
266 std::vector<std::vector<Operation *>> sortedOpGroups(maxShift + 1);
268 for (
auto &op : forOp.getBody()->without_terminator()) {
269 auto shift = shifts[pos++];
270 sortedOpGroups[shift].push_back(&op);
278 AffineForOp prologue, epilogue;
283 std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> opGroupQueue;
285 auto origLbMap = forOp.getLowerBoundMap();
286 uint64_t lbShift = 0;
288 for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
290 if (sortedOpGroups[d].empty())
292 if (!opGroupQueue.empty()) {
294 "Queue expected to be empty when the first block is found");
299 if (lbShift + tripCount * step < d * step) {
303 opGroupQueue, 0, forOp, b);
305 opGroupQueue.clear();
306 lbShift += tripCount * step;
310 opGroupQueue, 0, forOp, b);
317 AffineForOp::getCanonicalizationPatterns(
patterns, res.getContext());
322 config,
nullptr, &erased);
323 if (!erased && !prologue)
333 opGroupQueue.emplace_back(d, sortedOpGroups[d]);
338 for (
unsigned i = 0, e = opGroupQueue.size(); i < e; ++i) {
339 uint64_t ubShift = (opGroupQueue[i].first + tripCount) * step;
342 opGroupQueue, i, forOp, b);
351 if (unrollPrologueEpilogue && prologue)
353 if (unrollPrologueEpilogue && !epilogue && epilogue != prologue)
365 if (input.size() <= 1)
368 LLVM_DEBUG(llvm::dbgs() <<
"Index set computation failed!\n");
372 LLVM_DEBUG(llvm::dbgs()
373 <<
"Non-hyperrectangular nests not supported for tiling!\n");
381 template <
typename t>
384 assert(input.size() == tileSizes.size() &&
"Too few/many tile sizes");
386 if (llvm::any_of(input,
387 [](AffineForOp op) {
return op.getNumResults() > 0; })) {
388 LLVM_DEBUG(llvm::dbgs()
389 <<
"Cannot tile nest where a loop has yield values\n");
395 LLVM_DEBUG(llvm::dbgs() <<
"input loops not perfectly nested");
410 auto &ops = src.getBody()->getOperations();
411 dest.getBody()->getOperations().splice(loc, ops, ops.begin(),
412 std::prev(ops.end()));
424 AffineForOp rootAffineForOp,
unsigned width,
426 Location loc = rootAffineForOp.getLoc();
429 Operation *topLoop = rootAffineForOp.getOperation();
430 AffineForOp innermostPointLoop;
433 for (
unsigned i = 0; i < width; i++) {
436 AffineForOp pointLoop = b.
create<AffineForOp>(loc, 0, 0);
437 pointLoop.getBody()->getOperations().splice(
440 tiledLoops[2 * width - 1 - i] = pointLoop;
441 topLoop = pointLoop.getOperation();
443 innermostPointLoop = pointLoop;
447 for (
unsigned i = width; i < 2 * width; i++) {
450 AffineForOp tileSpaceLoop = b.
create<AffineForOp>(loc, 0, 0);
451 tileSpaceLoop.getBody()->getOperations().splice(
454 tiledLoops[2 * width - i - 1] = tileSpaceLoop;
455 topLoop = tileSpaceLoop.getOperation();
465 AffineForOp newInterTileLoop,
466 AffineForOp newIntraTileLoop,
476 assert(origLoop.hasConstantLowerBound() &&
477 "expected input loops to have constant lower bound.");
499 lbOperands.push_back(newInterTileLoop.getInductionVar());
500 ubOperands.push_back(newInterTileLoop.getInductionVar());
514 lbOperands.push_back(tileSize);
515 ubOperands.push_back(tileSize);
527 lbBoundExprs.push_back(
528 ((lbLoopIvExpr - origLowerBoundExpr) * lbTileParameter) +
536 ubBoundExprs.push_back(
537 ((ubLoopIvExpr - origLowerBoundExpr) * ubTileParameter) +
538 (ubTileParameter * origLoopStep) + origLowerBoundExpr);
540 ubBoundExprs.append(origUbMap.
getResults().begin(),
546 newIntraTileLoop.setLowerBound(lbOperands, lbMap);
551 newIntraTileLoop.setUpperBound(ubOperands, ubMap);
554 newIntraTileLoop.setStep(origLoop.getStepAsInt());
560 AffineForOp newLoop,
Value tileSize) {
561 OperandRange newLbOperands = origLoop.getLowerBoundOperands();
565 newLoop.setLowerBound(newLbOperands, origLoop.getLowerBoundMap());
576 assert(origLoop.hasConstantLowerBound() &&
577 "expected input loops to have constant lower bound.");
597 ubOperands.push_back(tileSize);
604 int64_t origUpperBound;
609 if (origLoop.hasConstantUpperBound()) {
610 origUpperBound = origLoop.getConstantUpperBound();
617 boundExprs.push_back(
619 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
640 boundExprs.push_back(
642 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
648 newLoop.setUpperBound(ubOperands, ubMap);
651 newLoop.setStep(origLoop.getStepAsInt());
663 assert(!origLoops.empty() &&
"expected atleast one loop in band");
664 assert(origLoops.size() == tileSizes.size() &&
665 "expected tiling parameter for each loop in band.");
667 OpBuilder b(origLoops[0].getOperation());
668 unsigned width = origLoops.size();
671 for (
unsigned i = 0; i < width; ++i) {
676 for (
unsigned i = 0; i < width; ++i) {
678 newLoops[i + width], tileSizes[i]);
691 assert(!origLoops.empty());
692 assert(origLoops.size() == tileSizes.size());
694 OpBuilder b(origLoops[0].getOperation());
695 unsigned width = origLoops.size();
698 for (
unsigned i = 0; i < width; i++) {
699 OperandRange newLbOperands = origLoops[i].getLowerBoundOperands();
700 OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
701 newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
702 newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
705 newLoops[i].setStep(tileSizes[i] * origLoops[i].getStepAsInt());
708 for (
unsigned i = 0; i < width; i++) {
710 std::optional<uint64_t> mayBeConstantCount =
714 newLoops[width + i].setLowerBound(
715 newLoops[i].getInductionVar(), lbMap);
717 newLoops[width + i].setStep(origLoops[i].getStepAsInt());
720 if (mayBeConstantCount && *mayBeConstantCount < tileSizes[i]) {
724 *mayBeConstantCount * origLoops[i].getStepAsInt());
725 newLoops[width + i].setUpperBound(
726 newLoops[i].getInductionVar(), ubMap);
727 }
else if (largestDiv % tileSizes[i] != 0) {
743 ubOperands.push_back(newLoops[i].getInductionVar());
754 boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStepAsInt());
755 boundExprs.append(origUbMap.
getResults().begin(),
760 newLoops[width + i].setUpperBound(ubOperands, ubMap);
765 1, 0, dim + tileSizes[i] * origLoops[i].getStepAsInt());
766 newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
782 AffineForOp rootAffineForOp = origLoops[0];
785 unsigned width = input.size();
799 for (
unsigned i = 0; i < width; i++)
800 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
803 rootAffineForOp.erase();
806 *tiledNest = std::move(tiledLoops);
824 AffineForOp rootAffineForOp = origLoops[0];
825 unsigned width = input.size();
840 for (
unsigned i = 0; i < width; i++)
841 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
844 rootAffineForOp.erase();
847 *tiledNest = std::move(tiledLoops);
859 nestedLoops.push_back(root);
861 if (body.
begin() != std::prev(body.
end(), 2))
864 root = dyn_cast<AffineForOp>(&body.
front());
877 for (AffineForOp forOp : f.getOps<AffineForOp>()) {
880 bands->push_back(band);
887 if (mayBeConstantTripCount.has_value()) {
888 uint64_t tripCount = *mayBeConstantTripCount;
901 uint64_t unrollFactor) {
903 if (mayBeConstantTripCount.has_value() &&
904 *mayBeConstantTripCount < unrollFactor)
914 Block *loopBodyBlock,
Value forOpIV, uint64_t unrollFactor,
924 annotateFn = defaultAnnotateFn;
933 for (
unsigned i = 1; i < unrollFactor; i++) {
937 operandMap.
map(iterArgs, lastYielded);
942 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
943 operandMap.
map(forOpIV, ivUnroll);
947 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
949 annotateFn(i, clonedOp, builder);
956 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++) {
957 Operation *defOp = yieldedValues[i].getDefiningOp();
958 if (defOp && defOp->
getBlock() == loopBodyBlock)
959 lastYielded[i] = operandMap.
lookup(yieldedValues[i]);
965 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
966 annotateFn(0, &*it, builder);
975 uint64_t unrollFactor) {
978 auto cleanupForOp = cast<AffineForOp>(builder.
clone(*forOp));
982 auto results = forOp.getResults();
983 auto cleanupResults = cleanupForOp.getResults();
984 auto cleanupIterOperands = cleanupForOp.getInits();
986 for (
auto e : llvm::zip(results, cleanupResults, cleanupIterOperands)) {
987 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
988 cleanupForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
997 cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
1003 forOp.setUpperBound(cleanupOperands, cleanupMap);
1010 AffineForOp forOp, uint64_t unrollFactor,
1012 bool cleanUpUnroll) {
1013 assert(unrollFactor > 0 &&
"unroll factor should be positive");
1016 if (unrollFactor == 1) {
1017 if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
1024 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1028 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
1029 if (cleanUpUnroll) {
1043 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1044 forOp.getUpperBoundMap().getNumResults() != 1)
1050 assert(
false &&
"cleanup loop lower bound map for single result lower "
1051 "and upper bound maps can always be determined");
1054 ValueRange iterArgs(forOp.getRegionIterArgs());
1055 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
1058 int64_t step = forOp.getStepAsInt();
1059 forOp.setStep(step * unrollFactor);
1061 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
1064 auto d0 = b.getAffineDimExpr(0);
1065 auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
1066 return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, iv);
1069 iterArgs, yieldedValues);
1077 uint64_t unrollJamFactor) {
1079 if (mayBeConstantTripCount.has_value() &&
1080 *mayBeConstantTripCount < unrollJamFactor)
1088 auto walkResult = forOp.walk([&](AffineForOp aForOp) {
1089 for (
auto controlOperand : aForOp.getControlOperands()) {
1090 if (!forOp.isDefinedOutsideOfLoop(controlOperand))
1091 return WalkResult::interrupt();
1095 return !walkResult.wasInterrupted();
1100 uint64_t unrollJamFactor) {
1101 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
1104 if (unrollJamFactor == 1) {
1105 if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
1112 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1116 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollJamFactor) {
1117 LLVM_DEBUG(llvm::dbgs() <<
"[failed] trip count < unroll-jam factor\n");
1133 forOp.walk([&](AffineForOp aForOp) {
1134 if (aForOp.getNumIterOperands() > 0)
1135 loopsWithIterArgs.push_back(aForOp);
1140 if (forOp.getNumIterOperands() > 0)
1150 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1151 forOp.getUpperBoundMap().getNumResults() != 1)
1154 assert(
false &&
"cleanup loop lower bound map for single result lower "
1155 "and upper bound maps can always be determined");
1167 for (AffineForOp oldForOp : loopsWithIterArgs) {
1169 ValueRange oldIterOperands = oldForOp.getInits();
1170 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
1172 cast<AffineYieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
1175 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1176 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
1177 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
1181 bool forOpReplaced = oldForOp == forOp;
1182 AffineForOp newForOp =
1183 cast<AffineForOp>(*oldForOp.replaceWithAdditionalYields(
1184 rewriter, dupIterOperands,
false,
1186 return dupYieldOperands;
1188 newLoopsWithIterArgs.push_back(newForOp);
1193 ValueRange newIterArgs = newForOp.getRegionIterArgs();
1194 unsigned oldNumIterArgs = oldIterArgs.size();
1195 ValueRange newResults = newForOp.getResults();
1196 unsigned oldNumResults = newResults.size() / unrollJamFactor;
1197 assert(oldNumIterArgs == oldNumResults &&
1198 "oldNumIterArgs must be the same as oldNumResults");
1199 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1200 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
1204 operandMaps[i - 1].map(newIterArgs[
j],
1205 newIterArgs[i * oldNumIterArgs +
j]);
1206 operandMaps[i - 1].map(newResults[
j],
1207 newResults[i * oldNumResults +
j]);
1213 int64_t step = forOp.getStepAsInt();
1214 forOp.setStep(step * unrollJamFactor);
1216 auto forOpIV = forOp.getInductionVar();
1218 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1219 for (
auto &subBlock : subBlocks) {
1222 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
1231 builder.
create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
1232 operandMaps[i - 1].map(forOpIV, ivUnroll);
1235 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
1236 builder.
clone(*it, operandMaps[i - 1]);
1239 for (
auto newForOp : newLoopsWithIterArgs) {
1240 unsigned oldNumIterOperands =
1241 newForOp.getNumIterOperands() / unrollJamFactor;
1242 unsigned numControlOperands = newForOp.getNumControlOperands();
1243 auto yieldOp = cast<AffineYieldOp>(newForOp.getBody()->getTerminator());
1244 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
1245 assert(oldNumIterOperands == oldNumYieldOperands &&
1246 "oldNumIterOperands must be the same as oldNumYieldOperands");
1247 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
1251 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
1252 operandMaps[i - 1].lookupOrDefault(
1253 newForOp.getOperand(numControlOperands +
j)));
1255 i * oldNumYieldOperands +
j,
1256 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
1260 if (forOp.getNumResults() > 0) {
1266 auto loc = forOp.getLoc();
1267 unsigned oldNumResults = forOp.getNumResults() / unrollJamFactor;
1269 unsigned pos = reduction.iterArgPosition;
1270 Value lhs = forOp.getResult(pos);
1273 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1274 rhs = forOp.getResult(i * oldNumResults + pos);
1280 assert(op &&
"Reduction op should have been created");
1284 forOp.getResult(pos).replaceAllUsesExcept(lhs, newOps);
1296 assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
1297 auto &forOpABody = forOpA.getBody()->getOperations();
1298 auto &forOpBBody = forOpB.getBody()->getOperations();
1304 forOpABody, forOpABody.begin(),
1305 std::prev(forOpABody.end()));
1308 forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
1309 std::prev(forOpBBody.end()));
1311 forOpBBody.splice(forOpBBody.begin(), forOpA->getBlock()->getOperations(),
1322 unsigned maxLoopDepth = loops.size();
1324 loopPermMapInv.resize(maxLoopDepth);
1325 for (
unsigned i = 0; i < maxLoopDepth; ++i)
1326 loopPermMapInv[loopPermMap[i]] = i;
1333 for (
const auto &depComps : depCompsVec) {
1334 assert(depComps.size() >= maxLoopDepth);
1337 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1338 unsigned permIndex = loopPermMapInv[
j];
1339 assert(depComps[permIndex].lb);
1340 int64_t depCompLb = *depComps[permIndex].lb;
1354 assert(loopPermMap.size() == loops.size() &&
"invalid loop perm map");
1355 unsigned maxLoopDepth = loops.size();
1356 if (maxLoopDepth == 1)
1360 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1367 bool LLVM_ATTRIBUTE_UNUSED
1369 assert(!loops.empty() &&
"no loops provided");
1372 auto hasTwoElements = [](
Block *block) {
1373 auto secondOpIt = std::next(block->begin());
1374 return secondOpIt != block->end() && &*secondOpIt == &block->back();
1377 auto enclosingLoop = loops.front();
1378 for (
auto loop : loops.drop_front()) {
1379 auto parentForOp = dyn_cast<AffineForOp>(loop->getParentOp());
1381 if (parentForOp != enclosingLoop || !hasTwoElements(parentForOp.getBody()))
1383 enclosingLoop = loop;
1392 assert(input.size() == permMap.size() &&
"invalid permutation map size");
1396 llvm::sort(checkPermMap);
1398 [](
const auto &en) {
return en.value() != en.index(); }))
1399 assert(
false &&
"invalid permutation map");
1402 if (input.size() < 2)
1410 for (
unsigned i = 0, e = input.size(); i < e; ++i)
1411 invPermMap.push_back({permMap[i], i});
1412 llvm::sort(invPermMap);
1416 if (permMap.back() != input.size() - 1) {
1417 Block *destBody = ((AffineForOp)input[invPermMap.back().second]).getBody();
1418 Block *srcBody = ((AffineForOp)input.back()).getBody();
1421 std::prev(srcBody->
end()));
1426 for (
int i = input.size() - 1; i >= 0; --i) {
1429 if (permMap[i] == 0) {
1434 auto *parentBlock = input[0]->getBlock();
1436 input[i]->getBlock()->getOperations(),
1443 unsigned parentPosInInput = invPermMap[permMap[i] - 1].second;
1444 if (i > 0 &&
static_cast<unsigned>(i - 1) == parentPosInInput)
1448 auto *destBody = ((AffineForOp)input[parentPosInInput]).getBody();
1449 destBody->getOperations().splice(destBody->begin(),
1450 input[i]->getBlock()->getOperations(),
1454 return invPermMap[0].second;
1463 if (loops.size() < 2)
1468 unsigned maxLoopDepth = loops.size();
1469 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1474 for (
auto &depComps : depCompsVec) {
1475 assert(depComps.size() >= maxLoopDepth);
1476 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1478 assert(depComp.
lb.has_value() && depComp.
ub.has_value());
1479 if (*depComp.
lb != 0 || *depComp.
ub != 0)
1489 unsigned nextSequentialLoop = numParallelLoops;
1490 unsigned nextParallelLoop = 0;
1491 for (
unsigned i = 0; i < maxLoopDepth; ++i) {
1493 loopPermMap[i] = nextParallelLoop++;
1495 loopPermMap[i] = nextSequentialLoop++;
1503 unsigned loopNestRootIndex =
permuteLoops(loops, loopPermMap);
1504 return loops[loopNestRootIndex];
1518 int64_t offset = 0) {
1519 auto bounds = llvm::to_vector<4>(map->
getResults());
1521 operands->insert(operands->begin() + map->
getNumDims(), iv);
1538 auto originalStep = forOp.getStepAsInt();
1539 auto scaledStep = originalStep * factor;
1540 forOp.setStep(scaledStep);
1545 auto lbMap = forOp.getLowerBoundMap();
1550 auto ubMap = forOp.getUpperBoundMap();
1555 auto iv = forOp.getInductionVar();
1557 for (
auto t : targets) {
1560 auto newForOp = b.
create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
1561 ubOperands, ubMap, originalStep);
1562 auto begin = t.getBody()->begin();
1564 auto nOps = t.getBody()->getOperations().size() - 2;
1565 newForOp.getBody()->getOperations().splice(
1566 newForOp.getBody()->getOperations().begin(),
1567 t.getBody()->getOperations(), begin, std::next(begin, nOps));
1569 newForOp.getRegion());
1570 innerLoops.push_back(newForOp);
1578 template <
typename SizeType>
1580 AffineForOp target) {
1586 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1595 for (
auto it : llvm::zip(forOps, sizes)) {
1596 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1597 res.push_back(step);
1598 currentTargets = step;
1605 AffineForOp target) {
1608 res.push_back(llvm::getSingleElement(loops));
1613 if (loops.size() < 2)
1616 AffineForOp innermost = loops.back();
1617 AffineForOp outermost = loops.front();
1622 for (AffineForOp loop : loops) {
1624 if (loop.getStepAsInt() != 1 || !loop.hasConstantLowerBound() ||
1625 loop.getConstantLowerBound() != 0)
1634 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1635 prev = builder.
create<AffineMinOp>(loc, origUbMap, ubOperands);
1637 prev = builder.
create<AffineApplyOp>(loc, origUbMap, ubOperands);
1638 upperBoundSymbols.push_back(prev);
1642 for (AffineForOp loop : loops.drop_front()) {
1643 ub = loop.getUpperBound();
1648 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1649 upperBound = builder.
create<AffineMinOp>(loc, origUbMap, ubOperands);
1651 upperBound = builder.
create<AffineApplyOp>(loc, origUbMap, ubOperands);
1652 upperBoundSymbols.push_back(upperBound);
1654 operands.push_back(prev);
1655 operands.push_back(upperBound);
1657 prev = builder.
create<AffineApplyOp>(
1669 outermost.setUpperBound(prev, newUbMap);
1681 Value previous = outermost.getInductionVar();
1682 for (
unsigned idx = loops.size(); idx > 0; --idx) {
1683 if (idx != loops.size()) {
1685 operands.push_back(previous);
1686 operands.push_back(upperBoundSymbols[idx]);
1687 previous = builder.
create<AffineApplyOp>(
1697 Value inductionVariable;
1699 inductionVariable = previous;
1702 applyOperands.push_back(previous);
1703 applyOperands.push_back(upperBoundSymbols[idx - 1]);
1704 inductionVariable = builder.
create<AffineApplyOp>(
1712 inductionVariable, loops.back().getRegion());
1717 AffineForOp secondOutermostLoop = loops[1];
1718 innermost.getBody()->back().erase();
1719 outermost.getBody()->getOperations().splice(
1721 innermost.getBody()->getOperations());
1722 secondOutermostLoop.erase();
1729 assert(processorId.size() == numProcessors.size());
1730 if (processorId.empty())
1740 Value linearIndex = processorId.front();
1741 for (
unsigned i = 1, e = processorId.size(); i < e; ++i) {
1742 auto mulApplyOp = b.
create<AffineApplyOp>(
1743 loc, mulMap,
ValueRange{linearIndex, numProcessors[i]});
1744 linearIndex = b.
create<AffineApplyOp>(
1745 loc, addMap,
ValueRange{mulApplyOp, processorId[i]});
1748 auto mulApplyOp = b.
create<AffineApplyOp>(
1749 loc, mulMap,
ValueRange{linearIndex, forOp.getStep()});
1751 loc, addMap,
ValueRange{mulApplyOp, forOp.getLowerBound()});
1752 forOp.setLowerBound(lb);
1754 Value step = forOp.getStep();
1755 for (
auto numProcs : numProcessors)
1757 forOp.setStep(step);
1768 Block **copyPlacementBlock,
1773 cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
1779 auto it = enclosingAffineOps.rbegin();
1780 AffineForOp lastInvariantFor;
1781 for (
auto e = enclosingAffineOps.rend(); it != e; ++it) {
1788 <<
"memref definition will end up not dominating hoist location\n");
1792 auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
1797 if (llvm::is_contained(symbols, affineFor.getInductionVar()))
1799 lastInvariantFor = affineFor;
1802 if (it != enclosingAffineOps.rbegin()) {
1804 *copyOutPlacementStart = std::next(*copyInPlacementStart);
1805 *copyPlacementBlock = lastInvariantFor->getBlock();
1807 *copyInPlacementStart = begin;
1808 *copyOutPlacementStart = end;
1809 *copyPlacementBlock = █
1827 if (bufferShape.size() <= 1)
1830 int64_t numEltPerStride = 1;
1832 for (
int d = bufferShape.size() - 1; d >= 1; d--) {
1833 int64_t dimSize = cast<MemRefType>(region.
memref.
getType()).getDimSize(d);
1835 numEltPerStride *= bufferShape[d];
1839 if (bufferShape[d] < dimSize && bufferShape[d - 1] > 1) {
1840 strideInfos->push_back({stride, numEltPerStride});
1865 assert(llvm::all_of(lbMaps, [&](
AffineMap lbMap) {
1868 assert(llvm::all_of(ubMaps, [&](
AffineMap ubMap) {
1872 unsigned rank = cast<MemRefType>(memref.
getType()).getRank();
1874 assert(rank != 0 &&
"non-zero rank memref expected");
1875 assert(lbMaps.size() == rank &&
"wrong number of lb maps");
1876 assert(ubMaps.size() == rank &&
"wrong number of ub maps");
1881 AffineForOp copyNestRoot;
1883 for (
unsigned d = 0; d < rank; ++d) {
1885 ubOperands, ubMaps[d]);
1887 copyNestRoot = forOp;
1891 auto fastBufOffsetMap =
1893 auto offset = b.
create<AffineApplyOp>(loc, fastBufOffsetMap, lbOperands);
1899 fastBufMapOperands.push_back(offset);
1900 fastBufMapOperands.push_back(forOp.getInductionVar());
1901 mayBeDeadApplys.push_back(offset);
1904 memIndices.push_back(forOp.getInductionVar());
1914 for (
auto applyOp : mayBeDeadApplys)
1915 if (applyOp.use_empty())
1920 auto load = b.
create<AffineLoadOp>(loc, memref, memIndices);
1921 b.
create<AffineStoreOp>(loc, load, fastMemRef, fastBufMap,
1922 fastBufMapOperands);
1923 return copyNestRoot;
1928 b.
create<AffineLoadOp>(loc, fastMemRef, fastBufMap, fastBufMapOperands);
1929 b.
create<AffineStoreOp>(loc, load, memref, memIndices);
1930 return copyNestRoot;
1962 auto f = begin->getParentOfType<FunctionOpInterface>();
1963 OpBuilder topBuilder(f.getFunctionBody());
1964 Value zeroIndex = topBuilder.create<arith::ConstantIndexOp>(f.getLoc(), 0);
1973 bool isCopyOutAtEndOfBlock = (end == copyOutPlacementStart);
1976 OpBuilder prologue(copyPlacementBlock, copyInPlacementStart);
1978 OpBuilder epilogue(copyPlacementBlock, copyOutPlacementStart);
1986 auto loc = region.
loc;
1987 auto memref = region.
memref;
1988 auto memRefType = cast<MemRefType>(memref.getType());
1990 if (!memRefType.getLayout().isIdentity()) {
1991 LLVM_DEBUG(llvm::dbgs() <<
"Non-identity layout map not yet supported\n");
2001 unsigned rank = memRefType.getRank();
2003 LLVM_DEBUG(llvm::dbgs() <<
"Non-zero ranked memrefs supported\n");
2012 std::optional<int64_t> numElements =
2015 LLVM_DEBUG(llvm::dbgs() <<
"Non-constant region size not supported\n");
2021 LLVM_DEBUG(llvm::dbgs()
2022 <<
"Max lower bound for memref region start not supported\n");
2026 if (*numElements == 0) {
2027 LLVM_DEBUG(llvm::dbgs() <<
"Nothing to copy\n");
2032 for (
unsigned i = 0; i < rank; ++i) {
2034 if (lbMaps[i].getNumResults() == 0 || ubMaps[i].getNumResults() == 0) {
2035 LLVM_DEBUG(llvm::dbgs()
2036 <<
"Missing lower or upper bound for region along dimension: "
2056 fastBufOffsets.reserve(rank);
2057 for (
unsigned d = 0; d < rank; d++) {
2058 assert(lbs[d].getNumSymbols() == cst->
getNumCols() - rank - 1 &&
2059 "incorrect bound size");
2063 if (lbs[d].isSingleConstant()) {
2064 auto indexVal = lbs[d].getSingleConstantResult();
2065 if (indexVal == 0) {
2066 memIndices.push_back(zeroIndex);
2068 memIndices.push_back(
2069 top.create<arith::ConstantIndexOp>(loc, indexVal).getResult());
2078 for (
unsigned i = 0, e = lbs[d].getNumSymbols(); i < e; ++i)
2079 symReplacements[i] = top.getAffineDimExpr(i);
2080 lbs[d] = lbs[d].replaceDimsAndSymbols(
2081 {}, symReplacements, lbs[d].getNumSymbols(),
2083 memIndices.push_back(b.
create<AffineApplyOp>(loc, lbs[d], regionSymbols));
2086 bufIndices.push_back(zeroIndex);
2090 fastBufOffsets.push_back(lbs[d].getResult(0));
2097 bool existingBuf = fastBufferMap.count(memref) > 0;
2100 auto fastMemRefType =
2107 prologue.
create<memref::AllocOp>(loc, fastMemRefType).getResult();
2109 fastBufferMap[memref] = fastMemRef;
2113 *sizeInBytes = maySizeInBytes.value_or(0);
2116 <<
"Creating fast buffer of type " << fastMemRefType
2121 fastMemRef = fastBufferMap[memref];
2124 auto numElementsSSA = top.create<arith::ConstantIndexOp>(loc, *numElements);
2127 Value numEltPerDmaStride;
2134 if (dmaStrideInfos.size() > 1) {
2135 LLVM_DEBUG(llvm::dbgs() <<
"Only up to one level of stride supported\n");
2139 if (!dmaStrideInfos.empty()) {
2141 top.create<arith::ConstantIndexOp>(loc, dmaStrideInfos[0].stride);
2142 numEltPerDmaStride = top.create<arith::ConstantIndexOp>(
2143 loc, dmaStrideInfos[0].numEltPerStride);
2151 auto postDomFilter = std::prev(end);
2163 regionSymbols, ubMaps,
2164 regionSymbols, fastBufOffsets,
2168 copyNests.insert(copyNest);
2172 if (region.
isWrite() && isCopyOutAtEndOfBlock)
2179 auto tagMemRef = prologue.
create<memref::AllocOp>(loc, tagMemRefType);
2187 fastMemRef, bufAffineMap, bufIndices,
2188 tagMemRef, tagAffineMap, tagIndices,
2189 numElementsSSA, dmaStride, numEltPerDmaStride);
2193 loc, fastMemRef, bufAffineMap, bufIndices, memref, memAffineMap,
2194 memIndices, tagMemRef, tagAffineMap, tagIndices, numElementsSSA,
2195 dmaStride, numEltPerDmaStride);
2198 if (isCopyOutAtEndOfBlock)
2207 auto tagDeallocOp = epilogue.
create<memref::DeallocOp>(loc, tagMemRef);
2208 if (*nEnd == end && isCopyOutAtEndOfBlock)
2216 auto bufDeallocOp = epilogue.
create<memref::DeallocOp>(loc, fastMemRef);
2219 if (!copyOptions.
generateDma && *nEnd == end && isCopyOutAtEndOfBlock)
2231 remapExprs.reserve(rank);
2232 for (
unsigned i = 0; i < rank; i++) {
2237 remapExprs.push_back(dimExpr - fastBufOffsets[i]);
2239 auto indexRemap =
AffineMap::get(regionSymbols.size() + rank, 0, remapExprs,
2244 bool isBeginAtStartOfBlock = (begin == block->
begin());
2245 if (!isBeginAtStartOfBlock)
2246 prevOfBegin = std::prev(begin);
2256 *nBegin = isBeginAtStartOfBlock ? block->
begin() : std::next(prevOfBegin);
2268 if (
auto loadOp = dyn_cast<AffineLoadOp>(op)) {
2269 rank = loadOp.getMemRefType().getRank();
2270 region->
memref = loadOp.getMemRef();
2272 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(op)) {
2273 rank = storeOp.getMemRefType().getRank();
2274 region->
memref = storeOp.getMemRef();
2277 assert(
false &&
"expected load or store op");
2280 auto memRefType = cast<MemRefType>(region->
memref.
getType());
2281 if (!memRefType.hasStaticShape())
2290 ivs.resize(numParamLoopIVs);
2294 regionCst->setValues(rank, rank + numParamLoopIVs, symbols);
2297 for (
unsigned d = 0; d < rank; d++) {
2298 auto dimSize = memRefType.getDimSize(d);
2299 assert(dimSize > 0 &&
"filtered dynamic shapes above");
2300 regionCst->addBound(BoundType::LB, d, 0);
2301 regionCst->addBound(BoundType::UB, d, dimSize - 1);
2309 std::optional<Value> filterMemRef,
2314 assert(begin->getBlock() == std::prev(end)->getBlock() &&
2315 "Inconsistent block begin/end args");
2316 assert(end != end->getBlock()->end() &&
"end can't be the block terminator");
2318 Block *block = begin->getBlock();
2324 LLVM_DEBUG(llvm::dbgs() <<
"Generating copies at depth " << copyDepth
2326 LLVM_DEBUG(llvm::dbgs() <<
"from begin: " << *begin <<
"\n");
2327 LLVM_DEBUG(llvm::dbgs() <<
"to inclusive end: " << *std::prev(end) <<
"\n");
2332 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> readRegions;
2333 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> writeRegions;
2345 MemRefType memrefType;
2347 if (
auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
2348 memref = loadOp.getMemRef();
2349 memrefType = loadOp.getMemRefType();
2350 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
2351 memref = storeOp.getMemRef();
2352 memrefType = storeOp.getMemRefType();
2358 if ((filterMemRef.has_value() && filterMemRef != memref) ||
2359 (isa_and_nonnull<IntegerAttr>(memrefType.getMemorySpace()) &&
2364 LLVM_DEBUG(llvm::dbgs() <<
"memref definition is inside of the depth at "
2365 "which copy-in/copy-out would happen\n");
2370 auto region = std::make_unique<MemRefRegion>(opInst->
getLoc());
2371 if (failed(region->compute(opInst, copyDepth,
nullptr,
2373 LLVM_DEBUG(llvm::dbgs()
2374 <<
"Error obtaining memory region: semi-affine maps?\n");
2375 LLVM_DEBUG(llvm::dbgs() <<
"over-approximating to the entire memref\n");
2376 if (!getFullMemRefAsRegion(opInst, copyDepth, region.get())) {
2378 opInst->emitError(
"non-constant memref sizes not yet supported"));
2399 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2401 const auto *
const it = targetRegions.find(region->memref);
2402 if (it == targetRegions.end())
2406 if (failed(it->second->unionBoundingBox(*region))) {
2407 LLVM_DEBUG(llvm::dbgs()
2408 <<
"Memory region bounding box failed; "
2409 "over-approximating to the entire memref\n");
2413 "non-constant memref sizes not yet supported"));
2417 it->second->getConstraints()->clearAndCopyFrom(
2418 *region->getConstraints());
2421 region->getConstraints()->clearAndCopyFrom(
2422 *it->second->getConstraints());
2427 bool existsInRead = updateRegion(readRegions);
2430 bool existsInWrite = updateRegion(writeRegions);
2435 if (region->isWrite() && !existsInWrite) {
2436 writeRegions[region->memref] = std::move(region);
2437 }
else if (!region->isWrite() && !existsInRead) {
2438 readRegions[region->memref] = std::move(region);
2443 LLVM_DEBUG(begin->emitError(
2444 "copy generation failed for one or more memref's in this block\n"));
2448 uint64_t totalCopyBuffersSizeInBytes = 0;
2450 auto processRegions =
2451 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2453 for (
const auto ®ionEntry : regions) {
2457 Block *copyPlacementBlock;
2459 *regionEntry.second, *block, begin, end, ©PlacementBlock,
2460 ©InPlacementStart, ©OutPlacementStart);
2462 uint64_t sizeInBytes;
2465 *regionEntry.second, block, begin, end, copyPlacementBlock,
2466 copyInPlacementStart, copyOutPlacementStart, copyOptions,
2467 fastBufferMap, copyNests, &sizeInBytes, &nBegin, &nEnd);
2468 if (succeeded(iRet)) {
2472 totalCopyBuffersSizeInBytes += sizeInBytes;
2474 ret = ret & succeeded(iRet);
2477 processRegions(readRegions);
2478 processRegions(writeRegions);
2481 LLVM_DEBUG(begin->emitError(
2482 "copy generation failed for one or more memref's in this block\n"));
2488 if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
2489 LLVM_DEBUG(forOp.emitRemark()
2491 <<
" KiB of copy buffers in fast memory space for this block");
2494 if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) {
2495 block->getParentOp()->emitWarning(
2496 "total size of all copy buffers' for this block exceeds fast memory "
2509 std::prev(forOp.getBody()->end()), copyOptions,
2510 filterMemRef, copyNests);
2517 auto begin = analyzedOp->getIterator();
2518 auto end = std::next(begin);
2522 auto err =
generateCopy(memrefRegion, block, begin, end, block, begin, end,
2523 copyOptions, fastBufferMap, copyNests,
2528 const auto &en = fastBufferMap.find(memrefRegion.
memref);
2530 if (en == fastBufferMap.end())
2532 result.
alloc = en->second.getDefiningOp();
2533 assert(result.
alloc &&
"fast buffer expected to be locally allocated");
2534 assert(copyNests.size() <= 1 &&
"At most one copy nest is expected.");
2535 result.
copyNest = copyNests.empty() ? nullptr : *copyNests.begin();
2544 assert(currLoopDepth <= depthToLoops.size() &&
"Unexpected currLoopDepth");
2545 if (currLoopDepth == depthToLoops.size())
2546 depthToLoops.emplace_back();
2548 for (
auto &op : *block) {
2549 if (
auto forOp = dyn_cast<AffineForOp>(op)) {
2550 depthToLoops[currLoopDepth].push_back(forOp);
2559 for (
auto &block : func)
2563 if (!depthToLoops.empty()) {
2564 assert(depthToLoops.back().empty() &&
"Last loop level is not empty?");
2565 depthToLoops.pop_back();
2582 return b.
create<AffineForOp>(loc, lowerOperands, lbMap, upperOperands, ubMap,
2596 auto *context = loops[0].getContext();
2600 llvm::append_range(ops, loops);
2610 for (
auto loop : loops) {
2613 assert(loop.getStepAsInt() == 1 &&
"point loop step expected to be one");
2617 unsigned fullTileLbPos, fullTileUbPos;
2619 .getConstantBoundOnDimSize(0,
nullptr,
2621 nullptr, &fullTileLbPos,
2623 LLVM_DEBUG(llvm::dbgs() <<
"Can't get constant diff pair for a loop\n");
2632 fullTileLb.assign(fLb.begin(), fLb.end());
2633 fullTileUb.assign(fUb.begin(), fUb.end());
2636 for (
auto lbIndex : lbIndices)
2637 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2638 cst.
atIneq(lbIndex, i) = fullTileLb[i] - cst.
atIneq(lbIndex, i);
2641 for (
auto ubIndex : ubIndices)
2642 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2643 cst.
atIneq(ubIndex, i) -= fullTileUb[i];
2666 return b.
create<AffineIfOp>(loops[0].getLoc(), ifCondSet, setOperands,
2671 static LogicalResult
2674 fullTileLoops.reserve(inputNest.size());
2679 for (
auto loop : inputNest) {
2681 if (loop.getStepAsInt() != 1) {
2682 LLVM_DEBUG(llvm::dbgs()
2683 <<
"[tile separation] non-unit stride not implemented\n");
2691 unsigned lbPos, ubPos;
2693 .getConstantBoundOnDimSize(0,
nullptr,
2695 nullptr, &lbPos, &ubPos) ||
2697 LLVM_DEBUG(llvm::dbgs() <<
"[tile separation] Can't get constant diff / "
2698 "equalities not yet handled\n");
2707 cst.getIneqAsAffineValueMap(0, lbPos, lbVmap, b.
getContext());
2708 cst.getIneqAsAffineValueMap(0, ubPos, ubVmap, b.
getContext());
2713 fullTileLoops.push_back(fullTileLoop);
2719 operandMap.
map(loopEn.value().getInductionVar(),
2720 fullTileLoops[loopEn.index()].getInductionVar());
2722 for (
auto &op : inputNest.back().getBody()->without_terminator())
2723 b.
clone(op, operandMap);
2730 if (inputNest.empty())
2733 auto firstLoop = inputNest[0];
2736 auto prevLoop = firstLoop;
2737 for (
auto loop : inputNest.drop_front(1)) {
2738 assert(loop->getParentOp() == prevLoop &&
"input not contiguously nested");
2746 if (!fullTileLoops.empty())
2747 fullTileLoops.front().erase();
2755 fullTileLoops.front().erase();
2756 LLVM_DEBUG(llvm::dbgs() <<
"All tiles are full tiles, or failure creating "
2757 "separation condition\n");
2762 Block *thenBlock = ifOp.getThenBlock();
2763 AffineForOp outermostFullTileLoop = fullTileLoops[0];
2765 std::prev(thenBlock->
end()),
2766 outermostFullTileLoop->getBlock()->getOperations(),
2771 Block *elseBlock = ifOp.getElseBlock();
2773 firstLoop->getBlock()->getOperations(),
2777 *fullTileNest = std::move(fullTileLoops);
2783 LogicalResult result(failure());
2786 if (loops.size() <= 1)
2794 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
2795 operandsDefinedAbove[i] = i;
2796 for (
unsigned j = 0;
j < i; ++
j) {
2798 operandsDefinedAbove[i] =
j;
2807 for (
unsigned end = loops.size(); end > 0; --end) {
2809 for (; start < end - 1; ++start) {
2811 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
2812 std::next(operandsDefinedAbove.begin(), end));
2815 assert(maxPos == start &&
2816 "expected loop bounds to be known at the start of the band");
2824 if (start != end - 1)
2833 while (
auto loopOp = currentOp->
getParentOfType<LoopLikeOpInterface>()) {
2834 if (!loopOp.isDefinedOutsideOfLoop(operand.
get()))
static LogicalResult performPreTilingChecks(MutableArrayRef< AffineForOp > input, ArrayRef< t > tileSizes)
Check if the input nest is supported for tiling and whether tiling would be legal or not.
static void constructParametricallyTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< Value > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets,...
static void constructTiledLoopNest(MutableArrayRef< AffineForOp > origLoops, AffineForOp rootAffineForOp, unsigned width, MutableArrayRef< AffineForOp > tiledLoops)
Constructs tiled loop nest, without setting the loop bounds and move the body of the original loop ne...
static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs, MemRefRegion *region)
Construct the memref region to just include the entire memref.
static SmallVector< AffineForOp, 8 > stripmineSink(AffineForOp forOp, uint64_t factor, ArrayRef< AffineForOp > targets)
static InFlightDiagnostic LLVM_ATTRIBUTE_UNUSED emitRemarkForBlock(Block &block)
static LogicalResult checkIfHyperRectangular(MutableArrayRef< AffineForOp > input)
Checks whether a loop nest is hyper-rectangular or not.
static void findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block, Block::iterator &begin, Block::iterator &end, Block **copyPlacementBlock, Block::iterator *copyInPlacementStart, Block::iterator *copyOutPlacementStart)
Given a memref region, determine the lowest depth at which transfers can be placed for it,...
static AffineForOp generateShiftedLoop(AffineMap lbMap, AffineMap ubMap, const std::vector< std::pair< uint64_t, ArrayRef< Operation * >>> &opGroupQueue, unsigned offset, AffineForOp srcForOp, OpBuilder b)
Generates an affine.for op with the specified lower and upper bounds while generating the right IV re...
static void moveLoopBodyImpl(AffineForOp src, AffineForOp dest, Block::iterator loc)
Move the loop body of AffineForOp 'src' from 'src' into the specified location in destination's body,...
static void setInterTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newLoop, Value tileSize)
Set lower and upper bounds of inter-tile loops for parametric tiling.
static void setIntraTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newInterTileLoop, AffineForOp newIntraTileLoop, Value tileSize)
Set lower and upper bounds of intra-tile loops for parametric tiling.
static void gatherLoopsInBlock(Block *block, unsigned currLoopDepth, std::vector< SmallVector< AffineForOp, 2 >> &depthToLoops)
Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor'...
static LogicalResult generateCopy(const MemRefRegion ®ion, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, const AffineCopyOptions ©Options, DenseMap< Value, Value > &fastBufferMap, DenseSet< Operation * > ©Nests, uint64_t *sizeInBytes, Block::iterator *nBegin, Block::iterator *nEnd)
Creates a buffer in the faster memory space for the specified memref region (memref has to be non-zer...
static LogicalResult createFullTiles(MutableArrayRef< AffineForOp > inputNest, SmallVectorImpl< AffineForOp > &fullTileLoops, OpBuilder b)
Create the full tile loop nest (along with its body).
static void getMultiLevelStrides(const MemRefRegion ®ion, ArrayRef< int64_t > bufferShape, SmallVectorImpl< StrideInfo > *strideInfos)
Returns striding information for a copy/transfer of this region with potentially multiple striding le...
static void constructTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< unsigned > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets,...
static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp, uint64_t unrollFactor)
Helper to generate cleanup loop for unroll or unroll-and-jam when the trip count is not a multiple of...
static bool areInnerBoundsInvariant(AffineForOp forOp)
Check if all control operands of all loops are defined outside of forOp and return false if not.
static bool checkLoopInterchangeDependences(const std::vector< SmallVector< DependenceComponent, 2 >> &depCompsVec, ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
static void moveLoopBody(AffineForOp src, AffineForOp dest)
Move the loop body of AffineForOp 'src' from 'src' to the start of dest body.
static AffineIfOp createSeparationCondition(MutableArrayRef< AffineForOp > loops, OpBuilder b)
Creates an AffineIfOp that encodes the conditional to choose between the constant trip count version ...
static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, AffineMap &cleanupLbMap, SmallVectorImpl< Value > &cleanupLbOperands)
Computes the cleanup loop lower bound of the loop being unrolled with the specified unroll factor; th...
static void augmentMapAndBounds(OpBuilder &b, Value iv, AffineMap *map, SmallVector< Value, 4 > *operands, int64_t offset=0)
static AffineForOp generatePointWiseCopy(Location loc, Value memref, Value fastMemRef, ArrayRef< AffineMap > lbMaps, ArrayRef< Value > lbOperands, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > ubOperands, ArrayRef< AffineExpr > fastBufOffsets, bool isCopyOut, OpBuilder b)
Generates a point-wise copy from/to a non-zero ranked ‘memref’ to/from ‘fastMemRef’ and returns the o...
static void replaceIterArgsAndYieldResults(AffineForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting singl...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExpr floorDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
Block represents an ordered list of Operations.
OpListType::iterator iterator
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Operation * getTerminator()
Get the terminator operation of this block.
OpListType & getOperations()
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
AffineMap getSingleDimShiftAffineMap(int64_t shift)
Returns a map that shifts its (single) input dimension by 'shift'.
AffineMap getShiftedAffineMap(AffineMap map, int64_t shift)
Returns an affine map that is a translation (shift) of all result expressions in 'map' by 'shift'.
AffineMap getDimIdentityMap()
AffineMap getMultiDimIdentityMap(unsigned rank)
AffineExpr getAffineSymbolExpr(unsigned position)
AffineExpr getAffineConstantExpr(int64_t constant)
AffineExpr getAffineDimExpr(unsigned position)
MLIRContext * getContext() const
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
This class allows control over how the GreedyPatternRewriteDriver works.
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class represents a diagnostic that is inflight and set to be reported.
An integer set representing a conjunction of one or more affine equalities and inequalities.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation * getOperation()
Inherit getOperation from OpState.
This class implements the operand iterators for the Operation class.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Block * getBlock()
Returns the operation block that contains this operation.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Region * getParentRegion()
Returns the region to which the instruction belongs.
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Region * getParentRegion()
Return the Region in which this Value is defined.
static WalkResult advance()
AffineBound represents a lower or upper bound in the for operation.
Value getOperand(unsigned idx)
operand_range getOperands()
unsigned getNumOperands()
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
ArrayRef< Value > getOperands() const
AffineMap getAffineMap() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
Specialization of arith.constant op that returns an integer of index type.
Operation * getOwner() const
Return the owner of this operand.
An IntegerRelation represents the set of points from a PresburgerSpace that satisfy a list of affine ...
void removeIndependentConstraints(unsigned pos, unsigned num)
Removes constraints that are independent of (i.e., do not have a coefficient) variables in the range ...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
ArrayRef< DynamicAPInt > getInequality(unsigned idx) const
DynamicAPInt atIneq(unsigned i, unsigned j) const
Returns the value at the specified inequality row and column.
bool isHyperRectangular(unsigned pos, unsigned num) const
Returns true if the set can be trivially detected as being hyper-rectangular on the specified contigu...
unsigned getNumVars() const
void setDimSymbolSeparation(unsigned newSymbolCount)
Changes the partition between dimensions and symbols.
unsigned getNumDimAndSymbolVars() const
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void getLowerAndUpperBoundIndices(unsigned pos, SmallVectorImpl< unsigned > *lbIndices, SmallVectorImpl< unsigned > *ubIndices, SmallVectorImpl< unsigned > *eqIndices=nullptr, unsigned offset=0, unsigned num=0) const
Gather positions of all lower and upper bounds of the variable at pos, and optionally any equalities ...
void removeVar(VarKind kind, unsigned pos)
Removes variables of the specified kind with the specified pos (or within the specified range) from t...
bool isParallelLoop(Operation &op)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
LogicalResult coalesceLoops(MutableArrayRef< AffineForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
LogicalResult loopUnrollFull(AffineForOp forOp)
Unrolls this for operation completely if the trip count is known to be constant.
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional< Value > filterMemRef, DenseSet< Operation * > ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor or by the trip count (if constant),...
void extractForInductionVars(ArrayRef< AffineForOp > forInsts, SmallVectorImpl< Value > *ivs)
Extracts the induction variables from a list of AffineForOps and places them in the output argument i...
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, bool cleanUpUnroll=false)
Unrolls this for operation by the specified unroll factor.
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
void gatherLoops(func::FuncOp func, std::vector< SmallVector< AffineForOp, 2 >> &depthToLoops)
Gathers all AffineForOps in 'func.func' grouped by loop depth.
bool LLVM_ATTRIBUTE_UNUSED isPerfectlyNested(ArrayRef< AffineForOp > loops)
Returns true if loops is a perfectly nested loop nest, where loops appear in it from outermost to inn...
LogicalResult getIndexSet(MutableArrayRef< Operation * > ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
AffineForOp createCanonicalizedAffineForOp(OpBuilder b, Location loc, ValueRange lbOperands, AffineMap lbMap, ValueRange ubOperands, AffineMap ubMap, int64_t step=1)
Creates an AffineForOp while ensuring that the lower and upper bounds are canonicalized,...
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isValidLoopInterchangePermutation(ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
Checks if the loop interchange permutation 'loopPermMap', of the perfectly nested sequence of loops i...
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion, Operation *analyzedOp, const AffineCopyOptions ©Options, CopyGenerateResult &result)
generateCopyForMemRegion is similar to affineDataCopyGenerate, but works with a single memref region.
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor)
Unrolls this loop by the specified unroll factor or its trip count, whichever is lower.
unsigned permuteLoops(ArrayRef< AffineForOp > inputNest, ArrayRef< unsigned > permMap)
Performs a loop permutation on a perfectly nested loop nest inputNest (where the contained loops appe...
LogicalResult loopUnrollJamByFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor.
LogicalResult tilePerfectlyNestedParametric(MutableArrayRef< AffineForOp > input, ArrayRef< Value > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops,...
void canonicalizeSetAndOperands(IntegerSet *set, SmallVectorImpl< Value > *operands)
Canonicalizes an integer set the same way canonicalizeMapAndOperands does for affine maps.
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
int64_t numEnclosingInvariantLoops(OpOperand &operand)
Count the number of loops surrounding operand such that operand could be hoisted above.
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
AffineForOp sinkSequentialLoops(AffineForOp forOp)
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB)
Performs loop interchange on 'forOpA' and 'forOpB'.
LogicalResult coalescePerfectlyNestedAffineLoops(AffineForOp op)
Walk an affine.for to find a band to coalesce.
void getTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 >> *bands)
Identify valid and profitable bands of loops to tile.
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
LogicalResult separateFullTiles(MutableArrayRef< AffineForOp > nest, SmallVectorImpl< AffineForOp > *fullTileNest=nullptr)
Separates full tiles from partial tiles for a perfect nest nest by generating a conditional guard tha...
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
AffineMap removeDuplicateExprs(AffineMap map)
Returns a map with the same dimension and symbol count as map, but whose results are the unique affin...
const FrozenRewritePatternSet GreedyRewriteConfig config
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
@ ExistingOps
Only pre-existing ops are processed.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
Result for calling generateCopyForMemRegion.
std::optional< int64_t > ub
std::optional< int64_t > lb
A description of a (parallelizable) reduction in an affine loop.
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
FlatAffineValueConstraints * getConstraints()
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Value memref
Memref that this region corresponds to.
Location loc
If there is more than one load/store op associated with the region, the location information would co...
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.