25 #include "llvm/ADT/MapVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "loop-utils"
34 using namespace affine;
35 using namespace presburger;
36 using llvm::SmallMapVector;
57 auto lbMap = forOp.getLowerBoundMap();
58 auto lb = b.
create<AffineApplyOp>(forOp.getLoc(), lbMap,
59 forOp.getLowerBoundOperands());
68 int64_t step = forOp.getStepAsInt();
69 for (
unsigned i = 0, e = tripCountMap.
getNumResults(); i < e; i++) {
70 auto tripCountExpr = tripCountMap.
getResult(i);
71 bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
75 b.
create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands);
79 for (
unsigned i = 0, e = bumpExprs.size(); i < e; i++)
82 cleanupLbOperands.clear();
83 cleanupLbOperands.push_back(lb);
84 cleanupLbOperands.append(bumpValues.begin(), bumpValues.end());
92 for (
auto v : bumpValues)
94 v.getDefiningOp()->erase();
104 auto iterOperands = forOp.getInits();
105 auto iterArgs = forOp.getRegionIterArgs();
106 for (
auto e : llvm::zip(iterOperands, iterArgs))
107 std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
110 auto outerResults = forOp.getResults();
111 auto innerResults = forOp.getBody()->getTerminator()->getOperands();
112 for (
auto e : llvm::zip(outerResults, innerResults))
113 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
120 if (!tripCount || *tripCount != 1)
124 if (forOp.getLowerBoundMap().getNumResults() != 1)
128 auto iv = forOp.getInductionVar();
129 auto *parentBlock = forOp->getBlock();
130 if (!iv.use_empty()) {
131 if (forOp.hasConstantLowerBound()) {
132 auto func = forOp->getParentOfType<FunctionOpInterface>();
135 builder.setInsertionPointToStart(&func.getFunctionBody().front());
137 builder.setInsertionPoint(forOp);
139 forOp.getLoc(), forOp.getConstantLowerBound());
140 iv.replaceAllUsesWith(constOp);
142 auto lbOperands = forOp.getLowerBoundOperands();
143 auto lbMap = forOp.getLowerBoundMap();
147 iv.replaceAllUsesWith(lbOperands[0]);
150 builder.
create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
160 forOp.getBody()->back().erase();
162 forOp.getBody()->getOperations());
177 unsigned offset, AffineForOp srcForOp,
OpBuilder b) {
178 auto lbOperands = srcForOp.getLowerBoundOperands();
179 auto ubOperands = srcForOp.getUpperBoundOperands();
185 b.
create<AffineForOp>(srcForOp.getLoc(), lbOperands, lbMap, ubOperands,
186 ubMap, srcForOp.getStepAsInt());
187 auto loopChunkIV = loopChunk.getInductionVar();
188 auto srcIV = srcForOp.getInductionVar();
193 for (
const auto &it : llvm::drop_begin(opGroupQueue, offset)) {
194 uint64_t shift = it.first;
195 auto ops = it.second;
200 if (!srcIV.use_empty() && shift != 0) {
201 auto ivRemap = bodyBuilder.create<AffineApplyOp>(
203 bodyBuilder.getSingleDimShiftAffineMap(
204 -
static_cast<int64_t
>(srcForOp.getStepAsInt() * shift)),
206 operandMap.
map(srcIV, ivRemap);
208 operandMap.
map(srcIV, loopChunkIV);
211 bodyBuilder.clone(*op, operandMap);
214 return AffineForOp();
231 bool unrollPrologueEpilogue) {
232 assert(forOp.getBody()->getOperations().size() == shifts.size() &&
233 "too few/many shifts");
234 if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
242 if (!mayBeConstTripCount) {
243 LLVM_DEBUG(forOp.emitRemark(
"non-constant trip count loop not handled"));
246 uint64_t tripCount = *mayBeConstTripCount;
249 "shifts will lead to an invalid transformation\n");
251 int64_t step = forOp.getStepAsInt();
253 unsigned numChildOps = shifts.size();
256 uint64_t maxShift = *llvm::max_element(shifts);
257 if (maxShift >= numChildOps) {
259 forOp.emitWarning(
"not shifting because shifts are unrealistically large");
266 std::vector<std::vector<Operation *>> sortedOpGroups(maxShift + 1);
268 for (
auto &op : forOp.getBody()->without_terminator()) {
269 auto shift = shifts[pos++];
270 sortedOpGroups[shift].push_back(&op);
278 AffineForOp prologue, epilogue;
283 std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> opGroupQueue;
285 auto origLbMap = forOp.getLowerBoundMap();
286 uint64_t lbShift = 0;
288 for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
290 if (sortedOpGroups[d].empty())
292 if (!opGroupQueue.empty()) {
294 "Queue expected to be empty when the first block is found");
299 if (lbShift + tripCount * step < d * step) {
303 opGroupQueue, 0, forOp, b);
305 opGroupQueue.clear();
306 lbShift += tripCount * step;
310 opGroupQueue, 0, forOp, b);
317 AffineForOp::getCanonicalizationPatterns(
patterns, res.getContext());
320 res.getOperation(), std::move(
patterns),
324 if (!erased && !prologue)
334 opGroupQueue.emplace_back(d, sortedOpGroups[d]);
339 for (
unsigned i = 0, e = opGroupQueue.size(); i < e; ++i) {
340 uint64_t ubShift = (opGroupQueue[i].first + tripCount) * step;
343 opGroupQueue, i, forOp, b);
352 if (unrollPrologueEpilogue && prologue)
354 if (unrollPrologueEpilogue && !epilogue && epilogue != prologue)
366 if (input.size() <= 1)
369 LLVM_DEBUG(llvm::dbgs() <<
"Index set computation failed!\n");
373 LLVM_DEBUG(llvm::dbgs()
374 <<
"Non-hyperrectangular nests not supported for tiling!\n");
382 template <
typename t>
385 assert(input.size() == tileSizes.size() &&
"Too few/many tile sizes");
387 if (llvm::any_of(input,
388 [](AffineForOp op) {
return op.getNumResults() > 0; })) {
389 LLVM_DEBUG(llvm::dbgs()
390 <<
"Cannot tile nest where a loop has yield values\n");
396 LLVM_DEBUG(llvm::dbgs() <<
"input loops not perfectly nested");
411 auto &ops = src.getBody()->getOperations();
412 dest.getBody()->getOperations().splice(loc, ops, ops.begin(),
413 std::prev(ops.end()));
425 AffineForOp rootAffineForOp,
unsigned width,
427 Location loc = rootAffineForOp.getLoc();
430 Operation *topLoop = rootAffineForOp.getOperation();
431 AffineForOp innermostPointLoop;
434 for (
unsigned i = 0; i < width; i++) {
437 AffineForOp pointLoop = b.
create<AffineForOp>(loc, 0, 0);
438 pointLoop.getBody()->getOperations().splice(
441 tiledLoops[2 * width - 1 - i] = pointLoop;
442 topLoop = pointLoop.getOperation();
444 innermostPointLoop = pointLoop;
448 for (
unsigned i = width; i < 2 * width; i++) {
451 AffineForOp tileSpaceLoop = b.
create<AffineForOp>(loc, 0, 0);
452 tileSpaceLoop.getBody()->getOperations().splice(
455 tiledLoops[2 * width - i - 1] = tileSpaceLoop;
456 topLoop = tileSpaceLoop.getOperation();
466 AffineForOp newInterTileLoop,
467 AffineForOp newIntraTileLoop,
477 assert(origLoop.hasConstantLowerBound() &&
478 "expected input loops to have constant lower bound.");
500 lbOperands.push_back(newInterTileLoop.getInductionVar());
501 ubOperands.push_back(newInterTileLoop.getInductionVar());
515 lbOperands.push_back(tileSize);
516 ubOperands.push_back(tileSize);
528 lbBoundExprs.push_back(
529 ((lbLoopIvExpr - origLowerBoundExpr) * lbTileParameter) +
537 ubBoundExprs.push_back(
538 ((ubLoopIvExpr - origLowerBoundExpr) * ubTileParameter) +
539 (ubTileParameter * origLoopStep) + origLowerBoundExpr);
541 ubBoundExprs.append(origUbMap.
getResults().begin(),
547 newIntraTileLoop.setLowerBound(lbOperands, lbMap);
552 newIntraTileLoop.setUpperBound(ubOperands, ubMap);
555 newIntraTileLoop.setStep(origLoop.getStepAsInt());
561 AffineForOp newLoop,
Value tileSize) {
562 OperandRange newLbOperands = origLoop.getLowerBoundOperands();
566 newLoop.setLowerBound(newLbOperands, origLoop.getLowerBoundMap());
577 assert(origLoop.hasConstantLowerBound() &&
578 "expected input loops to have constant lower bound.");
598 ubOperands.push_back(tileSize);
605 int64_t origUpperBound;
610 if (origLoop.hasConstantUpperBound()) {
611 origUpperBound = origLoop.getConstantUpperBound();
618 boundExprs.push_back(
620 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
641 boundExprs.push_back(
643 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
649 newLoop.setUpperBound(ubOperands, ubMap);
652 newLoop.setStep(origLoop.getStepAsInt());
664 assert(!origLoops.empty() &&
"expected atleast one loop in band");
665 assert(origLoops.size() == tileSizes.size() &&
666 "expected tiling parameter for each loop in band.");
668 OpBuilder b(origLoops[0].getOperation());
669 unsigned width = origLoops.size();
672 for (
unsigned i = 0; i < width; ++i) {
677 for (
unsigned i = 0; i < width; ++i) {
679 newLoops[i + width], tileSizes[i]);
692 assert(!origLoops.empty());
693 assert(origLoops.size() == tileSizes.size());
695 OpBuilder b(origLoops[0].getOperation());
696 unsigned width = origLoops.size();
699 for (
unsigned i = 0; i < width; i++) {
700 OperandRange newLbOperands = origLoops[i].getLowerBoundOperands();
701 OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
702 newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
703 newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
706 newLoops[i].setStep(tileSizes[i] * origLoops[i].getStepAsInt());
709 for (
unsigned i = 0; i < width; i++) {
711 std::optional<uint64_t> mayBeConstantCount =
715 newLoops[width + i].setLowerBound(
716 newLoops[i].getInductionVar(), lbMap);
718 newLoops[width + i].setStep(origLoops[i].getStepAsInt());
721 if (mayBeConstantCount && *mayBeConstantCount < tileSizes[i]) {
725 *mayBeConstantCount * origLoops[i].getStepAsInt());
726 newLoops[width + i].setUpperBound(
727 newLoops[i].getInductionVar(), ubMap);
728 }
else if (largestDiv % tileSizes[i] != 0) {
744 ubOperands.push_back(newLoops[i].getInductionVar());
755 boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStepAsInt());
756 boundExprs.append(origUbMap.
getResults().begin(),
761 newLoops[width + i].setUpperBound(ubOperands, ubMap);
766 1, 0, dim + tileSizes[i] * origLoops[i].getStepAsInt());
767 newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
783 AffineForOp rootAffineForOp = origLoops[0];
786 unsigned width = input.size();
800 for (
unsigned i = 0; i < width; i++)
801 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
804 rootAffineForOp.erase();
807 *tiledNest = std::move(tiledLoops);
825 AffineForOp rootAffineForOp = origLoops[0];
826 unsigned width = input.size();
841 for (
unsigned i = 0; i < width; i++)
842 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
845 rootAffineForOp.erase();
848 *tiledNest = std::move(tiledLoops);
860 nestedLoops.push_back(root);
862 if (body.
begin() != std::prev(body.
end(), 2))
865 root = dyn_cast<AffineForOp>(&body.
front());
878 for (AffineForOp forOp : f.getOps<AffineForOp>()) {
881 bands->push_back(band);
888 if (mayBeConstantTripCount.has_value()) {
889 uint64_t tripCount = *mayBeConstantTripCount;
902 uint64_t unrollFactor) {
904 if (mayBeConstantTripCount.has_value() &&
905 *mayBeConstantTripCount < unrollFactor)
915 Block *loopBodyBlock,
Value forOpIV, uint64_t unrollFactor,
925 annotateFn = defaultAnnotateFn;
934 for (
unsigned i = 1; i < unrollFactor; i++) {
938 operandMap.
map(iterArgs, lastYielded);
943 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
944 operandMap.
map(forOpIV, ivUnroll);
948 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
950 annotateFn(i, clonedOp, builder);
957 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++) {
958 Operation *defOp = yieldedValues[i].getDefiningOp();
959 if (defOp && defOp->
getBlock() == loopBodyBlock)
960 lastYielded[i] = operandMap.
lookup(yieldedValues[i]);
966 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
967 annotateFn(0, &*it, builder);
976 uint64_t unrollFactor) {
979 auto cleanupForOp = cast<AffineForOp>(builder.
clone(*forOp));
983 auto results = forOp.getResults();
984 auto cleanupResults = cleanupForOp.getResults();
985 auto cleanupIterOperands = cleanupForOp.getInits();
987 for (
auto e : llvm::zip(results, cleanupResults, cleanupIterOperands)) {
988 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
989 cleanupForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
998 cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
1004 forOp.setUpperBound(cleanupOperands, cleanupMap);
1011 AffineForOp forOp, uint64_t unrollFactor,
1013 bool cleanUpUnroll) {
1014 assert(unrollFactor > 0 &&
"unroll factor should be positive");
1017 if (unrollFactor == 1) {
1024 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1028 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
1029 if (cleanUpUnroll) {
1043 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1044 forOp.getUpperBoundMap().getNumResults() != 1)
1050 assert(
false &&
"cleanup loop lower bound map for single result lower "
1051 "and upper bound maps can always be determined");
1054 ValueRange iterArgs(forOp.getRegionIterArgs());
1055 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
1058 int64_t step = forOp.getStepAsInt();
1059 forOp.setStep(step * unrollFactor);
1061 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
1064 auto d0 = b.getAffineDimExpr(0);
1065 auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
1066 return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, iv);
1069 iterArgs, yieldedValues);
1077 uint64_t unrollJamFactor) {
1079 if (mayBeConstantTripCount.has_value() &&
1080 *mayBeConstantTripCount < unrollJamFactor)
1088 auto walkResult = forOp.walk([&](AffineForOp aForOp) {
1089 for (
auto controlOperand : aForOp.getControlOperands()) {
1090 if (!forOp.isDefinedOutsideOfLoop(controlOperand))
1091 return WalkResult::interrupt();
1095 return !walkResult.wasInterrupted();
1100 uint64_t unrollJamFactor) {
1101 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
1104 if (unrollJamFactor == 1) {
1111 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1115 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollJamFactor) {
1116 LLVM_DEBUG(llvm::dbgs() <<
"[failed] trip count < unroll-jam factor\n");
1132 forOp.walk([&](AffineForOp aForOp) {
1133 if (aForOp.getNumIterOperands() > 0)
1134 loopsWithIterArgs.push_back(aForOp);
1139 if (forOp.getNumIterOperands() > 0)
1149 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1150 forOp.getUpperBoundMap().getNumResults() != 1)
1153 assert(
false &&
"cleanup loop lower bound map for single result lower "
1154 "and upper bound maps can always be determined");
1166 for (AffineForOp oldForOp : loopsWithIterArgs) {
1168 ValueRange oldIterOperands = oldForOp.getInits();
1169 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
1171 cast<AffineYieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
1174 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1175 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
1176 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
1180 bool forOpReplaced = oldForOp == forOp;
1181 AffineForOp newForOp =
1182 cast<AffineForOp>(*oldForOp.replaceWithAdditionalYields(
1183 rewriter, dupIterOperands,
false,
1185 return dupYieldOperands;
1187 newLoopsWithIterArgs.push_back(newForOp);
1192 ValueRange newIterArgs = newForOp.getRegionIterArgs();
1193 unsigned oldNumIterArgs = oldIterArgs.size();
1194 ValueRange newResults = newForOp.getResults();
1195 unsigned oldNumResults = newResults.size() / unrollJamFactor;
1196 assert(oldNumIterArgs == oldNumResults &&
1197 "oldNumIterArgs must be the same as oldNumResults");
1198 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1199 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
1203 operandMaps[i - 1].map(newIterArgs[
j],
1204 newIterArgs[i * oldNumIterArgs +
j]);
1205 operandMaps[i - 1].map(newResults[
j],
1206 newResults[i * oldNumResults +
j]);
1212 int64_t step = forOp.getStepAsInt();
1213 forOp.setStep(step * unrollJamFactor);
1215 auto forOpIV = forOp.getInductionVar();
1217 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1218 for (
auto &subBlock : subBlocks) {
1221 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
1230 builder.
create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
1231 operandMaps[i - 1].map(forOpIV, ivUnroll);
1234 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
1235 builder.
clone(*it, operandMaps[i - 1]);
1238 for (
auto newForOp : newLoopsWithIterArgs) {
1239 unsigned oldNumIterOperands =
1240 newForOp.getNumIterOperands() / unrollJamFactor;
1241 unsigned numControlOperands = newForOp.getNumControlOperands();
1242 auto yieldOp = cast<AffineYieldOp>(newForOp.getBody()->getTerminator());
1243 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
1244 assert(oldNumIterOperands == oldNumYieldOperands &&
1245 "oldNumIterOperands must be the same as oldNumYieldOperands");
1246 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
1250 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
1251 operandMaps[i - 1].lookupOrDefault(
1252 newForOp.getOperand(numControlOperands +
j)));
1254 i * oldNumYieldOperands +
j,
1255 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
1259 if (forOp.getNumResults() > 0) {
1265 auto loc = forOp.getLoc();
1266 unsigned oldNumResults = forOp.getNumResults() / unrollJamFactor;
1268 unsigned pos = reduction.iterArgPosition;
1269 Value lhs = forOp.getResult(pos);
1272 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1273 rhs = forOp.getResult(i * oldNumResults + pos);
1279 assert(op &&
"Reduction op should have been created");
1283 forOp.getResult(pos).replaceAllUsesExcept(lhs, newOps);
1295 assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
1296 auto &forOpABody = forOpA.getBody()->getOperations();
1297 auto &forOpBBody = forOpB.getBody()->getOperations();
1303 forOpABody, forOpABody.begin(),
1304 std::prev(forOpABody.end()));
1307 forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
1308 std::prev(forOpBBody.end()));
1310 forOpBBody.splice(forOpBBody.begin(), forOpA->getBlock()->getOperations(),
1321 unsigned maxLoopDepth = loops.size();
1323 loopPermMapInv.resize(maxLoopDepth);
1324 for (
unsigned i = 0; i < maxLoopDepth; ++i)
1325 loopPermMapInv[loopPermMap[i]] = i;
1332 for (
const auto &depComps : depCompsVec) {
1333 assert(depComps.size() >= maxLoopDepth);
1336 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1337 unsigned permIndex = loopPermMapInv[
j];
1338 assert(depComps[permIndex].lb);
1339 int64_t depCompLb = *depComps[permIndex].lb;
1353 assert(loopPermMap.size() == loops.size() &&
"invalid loop perm map");
1354 unsigned maxLoopDepth = loops.size();
1355 if (maxLoopDepth == 1)
1359 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1366 bool LLVM_ATTRIBUTE_UNUSED
1368 assert(!loops.empty() &&
"no loops provided");
1371 auto hasTwoElements = [](
Block *block) {
1372 auto secondOpIt = std::next(block->begin());
1373 return secondOpIt != block->end() && &*secondOpIt == &block->back();
1376 auto enclosingLoop = loops.front();
1377 for (
auto loop : loops.drop_front()) {
1378 auto parentForOp = dyn_cast<AffineForOp>(loop->getParentOp());
1380 if (parentForOp != enclosingLoop || !hasTwoElements(parentForOp.getBody()))
1382 enclosingLoop = loop;
1391 assert(input.size() == permMap.size() &&
"invalid permutation map size");
1395 llvm::sort(checkPermMap);
1397 [](
const auto &en) {
return en.value() != en.index(); }))
1398 assert(
false &&
"invalid permutation map");
1401 if (input.size() < 2)
1409 for (
unsigned i = 0, e = input.size(); i < e; ++i)
1410 invPermMap.push_back({permMap[i], i});
1411 llvm::sort(invPermMap);
1415 if (permMap.back() != input.size() - 1) {
1416 Block *destBody = ((AffineForOp)input[invPermMap.back().second]).getBody();
1417 Block *srcBody = ((AffineForOp)input.back()).getBody();
1420 std::prev(srcBody->
end()));
1425 for (
int i = input.size() - 1; i >= 0; --i) {
1428 if (permMap[i] == 0) {
1433 auto *parentBlock = input[0]->getBlock();
1435 input[i]->getBlock()->getOperations(),
1442 unsigned parentPosInInput = invPermMap[permMap[i] - 1].second;
1443 if (i > 0 &&
static_cast<unsigned>(i - 1) == parentPosInInput)
1447 auto *destBody = ((AffineForOp)input[parentPosInInput]).getBody();
1448 destBody->getOperations().splice(destBody->begin(),
1449 input[i]->getBlock()->getOperations(),
1453 return invPermMap[0].second;
1462 if (loops.size() < 2)
1467 unsigned maxLoopDepth = loops.size();
1468 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1473 for (
auto &depComps : depCompsVec) {
1474 assert(depComps.size() >= maxLoopDepth);
1475 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1477 assert(depComp.
lb.has_value() && depComp.
ub.has_value());
1478 if (*depComp.
lb != 0 || *depComp.
ub != 0)
1488 unsigned nextSequentialLoop = numParallelLoops;
1489 unsigned nextParallelLoop = 0;
1490 for (
unsigned i = 0; i < maxLoopDepth; ++i) {
1492 loopPermMap[i] = nextParallelLoop++;
1494 loopPermMap[i] = nextSequentialLoop++;
1502 unsigned loopNestRootIndex =
permuteLoops(loops, loopPermMap);
1503 return loops[loopNestRootIndex];
1517 int64_t offset = 0) {
1518 auto bounds = llvm::to_vector<4>(map->
getResults());
1520 operands->insert(operands->begin() + map->
getNumDims(), iv);
1537 auto originalStep = forOp.getStepAsInt();
1538 auto scaledStep = originalStep * factor;
1539 forOp.setStep(scaledStep);
1544 auto lbMap = forOp.getLowerBoundMap();
1549 auto ubMap = forOp.getUpperBoundMap();
1554 auto iv = forOp.getInductionVar();
1556 for (
auto t : targets) {
1559 auto newForOp = b.
create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
1560 ubOperands, ubMap, originalStep);
1561 auto begin = t.getBody()->begin();
1563 auto nOps = t.getBody()->getOperations().size() - 2;
1564 newForOp.getBody()->getOperations().splice(
1565 newForOp.getBody()->getOperations().begin(),
1566 t.getBody()->getOperations(), begin, std::next(begin, nOps));
1568 newForOp.getRegion());
1569 innerLoops.push_back(newForOp);
1577 template <
typename SizeType>
1579 AffineForOp target) {
1585 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1594 for (
auto it : llvm::zip(forOps, sizes)) {
1595 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1596 res.push_back(step);
1597 currentTargets = step;
1604 AffineForOp target) {
1607 res.push_back(llvm::getSingleElement(loops));
1612 if (loops.size() < 2)
1615 AffineForOp innermost = loops.back();
1616 AffineForOp outermost = loops.front();
1621 for (AffineForOp loop : loops) {
1623 if (loop.getStepAsInt() != 1 || !loop.hasConstantLowerBound() ||
1624 loop.getConstantLowerBound() != 0)
1633 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1634 prev = builder.
create<AffineMinOp>(loc, origUbMap, ubOperands);
1636 prev = builder.
create<AffineApplyOp>(loc, origUbMap, ubOperands);
1637 upperBoundSymbols.push_back(prev);
1641 for (AffineForOp loop : loops.drop_front()) {
1642 ub = loop.getUpperBound();
1647 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1648 upperBound = builder.
create<AffineMinOp>(loc, origUbMap, ubOperands);
1650 upperBound = builder.
create<AffineApplyOp>(loc, origUbMap, ubOperands);
1651 upperBoundSymbols.push_back(upperBound);
1653 operands.push_back(prev);
1654 operands.push_back(upperBound);
1656 prev = builder.
create<AffineApplyOp>(
1668 outermost.setUpperBound(prev, newUbMap);
1680 Value previous = outermost.getInductionVar();
1681 for (
unsigned idx = loops.size(); idx > 0; --idx) {
1682 if (idx != loops.size()) {
1684 operands.push_back(previous);
1685 operands.push_back(upperBoundSymbols[idx]);
1686 previous = builder.
create<AffineApplyOp>(
1696 Value inductionVariable;
1698 inductionVariable = previous;
1701 applyOperands.push_back(previous);
1702 applyOperands.push_back(upperBoundSymbols[idx - 1]);
1703 inductionVariable = builder.
create<AffineApplyOp>(
1711 inductionVariable, loops.back().getRegion());
1716 AffineForOp secondOutermostLoop = loops[1];
1717 innermost.getBody()->back().erase();
1718 outermost.getBody()->getOperations().splice(
1720 innermost.getBody()->getOperations());
1721 secondOutermostLoop.erase();
1728 assert(processorId.size() == numProcessors.size());
1729 if (processorId.empty())
1739 Value linearIndex = processorId.front();
1740 for (
unsigned i = 1, e = processorId.size(); i < e; ++i) {
1741 auto mulApplyOp = b.
create<AffineApplyOp>(
1742 loc, mulMap,
ValueRange{linearIndex, numProcessors[i]});
1743 linearIndex = b.
create<AffineApplyOp>(
1744 loc, addMap,
ValueRange{mulApplyOp, processorId[i]});
1747 auto mulApplyOp = b.
create<AffineApplyOp>(
1748 loc, mulMap,
ValueRange{linearIndex, forOp.getStep()});
1750 loc, addMap,
ValueRange{mulApplyOp, forOp.getLowerBound()});
1751 forOp.setLowerBound(lb);
1753 Value step = forOp.getStep();
1754 for (
auto numProcs : numProcessors)
1756 forOp.setStep(step);
1767 Block **copyPlacementBlock,
1772 cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
1778 auto it = enclosingAffineOps.rbegin();
1779 AffineForOp lastInvariantFor;
1780 for (
auto e = enclosingAffineOps.rend(); it != e; ++it) {
1787 <<
"memref definition will end up not dominating hoist location\n");
1791 auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
1796 if (llvm::is_contained(symbols, affineFor.getInductionVar()))
1798 lastInvariantFor = affineFor;
1801 if (it != enclosingAffineOps.rbegin()) {
1803 *copyOutPlacementStart = std::next(*copyInPlacementStart);
1804 *copyPlacementBlock = lastInvariantFor->getBlock();
1806 *copyInPlacementStart = begin;
1807 *copyOutPlacementStart = end;
1808 *copyPlacementBlock = █
1826 if (bufferShape.size() <= 1)
1829 int64_t numEltPerStride = 1;
1831 for (
int d = bufferShape.size() - 1; d >= 1; d--) {
1832 int64_t dimSize = cast<MemRefType>(region.
memref.
getType()).getDimSize(d);
1834 numEltPerStride *= bufferShape[d];
1838 if (bufferShape[d] < dimSize && bufferShape[d - 1] > 1) {
1839 strideInfos->push_back({stride, numEltPerStride});
1864 assert(llvm::all_of(lbMaps, [&](
AffineMap lbMap) {
1867 assert(llvm::all_of(ubMaps, [&](
AffineMap ubMap) {
1871 unsigned rank = cast<MemRefType>(memref.
getType()).getRank();
1873 assert(rank != 0 &&
"non-zero rank memref expected");
1874 assert(lbMaps.size() == rank &&
"wrong number of lb maps");
1875 assert(ubMaps.size() == rank &&
"wrong number of ub maps");
1880 AffineForOp copyNestRoot;
1882 for (
unsigned d = 0; d < rank; ++d) {
1884 ubOperands, ubMaps[d]);
1886 copyNestRoot = forOp;
1890 auto fastBufOffsetMap =
1892 auto offset = b.
create<AffineApplyOp>(loc, fastBufOffsetMap, lbOperands);
1898 fastBufMapOperands.push_back(offset);
1899 fastBufMapOperands.push_back(forOp.getInductionVar());
1900 mayBeDeadApplys.push_back(offset);
1903 memIndices.push_back(forOp.getInductionVar());
1913 for (
auto applyOp : mayBeDeadApplys)
1914 if (applyOp.use_empty())
1919 auto load = b.
create<AffineLoadOp>(loc, memref, memIndices);
1920 b.
create<AffineStoreOp>(loc, load, fastMemRef, fastBufMap,
1921 fastBufMapOperands);
1922 return copyNestRoot;
1927 b.
create<AffineLoadOp>(loc, fastMemRef, fastBufMap, fastBufMapOperands);
1928 b.
create<AffineStoreOp>(loc, load, memref, memIndices);
1929 return copyNestRoot;
1961 auto f = begin->getParentOfType<FunctionOpInterface>();
1962 OpBuilder topBuilder(f.getFunctionBody());
1963 Value zeroIndex = topBuilder.create<arith::ConstantIndexOp>(f.getLoc(), 0);
1972 bool isCopyOutAtEndOfBlock = (end == copyOutPlacementStart);
1975 OpBuilder prologue(copyPlacementBlock, copyInPlacementStart);
1977 OpBuilder epilogue(copyPlacementBlock, copyOutPlacementStart);
1985 auto loc = region.
loc;
1986 auto memref = region.
memref;
1987 auto memRefType = cast<MemRefType>(memref.getType());
1989 if (!memRefType.getLayout().isIdentity()) {
1990 LLVM_DEBUG(llvm::dbgs() <<
"Non-identity layout map not yet supported\n");
2000 unsigned rank = memRefType.getRank();
2002 LLVM_DEBUG(llvm::dbgs() <<
"Non-zero ranked memrefs supported\n");
2011 std::optional<int64_t> numElements =
2014 LLVM_DEBUG(llvm::dbgs() <<
"Non-constant region size not supported\n");
2020 LLVM_DEBUG(llvm::dbgs()
2021 <<
"Max lower bound for memref region start not supported\n");
2025 if (*numElements == 0) {
2026 LLVM_DEBUG(llvm::dbgs() <<
"Nothing to copy\n");
2031 for (
unsigned i = 0; i < rank; ++i) {
2033 if (lbMaps[i].getNumResults() == 0 || ubMaps[i].getNumResults() == 0) {
2034 LLVM_DEBUG(llvm::dbgs()
2035 <<
"Missing lower or upper bound for region along dimension: "
2055 fastBufOffsets.reserve(rank);
2056 for (
unsigned d = 0; d < rank; d++) {
2057 assert(lbs[d].getNumSymbols() == cst->
getNumCols() - rank - 1 &&
2058 "incorrect bound size");
2062 if (lbs[d].isSingleConstant()) {
2063 auto indexVal = lbs[d].getSingleConstantResult();
2064 if (indexVal == 0) {
2065 memIndices.push_back(zeroIndex);
2067 memIndices.push_back(
2068 top.create<arith::ConstantIndexOp>(loc, indexVal).getResult());
2077 for (
unsigned i = 0, e = lbs[d].getNumSymbols(); i < e; ++i)
2078 symReplacements[i] = top.getAffineDimExpr(i);
2079 lbs[d] = lbs[d].replaceDimsAndSymbols(
2080 {}, symReplacements, lbs[d].getNumSymbols(),
2082 memIndices.push_back(b.
create<AffineApplyOp>(loc, lbs[d], regionSymbols));
2085 bufIndices.push_back(zeroIndex);
2089 fastBufOffsets.push_back(lbs[d].getResult(0));
2096 bool existingBuf = fastBufferMap.count(memref) > 0;
2099 auto fastMemRefType =
2106 prologue.
create<memref::AllocOp>(loc, fastMemRefType).getResult();
2108 fastBufferMap[memref] = fastMemRef;
2112 *sizeInBytes = maySizeInBytes.value_or(0);
2115 <<
"Creating fast buffer of type " << fastMemRefType
2120 fastMemRef = fastBufferMap[memref];
2123 auto numElementsSSA = top.create<arith::ConstantIndexOp>(loc, *numElements);
2126 Value numEltPerDmaStride;
2133 if (dmaStrideInfos.size() > 1) {
2134 LLVM_DEBUG(llvm::dbgs() <<
"Only up to one level of stride supported\n");
2138 if (!dmaStrideInfos.empty()) {
2140 top.create<arith::ConstantIndexOp>(loc, dmaStrideInfos[0].stride);
2141 numEltPerDmaStride = top.create<arith::ConstantIndexOp>(
2142 loc, dmaStrideInfos[0].numEltPerStride);
2150 auto postDomFilter = std::prev(end);
2162 regionSymbols, ubMaps,
2163 regionSymbols, fastBufOffsets,
2167 copyNests.insert(copyNest);
2171 if (region.
isWrite() && isCopyOutAtEndOfBlock)
2178 auto tagMemRef = prologue.
create<memref::AllocOp>(loc, tagMemRefType);
2186 fastMemRef, bufAffineMap, bufIndices,
2187 tagMemRef, tagAffineMap, tagIndices,
2188 numElementsSSA, dmaStride, numEltPerDmaStride);
2192 loc, fastMemRef, bufAffineMap, bufIndices, memref, memAffineMap,
2193 memIndices, tagMemRef, tagAffineMap, tagIndices, numElementsSSA,
2194 dmaStride, numEltPerDmaStride);
2197 if (isCopyOutAtEndOfBlock)
2206 auto tagDeallocOp = epilogue.
create<memref::DeallocOp>(loc, tagMemRef);
2207 if (*nEnd == end && isCopyOutAtEndOfBlock)
2215 auto bufDeallocOp = epilogue.
create<memref::DeallocOp>(loc, fastMemRef);
2218 if (!copyOptions.
generateDma && *nEnd == end && isCopyOutAtEndOfBlock)
2230 remapExprs.reserve(rank);
2231 for (
unsigned i = 0; i < rank; i++) {
2236 remapExprs.push_back(dimExpr - fastBufOffsets[i]);
2238 auto indexRemap =
AffineMap::get(regionSymbols.size() + rank, 0, remapExprs,
2243 bool isBeginAtStartOfBlock = (begin == block->
begin());
2244 if (!isBeginAtStartOfBlock)
2245 prevOfBegin = std::prev(begin);
2255 *nBegin = isBeginAtStartOfBlock ? block->
begin() : std::next(prevOfBegin);
2267 if (
auto loadOp = dyn_cast<AffineLoadOp>(op)) {
2268 rank = loadOp.getMemRefType().getRank();
2269 region->
memref = loadOp.getMemRef();
2271 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(op)) {
2272 rank = storeOp.getMemRefType().getRank();
2273 region->
memref = storeOp.getMemRef();
2276 assert(
false &&
"expected load or store op");
2279 auto memRefType = cast<MemRefType>(region->
memref.
getType());
2280 if (!memRefType.hasStaticShape())
2289 ivs.resize(numParamLoopIVs);
2293 regionCst->setValues(rank, rank + numParamLoopIVs, symbols);
2296 for (
unsigned d = 0; d < rank; d++) {
2297 auto dimSize = memRefType.getDimSize(d);
2298 assert(dimSize > 0 &&
"filtered dynamic shapes above");
2299 regionCst->addBound(BoundType::LB, d, 0);
2300 regionCst->addBound(BoundType::UB, d, dimSize - 1);
2308 std::optional<Value> filterMemRef,
2313 assert(begin->getBlock() == std::prev(end)->getBlock() &&
2314 "Inconsistent block begin/end args");
2315 assert(end != end->getBlock()->end() &&
"end can't be the block terminator");
2317 Block *block = begin->getBlock();
2323 LLVM_DEBUG(llvm::dbgs() <<
"Generating copies at depth " << copyDepth
2325 LLVM_DEBUG(llvm::dbgs() <<
"from begin: " << *begin <<
"\n");
2326 LLVM_DEBUG(llvm::dbgs() <<
"to inclusive end: " << *std::prev(end) <<
"\n");
2331 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> readRegions;
2332 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> writeRegions;
2344 MemRefType memrefType;
2346 if (
auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
2347 memref = loadOp.getMemRef();
2348 memrefType = loadOp.getMemRefType();
2349 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
2350 memref = storeOp.getMemRef();
2351 memrefType = storeOp.getMemRefType();
2357 if ((filterMemRef.has_value() && filterMemRef != memref) ||
2358 (isa_and_nonnull<IntegerAttr>(memrefType.getMemorySpace()) &&
2363 LLVM_DEBUG(llvm::dbgs() <<
"memref definition is inside of the depth at "
2364 "which copy-in/copy-out would happen\n");
2369 auto region = std::make_unique<MemRefRegion>(opInst->
getLoc());
2370 if (failed(region->compute(opInst, copyDepth,
nullptr,
2372 LLVM_DEBUG(llvm::dbgs()
2373 <<
"Error obtaining memory region: semi-affine maps?\n");
2374 LLVM_DEBUG(llvm::dbgs() <<
"over-approximating to the entire memref\n");
2375 if (!getFullMemRefAsRegion(opInst, copyDepth, region.get())) {
2377 opInst->emitError(
"non-constant memref sizes not yet supported"));
2398 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2400 const auto *
const it = targetRegions.find(region->memref);
2401 if (it == targetRegions.end())
2405 if (failed(it->second->unionBoundingBox(*region))) {
2406 LLVM_DEBUG(llvm::dbgs()
2407 <<
"Memory region bounding box failed; "
2408 "over-approximating to the entire memref\n");
2412 "non-constant memref sizes not yet supported"));
2416 it->second->getConstraints()->clearAndCopyFrom(
2417 *region->getConstraints());
2420 region->getConstraints()->clearAndCopyFrom(
2421 *it->second->getConstraints());
2426 bool existsInRead = updateRegion(readRegions);
2429 bool existsInWrite = updateRegion(writeRegions);
2434 if (region->isWrite() && !existsInWrite) {
2435 writeRegions[region->memref] = std::move(region);
2436 }
else if (!region->isWrite() && !existsInRead) {
2437 readRegions[region->memref] = std::move(region);
2442 LLVM_DEBUG(begin->emitError(
2443 "copy generation failed for one or more memref's in this block\n"));
2447 uint64_t totalCopyBuffersSizeInBytes = 0;
2449 auto processRegions =
2450 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2452 for (
const auto ®ionEntry : regions) {
2456 Block *copyPlacementBlock;
2458 *regionEntry.second, *block, begin, end, ©PlacementBlock,
2459 ©InPlacementStart, ©OutPlacementStart);
2461 uint64_t sizeInBytes;
2464 *regionEntry.second, block, begin, end, copyPlacementBlock,
2465 copyInPlacementStart, copyOutPlacementStart, copyOptions,
2466 fastBufferMap, copyNests, &sizeInBytes, &nBegin, &nEnd);
2467 if (succeeded(iRet)) {
2471 totalCopyBuffersSizeInBytes += sizeInBytes;
2473 ret = ret & succeeded(iRet);
2476 processRegions(readRegions);
2477 processRegions(writeRegions);
2480 LLVM_DEBUG(begin->emitError(
2481 "copy generation failed for one or more memref's in this block\n"));
2487 if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
2488 LLVM_DEBUG(forOp.emitRemark()
2490 <<
" KiB of copy buffers in fast memory space for this block");
2493 if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) {
2494 block->getParentOp()->emitWarning(
2495 "total size of all copy buffers' for this block exceeds fast memory "
2508 std::prev(forOp.getBody()->end()), copyOptions,
2509 filterMemRef, copyNests);
2516 auto begin = analyzedOp->getIterator();
2517 auto end = std::next(begin);
2521 auto err =
generateCopy(memrefRegion, block, begin, end, block, begin, end,
2522 copyOptions, fastBufferMap, copyNests,
2527 const auto &en = fastBufferMap.find(memrefRegion.
memref);
2529 if (en == fastBufferMap.end())
2531 result.
alloc = en->second.getDefiningOp();
2532 assert(result.
alloc &&
"fast buffer expected to be locally allocated");
2533 assert(copyNests.size() <= 1 &&
"At most one copy nest is expected.");
2534 result.
copyNest = copyNests.empty() ? nullptr : *copyNests.begin();
2543 assert(currLoopDepth <= depthToLoops.size() &&
"Unexpected currLoopDepth");
2544 if (currLoopDepth == depthToLoops.size())
2545 depthToLoops.emplace_back();
2547 for (
auto &op : *block) {
2548 if (
auto forOp = dyn_cast<AffineForOp>(op)) {
2549 depthToLoops[currLoopDepth].push_back(forOp);
2558 for (
auto &block : func)
2562 if (!depthToLoops.empty()) {
2563 assert(depthToLoops.back().empty() &&
"Last loop level is not empty?");
2564 depthToLoops.pop_back();
2581 return b.
create<AffineForOp>(loc, lowerOperands, lbMap, upperOperands, ubMap,
2595 auto *context = loops[0].getContext();
2599 llvm::append_range(ops, loops);
2609 for (
auto loop : loops) {
2612 assert(loop.getStepAsInt() == 1 &&
"point loop step expected to be one");
2616 unsigned fullTileLbPos, fullTileUbPos;
2618 .getConstantBoundOnDimSize(0,
nullptr,
2620 nullptr, &fullTileLbPos,
2622 LLVM_DEBUG(llvm::dbgs() <<
"Can't get constant diff pair for a loop\n");
2631 fullTileLb.assign(fLb.begin(), fLb.end());
2632 fullTileUb.assign(fUb.begin(), fUb.end());
2635 for (
auto lbIndex : lbIndices)
2636 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2637 cst.
atIneq(lbIndex, i) = fullTileLb[i] - cst.
atIneq(lbIndex, i);
2640 for (
auto ubIndex : ubIndices)
2641 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2642 cst.
atIneq(ubIndex, i) -= fullTileUb[i];
2665 return b.
create<AffineIfOp>(loops[0].getLoc(), ifCondSet, setOperands,
2670 static LogicalResult
2673 fullTileLoops.reserve(inputNest.size());
2678 for (
auto loop : inputNest) {
2680 if (loop.getStepAsInt() != 1) {
2681 LLVM_DEBUG(llvm::dbgs()
2682 <<
"[tile separation] non-unit stride not implemented\n");
2690 unsigned lbPos, ubPos;
2692 .getConstantBoundOnDimSize(0,
nullptr,
2694 nullptr, &lbPos, &ubPos) ||
2696 LLVM_DEBUG(llvm::dbgs() <<
"[tile separation] Can't get constant diff / "
2697 "equalities not yet handled\n");
2706 cst.getIneqAsAffineValueMap(0, lbPos, lbVmap, b.
getContext());
2707 cst.getIneqAsAffineValueMap(0, ubPos, ubVmap, b.
getContext());
2712 fullTileLoops.push_back(fullTileLoop);
2718 operandMap.
map(loopEn.value().getInductionVar(),
2719 fullTileLoops[loopEn.index()].getInductionVar());
2721 for (
auto &op : inputNest.back().getBody()->without_terminator())
2722 b.
clone(op, operandMap);
2729 if (inputNest.empty())
2732 auto firstLoop = inputNest[0];
2735 auto prevLoop = firstLoop;
2736 for (
auto loop : inputNest.drop_front(1)) {
2737 assert(loop->getParentOp() == prevLoop &&
"input not contiguously nested");
2745 if (!fullTileLoops.empty())
2746 fullTileLoops.front().erase();
2754 fullTileLoops.front().erase();
2755 LLVM_DEBUG(llvm::dbgs() <<
"All tiles are full tiles, or failure creating "
2756 "separation condition\n");
2761 Block *thenBlock = ifOp.getThenBlock();
2762 AffineForOp outermostFullTileLoop = fullTileLoops[0];
2764 std::prev(thenBlock->
end()),
2765 outermostFullTileLoop->getBlock()->getOperations(),
2770 Block *elseBlock = ifOp.getElseBlock();
2772 firstLoop->getBlock()->getOperations(),
2776 *fullTileNest = std::move(fullTileLoops);
2782 LogicalResult result(failure());
2785 if (loops.size() <= 1)
2793 for (
unsigned i = 0, e = loops.size(); i < e; ++i) {
2794 operandsDefinedAbove[i] = i;
2795 for (
unsigned j = 0;
j < i; ++
j) {
2797 operandsDefinedAbove[i] =
j;
2806 for (
unsigned end = loops.size(); end > 0; --end) {
2808 for (; start < end - 1; ++start) {
2810 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
2811 std::next(operandsDefinedAbove.begin(), end));
2814 assert(maxPos == start &&
2815 "expected loop bounds to be known at the start of the band");
2823 if (start != end - 1)
2832 while (
auto loopOp = currentOp->
getParentOfType<LoopLikeOpInterface>()) {
2833 if (!loopOp.isDefinedOutsideOfLoop(operand.
get()))
static LogicalResult performPreTilingChecks(MutableArrayRef< AffineForOp > input, ArrayRef< t > tileSizes)
Check if the input nest is supported for tiling and whether tiling would be legal or not.
static void constructParametricallyTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< Value > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets,...
static void constructTiledLoopNest(MutableArrayRef< AffineForOp > origLoops, AffineForOp rootAffineForOp, unsigned width, MutableArrayRef< AffineForOp > tiledLoops)
Constructs tiled loop nest, without setting the loop bounds and move the body of the original loop ne...
static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs, MemRefRegion *region)
Construct the memref region to just include the entire memref.
static SmallVector< AffineForOp, 8 > stripmineSink(AffineForOp forOp, uint64_t factor, ArrayRef< AffineForOp > targets)
static InFlightDiagnostic LLVM_ATTRIBUTE_UNUSED emitRemarkForBlock(Block &block)
static LogicalResult checkIfHyperRectangular(MutableArrayRef< AffineForOp > input)
Checks whether a loop nest is hyper-rectangular or not.
static void findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block, Block::iterator &begin, Block::iterator &end, Block **copyPlacementBlock, Block::iterator *copyInPlacementStart, Block::iterator *copyOutPlacementStart)
Given a memref region, determine the lowest depth at which transfers can be placed for it,...
static AffineForOp generateShiftedLoop(AffineMap lbMap, AffineMap ubMap, const std::vector< std::pair< uint64_t, ArrayRef< Operation * >>> &opGroupQueue, unsigned offset, AffineForOp srcForOp, OpBuilder b)
Generates an affine.for op with the specified lower and upper bounds while generating the right IV re...
static void moveLoopBodyImpl(AffineForOp src, AffineForOp dest, Block::iterator loc)
Move the loop body of AffineForOp 'src' from 'src' into the specified location in destination's body,...
static void setInterTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newLoop, Value tileSize)
Set lower and upper bounds of inter-tile loops for parametric tiling.
static void setIntraTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newInterTileLoop, AffineForOp newIntraTileLoop, Value tileSize)
Set lower and upper bounds of intra-tile loops for parametric tiling.
static void gatherLoopsInBlock(Block *block, unsigned currLoopDepth, std::vector< SmallVector< AffineForOp, 2 >> &depthToLoops)
Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor'...
static LogicalResult generateCopy(const MemRefRegion ®ion, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, const AffineCopyOptions ©Options, DenseMap< Value, Value > &fastBufferMap, DenseSet< Operation * > ©Nests, uint64_t *sizeInBytes, Block::iterator *nBegin, Block::iterator *nEnd)
Creates a buffer in the faster memory space for the specified memref region (memref has to be non-zer...
static LogicalResult createFullTiles(MutableArrayRef< AffineForOp > inputNest, SmallVectorImpl< AffineForOp > &fullTileLoops, OpBuilder b)
Create the full tile loop nest (along with its body).
static void getMultiLevelStrides(const MemRefRegion ®ion, ArrayRef< int64_t > bufferShape, SmallVectorImpl< StrideInfo > *strideInfos)
Returns striding information for a copy/transfer of this region with potentially multiple striding le...
static void constructTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< unsigned > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets,...
static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp, uint64_t unrollFactor)
Helper to generate cleanup loop for unroll or unroll-and-jam when the trip count is not a multiple of...
static bool areInnerBoundsInvariant(AffineForOp forOp)
Check if all control operands of all loops are defined outside of forOp and return false if not.
static bool checkLoopInterchangeDependences(const std::vector< SmallVector< DependenceComponent, 2 >> &depCompsVec, ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
static void moveLoopBody(AffineForOp src, AffineForOp dest)
Move the loop body of AffineForOp 'src' from 'src' to the start of dest body.
static AffineIfOp createSeparationCondition(MutableArrayRef< AffineForOp > loops, OpBuilder b)
Creates an AffineIfOp that encodes the conditional to choose between the constant trip count version ...
static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, AffineMap &cleanupLbMap, SmallVectorImpl< Value > &cleanupLbOperands)
Computes the cleanup loop lower bound of the loop being unrolled with the specified unroll factor; th...
static void augmentMapAndBounds(OpBuilder &b, Value iv, AffineMap *map, SmallVector< Value, 4 > *operands, int64_t offset=0)
static AffineForOp generatePointWiseCopy(Location loc, Value memref, Value fastMemRef, ArrayRef< AffineMap > lbMaps, ArrayRef< Value > lbOperands, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > ubOperands, ArrayRef< AffineExpr > fastBufOffsets, bool isCopyOut, OpBuilder b)
Generates a point-wise copy from/to a non-zero ranked ‘memref’ to/from ‘fastMemRef’ and returns the o...
static void replaceIterArgsAndYieldResults(AffineForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting singl...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
AffineExpr floorDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
Block represents an ordered list of Operations.
OpListType::iterator iterator
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Operation * getTerminator()
Get the terminator operation of this block.
OpListType & getOperations()
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
AffineMap getSingleDimShiftAffineMap(int64_t shift)
Returns a map that shifts its (single) input dimension by 'shift'.
AffineMap getShiftedAffineMap(AffineMap map, int64_t shift)
Returns an affine map that is a translation (shift) of all result expressions in 'map' by 'shift'.
AffineMap getDimIdentityMap()
AffineMap getMultiDimIdentityMap(unsigned rank)
AffineExpr getAffineSymbolExpr(unsigned position)
AffineExpr getAffineConstantExpr(int64_t constant)
AffineExpr getAffineDimExpr(unsigned position)
MLIRContext * getContext() const
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteConfig & setStrictness(GreedyRewriteStrictness mode)
This is a utility class for mapping one set of IR entities to another.
auto lookup(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class represents a diagnostic that is inflight and set to be reported.
An integer set representing a conjunction of one or more affine equalities and inequalities.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents an operand of an operation.
Operation * getOperation()
Inherit getOperation from OpState.
This class implements the operand iterators for the Operation class.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Block * getBlock()
Returns the operation block that contains this operation.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Region * getParentRegion()
Returns the region to which the instruction belongs.
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Region * getParentRegion()
Return the Region in which this Value is defined.
static WalkResult advance()
AffineBound represents a lower or upper bound in the for operation.
Value getOperand(unsigned idx)
operand_range getOperands()
unsigned getNumOperands()
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
ArrayRef< Value > getOperands() const
AffineMap getAffineMap() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
Specialization of arith.constant op that returns an integer of index type.
Operation * getOwner() const
Return the owner of this operand.
An IntegerRelation represents the set of points from a PresburgerSpace that satisfy a list of affine ...
void removeIndependentConstraints(unsigned pos, unsigned num)
Removes constraints that are independent of (i.e., do not have a coefficient) variables in the range ...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
ArrayRef< DynamicAPInt > getInequality(unsigned idx) const
DynamicAPInt atIneq(unsigned i, unsigned j) const
Returns the value at the specified inequality row and column.
bool isHyperRectangular(unsigned pos, unsigned num) const
Returns true if the set can be trivially detected as being hyper-rectangular on the specified contigu...
unsigned getNumVars() const
void setDimSymbolSeparation(unsigned newSymbolCount)
Changes the partition between dimensions and symbols.
unsigned getNumDimAndSymbolVars() const
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void getLowerAndUpperBoundIndices(unsigned pos, SmallVectorImpl< unsigned > *lbIndices, SmallVectorImpl< unsigned > *ubIndices, SmallVectorImpl< unsigned > *eqIndices=nullptr, unsigned offset=0, unsigned num=0) const
Gather positions of all lower and upper bounds of the variable at pos, and optionally any equalities ...
void removeVar(VarKind kind, unsigned pos)
Removes variables of the specified kind with the specified pos (or within the specified range) from t...
bool isParallelLoop(Operation &op)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
LogicalResult coalesceLoops(MutableArrayRef< AffineForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
LogicalResult loopUnrollFull(AffineForOp forOp)
Unrolls this for operation completely if the trip count is known to be constant.
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional< Value > filterMemRef, DenseSet< Operation * > ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor or by the trip count (if constant),...
void extractForInductionVars(ArrayRef< AffineForOp > forInsts, SmallVectorImpl< Value > *ivs)
Extracts the induction variables from a list of AffineForOps and places them in the output argument i...
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, bool cleanUpUnroll=false)
Unrolls this for operation by the specified unroll factor.
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
void gatherLoops(func::FuncOp func, std::vector< SmallVector< AffineForOp, 2 >> &depthToLoops)
Gathers all AffineForOps in 'func.func' grouped by loop depth.
bool LLVM_ATTRIBUTE_UNUSED isPerfectlyNested(ArrayRef< AffineForOp > loops)
Returns true if loops is a perfectly nested loop nest, where loops appear in it from outermost to inn...
LogicalResult getIndexSet(MutableArrayRef< Operation * > ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
AffineForOp createCanonicalizedAffineForOp(OpBuilder b, Location loc, ValueRange lbOperands, AffineMap lbMap, ValueRange ubOperands, AffineMap ubMap, int64_t step=1)
Creates an AffineForOp while ensuring that the lower and upper bounds are canonicalized,...
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isValidLoopInterchangePermutation(ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
Checks if the loop interchange permutation 'loopPermMap', of the perfectly nested sequence of loops i...
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion, Operation *analyzedOp, const AffineCopyOptions ©Options, CopyGenerateResult &result)
generateCopyForMemRegion is similar to affineDataCopyGenerate, but works with a single memref region.
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor)
Unrolls this loop by the specified unroll factor or its trip count, whichever is lower.
unsigned permuteLoops(ArrayRef< AffineForOp > inputNest, ArrayRef< unsigned > permMap)
Performs a loop permutation on a perfectly nested loop nest inputNest (where the contained loops appe...
LogicalResult loopUnrollJamByFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor.
LogicalResult tilePerfectlyNestedParametric(MutableArrayRef< AffineForOp > input, ArrayRef< Value > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops,...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
void canonicalizeSetAndOperands(IntegerSet *set, SmallVectorImpl< Value > *operands)
Canonicalizes an integer set the same way canonicalizeMapAndOperands does for affine maps.
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
int64_t numEnclosingInvariantLoops(OpOperand &operand)
Count the number of loops surrounding operand such that operand could be hoisted above.
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
AffineForOp sinkSequentialLoops(AffineForOp forOp)
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB)
Performs loop interchange on 'forOpA' and 'forOpB'.
LogicalResult coalescePerfectlyNestedAffineLoops(AffineForOp op)
Walk an affine.for to find a band to coalesce.
void getTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 >> *bands)
Identify valid and profitable bands of loops to tile.
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
LogicalResult separateFullTiles(MutableArrayRef< AffineForOp > nest, SmallVectorImpl< AffineForOp > *fullTileNest=nullptr)
Separates full tiles from partial tiles for a perfect nest nest by generating a conditional guard tha...
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
AffineMap removeDuplicateExprs(AffineMap map)
Returns a map with the same dimension and symbol count as map, but whose results are the unique affin...
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
@ ExistingAndNewOps
Only pre-existing and newly created ops are processed.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
Result for calling generateCopyForMemRegion.
std::optional< int64_t > ub
std::optional< int64_t > lb
A description of a (parallelizable) reduction in an affine loop.
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
FlatAffineValueConstraints * getConstraints()
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Value memref
Memref that this region corresponds to.
Location loc
If there is more than one load/store op associated with the region, the location information would co...
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.