29 #include "llvm/ADT/MapVector.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 34 #define DEBUG_TYPE "LoopUtils" 37 using namespace presburger;
38 using llvm::SmallMapVector;
69 auto lbMap = forOp.getLowerBoundMap();
70 auto lb = b.
create<AffineApplyOp>(forOp.getLoc(), lbMap,
71 forOp.getLowerBoundOperands());
80 int64_t step = forOp.getStep();
81 for (
unsigned i = 0, e = tripCountMap.
getNumResults(); i < e; i++) {
82 auto tripCountExpr = tripCountMap.
getResult(i);
83 bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
87 b.
create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands);
91 for (
unsigned i = 0, e = bumpExprs.size(); i < e; i++)
94 cleanupLbOperands.clear();
95 cleanupLbOperands.push_back(lb);
96 cleanupLbOperands.append(bumpValues.begin(), bumpValues.end());
104 for (
auto v : bumpValues)
106 v.getDefiningOp()->erase();
116 auto iterOperands = forOp.getIterOperands();
117 auto iterArgs = forOp.getRegionIterArgs();
118 for (
auto e : llvm::zip(iterOperands, iterArgs))
119 std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
122 auto outerResults = forOp.getResults();
123 auto innerResults = forOp.getBody()->getTerminator()->getOperands();
124 for (
auto e : llvm::zip(outerResults, innerResults))
125 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
133 if (!tripCount || *tripCount != 1)
136 if (forOp.getLowerBoundMap().getNumResults() != 1)
140 auto iv = forOp.getInductionVar();
141 auto *parentBlock = forOp->getBlock();
142 if (!iv.use_empty()) {
143 if (forOp.hasConstantLowerBound()) {
144 OpBuilder topBuilder(forOp->getParentOfType<func::FuncOp>().getBody());
146 forOp.getLoc(), forOp.getConstantLowerBound());
147 iv.replaceAllUsesWith(constOp);
149 auto lbOperands = forOp.getLowerBoundOperands();
150 auto lbMap = forOp.getLowerBoundMap();
154 iv.replaceAllUsesWith(lbOperands[0]);
157 builder.
create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
167 forOp.getBody()->back().erase();
169 forOp.getBody()->getOperations());
184 unsigned offset, AffineForOp srcForOp,
OpBuilder b) {
185 auto lbOperands = srcForOp.getLowerBoundOperands();
186 auto ubOperands = srcForOp.getUpperBoundOperands();
191 auto loopChunk = b.
create<AffineForOp>(srcForOp.getLoc(), lbOperands, lbMap,
192 ubOperands, ubMap, srcForOp.getStep());
193 auto loopChunkIV = loopChunk.getInductionVar();
194 auto srcIV = srcForOp.getInductionVar();
199 for (
auto it = opGroupQueue.begin() + offset, e = opGroupQueue.end(); it != e;
201 uint64_t shift = it->first;
202 auto ops = it->second;
207 if (!srcIV.use_empty() && shift != 0) {
208 auto ivRemap = bodyBuilder.
create<AffineApplyOp>(
210 bodyBuilder.getSingleDimShiftAffineMap(
211 -static_cast<int64_t>(srcForOp.getStep() * shift)),
213 operandMap.
map(srcIV, ivRemap);
215 operandMap.
map(srcIV, loopChunkIV);
218 bodyBuilder.clone(*op, operandMap);
221 return AffineForOp();
238 bool unrollPrologueEpilogue) {
239 assert(forOp.getBody()->getOperations().size() == shifts.size() &&
240 "too few/many shifts");
241 if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
249 if (!mayBeConstTripCount) {
250 LLVM_DEBUG(forOp.emitRemark(
"non-constant trip count loop not handled"));
253 uint64_t tripCount = *mayBeConstTripCount;
256 "shifts will lead to an invalid transformation\n");
258 int64_t step = forOp.getStep();
260 unsigned numChildOps = shifts.size();
263 uint64_t maxShift = *std::max_element(shifts.begin(), shifts.end());
264 if (maxShift >= numChildOps) {
266 forOp.emitWarning(
"not shifting because shifts are unrealistically large");
273 std::vector<std::vector<Operation *>> sortedOpGroups(maxShift + 1);
275 for (
auto &op : forOp.getBody()->without_terminator()) {
276 auto shift = shifts[pos++];
277 sortedOpGroups[shift].push_back(&op);
285 AffineForOp prologue, epilogue;
290 std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> opGroupQueue;
292 auto origLbMap = forOp.getLowerBoundMap();
293 uint64_t lbShift = 0;
295 for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
297 if (sortedOpGroups[d].empty())
299 if (!opGroupQueue.empty()) {
301 "Queue expected to be empty when the first block is found");
306 if (lbShift + tripCount * step < d * step) {
310 opGroupQueue, 0, forOp, b);
312 opGroupQueue.clear();
313 lbShift += tripCount * step;
317 opGroupQueue, 0, forOp, b);
324 AffineForOp::getCanonicalizationPatterns(patterns, res.getContext());
328 if (!erased && !prologue)
338 opGroupQueue.emplace_back(d, sortedOpGroups[d]);
343 for (
unsigned i = 0, e = opGroupQueue.size(); i < e; ++i) {
344 uint64_t ubShift = (opGroupQueue[i].first + tripCount) * step;
347 opGroupQueue, i, forOp, b);
356 if (unrollPrologueEpilogue && prologue)
358 if (unrollPrologueEpilogue && !epilogue && epilogue != prologue)
369 assert(!origLoops.empty() &&
"no original loops provided");
374 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
375 loadAndStoreOps.push_back(op);
378 unsigned numOps = loadAndStoreOps.size();
379 unsigned numLoops = origLoops.size();
381 for (
unsigned d = 1; d <= numLoops + 1; ++d) {
382 for (
unsigned i = 0; i < numOps; ++i) {
385 for (
unsigned j = 0;
j < numOps; ++
j) {
390 dependenceConstraints.
reset();
392 srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
401 LLVM_DEBUG(llvm::dbgs() <<
"Checking whether tiling legality violated " 402 "for dependence at depth: " 403 << Twine(d) <<
" between:\n";);
406 for (
unsigned k = 0, e = depComps.size(); k < e; k++) {
408 if (depComp.
lb.hasValue() && depComp.
ub.hasValue() &&
409 depComp.
lb.getValue() < depComp.
ub.getValue() &&
410 depComp.
ub.getValue() < 0) {
411 LLVM_DEBUG(llvm::dbgs()
412 <<
"Dependence component lb = " 413 << Twine(depComp.
lb.getValue())
414 <<
" ub = " << Twine(depComp.
ub.getValue())
415 <<
" is negative at depth: " << Twine(d)
416 <<
" and thus violates the legality rule.\n");
446 if (input.size() <= 1)
449 LLVM_DEBUG(llvm::dbgs() <<
"Index set computation failed!\n");
453 LLVM_DEBUG(llvm::dbgs()
454 <<
"Non-hyperrectangular nests not supported for tiling!\n");
462 template <
typename t>
465 assert(input.size() == tileSizes.size() &&
"Too few/many tile sizes");
467 if (llvm::any_of(input,
468 [](AffineForOp op) {
return op.getNumResults() > 0; })) {
469 LLVM_DEBUG(llvm::dbgs()
470 <<
"Cannot tile nest where a loop has yield values\n");
476 LLVM_DEBUG(llvm::dbgs() <<
"input loops not perfectly nested");
485 input[0].emitRemark(
"tiling code is illegal due to dependences");
496 auto &ops = src.getBody()->getOperations();
497 dest.getBody()->getOperations().splice(loc, ops, ops.begin(),
498 std::prev(ops.end()));
510 AffineForOp rootAffineForOp,
unsigned width,
512 Location loc = rootAffineForOp.getLoc();
515 Operation *topLoop = rootAffineForOp.getOperation();
516 AffineForOp innermostPointLoop;
519 for (
unsigned i = 0; i < width; i++) {
522 AffineForOp pointLoop = b.
create<AffineForOp>(loc, 0, 0);
523 pointLoop.getBody()->getOperations().splice(
526 tiledLoops[2 * width - 1 - i] = pointLoop;
527 topLoop = pointLoop.getOperation();
529 innermostPointLoop = pointLoop;
533 for (
unsigned i = width; i < 2 * width; i++) {
536 AffineForOp tileSpaceLoop = b.
create<AffineForOp>(loc, 0, 0);
537 tileSpaceLoop.getBody()->getOperations().splice(
540 tiledLoops[2 * width - i - 1] = tileSpaceLoop;
541 topLoop = tileSpaceLoop.getOperation();
551 AffineForOp newInterTileLoop,
552 AffineForOp newIntraTileLoop,
562 assert(origLoop.hasConstantLowerBound() &&
563 "expected input loops to have constant lower bound.");
585 lbOperands.push_back(newInterTileLoop.getInductionVar());
586 ubOperands.push_back(newInterTileLoop.getInductionVar());
600 lbOperands.push_back(tileSize);
601 ubOperands.push_back(tileSize);
613 lbBoundExprs.push_back(
614 ((lbLoopIvExpr - origLowerBoundExpr) * lbTileParameter) +
622 ubBoundExprs.push_back(
623 ((ubLoopIvExpr - origLowerBoundExpr) * ubTileParameter) +
624 (ubTileParameter * origLoopStep) + origLowerBoundExpr);
626 ubBoundExprs.append(origUbMap.
getResults().begin(),
632 newIntraTileLoop.setLowerBound(lbOperands, lbMap);
637 newIntraTileLoop.setUpperBound(ubOperands, ubMap);
640 newIntraTileLoop.setStep(origLoop.getStep());
646 AffineForOp newLoop,
Value tileSize) {
647 OperandRange newLbOperands = origLoop.getLowerBoundOperands();
651 newLoop.setLowerBound(newLbOperands, origLoop.getLowerBoundMap());
662 assert(origLoop.hasConstantLowerBound() &&
663 "expected input loops to have constant lower bound.");
683 ubOperands.push_back(tileSize);
690 int64_t origUpperBound;
695 if (origLoop.hasConstantUpperBound()) {
696 origUpperBound = origLoop.getConstantUpperBound();
703 boundExprs.push_back(
705 (origUpperBoundExpr - origLowerBoundExpr).
ceilDiv(tileParameter));
726 boundExprs.push_back(
728 (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
734 newLoop.setUpperBound(ubOperands, ubMap);
737 newLoop.setStep(origLoop.getStep());
749 assert(!origLoops.empty() &&
"expected atleast one loop in band");
750 assert(origLoops.size() == tileSizes.size() &&
751 "expected tiling parameter for each loop in band.");
753 OpBuilder b(origLoops[0].getOperation());
754 unsigned width = origLoops.size();
757 for (
unsigned i = 0; i < width; ++i) {
762 for (
unsigned i = 0; i < width; ++i) {
764 newLoops[i + width], tileSizes[i]);
777 assert(!origLoops.empty());
778 assert(origLoops.size() == tileSizes.size());
780 OpBuilder b(origLoops[0].getOperation());
781 unsigned width = origLoops.size();
784 for (
unsigned i = 0; i < width; i++) {
785 OperandRange newLbOperands = origLoops[i].getLowerBoundOperands();
786 OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
787 newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
788 newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
791 newLoops[i].setStep(tileSizes[i] * origLoops[i].getStep());
794 for (
unsigned i = 0; i < width; i++) {
799 newLoops[width + i].setLowerBound(
800 newLoops[i].getInductionVar(), lbMap);
802 newLoops[width + i].setStep(origLoops[i].getStep());
805 if (mayBeConstantCount && mayBeConstantCount.getValue() < tileSizes[i]) {
808 AffineMap ubMap = b.getSingleDimShiftAffineMap(
809 mayBeConstantCount.getValue() * origLoops[i].getStep());
810 newLoops[width + i].setUpperBound(
811 newLoops[i].getInductionVar(), ubMap);
812 }
else if (largestDiv % tileSizes[i] != 0) {
828 ubOperands.push_back(newLoops[i].getInductionVar());
839 boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStep());
840 boundExprs.append(origUbMap.
getResults().begin(),
844 boundExprs, b.getContext());
845 newLoops[width + i].setUpperBound(ubOperands, ubMap);
850 AffineMap::get(1, 0, dim + tileSizes[i] * origLoops[i].getStep());
851 newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
870 AffineForOp rootAffineForOp = origLoops[0];
873 unsigned width = input.size();
887 for (
unsigned i = 0; i < width; i++)
888 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
891 rootAffineForOp.erase();
894 *tiledNest = std::move(tiledLoops);
914 AffineForOp rootAffineForOp = origLoops[0];
915 unsigned width = input.size();
930 for (
unsigned i = 0; i < width; i++)
931 origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
934 rootAffineForOp.erase();
937 *tiledNest = std::move(tiledLoops);
949 nestedLoops.push_back(root);
951 if (body.
begin() != std::prev(body.
end(), 2))
954 root = dyn_cast<AffineForOp>(&body.
front());
967 for (AffineForOp forOp : f.getOps<AffineForOp>()) {
970 bands->push_back(band);
977 if (mayBeConstantTripCount.hasValue()) {
978 uint64_t tripCount = mayBeConstantTripCount.getValue();
991 uint64_t unrollFactor) {
993 if (mayBeConstantTripCount.hasValue() &&
994 mayBeConstantTripCount.getValue() < unrollFactor)
1004 Block *loopBodyBlock,
Value forOpIV, uint64_t unrollFactor,
1022 for (
unsigned i = 1; i < unrollFactor; i++) {
1026 operandMap.
map(iterArgs, lastYielded);
1031 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
1032 operandMap.
map(forOpIV, ivUnroll);
1036 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++) {
1037 Operation *clonedOp = builder.clone(*it, operandMap);
1038 annotateFn(i, clonedOp, builder);
1042 for (
unsigned i = 0, e = lastYielded.size(); i < e; i++)
1043 lastYielded[i] = operandMap.
lookup(yieldedValues[i]);
1048 for (
auto it = loopBodyBlock->
begin(); it != std::next(srcBlockEnd); it++)
1049 annotateFn(0, &*it, builder);
1058 uint64_t unrollFactor) {
1061 auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
1065 auto results = forOp.getResults();
1066 auto cleanupResults = cleanupForOp.getResults();
1067 auto cleanupIterOperands = cleanupForOp.getIterOperands();
1069 for (
auto e : llvm::zip(results, cleanupResults, cleanupIterOperands)) {
1070 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
1071 cleanupForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
1080 cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
1086 forOp.setUpperBound(cleanupOperands, cleanupMap);
1093 AffineForOp forOp, uint64_t unrollFactor,
1095 assert(unrollFactor > 0 &&
"unroll factor should be positive");
1098 if (unrollFactor == 1) {
1099 if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
1106 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1111 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor)
1120 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1121 forOp.getUpperBoundMap().getNumResults() != 1)
1124 assert(
false &&
"cleanup loop lower bound map for single result lower " 1125 "and upper bound maps can always be determined");
1128 ValueRange iterArgs(forOp.getRegionIterArgs());
1129 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
1132 int64_t step = forOp.getStep();
1133 forOp.setStep(step * unrollFactor);
1135 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
1138 auto d0 = b.getAffineDimExpr(0);
1140 return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, iv);
1143 iterArgs, yieldedValues);
1151 uint64_t unrollJamFactor) {
1153 if (mayBeConstantTripCount.hasValue() &&
1154 mayBeConstantTripCount.getValue() < unrollJamFactor)
1162 auto walkResult = forOp.walk([&](AffineForOp aForOp) {
1163 for (
auto controlOperand : aForOp.getControlOperands()) {
1164 if (!forOp.isDefinedOutsideOfLoop(controlOperand))
1169 return !walkResult.wasInterrupted();
1177 std::vector<std::pair<Block::iterator, Block::iterator>>
subBlocks;
1182 for (
auto &block : region)
1187 for (
auto it = block.
begin(), e = std::prev(block.
end()); it != e;) {
1188 auto subBlockStart = it;
1189 while (it != e && !isa<AffineForOp>(&*it))
1191 if (it != subBlockStart)
1192 subBlocks.emplace_back(subBlockStart, std::prev(it));
1194 while (it != e && isa<AffineForOp>(&*it))
1202 uint64_t unrollJamFactor) {
1203 assert(unrollJamFactor > 0 &&
"unroll jam factor should be positive");
1206 if (unrollJamFactor == 1) {
1207 if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
1214 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
1218 if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollJamFactor) {
1219 LLVM_DEBUG(llvm::dbgs() <<
"[failed] trip count < unroll-jam factor\n");
1235 forOp.walk([&](AffineForOp aForOp) {
1236 if (aForOp.getNumIterOperands() > 0)
1237 loopsWithIterArgs.push_back(aForOp);
1242 if (forOp.getNumIterOperands() > 0)
1252 if (forOp.getLowerBoundMap().getNumResults() != 1 ||
1253 forOp.getUpperBoundMap().getNumResults() != 1)
1256 assert(
false &&
"cleanup loop lower bound map for single result lower " 1257 "and upper bound maps can always be determined");
1269 for (AffineForOp oldForOp : loopsWithIterArgs) {
1271 ValueRange oldIterOperands = oldForOp.getIterOperands();
1272 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
1274 cast<AffineYieldOp>(oldForOp.getBody()->getTerminator()).getOperands();
1277 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1278 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
1279 dupIterArgs.append(oldIterArgs.begin(), oldIterArgs.end());
1280 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
1285 builder, oldForOp, dupIterOperands, dupYieldOperands, dupIterArgs);
1286 newLoopsWithIterArgs.push_back(newForOp);
1288 if (oldForOp == forOp)
1290 assert(oldForOp.use_empty() &&
"old for op should not have any user");
1293 ValueRange newIterArgs = newForOp.getRegionIterArgs();
1294 unsigned oldNumIterArgs = oldIterArgs.size();
1295 ValueRange newResults = newForOp.getResults();
1296 unsigned oldNumResults = newResults.size() / unrollJamFactor;
1297 assert(oldNumIterArgs == oldNumResults &&
1298 "oldNumIterArgs must be the same as oldNumResults");
1299 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1300 for (
unsigned j = 0;
j < oldNumIterArgs; ++
j) {
1304 operandMaps[i - 1].map(newIterArgs[
j],
1305 newIterArgs[i * oldNumIterArgs + j]);
1306 operandMaps[i - 1].map(newResults[j],
1307 newResults[i * oldNumResults + j]);
1313 int64_t step = forOp.getStep();
1314 forOp.setStep(step * unrollJamFactor);
1316 auto forOpIV = forOp.getInductionVar();
1318 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1319 for (
auto &subBlock : subBlocks) {
1322 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
1331 builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
1332 operandMaps[i - 1].map(forOpIV, ivUnroll);
1335 for (
auto it = subBlock.first; it != std::next(subBlock.second); ++it)
1336 builder.clone(*it, operandMaps[i - 1]);
1339 for (
auto newForOp : newLoopsWithIterArgs) {
1340 unsigned oldNumIterOperands =
1341 newForOp.getNumIterOperands() / unrollJamFactor;
1342 unsigned numControlOperands = newForOp.getNumControlOperands();
1343 auto yieldOp = cast<AffineYieldOp>(newForOp.getBody()->getTerminator());
1344 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
1345 assert(oldNumIterOperands == oldNumYieldOperands &&
1346 "oldNumIterOperands must be the same as oldNumYieldOperands");
1347 for (
unsigned j = 0;
j < oldNumIterOperands; ++
j) {
1351 newForOp.setOperand(numControlOperands + i * oldNumIterOperands +
j,
1352 operandMaps[i - 1].lookupOrDefault(
1353 newForOp.getOperand(numControlOperands +
j)));
1355 i * oldNumYieldOperands +
j,
1356 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(
j)));
1360 if (forOp.getNumResults() > 0) {
1365 builder.setInsertionPointAfter(forOp);
1366 auto loc = forOp.getLoc();
1367 unsigned oldNumResults = forOp.getNumResults() / unrollJamFactor;
1369 unsigned pos = reduction.iterArgPosition;
1370 Value lhs = forOp.getResult(pos);
1373 for (
unsigned i = unrollJamFactor - 1; i >= 1; --i) {
1374 rhs = forOp.getResult(i * oldNumResults + pos);
1380 assert(op &&
"Reduction op should have been created");
1384 forOp.getResult(pos).replaceAllUsesExcept(lhs, newOps);
1396 assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
1397 auto &forOpABody = forOpA.getBody()->getOperations();
1398 auto &forOpBBody = forOpB.getBody()->getOperations();
1404 forOpABody, forOpABody.begin(),
1405 std::prev(forOpABody.end()));
1408 forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
1409 std::prev(forOpBBody.end()));
1411 forOpBBody.splice(forOpBBody.begin(), forOpA->getBlock()->getOperations(),
1422 unsigned maxLoopDepth = loops.size();
1424 loopPermMapInv.resize(maxLoopDepth);
1425 for (
unsigned i = 0; i < maxLoopDepth; ++i)
1426 loopPermMapInv[loopPermMap[i]] = i;
1433 for (
const auto &depComps : depCompsVec) {
1434 assert(depComps.size() >= maxLoopDepth);
1437 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1438 unsigned permIndex = loopPermMapInv[
j];
1439 assert(depComps[permIndex].lb);
1440 int64_t depCompLb = *depComps[permIndex].lb;
1456 assert(loopPermMap.size() == loops.size());
1457 unsigned maxLoopDepth = loops.size();
1458 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1465 bool LLVM_ATTRIBUTE_UNUSED
1467 assert(!loops.empty() &&
"no loops provided");
1470 auto hasTwoElements = [](
Block *block) {
1471 auto secondOpIt = std::next(block->begin());
1472 return secondOpIt != block->end() && &*secondOpIt == &block->back();
1475 auto enclosingLoop = loops.front();
1476 for (
auto loop : loops.drop_front()) {
1477 auto parentForOp = dyn_cast<AffineForOp>(loop->getParentOp());
1479 if (parentForOp != enclosingLoop || !hasTwoElements(parentForOp.getBody()))
1481 enclosingLoop = loop;
1490 assert(input.size() == permMap.size() &&
"invalid permutation map size");
1494 llvm::sort(checkPermMap);
1496 [](
const auto &en) {
return en.value() != en.index(); }))
1497 assert(
false &&
"invalid permutation map");
1500 if (input.size() < 2)
1508 for (
unsigned i = 0, e = input.size(); i < e; ++i)
1509 invPermMap.push_back({permMap[i], i});
1510 llvm::sort(invPermMap);
1514 if (permMap.back() != input.size() - 1) {
1515 auto *destBody = input[invPermMap.back().second].getBody();
1516 auto *srcBody = input.back().getBody();
1517 destBody->getOperations().splice(destBody->begin(),
1518 srcBody->getOperations(), srcBody->begin(),
1519 std::prev(srcBody->end()));
1524 for (
int i = input.size() - 1; i >= 0; --i) {
1527 if (permMap[i] == 0) {
1532 auto *parentBlock = input[0]->getBlock();
1534 input[i]->getBlock()->getOperations(),
1541 unsigned parentPosInInput = invPermMap[permMap[i] - 1].second;
1542 if (i > 0 && static_cast<unsigned>(i - 1) == parentPosInInput)
1546 auto *destBody = input[parentPosInInput].getBody();
1547 destBody->getOperations().splice(destBody->begin(),
1548 input[i]->getBlock()->getOperations(),
1552 return invPermMap[0].second;
1561 if (loops.size() < 2)
1566 unsigned maxLoopDepth = loops.size();
1567 std::vector<SmallVector<DependenceComponent, 2>> depCompsVec;
1572 for (
auto &depComps : depCompsVec) {
1573 assert(depComps.size() >= maxLoopDepth);
1574 for (
unsigned j = 0;
j < maxLoopDepth; ++
j) {
1576 assert(depComp.
lb.hasValue() && depComp.
ub.hasValue());
1577 if (depComp.
lb.getValue() != 0 || depComp.
ub.getValue() != 0)
1578 isParallelLoop[
j] =
false;
1583 unsigned numParallelLoops = 0;
1584 for (
unsigned i = 0, e = isParallelLoop.size(); i < e; ++i)
1585 if (isParallelLoop[i])
1591 unsigned nextSequentialLoop = numParallelLoops;
1592 unsigned nextParallelLoop = 0;
1593 for (
unsigned i = 0; i < maxLoopDepth; ++i) {
1594 if (isParallelLoop[i]) {
1595 loopPermMap[i] = nextParallelLoop++;
1597 loopPermMap[i] = nextSequentialLoop++;
1605 unsigned loopNestRootIndex =
permuteLoops(loops, loopPermMap);
1606 return loops[loopNestRootIndex];
1620 int64_t offset = 0) {
1621 auto bounds = llvm::to_vector<4>(map->
getResults());
1623 operands->insert(operands->begin() + map->
getNumDims(), iv);
1640 auto originalStep = forOp.getStep();
1641 auto scaledStep = originalStep * factor;
1642 forOp.setStep(scaledStep);
1647 auto lbMap = forOp.getLowerBoundMap();
1652 auto ubMap = forOp.getUpperBoundMap();
1657 auto iv = forOp.getInductionVar();
1659 for (
auto t : targets) {
1662 auto newForOp = b.
create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
1663 ubOperands, ubMap, originalStep);
1664 auto begin = t.getBody()->begin();
1666 auto nOps = t.getBody()->getOperations().size() - 2;
1667 newForOp.getBody()->getOperations().splice(
1668 newForOp.getBody()->getOperations().begin(),
1669 t.getBody()->getOperations(), begin, std::next(begin, nOps));
1671 newForOp.getRegion());
1672 innerLoops.push_back(newForOp);
1680 template <
typename SizeType>
1682 AffineForOp target) {
1688 assert(res.size() == 1 &&
"Expected 1 inner forOp");
1697 for (
auto it : llvm::zip(forOps, sizes)) {
1698 auto step =
stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1699 res.push_back(step);
1700 currentTargets = step;
1707 AffineForOp target) {
1710 assert(loops.size() == 1);
1711 res.push_back(loops[0]);
1717 if (loops.size() < 2)
1720 AffineForOp innermost = loops.back();
1721 AffineForOp outermost = loops.front();
1726 for (AffineForOp loop : loops) {
1728 if (loop.getStep() != 1 || !loop.hasConstantLowerBound() ||
1729 loop.getConstantLowerBound() != 0)
1738 if (!llvm::hasSingleElement(origUbMap.
getResults()))
1739 prev = builder.
create<AffineMinOp>(loc, origUbMap, ubOperands);
1741 prev = builder.
create<AffineApplyOp>(loc, origUbMap, ubOperands);
1742 upperBoundSymbols.push_back(prev);
1746 for (AffineForOp loop : loops.drop_front()) {
1747 ub = loop.getUpperBound();
1752 if (!llvm::hasSingleElement(origUbMap.getResults()))
1753 upperBound = builder.
create<AffineMinOp>(loc, origUbMap, ubOperands);
1755 upperBound = builder.
create<AffineApplyOp>(loc, origUbMap, ubOperands);
1756 upperBoundSymbols.push_back(upperBound);
1758 operands.push_back(prev);
1759 operands.push_back(upperBound);
1761 prev = builder.
create<AffineApplyOp>(
1773 outermost.setUpperBound(prev, newUbMap);
1785 Value previous = outermost.getInductionVar();
1786 for (
unsigned idx = loops.size(); idx > 0; --idx) {
1787 if (idx != loops.size()) {
1789 operands.push_back(previous);
1790 operands.push_back(upperBoundSymbols[idx]);
1791 previous = builder.
create<AffineApplyOp>(
1801 Value inductionVariable;
1803 inductionVariable = previous;
1806 applyOperands.push_back(previous);
1807 applyOperands.push_back(upperBoundSymbols[idx - 1]);
1808 inductionVariable = builder.
create<AffineApplyOp>(
1816 inductionVariable, loops.back().getRegion());
1821 AffineForOp secondOutermostLoop = loops[1];
1822 innermost.getBody()->back().erase();
1823 outermost.getBody()->getOperations().splice(
1825 innermost.getBody()->getOperations());
1826 secondOutermostLoop.erase();
1832 assert(processorId.size() == numProcessors.size());
1833 if (processorId.empty())
1843 Value linearIndex = processorId.front();
1844 for (
unsigned i = 1, e = processorId.size(); i < e; ++i) {
1845 auto mulApplyOp = b.
create<AffineApplyOp>(
1846 loc, mulMap,
ValueRange{linearIndex, numProcessors[i]});
1847 linearIndex = b.
create<AffineApplyOp>(
1848 loc, addMap,
ValueRange{mulApplyOp, processorId[i]});
1851 auto mulApplyOp = b.
create<AffineApplyOp>(
1852 loc, mulMap,
ValueRange{linearIndex, forOp.getStep()});
1854 loc, addMap,
ValueRange{mulApplyOp, forOp.getLowerBound()});
1855 forOp.setLowerBound(lb);
1857 Value step = forOp.getStep();
1858 for (
auto numProcs : numProcessors)
1860 forOp.setStep(step);
1871 Block **copyPlacementBlock,
1876 cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
1882 auto it = enclosingFors.rbegin();
1883 for (
auto e = enclosingFors.rend(); it != e; ++it) {
1886 if (llvm::is_contained(symbols, it->getInductionVar()))
1890 if (it != enclosingFors.rbegin()) {
1891 auto lastInvariantIV = *std::prev(it);
1892 *copyInPlacementStart =
Block::iterator(lastInvariantIV.getOperation());
1893 *copyOutPlacementStart = std::next(*copyInPlacementStart);
1894 *copyPlacementBlock = lastInvariantIV->getBlock();
1896 *copyInPlacementStart = begin;
1897 *copyOutPlacementStart = end;
1898 *copyPlacementBlock = █
1916 if (bufferShape.size() <= 1)
1919 int64_t numEltPerStride = 1;
1921 for (
int d = bufferShape.size() - 1; d >= 1; d--) {
1924 numEltPerStride *= bufferShape[d];
1928 if (bufferShape[d] < dimSize && bufferShape[d - 1] > 1) {
1929 strideInfos->push_back({stride, numEltPerStride});
1954 assert(llvm::all_of(lbMaps, [&](
AffineMap lbMap) {
1957 assert(llvm::all_of(ubMaps, [&](
AffineMap ubMap) {
1961 unsigned rank = memref.
getType().
cast<MemRefType>().getRank();
1962 assert(lbMaps.size() == rank &&
"wrong number of lb maps");
1963 assert(ubMaps.size() == rank &&
"wrong number of ub maps");
1968 AffineForOp copyNestRoot;
1970 for (
unsigned d = 0; d < rank; ++d) {
1972 ubOperands, ubMaps[d]);
1974 copyNestRoot = forOp;
1978 auto fastBufOffsetMap =
1980 auto offset = b.
create<AffineApplyOp>(loc, fastBufOffsetMap, lbOperands);
1986 fastBufMapOperands.push_back(offset);
1987 fastBufMapOperands.push_back(forOp.getInductionVar());
1988 mayBeDeadApplys.push_back(offset);
1991 memIndices.push_back(forOp.getInductionVar());
2001 for (
auto applyOp : mayBeDeadApplys)
2002 if (applyOp.use_empty())
2007 auto load = b.
create<AffineLoadOp>(loc, memref, memIndices);
2008 b.
create<AffineStoreOp>(loc, load, fastMemRef, fastBufMap,
2009 fastBufMapOperands);
2010 return copyNestRoot;
2015 b.
create<AffineLoadOp>(loc, fastMemRef, fastBufMap, fastBufMapOperands);
2016 b.
create<AffineStoreOp>(loc, load, memref, memIndices);
2017 return copyNestRoot;
2048 func::FuncOp f = begin->getParentOfType<func::FuncOp>();
2057 bool isCopyOutAtEndOfBlock = (end == copyOutPlacementStart);
2060 OpBuilder prologue(copyPlacementBlock, copyInPlacementStart);
2062 OpBuilder epilogue(copyPlacementBlock, copyOutPlacementStart);
2069 auto loc = region.
loc;
2070 auto memref = region.
memref;
2071 auto memRefType = memref.
getType().
cast<MemRefType>();
2073 if (!memRefType.getLayout().isIdentity()) {
2074 LLVM_DEBUG(llvm::dbgs() <<
"Non-identity layout map not yet supported\n");
2084 unsigned rank = memRefType.getRank();
2088 std::vector<SmallVector<int64_t, 4>> lbs;
2092 &fastBufferShape, &lbs, &lbDivisors);
2094 LLVM_DEBUG(llvm::dbgs() <<
"Non-constant region size not supported\n");
2098 if (*numElements == 0) {
2099 LLVM_DEBUG(llvm::dbgs() <<
"Nothing to copy\n");
2105 for (
unsigned i = 0; i < rank; ++i)
2122 fastBufOffsets.reserve(rank);
2123 for (
unsigned d = 0; d < rank; d++) {
2124 assert(lbs[d].size() == cst->
getNumCols() - rank &&
"incorrect bound size");
2126 AffineExpr offset = top.getAffineConstantExpr(0);
2127 for (
unsigned j = 0, e = cst->
getNumCols() - rank - 1;
j < e;
j++)
2128 offset = offset + lbs[d][
j] * top.getAffineDimExpr(
j);
2129 assert(lbDivisors[d] > 0);
2136 auto indexVal = caf.getValue();
2137 if (indexVal == 0) {
2138 memIndices.push_back(zeroIndex);
2140 memIndices.push_back(
2148 memIndices.push_back(b.
create<AffineApplyOp>(loc, map, regionSymbols));
2151 bufIndices.push_back(zeroIndex);
2155 fastBufOffsets.push_back(offset);
2162 bool existingBuf = fastBufferMap.count(memref) > 0;
2165 auto fastMemRefType =
2166 MemRefType::get(fastBufferShape, memRefType.getElementType(),
2172 prologue.
create<memref::AllocOp>(loc, fastMemRefType).getResult();
2174 fastBufferMap[memref] = fastMemRef;
2178 <<
"Creating fast buffer of type " << fastMemRefType
2179 <<
" and size " << llvm::divideCeil(*sizeInBytes, 1024)
2183 fastMemRef = fastBufferMap[memref];
2189 Value dmaStride =
nullptr;
2190 Value numEltPerDmaStride =
nullptr;
2197 if (dmaStrideInfos.size() > 1) {
2198 LLVM_DEBUG(llvm::dbgs() <<
"Only up to one level of stride supported\n");
2202 if (!dmaStrideInfos.empty()) {
2206 loc, dmaStrideInfos[0].numEltPerStride);
2214 auto postDomFilter = std::prev(end);
2226 regionSymbols, ubMaps,
2227 regionSymbols, fastBufOffsets,
2231 copyNests.insert(copyNest);
2235 if (region.
isWrite() && isCopyOutAtEndOfBlock)
2240 auto tagMemRefType = MemRefType::get({1}, top.getIntegerType(32), {},
2242 auto tagMemRef = prologue.
create<memref::AllocOp>(loc, tagMemRefType);
2250 fastMemRef, bufAffineMap, bufIndices,
2251 tagMemRef, tagAffineMap, tagIndices,
2252 numElementsSSA, dmaStride, numEltPerDmaStride);
2256 loc, fastMemRef, bufAffineMap, bufIndices, memref, memAffineMap,
2257 memIndices, tagMemRef, tagAffineMap, tagIndices, numElementsSSA,
2258 dmaStride, numEltPerDmaStride);
2261 if (isCopyOutAtEndOfBlock)
2270 auto tagDeallocOp = epilogue.
create<memref::DeallocOp>(loc, tagMemRef);
2271 if (*nEnd == end && isCopyOutAtEndOfBlock)
2279 auto bufDeallocOp = epilogue.
create<memref::DeallocOp>(loc, fastMemRef);
2282 if (!copyOptions.
generateDma && *nEnd == end && isCopyOutAtEndOfBlock)
2294 remapExprs.reserve(rank);
2295 for (
unsigned i = 0; i < rank; i++) {
2300 remapExprs.push_back(dimExpr - fastBufOffsets[i]);
2302 auto indexRemap =
AffineMap::get(regionSymbols.size() + rank, 0, remapExprs,
2307 bool isBeginAtStartOfBlock = (begin == block->
begin());
2308 if (!isBeginAtStartOfBlock)
2309 prevOfBegin = std::prev(begin);
2319 *nBegin = isBeginAtStartOfBlock ? block->
begin() : std::next(prevOfBegin);
2331 if (
auto loadOp = dyn_cast<AffineLoadOp>(op)) {
2332 rank = loadOp.getMemRefType().getRank();
2333 region->
memref = loadOp.getMemRef();
2335 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(op)) {
2336 rank = storeOp.getMemRefType().getRank();
2337 region->
memref = storeOp.getMemRef();
2340 assert(
false &&
"expected load or store op");
2344 if (!memRefType.hasStaticShape())
2353 ivs.resize(numParamLoopIVs);
2356 regionCst->reset(rank, numParamLoopIVs, 0);
2357 regionCst->setValues(rank, rank + numParamLoopIVs, symbols);
2360 for (
unsigned d = 0; d < rank; d++) {
2361 auto dimSize = memRefType.getDimSize(d);
2362 assert(dimSize > 0 &&
"filtered dynamic shapes above");
2377 assert(begin->getBlock() == std::prev(end)->getBlock() &&
2378 "Inconsistent block begin/end args");
2379 assert(end != end->getBlock()->end() &&
"end can't be the block terminator");
2381 Block *block = begin->getBlock();
2387 LLVM_DEBUG(llvm::dbgs() <<
"Generating copies at depth " << copyDepth
2389 LLVM_DEBUG(llvm::dbgs() <<
"from begin: " << *begin <<
"\n");
2390 LLVM_DEBUG(llvm::dbgs() <<
"to inclusive end: " << *std::prev(end) <<
"\n");
2395 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> readRegions;
2396 SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> writeRegions;
2406 block->walk(begin, end, [&](
Operation *opInst) {
2408 if (
auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
2409 if ((filterMemRef.hasValue() && filterMemRef != loadOp.getMemRef()) ||
2410 (loadOp.getMemRefType().getMemorySpaceAsInt() !=
2413 }
else if (
auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
2414 if ((filterMemRef.hasValue() && filterMemRef != storeOp.getMemRef()) ||
2415 storeOp.getMemRefType().getMemorySpaceAsInt() !=
2424 auto region = std::make_unique<MemRefRegion>(opInst->
getLoc());
2425 if (
failed(region->compute(opInst, copyDepth,
nullptr,
2427 LLVM_DEBUG(llvm::dbgs()
2428 <<
"Error obtaining memory region: semi-affine maps?\n");
2429 LLVM_DEBUG(llvm::dbgs() <<
"over-approximating to the entire memref\n");
2432 opInst->
emitError(
"non-constant memref sizes not yet supported"));
2453 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2455 const auto *
const it = targetRegions.find(region->memref);
2456 if (it == targetRegions.end())
2460 if (
failed(it->second->unionBoundingBox(*region))) {
2461 LLVM_DEBUG(llvm::dbgs()
2462 <<
"Memory region bounding box failed; " 2463 "over-approximating to the entire memref\n");
2467 "non-constant memref sizes not yet supported"));
2471 it->second->getConstraints()->clearAndCopyFrom(
2472 *region->getConstraints());
2475 region->getConstraints()->clearAndCopyFrom(
2476 *it->second->getConstraints());
2481 bool existsInRead = updateRegion(readRegions);
2484 bool existsInWrite = updateRegion(writeRegions);
2489 if (region->isWrite() && !existsInWrite) {
2490 writeRegions[region->memref] = std::move(region);
2491 }
else if (!region->isWrite() && !existsInRead) {
2492 readRegions[region->memref] = std::move(region);
2497 LLVM_DEBUG(begin->emitError(
2498 "copy generation failed for one or more memref's in this block\n"));
2502 uint64_t totalCopyBuffersSizeInBytes = 0;
2504 auto processRegions =
2505 [&](
const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
2507 for (
const auto ®ionEntry : regions) {
2511 Block *copyPlacementBlock;
2513 *regionEntry.second, *block, begin, end, ©PlacementBlock,
2514 ©InPlacementStart, ©OutPlacementStart);
2516 uint64_t sizeInBytes;
2519 *regionEntry.second, block, begin, end, copyPlacementBlock,
2520 copyInPlacementStart, copyOutPlacementStart, copyOptions,
2521 fastBufferMap, copyNests, &sizeInBytes, &nBegin, &nEnd);
2526 totalCopyBuffersSizeInBytes += sizeInBytes;
2531 processRegions(readRegions);
2532 processRegions(writeRegions);
2535 LLVM_DEBUG(begin->emitError(
2536 "copy generation failed for one or more memref's in this block\n"));
2542 if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
2543 LLVM_DEBUG(forOp.emitRemark()
2544 << llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024)
2545 <<
" KiB of copy buffers in fast memory space for this block\n");
2549 StringRef str =
"Total size of all copy buffers' for this block " 2550 "exceeds fast memory capacity\n";
2551 block->getParentOp()->emitWarning(str);
2564 std::prev(forOp.getBody()->end()), copyOptions,
2565 filterMemRef, copyNests);
2572 auto begin = analyzedOp->getIterator();
2573 auto end = std::next(begin);
2577 auto err =
generateCopy(memrefRegion, block, begin, end, block, begin, end,
2578 copyOptions, fastBufferMap, copyNests,
2583 const auto &en = fastBufferMap.find(memrefRegion.
memref);
2585 if (en == fastBufferMap.end())
2587 result.
alloc = en->second.getDefiningOp();
2588 assert(result.
alloc &&
"fast buffer expected to be locally allocated");
2589 assert(copyNests.size() <= 1 &&
"At most one copy nest is expected.");
2590 result.
copyNest = copyNests.empty() ? nullptr : *copyNests.begin();
2599 assert(currLoopDepth <= depthToLoops.size() &&
"Unexpected currLoopDepth");
2600 if (currLoopDepth == depthToLoops.size())
2601 depthToLoops.emplace_back();
2603 for (
auto &op : *block) {
2604 if (
auto forOp = dyn_cast<AffineForOp>(op)) {
2605 depthToLoops[currLoopDepth].push_back(forOp);
2614 for (
auto &block : func)
2618 if (!depthToLoops.empty()) {
2619 assert(depthToLoops.back().empty() &&
"Last loop level is not empty?");
2620 depthToLoops.pop_back();
2641 return b.
create<AffineForOp>(loc, lowerOperands, lbMap, upperOperands, ubMap,
2655 auto *context = loops[0].getContext();
2659 llvm::append_range(ops, loops);
2669 for (
auto loop : loops) {
2672 assert(loop.getStep() == 1 &&
"point loop step expected to be one");
2676 unsigned fullTileLbPos, fullTileUbPos;
2679 nullptr, &fullTileLbPos,
2681 LLVM_DEBUG(llvm::dbgs() <<
"Can't get constant diff pair for a loop\n");
2690 fullTileLb.assign(fLb.begin(), fLb.end());
2691 fullTileUb.assign(fUb.begin(), fUb.end());
2694 for (
auto lbIndex : lbIndices)
2695 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2696 cst.
atIneq(lbIndex, i) = fullTileLb[i] - cst.
atIneq(lbIndex, i);
2699 for (
auto ubIndex : ubIndices)
2700 for (
unsigned i = 0, e = cst.
getNumCols(); i < e; ++i)
2701 cst.
atIneq(ubIndex, i) -= fullTileUb[i];
2724 return b.
create<AffineIfOp>(loops[0].getLoc(), ifCondSet, setOperands,
2732 fullTileLoops.reserve(inputNest.size());
2737 for (
auto loop : inputNest) {
2739 if (loop.getStep() != 1) {
2740 LLVM_DEBUG(llvm::dbgs()
2741 <<
"[tile separation] non-unit stride not implemented\n");
2749 unsigned lbPos, ubPos;
2754 LLVM_DEBUG(llvm::dbgs() <<
"[tile separation] Can't get constant diff / " 2755 "equalities not yet handled\n");
2770 fullTileLoops.push_back(fullTileLoop);
2776 operandMap.
map(loopEn.value().getInductionVar(),
2777 fullTileLoops[loopEn.index()].getInductionVar());
2779 for (
auto &op : inputNest.back().getBody()->without_terminator())
2780 b.
clone(op, operandMap);
2787 if (inputNest.empty())
2790 auto firstLoop = inputNest[0];
2793 auto prevLoop = firstLoop;
2794 for (
auto loop : inputNest.drop_front(1)) {
2795 assert(loop->getParentOp() == prevLoop &&
"input not contiguously nested");
2803 if (!fullTileLoops.empty())
2804 fullTileLoops.front().erase();
2812 fullTileLoops.front().erase();
2813 LLVM_DEBUG(llvm::dbgs() <<
"All tiles are full tiles, or failure creating " 2814 "separation condition\n");
2819 Block *thenBlock = ifOp.getThenBlock();
2820 AffineForOp outermostFullTileLoop = fullTileLoops[0];
2822 std::prev(thenBlock->
end()),
2823 outermostFullTileLoop->getBlock()->getOperations(),
2828 Block *elseBlock = ifOp.getElseBlock();
2830 firstLoop->getBlock()->getOperations(),
2834 *fullTileNest = std::move(fullTileLoops);
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, SmallVector< DependenceComponent, 2 > *dependenceComponents, bool allowRAR=false)
TODO: Remove this file when SCCP and integer range analysis have been ported to the new framework...
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
MLIRContext * getContext() const
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
AffineMap getMultiDimIdentityMap(unsigned rank)
void reset(unsigned numReservedInequalities, unsigned numReservedEqualities, unsigned numReservedCols, unsigned numDims, unsigned numSymbols, unsigned numLocals=0)
Clears any existing data and reserves memory for the specified constraints.
static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp, uint64_t unrollFactor)
Helper to generate cleanup loop for unroll or unroll-and-jam when the trip count is not a multiple of...
Operation is a basic unit of execution within MLIR.
std::vector< std::pair< Block::iterator, Block::iterator > > subBlocks
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
unsigned getNumSymbols() const
unsigned getNumDims() const
bool isHyperRectangular(unsigned pos, unsigned num) const
Returns true if the set can be trivially detected as being hyper-rectangular on the specified contigu...
static bool areInnerBoundsInvariant(AffineForOp forOp)
Check if all control operands of all loops are defined outside of forOp and return false if not...
This class represents a diagnostic that is inflight and set to be reported.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Block represents an ordered list of Operations.
Value getOperand(unsigned idx)
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor'...
LogicalResult applyOpPatternsAndFold(Operation *op, const FrozenRewritePatternSet &patterns, bool *erased=nullptr)
Applies the specified patterns on op alone while also trying to fold it, by selecting the highest ben...
LogicalResult checkTilingLegality(MutableArrayRef< mlir::AffineForOp > origLoops)
Checks whether hyper-rectangular loop tiling of the nest represented by origLoops is valid...
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
OpListType & getOperations()
bool LLVM_ATTRIBUTE_UNUSED isPerfectlyNested(ArrayRef< AffineForOp > loops)
Returns true if loops is a perfectly nested loop nest, where loops appear in it from outermost to inn...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
unsigned permuteLoops(MutableArrayRef< AffineForOp > inputNest, ArrayRef< unsigned > permMap)
Performs a loop permutation on a perfectly nested loop nest inputNest (where the contained loops appe...
AffineExpr getAffineSymbolExpr(unsigned position)
static AffineIfOp createSeparationCondition(MutableArrayRef< AffineForOp > loops, OpBuilder b)
Creates an AffineIfOp that encodes the conditional to choose between the constant trip count version ...
LogicalResult coalesceLoops(MutableArrayRef< AffineForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
ArrayRef< int64_t > getInequality(unsigned idx) const
static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, AffineMap &cleanupLbMap, SmallVectorImpl< Value > &cleanupLbOperands)
Computes the cleanup loop lower bound of the loop being unrolled with the specified unroll factor; th...
static bool checkLoopInterchangeDependences(const std::vector< SmallVector< DependenceComponent, 2 >> &depCompsVec, ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
static void constructParametricallyTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< Value > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets...
void getIneqAsAffineValueMap(unsigned pos, unsigned ineqPos, AffineValueMap &vmap, MLIRContext *context) const
Returns the bound for the variable at pos from the inequality at ineqPos as a 1-d affine value map (a...
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value...
static void replaceIterArgsAndYieldResults(AffineForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting singl...
Operation * getOperation()
Return the operation that this refers to.
static void setInterTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newLoop, Value tileSize)
Set lower and upper bounds of inter-tile loops for parametric tiling.
T lookup(T from) const
Lookup a mapped value within the map.
void getLoopIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the loops surrounding 'op' ordered from the outermost 'affine...
Block * getBlock()
Returns the operation block that contains this operation.
int64_t atIneq(unsigned i, unsigned j) const
Returns the value at the specified inequality row and column.
Checks whether two accesses to the same memref access the same element.
Optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, std::vector< SmallVector< int64_t, 4 >> *lbs=nullptr, SmallVectorImpl< int64_t > *lbDivisors=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
An integer constant appearing in affine expression.
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
void extractForInductionVars(ArrayRef< AffineForOp > forInsts, SmallVectorImpl< Value > *ivs)
Extracts the induction variables from a list of AffineForOps and places them in the output argument i...
void gatherLoops(func::FuncOp func, std::vector< SmallVector< AffineForOp, 2 >> &depthToLoops)
Gathers all AffineForOps in 'func.func' grouped by loop depth.
static bool checkTilingLegalityImpl(MutableArrayRef< mlir::AffineForOp > origLoops)
Checks the legality of tiling of a hyper-rectangular loop nest by simply checking if there is a 'nega...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
static void moveLoopBodyImpl(AffineForOp src, AffineForOp dest, Block::iterator loc)
Move the loop body of AffineForOp 'src' from 'src' into the specified location in destination's body...
Optional< uint64_t > getMemRefSizeInBytes(MemRefType memRefType)
Returns the size of memref data in bytes if it's statically shaped, None otherwise.
AffineExpr getResult(unsigned idx) const
void map(Block *from, Block *to)
Inserts a new mapping for 'from' to 'to'.
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops...
static AffineForOp generatePointWiseCopy(Location loc, Value memref, Value fastMemRef, ArrayRef< AffineMap > lbMaps, ArrayRef< Value > lbOperands, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > ubOperands, ArrayRef< AffineExpr > fastBufOffsets, bool isCopyOut, OpBuilder b)
Generates a point-wise copy from/to `memref' to/from `fastMemRef' and returns the outermost AffineFor...
Result for calling generateCopyForMemRegion.
unsigned getNumInputs() const
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor or by the trip count (if constant), whichever is lower.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
This class represents an efficient way to signal success or failure.
AffineMap removeDuplicateExprs(AffineMap map)
Returns a map with the same dimension and symbol count as map, but whose results are the unique affin...
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
LogicalResult loopUnrollFull(AffineForOp forOp)
Unrolls this for operation completely if the trip count is known to be constant.
int64_t floorDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's floordiv operation on constants.
unsigned getNumOperands()
OpListType::iterator iterator
void constructTiledLoopNest(MutableArrayRef< AffineForOp > origLoops, AffineForOp rootAffineForOp, unsigned width, MutableArrayRef< AffineForOp > tiledLoops)
Constructs tiled loop nest, without setting the loop bounds and move the body of the original loop ne...
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Location loc
If there is more than one load/store op associated with the region, the location information would co...
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
LogicalResult tilePerfectlyNestedParametric(MutableArrayRef< AffineForOp > input, ArrayRef< Value > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
unsigned getNumVars() const
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
unsigned getNumSymbolVars() const
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
LogicalResult checkIfHyperRectangular(MutableArrayRef< AffineForOp > input)
Checks whether a loop nest is hyper-rectangular or not.
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
Base type for affine expression.
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
static SmallVector< AffineForOp, 8 > stripmineSink(AffineForOp forOp, uint64_t factor, ArrayRef< AffineForOp > targets)
void canonicalizeSetAndOperands(IntegerSet *set, SmallVectorImpl< Value > *operands)
Canonicalizes an integer set the same way canonicalizeMapAndOperands does for affine maps...
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
static WalkResult advance()
AffineForOp sinkSequentialLoops(AffineForOp forOp)
unsigned getNumResults() const
static void getMultiLevelStrides(const MemRefRegion ®ion, ArrayRef< int64_t > bufferShape, SmallVectorImpl< StrideInfo > *strideInfos)
Returns striding information for a copy/transfer of this region with potentially multiple striding le...
LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor)
Unrolls this loop by the specified unroll factor or its trip count, whichever is lower.
Location getLoc()
The source location the operation was defined or derived from.
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
static LogicalResult generateCopy(const MemRefRegion ®ion, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, AffineCopyOptions copyOptions, DenseMap< Value, Value > &fastBufferMap, DenseSet< Operation *> ©Nests, uint64_t *sizeInBytes, Block::iterator *nBegin, Block::iterator *nEnd)
Creates a buffer in the faster memory space for the specified memref region; generates a copy from th...
uint64_t fastMemCapacityBytes
static AffineForOp generateShiftedLoop(AffineMap lbMap, AffineMap ubMap, const std::vector< std::pair< uint64_t, ArrayRef< Operation *>>> &opGroupQueue, unsigned offset, AffineForOp srcForOp, OpBuilder b)
Generates an affine.for op with the specified lower and upper bounds while generating the right IV re...
static void augmentMapAndBounds(OpBuilder &b, Value iv, AffineMap *map, SmallVector< Value, 4 > *operands, int64_t offset=0)
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued...
Optional< int64_t > getConstantBoundOnDimSize(unsigned pos, SmallVectorImpl< int64_t > *lb=nullptr, int64_t *boundFloorDivisor=nullptr, SmallVectorImpl< int64_t > *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
Returns the smallest known constant bound for the extent of the specified variable (pos^th)...
static WalkResult interrupt()
AffineDmaWaitOp blocks until the completion of a DMA operation associated with the tag element 'tag[i...
bool isParallelLoop(Operation &op)
AffineMap getAffineMap() const
void removeVar(VarKind kind, unsigned pos)
Removes variables of the specified kind with the specified pos (or within the specified range) from t...
static void gatherLoopsInBlock(Block *block, unsigned currLoopDepth, std::vector< SmallVector< AffineForOp, 2 >> &depthToLoops)
Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
AffineBound represents a lower or upper bound in the for operation.
unsigned getNumDimAndSymbolVars() const
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation...
ArrayRef< AffineExpr > getResults() const
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening...
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
unsigned getNumDimVars() const
LogicalResult separateFullTiles(MutableArrayRef< AffineForOp > nest, SmallVectorImpl< AffineForOp > *fullTileNest=nullptr)
Separates full tiles from partial tiles for a perfect nest nest by generating a conditional guard tha...
AffineExpr floorDiv(uint64_t v) const
void moveLoopBody(AffineForOp src, AffineForOp dest)
Move the loop body of AffineForOp 'src' from 'src' to the start of dest body.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs, MemRefRegion *region)
Construct the memref region to just include the entire memref.
FlatAffineValueConstraints * getConstraints()
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
Operation * getTerminator()
Get the terminator operation of this block.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
static void setIntraTileBoundsParametric(OpBuilder &b, AffineForOp origLoop, AffineForOp newInterTileLoop, AffineForOp newIntraTileLoop, Value tileSize)
Set lower and upper bounds of intra-tile loops for parametric tiling.
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, Optional< Value > filterMemRef, DenseSet< Operation *> ©Nests)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [`beg...
AffineExpr getAffineConstantExpr(int64_t constant)
AffineMap getShiftedAffineMap(AffineMap map, int64_t shift)
Returns an affine map that is a translation (shift) of all result expressions in 'map' by 'shift'...
LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion, Operation *analyzedOp, const AffineCopyOptions ©Options, CopyGenerateResult &result)
generateCopyForMemRegion is similar to affineDataCopyGenerate, but works with a single memref region...
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void getTileableBands(func::FuncOp f, std::vector< SmallVector< AffineForOp, 6 >> *bands)
Identify valid and profitable bands of loops to tile.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Type getType() const
Return the type of this value.
LogicalResult getIndexSet(MutableArrayRef< Operation *> ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
FlatAffineValueConstraints represents an extension of IntegerPolyhedron where each non-local variable...
void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB)
Performs loop interchange on 'forOpA' and 'forOpB'.
void setDimSymbolSeparation(unsigned newSymbolCount)
Changes the partition between dimensions and symbols.
LogicalResult loopUnrollJamByFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor.
void getLowerAndUpperBoundIndices(unsigned pos, SmallVectorImpl< unsigned > *lbIndices, SmallVectorImpl< unsigned > *ubIndices, SmallVectorImpl< unsigned > *eqIndices=nullptr, unsigned offset=0, unsigned num=0) const
Gather positions of all lower and upper bounds of the variable at pos, and optionally any equalities ...
AffineMap getDimIdentityMap()
void removeIndependentConstraints(unsigned pos, unsigned num)
Removes constraints that are independent of (i.e., do not have a coefficient) variables in the range ...
void walk(Operation *op, function_ref< void(Region *)> callback, WalkOrder order)
Walk all of the regions, blocks, or operations nested under (and including) the given operation...
static void constructTiledIndexSetHyperRect(MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< unsigned > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets...
static void findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block, Block::iterator &begin, Block::iterator &end, Block **copyPlacementBlock, Block::iterator *copyInPlacementStart, Block::iterator *copyOutPlacementStart)
Given a memref region, determine the lowest depth at which transfers can be placed for it...
Specialization of arith.constant op that returns an integer of index type.
static LogicalResult createFullTiles(MutableArrayRef< AffineForOp > inputNest, SmallVectorImpl< AffineForOp > &fullTileLoops, OpBuilder b)
Create the full tile loop nest (along with its body).
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
unsigned getNumCols() const
Returns the number of columns in the constraint system.
bool isValidLoopInterchangePermutation(ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
Checks if the loop interchange permutation 'loopPermMap', of the perfectly nested sequence of loops i...
LogicalResult performPreTilingChecks(MutableArrayRef< AffineForOp > input, ArrayRef< t > tileSizes)
Check if the input nest is supported for tiling and whether tiling would be legal or not...
AffineForOp replaceForOpWithNewYields(OpBuilder &b, AffineForOp loop, ValueRange newIterOperands, ValueRange newYieldedValues, ValueRange newIterArgs, bool replaceLoopResults=true)
Replace loop with a new loop where newIterOperands are appended with new initialization values and ne...
AffineDmaStartOp starts a non-blocking DMA operation that transfers data from a source memref to a de...
AffineExpr getAffineDimExpr(unsigned position)
This class implements the operand iterators for the Operation class.
Encapsulates a memref load or store access information.
Optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, None otherwise.
Value memref
Memref that this region corresponds to.
static InFlightDiagnostic LLVM_ATTRIBUTE_UNUSED emitRemarkForBlock(Block &block)
A description of a (parallelizable) reduction in an affine loop.
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation...
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
ArrayRef< Value > getOperands() const
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors...
This class helps build Operations.
This class provides an abstraction over the different types of ranges over Values.
operand_range getOperands()
AffineForOp createCanonicalizedAffineForOp(OpBuilder b, Location loc, ValueRange lbOperands, AffineMap lbMap, ValueRange ubOperands, AffineMap ubMap, int64_t step=1)
Creates an AffineForOp while ensuring that the lower and upper bounds are canonicalized, i.e., unused and duplicate operands are removed, any constant operands propagated/folded in, and duplicate bound maps dropped.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
An integer set representing a conjunction of one or more affine equalities and inequalities.