24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
30 #define DEBUG_TYPE "analysis-utils"
33 using namespace affine;
34 using namespace presburger;
36 using llvm::SmallDenseMap;
45 if (isa<AffineForOp>(op))
46 forOps.push_back(cast<AffineForOp>(op));
48 hasNonAffineRegionOp =
true;
49 else if (isa<AffineReadOpInterface>(op))
50 loadOpInsts.push_back(op);
51 else if (isa<AffineWriteOpInterface>(op))
52 storeOpInsts.push_back(op);
57 unsigned Node::getLoadOpCount(
Value memref)
const {
58 unsigned loadOpCount = 0;
60 if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
67 unsigned Node::getStoreOpCount(
Value memref)
const {
68 unsigned storeOpCount = 0;
70 if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
77 void Node::getStoreOpsForMemref(
Value memref,
80 if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
81 storeOps->push_back(storeOp);
86 void Node::getLoadOpsForMemref(
Value memref,
89 if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
90 loadOps->push_back(loadOp);
96 void Node::getLoadAndStoreMemrefSet(
98 llvm::SmallDenseSet<Value, 2> loadMemrefs;
100 loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
103 auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
104 if (loadMemrefs.count(memref) > 0)
105 loadAndStoreMemrefSet->insert(memref);
112 LLVM_DEBUG(llvm::dbgs() <<
"--- Initializing MDG ---\n");
119 if (dyn_cast<AffineForOp>(op)) {
128 Node node(nextNodeId++, &op);
130 node.
loads.push_back(opInst);
131 auto memref = cast<AffineReadOpInterface>(opInst).getMemRef();
132 memrefAccesses[memref].insert(node.
id);
135 node.
stores.push_back(opInst);
136 auto memref = cast<AffineWriteOpInterface>(opInst).getMemRef();
137 memrefAccesses[memref].insert(node.
id);
139 forToNodeMap[&op] = node.
id;
140 nodes.insert({node.
id, node});
141 }
else if (dyn_cast<AffineReadOpInterface>(op)) {
143 Node node(nextNodeId++, &op);
144 node.
loads.push_back(&op);
145 auto memref = cast<AffineReadOpInterface>(op).getMemRef();
146 memrefAccesses[memref].insert(node.
id);
147 nodes.insert({node.
id, node});
148 }
else if (dyn_cast<AffineWriteOpInterface>(op)) {
150 Node node(nextNodeId++, &op);
151 node.
stores.push_back(&op);
152 auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
153 memrefAccesses[memref].insert(node.
id);
154 nodes.insert({node.
id, node});
155 }
else if (op.getNumResults() > 0 && !op.use_empty()) {
158 Node node(nextNodeId++, &op);
159 nodes.insert({node.
id, node});
161 (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
168 Node node(nextNodeId++, &op);
169 nodes.insert({node.
id, node});
170 }
else if (op.getNumRegions() != 0) {
174 LLVM_DEBUG(llvm::dbgs()
175 <<
"MDG init failed; unknown region-holding op found!\n");
180 for (
auto &idAndNode : nodes) {
181 LLVM_DEBUG(llvm::dbgs() <<
"Create node " << idAndNode.first <<
" for:\n"
182 << *(idAndNode.second.op) <<
"\n");
188 for (
auto &idAndNode : nodes) {
189 const Node &node = idAndNode.second;
197 if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
204 auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
205 return loop->getBlock() == █
207 if (it == loops.end())
209 assert(forToNodeMap.count(*it) > 0 &&
"missing mapping");
210 unsigned userLoopNestId = forToNodeMap[*it];
211 addEdge(node.
id, userLoopNestId, value);
217 for (
auto &memrefAndList : memrefAccesses) {
218 unsigned n = memrefAndList.second.size();
219 for (
unsigned i = 0; i < n; ++i) {
220 unsigned srcId = memrefAndList.second[i];
222 getNode(srcId)->getStoreOpCount(memrefAndList.first) > 0;
223 for (
unsigned j = i + 1;
j < n; ++
j) {
224 unsigned dstId = memrefAndList.second[
j];
226 getNode(dstId)->getStoreOpCount(memrefAndList.first) > 0;
227 if (srcHasStore || dstHasStore)
228 addEdge(srcId, dstId, memrefAndList.first);
237 auto it = nodes.find(
id);
238 assert(it != nodes.end());
244 for (
auto &idAndNode : nodes)
245 if (idAndNode.second.op == forOp)
246 return &idAndNode.second;
252 Node node(nextNodeId++, op);
253 nodes.insert({node.
id, node});
260 if (inEdges.count(
id) > 0) {
262 for (
auto &inEdge : oldInEdges) {
263 removeEdge(inEdge.id,
id, inEdge.value);
267 if (outEdges.count(
id) > 0) {
269 for (
auto &outEdge : oldOutEdges) {
270 removeEdge(
id, outEdge.id, outEdge.value);
282 Node *node = getNode(
id);
283 for (
auto *storeOpInst : node->
stores) {
284 auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
285 auto *op = memref.getDefiningOp();
291 for (
auto *user : memref.getUsers())
292 if (!isa<AffineMapAccessInterface>(*user))
303 if (outEdges.count(srcId) == 0 || inEdges.count(dstId) == 0) {
306 bool hasOutEdge = llvm::any_of(outEdges[srcId], [=](
Edge &edge) {
307 return edge.
id == dstId && (!value || edge.
value == value);
309 bool hasInEdge = llvm::any_of(inEdges[dstId], [=](
Edge &edge) {
310 return edge.
id == srcId && (!value || edge.
value == value);
312 return hasOutEdge && hasInEdge;
318 if (!hasEdge(srcId, dstId, value)) {
319 outEdges[srcId].push_back({dstId, value});
320 inEdges[dstId].push_back({srcId, value});
321 if (isa<MemRefType>(value.
getType()))
322 memrefEdgeCount[value]++;
329 assert(inEdges.count(dstId) > 0);
330 assert(outEdges.count(srcId) > 0);
331 if (isa<MemRefType>(value.
getType())) {
332 assert(memrefEdgeCount.count(value) > 0);
333 memrefEdgeCount[value]--;
336 for (
auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
337 if ((*it).id == srcId && (*it).value == value) {
338 inEdges[dstId].erase(it);
343 for (
auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
344 if ((*it).id == dstId && (*it).value == value) {
345 outEdges[srcId].erase(it);
357 worklist.push_back({srcId, 0});
360 while (!worklist.empty()) {
361 auto &idAndIndex = worklist.back();
363 if (idAndIndex.first == dstId)
367 if (outEdges.count(idAndIndex.first) == 0 ||
368 idAndIndex.second == outEdges[idAndIndex.first].size()) {
373 Edge edge = outEdges[idAndIndex.first][idAndIndex.second];
380 if (!afterDst && edge.
id != idAndIndex.first)
381 worklist.push_back({edge.
id, 0});
390 unsigned inEdgeCount = 0;
391 if (inEdges.count(
id) > 0)
392 for (
auto &inEdge : inEdges[
id])
393 if (inEdge.value == memref) {
394 Node *srcNode = getNode(inEdge.id);
405 unsigned outEdgeCount = 0;
406 if (outEdges.count(
id) > 0)
407 for (
auto &outEdge : outEdges[
id])
408 if (!memref || outEdge.value == memref)
420 if (!isa<MemRefType>(edge.value.getType()))
421 definingNodes.insert(edge.id);
430 if (outEdges.count(srcId) == 0)
431 return getNode(dstId)->op;
435 gatherDefiningNodes(dstId, definingNodes);
436 if (llvm::any_of(definingNodes,
437 [&](
unsigned id) {
return hasDependencePath(srcId,
id); })) {
438 LLVM_DEBUG(llvm::dbgs()
439 <<
"Can't fuse: a defining op with a user in the dst "
440 "loop has dependence from the src loop\n");
446 for (
auto &outEdge : outEdges[srcId])
447 if (outEdge.id != dstId)
448 srcDepInsts.insert(getNode(outEdge.id)->op);
452 for (
auto &inEdge : inEdges[dstId])
453 if (inEdge.id != srcId)
454 dstDepInsts.insert(getNode(inEdge.id)->op);
456 Operation *srcNodeInst = getNode(srcId)->op;
457 Operation *dstNodeInst = getNode(dstId)->op;
470 std::optional<unsigned> firstSrcDepPos;
471 std::optional<unsigned> lastDstDepPos;
476 if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
477 firstSrcDepPos = pos;
478 if (dstDepInsts.count(op) > 0)
480 depInsts.push_back(op);
484 if (firstSrcDepPos.has_value()) {
485 if (lastDstDepPos.has_value()) {
486 if (*firstSrcDepPos <= *lastDstDepPos) {
492 return depInsts[*firstSrcDepPos];
508 if (inEdges.count(srcId) > 0) {
510 for (
auto &inEdge : oldInEdges) {
512 if (privateMemRefs.count(inEdge.value) == 0)
513 addEdge(inEdge.id, dstId, inEdge.value);
518 if (outEdges.count(srcId) > 0) {
520 for (
auto &outEdge : oldOutEdges) {
522 if (outEdge.id == dstId)
523 removeEdge(srcId, outEdge.id, outEdge.value);
524 else if (removeSrcId) {
525 addEdge(dstId, outEdge.id, outEdge.value);
526 removeEdge(srcId, outEdge.id, outEdge.value);
533 if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
535 for (
auto &inEdge : oldInEdges)
536 if (privateMemRefs.count(inEdge.value) > 0)
537 removeEdge(inEdge.id, dstId, inEdge.value);
547 if (inEdges.count(sibId) > 0) {
549 for (
auto &inEdge : oldInEdges) {
550 addEdge(inEdge.id, dstId, inEdge.value);
551 removeEdge(inEdge.id, sibId, inEdge.value);
558 if (outEdges.count(sibId) > 0) {
560 for (
auto &outEdge : oldOutEdges) {
561 addEdge(dstId, outEdge.id, outEdge.value);
562 removeEdge(sibId, outEdge.id, outEdge.value);
571 Node *node = getNode(
id);
572 llvm::append_range(node->
loads, loads);
573 llvm::append_range(node->
stores, stores);
577 Node *node = getNode(
id);
585 unsigned id,
const std::function<
void(
Edge)> &callback) {
586 if (inEdges.count(
id) > 0)
587 forEachMemRefEdge(inEdges[
id], callback);
593 unsigned id,
const std::function<
void(
Edge)> &callback) {
594 if (outEdges.count(
id) > 0)
595 forEachMemRefEdge(outEdges[
id], callback);
602 for (
const auto &edge : edges) {
604 if (!isa<MemRefType>(edge.value.getType()))
606 assert(nodes.count(edge.id) > 0);
608 if (!isa<AffineForOp>(getNode(edge.id)->op))
616 os <<
"\nMemRefDependenceGraph\n";
618 for (
const auto &idAndNode : nodes) {
619 os <<
"Node: " << idAndNode.first <<
"\n";
620 auto it = inEdges.find(idAndNode.first);
621 if (it != inEdges.end()) {
622 for (
const auto &e : it->second)
623 os <<
" InEdge: " << e.id <<
" " << e.value <<
"\n";
625 it = outEdges.find(idAndNode.first);
626 if (it != outEdges.end()) {
627 for (
const auto &e : it->second)
628 os <<
" OutEdge: " << e.id <<
" " << e.value <<
"\n";
636 AffineForOp currAffineForOp;
640 if (
auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
641 loops->push_back(currAffineForOp);
642 currOp = currOp->getParentOp();
644 std::reverse(loops->begin(), loops->end());
655 if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
656 ops->push_back(currOp);
659 std::reverse(ops->begin(), ops->end());
666 assert(!ivs.empty() &&
"Cannot have a slice without its IVs");
669 for (
Value iv : ivs) {
671 assert(loop &&
"Expected affine for");
681 assert(!lbOperands.empty());
683 unsigned numDims = ivs.size();
685 unsigned numSymbols = lbOperands[0].size();
689 values.append(lbOperands[0].begin(), lbOperands[0].end());
694 for (
unsigned i = numDims, end = values.size(); i < end; ++i) {
695 Value value = values[i];
696 assert(cst->
containsVar(value) &&
"value expected to be present");
700 cst->
addBound(BoundType::EQ, value, cOp.value());
708 LogicalResult ret = cst->
addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
709 assert(succeeded(ret) &&
710 "should not fail as we never have semi-affine slice maps");
724 llvm::errs() <<
"\tIVs:\n";
726 llvm::errs() <<
"\t\t" << iv <<
"\n";
728 llvm::errs() <<
"\tLBs:\n";
730 llvm::errs() <<
"\t\t" << en.value() <<
"\n";
731 llvm::errs() <<
"\t\tOperands:\n";
732 for (
Value lbOp : lbOperands[en.index()])
733 llvm::errs() <<
"\t\t\t" << lbOp <<
"\n";
736 llvm::errs() <<
"\tUBs:\n";
738 llvm::errs() <<
"\t\t" << en.value() <<
"\n";
739 llvm::errs() <<
"\t\tOperands:\n";
740 for (
Value ubOp : ubOperands[en.index()])
741 llvm::errs() <<
"\t\t\t" << ubOp <<
"\n";
750 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck()
const {
751 assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
752 "Unexpected number of lbs, ubs and ivs in slice");
754 for (
unsigned i = 0, end = lbs.size(); i < end; ++i) {
766 isa<AffineConstantExpr>(lbMap.
getResult(0)))
776 AffineForOp dstLoop =
780 AffineMap dstLbMap = dstLoop.getLowerBoundMap();
781 AffineMap dstUbMap = dstLoop.getUpperBoundMap();
785 assert(srcLoop &&
"Expected affine for");
786 AffineMap srcLbMap = srcLoop.getLowerBoundMap();
787 AffineMap srcUbMap = srcLoop.getUpperBoundMap();
799 if (!isa<AffineConstantExpr>(srcLbResult) ||
800 !isa<AffineConstantExpr>(srcUbResult) ||
801 !isa<AffineConstantExpr>(dstLbResult) ||
802 !isa<AffineConstantExpr>(dstUbResult))
807 if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
808 srcLoop.getStep() != dstLoop.getStep())
829 std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
830 if (isValidFastCheck && *isValidFastCheck)
836 if (failed(getSourceAsConstraints(srcConstraints))) {
837 LLVM_DEBUG(llvm::dbgs() <<
"Unable to compute source's domain\n");
843 LLVM_DEBUG(llvm::dbgs() <<
"Cannot handle symbols in source domain\n");
850 LLVM_DEBUG(llvm::dbgs() <<
"Cannot handle locals in source domain\n");
857 if (failed(getAsConstraints(&sliceConstraints))) {
858 LLVM_DEBUG(llvm::dbgs() <<
"Unable to compute slice's domain\n");
867 LLVM_DEBUG(llvm::dbgs() <<
"Domain of the source of the slice:\n");
868 LLVM_DEBUG(srcConstraints.
dump());
869 LLVM_DEBUG(llvm::dbgs() <<
"Domain of the slice if this fusion succeeds "
870 "(expressed in terms of its source's IVs):\n");
871 LLVM_DEBUG(sliceConstraints.
dump());
879 LLVM_DEBUG(llvm::dbgs() <<
"Incorrect slice\n");
891 std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
892 if (isMaximalFastCheck)
893 return isMaximalFastCheck;
899 for (
Value iv : ivs) {
901 assert(loop &&
"Expected affine for");
909 for (
Value lbOp : lbOperands[0])
911 consumerIVs.push_back(lbOp);
915 for (
int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
916 consumerIVs.push_back(
Value());
939 return cast<MemRefType>(memref.getType()).getRank();
945 auto memRefType = cast<MemRefType>(memref.getType());
946 unsigned rank = memRefType.getRank();
948 shape->reserve(rank);
950 assert(rank == cst.getNumDimVars() &&
"inconsistent memref region");
958 for (
unsigned r = 0; r < rank; r++) {
959 cstWithShapeBounds.
addBound(BoundType::LB, r, 0);
960 int64_t dimSize = memRefType.getDimSize(r);
961 if (ShapedType::isDynamic(dimSize))
963 cstWithShapeBounds.
addBound(BoundType::UB, r, dimSize - 1);
968 int64_t numElements = 1;
969 int64_t diffConstant;
971 for (
unsigned d = 0; d < rank; d++) {
973 std::optional<int64_t> diff =
975 if (diff.has_value()) {
976 diffConstant = *diff;
977 assert(diffConstant >= 0 &&
"Dim size bound can't be negative");
978 assert(lbDivisor > 0);
982 auto dimSize = memRefType.getDimSize(d);
983 if (dimSize == ShapedType::kDynamic)
985 diffConstant = dimSize;
990 numElements *= diffConstant;
993 assert(lbDivisors &&
"both lbs and lbDivisor or none");
994 lbDivisors->push_back(lbDivisor);
997 shape->push_back(diffConstant);
1005 assert(pos < cst.getNumDimVars() &&
"invalid position");
1006 auto memRefType = cast<MemRefType>(memref.getType());
1007 unsigned rank = memRefType.getRank();
1009 assert(rank == cst.getNumDimVars() &&
"inconsistent memref region");
1011 auto boundPairs = cst.getLowerAndUpperBound(
1012 pos, 0, rank, cst.getNumDimAndSymbolVars(),
1013 {}, memRefType.getContext());
1014 lbMap = boundPairs.first;
1015 ubMap = boundPairs.second;
1016 assert(lbMap &&
"lower bound for a region must exist");
1017 assert(ubMap &&
"upper bound for a region must exist");
1018 assert(lbMap.
getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1019 assert(ubMap.
getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1023 assert(memref == other.
memref);
1046 bool addMemRefDimBounds) {
1047 assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1048 "affine read/write op expected");
1054 unsigned rank = access.
getRank();
1056 LLVM_DEBUG(llvm::dbgs() <<
"MemRefRegion::compute: " << *op
1057 <<
"\ndepth: " << loopDepth <<
"\n";);
1063 assert(loopDepth <= ivs.size() &&
"invalid 'loopDepth'");
1065 ivs.resize(loopDepth);
1081 operands.resize(numOperands);
1082 for (
unsigned i = 0; i < numOperands; ++i)
1085 if (sliceState !=
nullptr) {
1086 operands.reserve(operands.size() + sliceState->
lbOperands[0].size());
1088 for (
auto extraOperand : sliceState->
lbOperands[0]) {
1089 if (!llvm::is_contained(operands, extraOperand)) {
1090 operands.push_back(extraOperand);
1102 for (
unsigned i = 0; i < numDims + numSymbols; ++i) {
1103 auto operand = operands[i];
1109 if (failed(cst.addAffineForOpDomain(affineFor)))
1112 if (failed(cst.addAffineParallelOpDomain(parallelOp)))
1116 Value symbol = operand;
1118 cst.addBound(BoundType::EQ, symbol, constVal.value());
1120 LLVM_DEBUG(llvm::dbgs() <<
"unknown affine dimensional value");
1126 if (sliceState !=
nullptr) {
1128 for (
auto operand : sliceState->
lbOperands[0]) {
1129 cst.addInductionVarOrTerminalSymbol(operand);
1133 cst.addSliceBounds(sliceState->
ivs, sliceState->
lbs, sliceState->
ubs,
1135 assert(succeeded(ret) &&
1136 "should not fail as we never have semi-affine slice maps");
1141 if (failed(cst.composeMap(&accessValueMap))) {
1142 op->
emitError(
"getMemRefRegion: compose affine map failed");
1150 cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1156 assert(loopDepth <= enclosingIVs.size() &&
"invalid loop depth");
1157 enclosingIVs.resize(loopDepth);
1159 cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1160 for (
Value var : vars) {
1162 cst.projectOut(var);
1168 cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1171 cst.constantFoldVarRange(cst.getNumDimVars(),
1172 cst.getNumSymbolVars());
1174 assert(cst.getNumDimVars() == rank &&
"unexpected MemRefRegion format");
1179 if (addMemRefDimBounds) {
1180 auto memRefType = cast<MemRefType>(memref.getType());
1181 for (
unsigned r = 0; r < rank; r++) {
1182 cst.addBound(BoundType::LB, r, 0);
1183 if (memRefType.isDynamicDim(r))
1185 cst.addBound(BoundType::UB, r, memRefType.getDimSize(r) - 1);
1188 cst.removeTrivialRedundancy();
1190 LLVM_DEBUG(llvm::dbgs() <<
"Memory region:\n");
1191 LLVM_DEBUG(cst.dump());
1195 std::optional<int64_t>
1197 auto elementType = memRefType.getElementType();
1199 unsigned sizeInBits;
1200 if (elementType.isIntOrFloat()) {
1201 sizeInBits = elementType.getIntOrFloatBitWidth();
1202 }
else if (
auto vectorType = dyn_cast<VectorType>(elementType)) {
1203 if (vectorType.getElementType().isIntOrFloat())
1205 vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1207 return std::nullopt;
1209 return std::nullopt;
1216 auto memRefType = cast<MemRefType>(memref.getType());
1218 if (!memRefType.getLayout().isIdentity()) {
1219 LLVM_DEBUG(llvm::dbgs() <<
"Non-identity layout map not yet supported\n");
1230 std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1232 LLVM_DEBUG(llvm::dbgs() <<
"Dynamic shapes not yet supported\n");
1233 return std::nullopt;
1237 return std::nullopt;
1238 return *eltSize * *numElements;
1245 std::optional<uint64_t>
1247 if (!memRefType.hasStaticShape())
1248 return std::nullopt;
1249 auto elementType = memRefType.getElementType();
1250 if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1251 return std::nullopt;
1255 return std::nullopt;
1256 for (
unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1257 sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1262 template <
typename LoadOrStoreOp>
1265 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1266 AffineWriteOpInterface>::value,
1267 "argument should be either a AffineReadOpInterface or a "
1268 "AffineWriteOpInterface");
1270 Operation *op = loadOrStoreOp.getOperation();
1272 if (failed(region.compute(op, 0,
nullptr,
1276 LLVM_DEBUG(llvm::dbgs() <<
"Memory region");
1277 LLVM_DEBUG(region.getConstraints()->dump());
1279 bool outOfBounds =
false;
1280 unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1283 for (
unsigned r = 0; r < rank; r++) {
1290 int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1296 ucst.addBound(BoundType::LB, r, dimSize);
1297 outOfBounds = !ucst.isEmpty();
1299 loadOrStoreOp.emitOpError()
1300 <<
"memref out of upper bound access along dimension #" << (r + 1);
1305 std::fill(ineq.begin(), ineq.end(), 0);
1307 lcst.addBound(BoundType::UB, r, -1);
1308 outOfBounds = !lcst.isEmpty();
1310 loadOrStoreOp.emitOpError()
1311 <<
"memref out of lower bound access along dimension #" << (r + 1);
1314 return failure(outOfBounds);
1318 template LogicalResult
1321 template LogicalResult
1330 while (block != limitBlock) {
1333 int instPosInBlock = std::distance(block->
begin(), op->getIterator());
1334 positions->push_back(instPosInBlock);
1338 std::reverse(positions->begin(), positions->end());
1345 unsigned level,
Block *block) {
1347 for (
auto &op : *block) {
1348 if (i != positions[level]) {
1352 if (level == positions.size() - 1)
1354 if (
auto childAffineForOp = dyn_cast<AffineForOp>(op))
1356 childAffineForOp.getBody());
1359 for (
auto &b : region)
1371 for (
unsigned i = 0, e = cst->
getNumDimVars(); i < e; ++i) {
1373 if (ivs.count(value) == 0) {
1387 unsigned numOps = ops.size();
1388 assert(numOps > 0 &&
"Expected at least one operation");
1390 std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1392 for (
unsigned i = 0; i < numOps; ++i) {
1395 std::min(loopDepthLimit,
static_cast<unsigned>(loops[i].size()));
1398 unsigned loopDepth = 0;
1399 for (
unsigned d = 0; d < loopDepthLimit; ++d) {
1401 for (i = 1; i < numOps; ++i) {
1402 if (loops[i - 1][d] != loops[i][d])
1405 if (surroundingLoops)
1406 surroundingLoops->push_back(loops[i - 1][d]);
1419 unsigned numCommonLoops,
bool isBackwardSlice,
1425 std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1426 for (
auto *i : opsA) {
1428 for (
auto *
j : opsB) {
1435 LLVM_DEBUG(llvm::dbgs() <<
"Invalid loop depth\n");
1439 bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.
opInst) &&
1440 isa<AffineReadOpInterface>(dstAccess.
opInst);
1444 srcAccess, dstAccess, numCommonLoops + 1,
1445 &dependenceConstraints,
nullptr,
1448 LLVM_DEBUG(llvm::dbgs() <<
"Dependence check failed\n");
1453 dependentOpPairs.emplace_back(i,
j);
1458 loopDepth, isBackwardSlice,
1464 LLVM_DEBUG(llvm::dbgs()
1465 <<
"Unable to compute slice bound constraints\n");
1475 LLVM_DEBUG(llvm::dbgs()
1476 <<
"Unable to compute slice bound constraints\n");
1486 for (
unsigned k = 0, l = sliceUnionCst.
getNumDimVars(); k < l; ++k)
1487 sliceUnionIVs.insert(sliceUnionCst.
getValue(k));
1489 for (
unsigned k = 0, l = tmpSliceCst.
getNumDimVars(); k < l; ++k)
1490 tmpSliceIVs.insert(tmpSliceCst.
getValue(k));
1508 LLVM_DEBUG(llvm::dbgs()
1509 <<
"Unable to compute union bounding box of slice bounds\n");
1521 for (
auto &dep : dependentOpPairs) {
1522 ops.push_back(isBackwardSlice ? dep.second : dep.first);
1525 unsigned innermostCommonLoopDepth =
1527 if (loopDepth > innermostCommonLoopDepth) {
1528 LLVM_DEBUG(llvm::dbgs() <<
"Exceeds max loop depth\n");
1548 sliceUnionCst.
getValues(numSliceLoopIVs,
1550 &sliceBoundOperands);
1553 sliceUnion->
ivs.clear();
1554 sliceUnionCst.
getValues(0, numSliceLoopIVs, &sliceUnion->
ivs);
1559 ? surroundingLoops[loopDepth - 1].getBody()->begin()
1560 : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1564 sliceUnion->
lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1565 sliceUnion->
ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1569 std::optional<bool> isSliceValid = sliceUnion->
isSliceValid();
1570 if (!isSliceValid) {
1571 LLVM_DEBUG(llvm::dbgs() <<
"Cannot determine if the slice is valid\n");
1583 assert(lbMap.
getNumResults() == 1 &&
"expected single result bound map");
1584 assert(ubMap.
getNumResults() == 1 &&
"expected single result bound map");
1591 auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1593 return std::nullopt;
1594 return cExpr.getValue();
1603 llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1604 unsigned numSrcLoopIVs = slice.
ivs.size();
1606 for (
unsigned i = 0; i < numSrcLoopIVs; ++i) {
1608 auto *op = forOp.getOperation();
1617 if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1618 (*tripCountMap)[op] =
1619 forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1623 if (maybeConstTripCount.has_value()) {
1624 (*tripCountMap)[op] = *maybeConstTripCount;
1631 if (!tripCount.has_value())
1633 (*tripCountMap)[op] = *tripCount;
1640 const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1641 uint64_t iterCount = 1;
1642 for (
const auto &count : sliceTripCountMap) {
1643 iterCount *= count.second;
1660 unsigned numSrcLoopIVs = srcLoopIVs.size();
1665 unsigned numDstLoopIVs = dstLoopIVs.size();
1667 assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1668 (isBackwardSlice && loopDepth <= numDstLoopIVs));
1671 unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1673 isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1677 unsigned offset = isBackwardSlice ? 0 : loopDepth;
1678 unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1679 dependenceConstraints->
getValues(offset, offset + numSliceLoopIVs,
1689 &sliceState->
lbs, &sliceState->
ubs);
1694 for (
unsigned i = 0; i < numDimsAndSymbols; ++i) {
1695 if (i < offset || i >= offset + numSliceLoopIVs) {
1696 sliceBoundOperands.push_back(dependenceConstraints->
getValue(i));
1702 sliceState->
lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1703 sliceState->
ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1707 isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1708 : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1710 llvm::SmallDenseSet<Value, 8> sequentialLoops;
1711 if (isa<AffineReadOpInterface>(depSourceOp) &&
1712 isa<AffineReadOpInterface>(depSinkOp)) {
1718 auto getSliceLoop = [&](
unsigned i) {
1719 return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1721 auto isInnermostInsertion = [&]() {
1722 return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1723 : loopDepth >= dstLoopIVs.size());
1725 llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1726 auto srcIsUnitSlice = [&]() {
1733 for (
unsigned i = 0; i < numSliceLoopIVs; ++i) {
1734 Value iv = getSliceLoop(i).getInductionVar();
1735 if (sequentialLoops.count(iv) == 0 &&
1743 std::optional<bool> isMaximal = sliceState->
isMaximal();
1745 isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1747 for (
unsigned j = i;
j < numSliceLoopIVs; ++
j) {
1773 unsigned numSrcLoopIVs = srcLoopIVs.size();
1778 unsigned dstLoopIVsSize = dstLoopIVs.size();
1779 if (dstLoopDepth > dstLoopIVsSize) {
1780 dstOpInst->
emitError(
"invalid destination loop depth");
1781 return AffineForOp();
1791 auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1792 OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1793 auto sliceLoopNest =
1794 cast<AffineForOp>(b.
clone(*srcLoopIVs[0].getOperation()));
1803 unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1804 (void)sliceSurroundingLoopsSize;
1805 assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1806 unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1807 (void)sliceLoopLimit;
1808 assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1811 for (
unsigned i = 0; i < numSrcLoopIVs; ++i) {
1812 auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1814 forOp.setLowerBound(sliceState->
lbOperands[i], lbMap);
1816 forOp.setUpperBound(sliceState->
ubOperands[i], ubMap);
1818 return sliceLoopNest;
1824 if (
auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
1825 memref = loadOp.getMemRef();
1826 opInst = loadOrStoreOpInst;
1827 llvm::append_range(indices, loadOp.getMapOperands());
1829 assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
1830 "Affine read/write op expected");
1831 auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
1832 opInst = loadOrStoreOpInst;
1833 memref = storeOp.getMemRef();
1834 llvm::append_range(indices, storeOp.getMapOperands());
1839 return cast<MemRefType>(memref.getType()).getRank();
1843 return isa<AffineWriteOpInterface>(opInst);
1852 if (isa<AffineForOp>(currOp))
1865 if (memref != rhs.
memref)
1869 getAccessMap(&thisMap);
1878 AffineForOp currAffineForOp;
1882 if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
1883 ivs.push_back(currAffineForOp.getInductionVar());
1884 else if (
auto parOp = dyn_cast<AffineParallelOp>(currOp))
1885 llvm::append_range(ivs, parOp.getIVs());
1886 currOp = currOp->getParentOp();
1888 std::reverse(ivs.begin(), ivs.end());
1899 unsigned minNumLoops =
std::min(loopsA.size(), loopsB.size());
1900 unsigned numCommonLoops = 0;
1901 for (
unsigned i = 0; i < minNumLoops; ++i) {
1902 if (loopsA[i] != loopsB[i])
1906 return numCommonLoops;
1913 SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
1917 if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
1923 auto region = std::make_unique<MemRefRegion>(opInst->
getLoc());
1925 region->compute(opInst,
1927 return opInst->
emitError(
"error obtaining memory region\n");
1930 auto [it, inserted] = regions.try_emplace(region->memref);
1932 it->second = std::move(region);
1933 }
else if (failed(it->second->unionBoundingBox(*region))) {
1935 "getMemoryFootprintBytes: unable to perform a union on a memory "
1940 if (result.wasInterrupted())
1941 return std::nullopt;
1943 int64_t totalSizeInBytes = 0;
1944 for (
const auto ®ion : regions) {
1945 std::optional<int64_t> size = region.second->getRegionSize();
1946 if (!size.has_value())
1947 return std::nullopt;
1948 totalSizeInBytes += *size;
1950 return totalSizeInBytes;
1955 auto *forInst = forOp.getOperation();
1966 return !reductions.empty();
1972 AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
1974 if (
auto innerFor = dyn_cast<AffineForOp>(op))
1976 sequentialLoops->insert(innerFor.getInductionVar());
1988 assert(simplifiedSet &&
"guaranteed to succeed while roundtripping");
1989 return simplifiedSet;
1995 llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
1996 return val.has_value() ? *val :
Value();
2015 for (
unsigned i = syms.size(); i < newSyms.size(); ++i)
2017 return constraints.
addBound(type, pos, alignedMap);
2024 newResults.push_back(r + val);
2068 bool isMin = isa<AffineMinOp>(op);
2069 assert((isMin || isa<AffineMaxOp>(op)) &&
"expect AffineMin/MaxOp");
2073 isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2080 unsigned resultDimStart = constraints.
appendDimVar(numResults);
2084 auto boundType = isMin ? BoundType::UB : BoundType::LB;
2095 AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2106 if (failed(constraints.
addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2124 for (
unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2132 map.
getSubMap({i - resultDimStart}), operands)))
2140 ineq[dimOpBound] = isMin ? 1 : -1;
2141 ineq[i] = isMin ? -1 : 1;
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
const char *const kSliceFusionBarrierAttrName
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
MemRefDependenceGraph::Node Node
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
MLIRContext * getContext() const
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Block represents an ordered list of Operations.
OpListType::iterator iterator
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This class is a general helper class for creating context-global objects like types,...
AffineExpr getAffineSymbolExpr(unsigned position)
AffineExpr getAffineConstantExpr(int64_t constant)
AffineExpr getAffineDimExpr(unsigned position)
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
unsigned appendDimVar(ValueRange vals)
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
unsigned appendSymbolVar(ValueRange vals)
An integer set representing a conjunction of one or more affine equalities and inequalities.
unsigned getNumDims() const
MLIRContext * getContext() const
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
unsigned getNumSymbols() const
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
Operation is the basic unit of execution within MLIR.
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
MLIRContext * getContext()
Return the context this operation is associated with.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Block * getBlock()
Returns the operation block that contains this operation.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
operand_range getOperands()
Returns an iterator on the underlying Value's.
user_range getUsers()
Returns a range of all users.
result_range getResults()
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
A utility result that is used to signal how to proceed with an ongoing walk:
static WalkResult advance()
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
AffineMap getAffineMap() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBoundOnDimSize64(unsigned pos, SmallVectorImpl< int64_t > *lb=nullptr, int64_t *boundFloorDivisor=nullptr, SmallVectorImpl< int64_t > *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
The same, but casts to int64_t.
unsigned getNumSymbolVars() const
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
unsigned getNumVars() const
unsigned getNumLocalVars() const
unsigned getNumDimAndSymbolVars() const
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
unsigned getNumDimVars() const
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, FlatAffineValueConstraints *dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
SmallVector< Value, 4 > ivs
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
std::vector< SmallVector< Value, 4 > > ubOperands
SmallVector< AffineMap, 4 > ubs
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
SmallVector< AffineMap, 4 > lbs
Block::iterator insertPoint
std::vector< SmallVector< Value, 4 > > lbOperands
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > loadOpInsts
bool hasNonAffineRegionOp
void collect(Operation *opToWalk)
SmallVector< Operation *, 4 > storeOpInsts
Encapsulates a memref load or store access information.
MemRefAccess(Operation *opInst)
Constructs a MemRefAccess from a load or store operation.
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
SmallVector< Operation *, 4 > loads
SmallVector< Operation *, 4 > stores
unsigned getStoreOpCount(Value memref) const
unsigned addNode(Operation *op)
unsigned getIncomingMemRefAccesses(unsigned id, Value memref)
void removeEdge(unsigned srcId, unsigned dstId, Value value)
void addEdge(unsigned srcId, unsigned dstId, Value value)
Node * getForOpNode(AffineForOp forOp)
bool hasDependencePath(unsigned srcId, unsigned dstId)
void clearNodeLoadAndStores(unsigned id)
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
bool writesToLiveInOrEscapingMemrefs(unsigned id)
void addToNode(unsigned id, const SmallVectorImpl< Operation * > &loads, const SmallVectorImpl< Operation * > &stores)
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr)
void removeNode(unsigned id)
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId)
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr)
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes)
Return all nodes which define SSA values used in node 'id'.
Node * getNode(unsigned id)
void print(raw_ostream &os) const
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
FlatAffineValueConstraints * getConstraints()
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, std::vector< SmallVector< int64_t, 4 >> *lbs=nullptr, SmallVectorImpl< int64_t > *lbDivisors=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
LogicalResult unionBoundingBox(const MemRefRegion &other)
Value memref
Memref that this region corresponds to.
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Enumerates different result statuses of slice computation by computeSliceUnion
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.