24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/DebugLog.h"
27 #include "llvm/Support/raw_ostream.h"
30 #define DEBUG_TYPE "analysis-utils"
33 using namespace affine;
34 using namespace presburger;
36 using llvm::SmallDenseMap;
45 if (
auto forOp = dyn_cast<AffineForOp>(op)) {
46 forOps.push_back(forOp);
47 }
else if (isa<AffineReadOpInterface>(op)) {
48 loadOpInsts.push_back(op);
49 }
else if (isa<AffineWriteOpInterface>(op)) {
50 storeOpInsts.push_back(op);
52 auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
59 if (!isa<MemRefType>(v.getType()))
62 memrefLoads.push_back(op);
63 memrefStores.push_back(op);
67 if (hasEffect<MemoryEffects::Read>(op))
68 memrefLoads.push_back(op);
69 if (hasEffect<MemoryEffects::Write>(op))
70 memrefStores.push_back(op);
71 if (hasEffect<MemoryEffects::Free>(op))
72 memrefFrees.push_back(op);
78 unsigned Node::getLoadOpCount(
Value memref)
const {
79 unsigned loadOpCount = 0;
82 if (
auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
83 if (memref == affineLoad.getMemRef())
85 }
else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
93 unsigned Node::getStoreOpCount(
Value memref)
const {
94 unsigned storeOpCount = 0;
95 for (
auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
97 if (
auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
98 if (memref == affineStore.getMemRef())
100 }
else if (hasEffect<MemoryEffects::Write>(
const_cast<Operation *
>(storeOp),
109 unsigned Node::hasStore(
Value memref)
const {
111 llvm::concat<Operation *const>(stores, memrefStores),
113 if (
auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
114 if (memref == affineStore.getMemRef())
116 }
else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
123 unsigned Node::hasFree(
Value memref)
const {
124 return llvm::any_of(memrefFrees, [&](
Operation *freeOp) {
125 return hasEffect<MemoryEffects::Free>(freeOp, memref);
130 void Node::getStoreOpsForMemref(
Value memref,
133 if (memref == cast<AffineWriteOpInterface>(storeOp).
getMemRef())
134 storeOps->push_back(storeOp);
139 void Node::getLoadOpsForMemref(
Value memref,
142 if (memref == cast<AffineReadOpInterface>(loadOp).
getMemRef())
143 loadOps->push_back(loadOp);
149 void Node::getLoadAndStoreMemrefSet(
151 llvm::SmallDenseSet<Value, 2> loadMemrefs;
153 loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).
getMemRef());
156 auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
157 if (loadMemrefs.count(memref) > 0)
158 loadAndStoreMemrefSet->insert(memref);
164 template <
typename... EffectTys>
166 auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
173 if (isa<MemRefType>(operand.getType()))
174 values.push_back(operand);
179 memOp.getEffects(effects);
180 for (
auto &effect : effects) {
181 Value effectVal = effect.getValue();
182 if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
183 isa<MemRefType>(effectVal.
getType()))
184 values.push_back(effectVal);
193 auto &nodes = mdg.
nodes;
199 Node &node = nodes.insert({newNodeId,
Node(newNodeId, nodeOp)}).first->second;
201 node.
loads.push_back(op);
202 auto memref = cast<AffineReadOpInterface>(op).getMemRef();
203 memrefAccesses[memref].insert(node.id);
206 node.stores.push_back(op);
207 auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
208 memrefAccesses[memref].insert(node.id);
212 getEffectedValues<MemoryEffects::Read>(op, effectedValues);
213 if (llvm::any_of(((
ValueRange)effectedValues).getTypes(),
214 [](
Type type) {
return !isa<MemRefType>(type); }))
217 for (
Value memref : effectedValues)
218 memrefAccesses[memref].insert(node.id);
219 node.memrefLoads.push_back(op);
223 getEffectedValues<MemoryEffects::Write>(op, effectedValues);
224 if (llvm::any_of((
ValueRange(effectedValues)).getTypes(),
225 [](
Type type) {
return !isa<MemRefType>(type); }))
227 for (
Value memref : effectedValues)
228 memrefAccesses[memref].insert(node.id);
229 node.memrefStores.push_back(op);
233 getEffectedValues<MemoryEffects::Free>(op, effectedValues);
234 if (llvm::any_of((
ValueRange(effectedValues)).getTypes(),
235 [](
Type type) {
return !isa<MemRefType>(type); }))
237 for (
Value memref : effectedValues)
238 memrefAccesses[memref].insert(node.id);
239 node.memrefFrees.push_back(op);
247 if (
auto memrefLoad = dyn_cast<memref::LoadOp>(memOp))
248 return memrefLoad.getMemRef();
249 if (
auto affineLoad = dyn_cast<AffineReadOpInterface>(memOp))
250 return affineLoad.getMemRef();
251 if (
auto memrefStore = dyn_cast<memref::StoreOp>(memOp))
252 return memrefStore.getMemRef();
253 if (
auto affineStore = dyn_cast<AffineWriteOpInterface>(memOp))
254 return affineStore.getMemRef();
255 llvm_unreachable(
"unexpected op");
266 assert(srcNode.op->getBlock() == dstNode.op->getBlock());
267 if (!isa<AffineForOp>(srcNode.op) || !isa<AffineForOp>(dstNode.op))
277 return llvm::any_of(srcMemOps, [&](
Operation *srcOp) {
279 if (srcMemref != memref)
281 return llvm::find_if(dstMemOps, [&](
Operation *dstOp) {
283 }) != dstMemOps.end();
289 llvm::append_range(dstOps, llvm::concat<Operation *const>(
290 dstNode.loads, dstNode.stores,
291 dstNode.memrefLoads, dstNode.memrefStores));
292 if (hasNonAffineDep(srcNode.memrefStores, dstOps))
296 llvm::append_range(dstOps, llvm::concat<Operation *const>(
297 dstNode.stores, dstNode.memrefStores));
298 if (hasNonAffineDep(srcNode.memrefLoads, dstOps))
302 llvm::append_range(dstOps, llvm::concat<Operation *const>(
303 dstNode.memrefLoads, dstNode.memrefStores));
304 if (hasNonAffineDep(srcNode.stores, dstOps))
308 llvm::append_range(dstOps, dstNode.memrefStores);
309 if (hasNonAffineDep(srcNode.loads, dstOps))
317 for (
auto *srcMemOp :
318 llvm::concat<Operation *const>(srcNode.stores, srcNode.loads)) {
320 if (srcAcc.
memref != memref)
322 for (
auto *destMemOp :
323 llvm::concat<Operation *const>(dstNode.stores, dstNode.loads)) {
325 if (destAcc.
memref != memref)
337 LDBG() <<
"--- Initializing MDG ---";
345 if (
auto forOp = dyn_cast<AffineForOp>(op)) {
349 forToNodeMap[&op] = node->
id;
350 }
else if (isa<AffineReadOpInterface>(op)) {
352 Node node(nextNodeId++, &op);
353 node.
loads.push_back(&op);
354 auto memref = cast<AffineReadOpInterface>(op).getMemRef();
355 memrefAccesses[memref].insert(node.
id);
356 nodes.insert({node.
id, node});
357 }
else if (isa<AffineWriteOpInterface>(op)) {
359 Node node(nextNodeId++, &op);
360 node.
stores.push_back(&op);
361 auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
362 memrefAccesses[memref].insert(node.
id);
363 nodes.insert({node.
id, node});
364 }
else if (op.getNumResults() > 0 && !op.use_empty()) {
371 (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
380 }
else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
384 LDBG() <<
"MDG init failed; unknown region-holding op found!";
392 LDBG() <<
"Created " << nodes.size() <<
" nodes";
396 for (
auto &idAndNode : nodes) {
397 const Node &node = idAndNode.second;
405 if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
412 auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
413 return loop->getBlock() == █
415 if (it == loops.end())
417 assert(forToNodeMap.count(*it) > 0 &&
"missing mapping");
418 unsigned userLoopNestId = forToNodeMap[*it];
419 addEdge(node.
id, userLoopNestId, value);
425 for (
auto &memrefAndList : memrefAccesses) {
426 unsigned n = memrefAndList.second.size();
427 Value srcMemRef = memrefAndList.first;
429 for (
unsigned i = 0; i < n; ++i) {
430 unsigned srcId = memrefAndList.second[i];
431 Node *srcNode = getNode(srcId);
432 bool srcHasStoreOrFree =
434 for (
unsigned j = i + 1;
j < n; ++
j) {
435 unsigned dstId = memrefAndList.second[
j];
436 Node *dstNode = getNode(dstId);
437 bool dstHasStoreOrFree =
439 if ((srcHasStoreOrFree || dstHasStoreOrFree)) {
441 if (!fullAffineDependences ||
443 addEdge(srcId, dstId, srcMemRef);
453 auto it = nodes.find(
id);
454 assert(it != nodes.end());
460 for (
auto &idAndNode : nodes)
461 if (idAndNode.second.op == forOp)
462 return &idAndNode.second;
468 Node node(nextNodeId++, op);
469 nodes.insert({node.
id, node});
476 if (inEdges.count(
id) > 0) {
478 for (
auto &inEdge : oldInEdges) {
479 removeEdge(inEdge.id,
id, inEdge.value);
483 if (outEdges.contains(
id)) {
485 for (
auto &outEdge : oldOutEdges) {
486 removeEdge(
id, outEdge.id, outEdge.value);
498 const Node *node = getNode(
id);
499 for (
auto *storeOpInst : node->
stores) {
500 auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
501 auto *op = memref.getDefiningOp();
507 for (
auto *user : memref.getUsers())
508 if (!isa<AffineMapAccessInterface>(*user))
519 if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) {
522 bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](
const Edge &edge) {
523 return edge.id == dstId && (!value || edge.value == value);
525 bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](
const Edge &edge) {
526 return edge.id == srcId && (!value || edge.value == value);
528 return hasOutEdge && hasInEdge;
534 if (!hasEdge(srcId, dstId, value)) {
535 outEdges[srcId].push_back({dstId, value});
536 inEdges[dstId].push_back({srcId, value});
537 if (isa<MemRefType>(value.
getType()))
538 memrefEdgeCount[value]++;
545 assert(inEdges.count(dstId) > 0);
546 assert(outEdges.count(srcId) > 0);
547 if (isa<MemRefType>(value.
getType())) {
548 assert(memrefEdgeCount.count(value) > 0);
549 memrefEdgeCount[value]--;
552 for (
auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
553 if ((*it).id == srcId && (*it).value == value) {
554 inEdges[dstId].erase(it);
559 for (
auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
560 if ((*it).id == dstId && (*it).value == value) {
561 outEdges[srcId].erase(it);
571 unsigned dstId)
const {
574 worklist.push_back({srcId, 0});
577 while (!worklist.empty()) {
578 auto &idAndIndex = worklist.back();
580 if (idAndIndex.first == dstId)
584 if (!outEdges.contains(idAndIndex.first) ||
585 idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) {
590 const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second];
597 if (!afterDst && edge.
id != idAndIndex.first)
598 worklist.push_back({edge.
id, 0});
606 Value memref)
const {
607 unsigned inEdgeCount = 0;
608 for (
const Edge &inEdge : inEdges.lookup(
id)) {
609 if (inEdge.value == memref) {
610 const Node *srcNode = getNode(inEdge.id);
622 Value memref)
const {
623 unsigned outEdgeCount = 0;
624 for (
const auto &outEdge : outEdges.lookup(
id))
625 if (!memref || outEdge.value == memref)
633 for (
const Edge &edge : inEdges.lookup(
id))
637 if (!isa<MemRefType>(edge.value.getType()))
638 definingNodes.insert(edge.id);
646 unsigned dstId)
const {
647 if (!outEdges.contains(srcId))
648 return getNode(dstId)->op;
652 gatherDefiningNodes(dstId, definingNodes);
653 if (llvm::any_of(definingNodes,
654 [&](
unsigned id) {
return hasDependencePath(srcId,
id); })) {
655 LDBG() <<
"Can't fuse: a defining op with a user in the dst "
656 <<
"loop has dependence from the src loop";
662 for (
auto &outEdge : outEdges.lookup(srcId))
663 if (outEdge.id != dstId)
664 srcDepInsts.insert(getNode(outEdge.id)->op);
668 for (
auto &inEdge : inEdges.lookup(dstId))
669 if (inEdge.id != srcId)
670 dstDepInsts.insert(getNode(inEdge.id)->op);
672 Operation *srcNodeInst = getNode(srcId)->op;
673 Operation *dstNodeInst = getNode(dstId)->op;
686 std::optional<unsigned> firstSrcDepPos;
687 std::optional<unsigned> lastDstDepPos;
692 if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
693 firstSrcDepPos = pos;
694 if (dstDepInsts.count(op) > 0)
696 depInsts.push_back(op);
700 if (firstSrcDepPos.has_value()) {
701 if (lastDstDepPos.has_value()) {
702 if (*firstSrcDepPos <= *lastDstDepPos) {
708 return depInsts[*firstSrcDepPos];
724 if (inEdges.count(srcId) > 0) {
726 for (
auto &inEdge : oldInEdges) {
728 if (!privateMemRefs.contains(inEdge.value))
729 addEdge(inEdge.id, dstId, inEdge.value);
734 if (outEdges.count(srcId) > 0) {
736 for (
auto &outEdge : oldOutEdges) {
738 if (outEdge.id == dstId)
739 removeEdge(srcId, outEdge.id, outEdge.value);
740 else if (removeSrcId) {
741 addEdge(dstId, outEdge.id, outEdge.value);
742 removeEdge(srcId, outEdge.id, outEdge.value);
749 if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
751 for (
auto &inEdge : oldInEdges)
752 if (privateMemRefs.count(inEdge.value) > 0)
753 removeEdge(inEdge.id, dstId, inEdge.value);
763 if (inEdges.count(sibId) > 0) {
765 for (
auto &inEdge : oldInEdges) {
766 addEdge(inEdge.id, dstId, inEdge.value);
767 removeEdge(inEdge.id, sibId, inEdge.value);
774 if (outEdges.count(sibId) > 0) {
776 for (
auto &outEdge : oldOutEdges) {
777 addEdge(dstId, outEdge.id, outEdge.value);
778 removeEdge(sibId, outEdge.id, outEdge.value);
789 Node *node = getNode(
id);
790 llvm::append_range(node->
loads, loads);
791 llvm::append_range(node->
stores, stores);
792 llvm::append_range(node->
memrefLoads, memrefLoads);
794 llvm::append_range(node->
memrefFrees, memrefFrees);
798 Node *node = getNode(
id);
806 unsigned id,
const std::function<
void(
Edge)> &callback) {
807 if (inEdges.count(
id) > 0)
808 forEachMemRefEdge(inEdges.at(
id), callback);
814 unsigned id,
const std::function<
void(
Edge)> &callback) {
815 if (outEdges.count(
id) > 0)
816 forEachMemRefEdge(outEdges.at(
id), callback);
823 for (
const auto &edge : edges) {
825 if (!isa<MemRefType>(edge.value.getType()))
827 assert(nodes.count(edge.id) > 0);
834 os <<
"\nMemRefDependenceGraph\n";
836 for (
const auto &idAndNode : nodes) {
837 os <<
"Node: " << idAndNode.first <<
"\n";
838 auto it = inEdges.find(idAndNode.first);
839 if (it != inEdges.end()) {
840 for (
const auto &e : it->second)
841 os <<
" InEdge: " << e.id <<
" " << e.value <<
"\n";
843 it = outEdges.find(idAndNode.first);
844 if (it != outEdges.end()) {
845 for (
const auto &e : it->second)
846 os <<
" OutEdge: " << e.id <<
" " << e.value <<
"\n";
854 AffineForOp currAffineForOp;
858 if (
auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
859 loops->push_back(currAffineForOp);
860 currOp = currOp->getParentOp();
862 std::reverse(loops->begin(), loops->end());
873 if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
874 ops->push_back(currOp);
877 std::reverse(ops->begin(), ops->end());
884 assert(!ivs.empty() &&
"Cannot have a slice without its IVs");
887 for (
Value iv : ivs) {
889 assert(loop &&
"Expected affine for");
899 assert(!lbOperands.empty());
901 unsigned numDims = ivs.size();
903 unsigned numSymbols = lbOperands[0].size();
907 values.append(lbOperands[0].begin(), lbOperands[0].end());
912 for (
unsigned i = numDims, end = values.size(); i < end; ++i) {
913 Value value = values[i];
914 assert(cst->
containsVar(value) &&
"value expected to be present");
918 cst->
addBound(BoundType::EQ, value, cOp.value());
926 LogicalResult ret = cst->
addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
927 assert(succeeded(ret) &&
928 "should not fail as we never have semi-affine slice maps");
942 llvm::errs() <<
"\tIVs:\n";
944 llvm::errs() <<
"\t\t" << iv <<
"\n";
946 llvm::errs() <<
"\tLBs:\n";
948 llvm::errs() <<
"\t\t" << en.value() <<
"\n";
949 llvm::errs() <<
"\t\tOperands:\n";
950 for (
Value lbOp : lbOperands[en.index()])
951 llvm::errs() <<
"\t\t\t" << lbOp <<
"\n";
954 llvm::errs() <<
"\tUBs:\n";
956 llvm::errs() <<
"\t\t" << en.value() <<
"\n";
957 llvm::errs() <<
"\t\tOperands:\n";
958 for (
Value ubOp : ubOperands[en.index()])
959 llvm::errs() <<
"\t\t\t" << ubOp <<
"\n";
968 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck()
const {
969 assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
970 "Unexpected number of lbs, ubs and ivs in slice");
972 for (
unsigned i = 0, end = lbs.size(); i < end; ++i) {
984 isa<AffineConstantExpr>(lbMap.
getResult(0)))
994 AffineForOp dstLoop =
998 AffineMap dstLbMap = dstLoop.getLowerBoundMap();
999 AffineMap dstUbMap = dstLoop.getUpperBoundMap();
1003 assert(srcLoop &&
"Expected affine for");
1004 AffineMap srcLbMap = srcLoop.getLowerBoundMap();
1005 AffineMap srcUbMap = srcLoop.getUpperBoundMap();
1011 return std::nullopt;
1017 if (!isa<AffineConstantExpr>(srcLbResult) ||
1018 !isa<AffineConstantExpr>(srcUbResult) ||
1019 !isa<AffineConstantExpr>(dstLbResult) ||
1020 !isa<AffineConstantExpr>(dstUbResult))
1021 return std::nullopt;
1025 if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
1026 srcLoop.getStep() != dstLoop.getStep())
1047 std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
1048 if (isValidFastCheck && *isValidFastCheck)
1054 if (
failed(getSourceAsConstraints(srcConstraints))) {
1055 LDBG() <<
"Unable to compute source's domain";
1056 return std::nullopt;
1061 LDBG() <<
"Cannot handle symbols in source domain";
1062 return std::nullopt;
1068 LDBG() <<
"Cannot handle locals in source domain";
1069 return std::nullopt;
1075 if (
failed(getAsConstraints(&sliceConstraints))) {
1076 LDBG() <<
"Unable to compute slice's domain";
1077 return std::nullopt;
1085 LDBG() <<
"Domain of the source of the slice:\n"
1086 <<
"Source constraints:" << srcConstraints
1087 <<
"\nDomain of the slice if this fusion succeeds "
1088 <<
"(expressed in terms of its source's IVs):\n"
1089 <<
"Slice constraints:" << sliceConstraints;
1097 LDBG() <<
"Incorrect slice";
1109 std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
1110 if (isMaximalFastCheck)
1111 return isMaximalFastCheck;
1117 for (
Value iv : ivs) {
1119 assert(loop &&
"Expected affine for");
1121 return std::nullopt;
1127 for (
Value lbOp : lbOperands[0])
1129 consumerIVs.push_back(lbOp);
1133 for (
int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
1134 consumerIVs.push_back(
Value());
1141 return std::nullopt;
1146 return std::nullopt;
1157 return cast<MemRefType>(memref.getType()).getRank();
1162 auto memRefType = cast<MemRefType>(memref.getType());
1164 unsigned rank = memRefType.getRank();
1166 shape->reserve(rank);
1168 assert(rank == cst.getNumDimVars() &&
"inconsistent memref region");
1176 for (
unsigned r = 0; r < rank; r++) {
1177 cstWithShapeBounds.
addBound(BoundType::LB, r, 0);
1178 int64_t dimSize = memRefType.getDimSize(r);
1179 if (ShapedType::isDynamic(dimSize))
1181 cstWithShapeBounds.
addBound(BoundType::UB, r, dimSize - 1);
1186 int64_t numElements = 1;
1187 int64_t diffConstant;
1188 for (
unsigned d = 0; d < rank; d++) {
1190 std::optional<int64_t> diff =
1192 if (diff.has_value()) {
1193 diffConstant = *diff;
1194 assert(diffConstant >= 0 &&
"dim size bound cannot be negative");
1198 auto dimSize = memRefType.getDimSize(d);
1199 if (ShapedType::isDynamic(dimSize))
1200 return std::nullopt;
1201 diffConstant = dimSize;
1206 numElements *= diffConstant;
1211 shape->push_back(diffConstant);
1218 assert(pos < cst.getNumDimVars() &&
"invalid position");
1219 auto memRefType = cast<MemRefType>(memref.getType());
1220 unsigned rank = memRefType.getRank();
1222 assert(rank == cst.getNumDimVars() &&
"inconsistent memref region");
1224 auto boundPairs = cst.getLowerAndUpperBound(
1225 pos, 0, rank, cst.getNumDimAndSymbolVars(),
1226 {}, memRefType.getContext());
1227 lbMap = boundPairs.first;
1228 ubMap = boundPairs.second;
1229 assert(lbMap &&
"lower bound for a region must exist");
1230 assert(ubMap &&
"upper bound for a region must exist");
1231 assert(lbMap.
getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1232 assert(ubMap.
getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1236 assert(memref == other.
memref);
1259 bool addMemRefDimBounds,
bool dropLocalVars,
1260 bool dropOuterIvs) {
1261 assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1262 "affine read/write op expected");
1268 unsigned rank = access.
getRank();
1270 LDBG() <<
"MemRefRegion::compute: " << *op <<
" depth: " << loopDepth;
1276 assert(loopDepth <= ivs.size() &&
"invalid 'loopDepth'");
1278 ivs.resize(loopDepth);
1294 operands.resize(numOperands);
1295 for (
unsigned i = 0; i < numOperands; ++i)
1298 if (sliceState !=
nullptr) {
1299 operands.reserve(operands.size() + sliceState->
lbOperands[0].size());
1301 for (
auto extraOperand : sliceState->
lbOperands[0]) {
1302 if (!llvm::is_contained(operands, extraOperand)) {
1303 operands.push_back(extraOperand);
1315 for (
unsigned i = 0; i < numDims + numSymbols; ++i) {
1316 auto operand = operands[i];
1322 if (
failed(cst.addAffineForOpDomain(affineFor)))
1325 if (
failed(cst.addAffineParallelOpDomain(parallelOp)))
1329 Value symbol = operand;
1331 cst.addBound(BoundType::EQ, symbol, constVal.value());
1333 LDBG() <<
"unknown affine dimensional value";
1339 if (sliceState !=
nullptr) {
1341 for (
auto operand : sliceState->
lbOperands[0]) {
1342 if (
failed(cst.addInductionVarOrTerminalSymbol(operand)))
1347 cst.addSliceBounds(sliceState->
ivs, sliceState->
lbs, sliceState->
ubs,
1349 assert(succeeded(ret) &&
1350 "should not fail as we never have semi-affine slice maps");
1355 if (
failed(cst.composeMap(&accessValueMap))) {
1356 op->
emitError(
"getMemRefRegion: compose affine map failed");
1357 LDBG() <<
"Access map: " << accessValueMap.
getAffineMap();
1364 cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1370 assert(loopDepth <= enclosingIVs.size() &&
"invalid loop depth");
1371 enclosingIVs.resize(loopDepth);
1373 cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1376 !llvm::is_contained(enclosingIVs, en.value())) {
1378 cst.projectOut(en.value());
1380 unsigned varPosition;
1381 cst.findVar(en.value(), &varPosition);
1382 auto varKind = cst.getVarKindAt(varPosition);
1383 varPosition -= cst.getNumDimVars();
1384 cst.convertToLocal(varKind, varPosition, varPosition + 1);
1392 cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1395 cst.constantFoldVarRange(cst.getNumDimVars(),
1396 cst.getNumSymbolVars());
1398 assert(cst.getNumDimVars() == rank &&
"unexpected MemRefRegion format");
1403 if (addMemRefDimBounds) {
1404 auto memRefType = cast<MemRefType>(memref.getType());
1405 for (
unsigned r = 0; r < rank; r++) {
1406 cst.addBound(BoundType::LB, r, 0);
1407 if (memRefType.isDynamicDim(r))
1409 cst.addBound(BoundType::UB, r, memRefType.getDimSize(r) - 1);
1412 cst.removeTrivialRedundancy();
1414 LDBG() <<
"Memory region: " << cst;
1418 std::optional<int64_t>
1420 auto elementType = memRefType.getElementType();
1422 unsigned sizeInBits;
1423 if (elementType.isIntOrFloat()) {
1424 sizeInBits = elementType.getIntOrFloatBitWidth();
1425 }
else if (
auto vectorType = dyn_cast<VectorType>(elementType)) {
1426 if (vectorType.getElementType().isIntOrFloat())
1428 vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1430 return std::nullopt;
1432 return std::nullopt;
1439 auto memRefType = cast<MemRefType>(memref.getType());
1441 if (!memRefType.getLayout().isIdentity()) {
1442 LDBG() <<
"Non-identity layout map not yet supported";
1447 std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1449 LDBG() <<
"Dynamic shapes not yet supported";
1450 return std::nullopt;
1454 return std::nullopt;
1455 return *eltSize * *numElements;
1462 std::optional<uint64_t>
1464 if (!memRefType.hasStaticShape())
1465 return std::nullopt;
1466 auto elementType = memRefType.getElementType();
1467 if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1468 return std::nullopt;
1472 return std::nullopt;
1473 for (
unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1474 sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1479 template <
typename LoadOrStoreOp>
1482 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1483 AffineWriteOpInterface>::value,
1484 "argument should be either a AffineReadOpInterface or a "
1485 "AffineWriteOpInterface");
1487 Operation *op = loadOrStoreOp.getOperation();
1489 if (
failed(region.compute(op, 0,
nullptr,
1493 LDBG() <<
"Memory region: " << region.getConstraints();
1495 bool outOfBounds =
false;
1496 unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1499 for (
unsigned r = 0; r < rank; r++) {
1506 int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1512 ucst.addBound(BoundType::LB, r, dimSize);
1513 outOfBounds = !ucst.isEmpty();
1515 loadOrStoreOp.emitOpError()
1516 <<
"memref out of upper bound access along dimension #" << (r + 1);
1521 llvm::fill(ineq, 0);
1523 lcst.addBound(BoundType::UB, r, -1);
1524 outOfBounds = !lcst.isEmpty();
1526 loadOrStoreOp.emitOpError()
1527 <<
"memref out of lower bound access along dimension #" << (r + 1);
1530 return failure(outOfBounds);
1534 template LogicalResult
1537 template LogicalResult
1546 while (block != limitBlock) {
1549 int instPosInBlock = std::distance(block->
begin(), op->getIterator());
1550 positions->push_back(instPosInBlock);
1554 std::reverse(positions->begin(), positions->end());
1561 unsigned level,
Block *block) {
1563 for (
auto &op : *block) {
1564 if (i != positions[level]) {
1568 if (level == positions.size() - 1)
1570 if (
auto childAffineForOp = dyn_cast<AffineForOp>(op))
1572 childAffineForOp.getBody());
1575 for (
auto &b : region)
1587 for (
unsigned i = 0, e = cst->
getNumDimVars(); i < e; ++i) {
1589 if (ivs.count(value) == 0) {
1603 unsigned numOps = ops.size();
1604 assert(numOps > 0 &&
"Expected at least one operation");
1606 std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1608 for (
unsigned i = 0; i < numOps; ++i) {
1611 std::min(loopDepthLimit,
static_cast<unsigned>(loops[i].size()));
1614 unsigned loopDepth = 0;
1615 for (
unsigned d = 0; d < loopDepthLimit; ++d) {
1617 for (i = 1; i < numOps; ++i) {
1618 if (loops[i - 1][d] != loops[i][d])
1621 if (surroundingLoops)
1622 surroundingLoops->push_back(loops[i - 1][d]);
1635 unsigned numCommonLoops,
bool isBackwardSlice,
1641 std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1653 LDBG() <<
"Invalid loop depth";
1657 bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.
opInst) &&
1658 isa<AffineReadOpInterface>(dstAccess.
opInst);
1662 srcAccess, dstAccess, numCommonLoops + 1,
1663 &dependenceConstraints,
nullptr,
1666 LDBG() <<
"Dependence check failed";
1671 dependentOpPairs.emplace_back(a, b);
1676 isBackwardSlice, &tmpSliceState);
1681 LDBG() <<
"Unable to compute slice bound constraints";
1691 LDBG() <<
"Unable to compute slice bound constraints";
1701 for (
unsigned k = 0, l = sliceUnionCst.
getNumDimVars(); k < l; ++k)
1702 sliceUnionIVs.insert(sliceUnionCst.
getValue(k));
1704 for (
unsigned k = 0, l = tmpSliceCst.
getNumDimVars(); k < l; ++k)
1705 tmpSliceIVs.insert(tmpSliceCst.
getValue(k));
1723 LDBG() <<
"Unable to compute union bounding box of slice bounds";
1731 LDBG() <<
"empty slice union - unexpected";
1737 for (
auto &dep : dependentOpPairs) {
1738 ops.push_back(isBackwardSlice ? dep.second : dep.first);
1741 unsigned innermostCommonLoopDepth =
1743 if (loopDepth > innermostCommonLoopDepth) {
1744 LDBG() <<
"Exceeds max loop depth";
1764 sliceUnionCst.
getValues(numSliceLoopIVs,
1766 &sliceBoundOperands);
1769 sliceUnion->
ivs.clear();
1770 sliceUnionCst.
getValues(0, numSliceLoopIVs, &sliceUnion->
ivs);
1776 ? surroundingLoops[loopDepth - 1].getBody()->begin()
1777 : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1781 sliceUnion->
lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1782 sliceUnion->
ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1786 std::optional<bool> isSliceValid = sliceUnion->
isSliceValid();
1787 if (!isSliceValid) {
1788 LDBG() <<
"Cannot determine if the slice is valid";
1800 assert(lbMap.
getNumResults() == 1 &&
"expected single result bound map");
1801 assert(ubMap.
getNumResults() == 1 &&
"expected single result bound map");
1808 auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1810 return std::nullopt;
1811 return cExpr.getValue();
1820 llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1821 unsigned numSrcLoopIVs = slice.
ivs.size();
1823 for (
unsigned i = 0; i < numSrcLoopIVs; ++i) {
1825 auto *op = forOp.getOperation();
1834 if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1835 (*tripCountMap)[op] =
1836 forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1840 if (maybeConstTripCount.has_value()) {
1841 (*tripCountMap)[op] = *maybeConstTripCount;
1848 if (!tripCount.has_value())
1850 (*tripCountMap)[op] = *tripCount;
1857 const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1858 uint64_t iterCount = 1;
1859 for (
const auto &count : sliceTripCountMap) {
1860 iterCount *= count.second;
1877 unsigned numSrcLoopIVs = srcLoopIVs.size();
1882 unsigned numDstLoopIVs = dstLoopIVs.size();
1884 assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1885 (isBackwardSlice && loopDepth <= numDstLoopIVs));
1888 unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1890 isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1895 unsigned offset = isBackwardSlice ? 0 : loopDepth;
1896 unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1897 sliceCst.
getValues(offset, offset + numSliceLoopIVs, &sliceState->
ivs);
1905 &sliceState->
lbs, &sliceState->
ubs);
1910 for (
unsigned i = 0; i < numDimsAndSymbols; ++i) {
1911 if (i < offset || i >= offset + numSliceLoopIVs)
1912 sliceBoundOperands.push_back(sliceCst.
getValue(i));
1917 sliceState->
lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1918 sliceState->
ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1922 isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1923 : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1925 llvm::SmallDenseSet<Value, 8> sequentialLoops;
1926 if (isa<AffineReadOpInterface>(depSourceOp) &&
1927 isa<AffineReadOpInterface>(depSinkOp)) {
1933 auto getSliceLoop = [&](
unsigned i) {
1934 return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1936 auto isInnermostInsertion = [&]() {
1937 return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1938 : loopDepth >= dstLoopIVs.size());
1940 llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1941 auto srcIsUnitSlice = [&]() {
1948 for (
unsigned i = 0; i < numSliceLoopIVs; ++i) {
1949 Value iv = getSliceLoop(i).getInductionVar();
1950 if (sequentialLoops.count(iv) == 0 &&
1958 std::optional<bool> isMaximal = sliceState->
isMaximal();
1960 isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1962 for (
unsigned j = i;
j < numSliceLoopIVs; ++
j) {
1988 unsigned numSrcLoopIVs = srcLoopIVs.size();
1993 unsigned dstLoopIVsSize = dstLoopIVs.size();
1994 if (dstLoopDepth > dstLoopIVsSize) {
1995 dstOpInst->
emitError(
"invalid destination loop depth");
1996 return AffineForOp();
2006 auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
2007 OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
2008 auto sliceLoopNest =
2009 cast<AffineForOp>(b.
clone(*srcLoopIVs[0].getOperation()));
2018 unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
2019 (void)sliceSurroundingLoopsSize;
2020 assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
2021 unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
2022 (void)sliceLoopLimit;
2023 assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
2026 for (
unsigned i = 0; i < numSrcLoopIVs; ++i) {
2027 auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
2029 forOp.setLowerBound(sliceState->
lbOperands[i], lbMap);
2031 forOp.setUpperBound(sliceState->
ubOperands[i], ubMap);
2033 return sliceLoopNest;
2039 if (
auto loadOp = dyn_cast<AffineReadOpInterface>(memOp)) {
2040 memref = loadOp.getMemRef();
2042 llvm::append_range(indices, loadOp.getMapOperands());
2044 assert(isa<AffineWriteOpInterface>(memOp) &&
2045 "Affine read/write op expected");
2046 auto storeOp = cast<AffineWriteOpInterface>(memOp);
2048 memref = storeOp.getMemRef();
2049 llvm::append_range(indices, storeOp.getMapOperands());
2054 return cast<MemRefType>(memref.getType()).getRank();
2058 return isa<AffineWriteOpInterface>(opInst);
2067 if (isa<AffineForOp>(currOp))
2069 if (
auto parOp = dyn_cast<AffineParallelOp>(currOp))
2070 depth += parOp.getNumDims();
2082 if (memref != rhs.
memref)
2086 getAccessMap(&thisMap);
2088 return thisMap == rhsMap;
2093 AffineForOp currAffineForOp;
2097 if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
2098 ivs.push_back(currAffineForOp.getInductionVar());
2099 else if (
auto parOp = dyn_cast<AffineParallelOp>(currOp))
2100 llvm::append_range(ivs, parOp.getIVs());
2101 currOp = currOp->getParentOp();
2103 std::reverse(ivs.begin(), ivs.end());
2114 unsigned minNumLoops =
std::min(loopsA.size(), loopsB.size());
2115 unsigned numCommonLoops = 0;
2116 for (
unsigned i = 0; i < minNumLoops; ++i) {
2117 if (loopsA[i] != loopsB[i])
2121 return numCommonLoops;
2128 SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
2132 if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
2138 auto region = std::make_unique<MemRefRegion>(opInst->
getLoc());
2140 region->compute(opInst,
2142 LDBG() <<
"Error obtaining memory region";
2143 opInst->
emitError(
"error obtaining memory region");
2147 auto [it, inserted] = regions.try_emplace(region->memref);
2149 it->second = std::move(region);
2150 }
else if (
failed(it->second->unionBoundingBox(*region))) {
2151 LDBG() <<
"getMemoryFootprintBytes: unable to perform a union on a "
2154 "getMemoryFootprintBytes: unable to perform a union on a memory "
2160 if (result.wasInterrupted())
2161 return std::nullopt;
2163 int64_t totalSizeInBytes = 0;
2164 for (
const auto ®ion : regions) {
2165 std::optional<int64_t> size = region.second->getRegionSize();
2166 if (!size.has_value())
2167 return std::nullopt;
2168 totalSizeInBytes += *size;
2170 return totalSizeInBytes;
2175 auto *forInst = forOp.getOperation();
2186 return !reductions.empty();
2192 AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
2194 if (
auto innerFor = dyn_cast<AffineForOp>(op))
2196 sequentialLoops->insert(innerFor.getInductionVar());
2208 assert(simplifiedSet &&
"guaranteed to succeed while roundtripping");
2209 return simplifiedSet;
2215 llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
2216 return val.has_value() ? *val :
Value();
2235 for (
unsigned i = syms.size(); i < newSyms.size(); ++i)
2237 return constraints.
addBound(type, pos, alignedMap);
2244 newResults.push_back(r + val);
2288 bool isMin = isa<AffineMinOp>(op);
2289 assert((isMin || isa<AffineMaxOp>(op)) &&
"expect AffineMin/MaxOp");
2293 isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2300 unsigned resultDimStart = constraints.
appendDimVar(numResults);
2304 auto boundType = isMin ? BoundType::UB : BoundType::LB;
2315 AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2326 if (
failed(constraints.
addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2344 for (
unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2352 map.
getSubMap({i - resultDimStart}), operands)))
2360 ineq[dimOpBound] = isMin ? 1 : -1;
2361 ineq[i] = isMin ? -1 : 1;
2395 if (aScope != bScope)
2400 auto getBlockAncestry = [&](
Operation *op,
2404 ancestry.push_back(curOp->
getBlock());
2409 assert(curOp &&
"can't reach root op without passing through affine scope");
2410 std::reverse(ancestry.begin(), ancestry.end());
2414 getBlockAncestry(a, aAncestors);
2415 getBlockAncestry(b, bAncestors);
2416 assert(!aAncestors.empty() && !bAncestors.empty() &&
2417 "at least one Block ancestor expected");
2419 Block *innermostCommonBlock =
nullptr;
2420 for (
unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size();
2421 a < e && b < f; ++a, ++b) {
2422 if (aAncestors[a] != bAncestors[b])
2424 innermostCommonBlock = aAncestors[a];
2426 return innermostCommonBlock;
static Value getMemRef(Operation *memOp)
Returns the memref being read/written by a memref/affine load/store op.
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
static Node * addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, DenseMap< Value, SetVector< unsigned >> &memrefAccesses)
Add op to MDG creating a new node and adding its memory accesses (affine or non-affine to memrefAcces...
static bool mayDependence(const Node &srcNode, const Node &dstNode, Value memref)
Returns true if there may be a dependence on memref from srcNode's memory ops to dstNode's memory ops...
const char *const kSliceFusionBarrierAttrName
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
static void getEffectedValues(Operation *op, SmallVectorImpl< Value > &values)
Returns the values that this op has a memref effect of type EffectTys on, not considering recursive e...
MemRefDependenceGraph::Node Node
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
MLIRContext * getContext() const
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Block represents an ordered list of Operations.
OpListType::iterator iterator
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
This class is a general helper class for creating context-global objects like types,...
AffineExpr getAffineSymbolExpr(unsigned position)
AffineExpr getAffineConstantExpr(int64_t constant)
AffineExpr getAffineDimExpr(unsigned position)
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
std::optional< int64_t > getConstantBoundOnDimSize(MLIRContext *context, unsigned pos, AffineMap *lb=nullptr, AffineMap *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
Returns a non-negative constant bound on the extent (upper bound - lower bound) of the specified vari...
FlatLinearValueConstraints represents an extension of FlatLinearConstraints where each non-local vari...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
void addBound(presburger::BoundType type, Value val, int64_t value)
Adds a constant bound for the variable associated with the given Value.
unsigned appendDimVar(ValueRange vals)
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
unsigned appendSymbolVar(ValueRange vals)
An integer set representing a conjunction of one or more affine equalities and inequalities.
unsigned getNumDims() const
MLIRContext * getContext() const
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
unsigned getNumSymbols() const
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
Operation is the basic unit of execution within MLIR.
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
MLIRContext * getContext()
Return the context this operation is associated with.
Location getLoc()
The source location the operation was defined or derived from.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Block * getBlock()
Returns the operation block that contains this operation.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
operand_range getOperands()
Returns an iterator on the underlying Value's.
user_range getUsers()
Returns a range of all users.
Region * getParentRegion()
Returns the region to which the instruction belongs.
result_range getResults()
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
A utility result that is used to signal how to proceed with an ongoing walk:
static WalkResult advance()
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
AffineMap getAffineMap() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
unsigned getNumSymbolVars() const
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
unsigned getNumVars() const
unsigned getNumLocalVars() const
unsigned getNumDimAndSymbolVars() const
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
unsigned getNumDimVars() const
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)
Find the innermost common Block of a and b in the affine scope that a and b are part of.
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
SmallVector< Value, 4 > ivs
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
std::vector< SmallVector< Value, 4 > > ubOperands
SmallVector< AffineMap, 4 > ubs
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
SmallVector< AffineMap, 4 > lbs
Block::iterator insertPoint
std::vector< SmallVector< Value, 4 > > lbOperands
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > memrefFrees
SmallVector< Operation *, 4 > loadOpInsts
SmallVector< Operation *, 4 > memrefStores
void collect(Operation *opToWalk)
SmallVector< Operation *, 4 > memrefLoads
SmallVector< Operation *, 4 > storeOpInsts
Encapsulates a memref load or store access information.
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
SmallVector< Operation *, 4 > loads
SmallVector< Operation *, 4 > stores
unsigned hasFree(Value memref) const
SmallVector< Operation *, 4 > memrefLoads
SmallVector< Operation *, 4 > memrefStores
unsigned getStoreOpCount(Value memref) const
unsigned hasStore(Value memref) const
Returns true if there exists an operation with a write memory effect to memref in this node.
SmallVector< Operation *, 4 > memrefFrees
unsigned addNode(Operation *op)
bool writesToLiveInOrEscapingMemrefs(unsigned id) const
void removeEdge(unsigned srcId, unsigned dstId, Value value)
void addEdge(unsigned srcId, unsigned dstId, Value value)
DenseMap< unsigned, Node > nodes
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes) const
Return all nodes which define SSA values used in node 'id'.
bool hasDependencePath(unsigned srcId, unsigned dstId) const
void clearNodeLoadAndStores(unsigned id)
const Node * getForOpNode(AffineForOp forOp) const
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
bool init(bool fullAffineDependences=true)
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const
const Node * getNode(unsigned id) const
void removeNode(unsigned id)
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)
unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr) const
void print(raw_ostream &os) const
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
FlatAffineValueConstraints * getConstraints()
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true, bool dropLocalVars=true, bool dropOuterIVs=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
LogicalResult unionBoundingBox(const MemRefRegion &other)
Value memref
Memref that this region corresponds to.
Enumerates different result statuses of slice computation by computeSliceUnion
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.