MLIR  22.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for non-loop IR
10 // structures.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
22 #include "mlir/IR/IntegerSet.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/DebugLog.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <optional>
28 
29 #define DEBUG_TYPE "analysis-utils"
30 
31 using namespace mlir;
32 using namespace affine;
33 using namespace presburger;
34 
35 using llvm::SmallDenseMap;
36 
38 
39 // LoopNestStateCollector walks loop nests and collects load and store
40 // operations, and whether or not a region holding op other than ForOp and IfOp
41 // was encountered in the loop nest.
43  opToWalk->walk([&](Operation *op) {
44  if (auto forOp = dyn_cast<AffineForOp>(op)) {
45  forOps.push_back(forOp);
46  } else if (isa<AffineReadOpInterface>(op)) {
47  loadOpInsts.push_back(op);
48  } else if (isa<AffineWriteOpInterface>(op)) {
49  storeOpInsts.push_back(op);
50  } else {
51  auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
52  if (!memInterface) {
54  // This op itself is memory-effect free.
55  return;
56  // Check operands. Eg. ops like the `call` op are handled here.
57  for (Value v : op->getOperands()) {
58  if (!isa<MemRefType>(v.getType()))
59  continue;
60  // Conservatively, we assume the memref is read and written to.
61  memrefLoads.push_back(op);
62  memrefStores.push_back(op);
63  }
64  } else {
65  // Non-affine loads and stores.
66  if (hasEffect<MemoryEffects::Read>(op))
67  memrefLoads.push_back(op);
68  if (hasEffect<MemoryEffects::Write>(op))
69  memrefStores.push_back(op);
70  if (hasEffect<MemoryEffects::Free>(op))
71  memrefFrees.push_back(op);
72  }
73  }
74  });
75 }
76 
77 unsigned Node::getLoadOpCount(Value memref) const {
78  unsigned loadOpCount = 0;
79  for (Operation *loadOp : loads) {
80  // Common case: affine reads.
81  if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
82  if (memref == affineLoad.getMemRef())
83  ++loadOpCount;
84  } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
85  ++loadOpCount;
86  }
87  }
88  return loadOpCount;
89 }
90 
91 // Returns the store op count for 'memref'.
92 unsigned Node::getStoreOpCount(Value memref) const {
93  unsigned storeOpCount = 0;
94  for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
95  // Common case: affine writes.
96  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
97  if (memref == affineStore.getMemRef())
98  ++storeOpCount;
99  } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp),
100  memref)) {
101  ++storeOpCount;
102  }
103  }
104  return storeOpCount;
105 }
106 
107 // Returns the store op count for 'memref'.
108 unsigned Node::hasStore(Value memref) const {
109  return llvm::any_of(
110  llvm::concat<Operation *const>(stores, memrefStores),
111  [&](Operation *storeOp) {
112  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
113  if (memref == affineStore.getMemRef())
114  return true;
115  } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
116  return true;
117  }
118  return false;
119  });
120 }
121 
122 unsigned Node::hasFree(Value memref) const {
123  return llvm::any_of(memrefFrees, [&](Operation *freeOp) {
124  return hasEffect<MemoryEffects::Free>(freeOp, memref);
125  });
126 }
127 
128 // Returns all store ops in 'storeOps' which access 'memref'.
129 void Node::getStoreOpsForMemref(Value memref,
130  SmallVectorImpl<Operation *> *storeOps) const {
131  for (Operation *storeOp : stores) {
132  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
133  storeOps->push_back(storeOp);
134  }
135 }
136 
137 // Returns all load ops in 'loadOps' which access 'memref'.
138 void Node::getLoadOpsForMemref(Value memref,
139  SmallVectorImpl<Operation *> *loadOps) const {
140  for (Operation *loadOp : loads) {
141  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
142  loadOps->push_back(loadOp);
143  }
144 }
145 
146 // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
147 // has at least one load and store operation.
148 void Node::getLoadAndStoreMemrefSet(
149  DenseSet<Value> *loadAndStoreMemrefSet) const {
150  llvm::SmallDenseSet<Value, 2> loadMemrefs;
151  for (Operation *loadOp : loads) {
152  loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
153  }
154  for (Operation *storeOp : stores) {
155  auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
156  if (loadMemrefs.count(memref) > 0)
157  loadAndStoreMemrefSet->insert(memref);
158  }
159 }
160 
161 /// Returns the values that this op has a memref effect of type `EffectTys` on,
162 /// not considering recursive effects.
163 template <typename... EffectTys>
165  auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
166  if (!memOp) {
168  // No effects.
169  return;
170  // Memref operands have to be considered as being affected.
171  for (Value operand : op->getOperands()) {
172  if (isa<MemRefType>(operand.getType()))
173  values.push_back(operand);
174  }
175  return;
176  }
178  memOp.getEffects(effects);
179  for (auto &effect : effects) {
180  Value effectVal = effect.getValue();
181  if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
182  isa<MemRefType>(effectVal.getType()))
183  values.push_back(effectVal);
184  };
185 }
186 
187 /// Add `op` to MDG creating a new node and adding its memory accesses (affine
188 /// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map.
189 static Node *
191  DenseMap<Value, SetVector<unsigned>> &memrefAccesses) {
192  auto &nodes = mdg.nodes;
193  // Create graph node 'id' to represent top-level 'forOp' and record
194  // all loads and store accesses it contains.
195  LoopNestStateCollector collector;
196  collector.collect(nodeOp);
197  unsigned newNodeId = mdg.nextNodeId++;
198  Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second;
199  for (Operation *op : collector.loadOpInsts) {
200  node.loads.push_back(op);
201  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
202  memrefAccesses[memref].insert(node.id);
203  }
204  for (Operation *op : collector.storeOpInsts) {
205  node.stores.push_back(op);
206  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
207  memrefAccesses[memref].insert(node.id);
208  }
209  for (Operation *op : collector.memrefLoads) {
210  SmallVector<Value> effectedValues;
211  getEffectedValues<MemoryEffects::Read>(op, effectedValues);
212  if (llvm::any_of(((ValueRange)effectedValues).getTypes(),
213  [](Type type) { return !isa<MemRefType>(type); }))
214  // We do not know the interaction here.
215  return nullptr;
216  for (Value memref : effectedValues)
217  memrefAccesses[memref].insert(node.id);
218  node.memrefLoads.push_back(op);
219  }
220  for (Operation *op : collector.memrefStores) {
221  SmallVector<Value> effectedValues;
222  getEffectedValues<MemoryEffects::Write>(op, effectedValues);
223  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
224  [](Type type) { return !isa<MemRefType>(type); }))
225  return nullptr;
226  for (Value memref : effectedValues)
227  memrefAccesses[memref].insert(node.id);
228  node.memrefStores.push_back(op);
229  }
230  for (Operation *op : collector.memrefFrees) {
231  SmallVector<Value> effectedValues;
232  getEffectedValues<MemoryEffects::Free>(op, effectedValues);
233  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
234  [](Type type) { return !isa<MemRefType>(type); }))
235  return nullptr;
236  for (Value memref : effectedValues)
237  memrefAccesses[memref].insert(node.id);
238  node.memrefFrees.push_back(op);
239  }
240 
241  return &node;
242 }
243 
245  LDBG() << "--- Initializing MDG ---";
246  // Map from a memref to the set of ids of the nodes that have ops accessing
247  // the memref.
248  DenseMap<Value, SetVector<unsigned>> memrefAccesses;
249 
250  // Create graph nodes.
251  DenseMap<Operation *, unsigned> forToNodeMap;
252  for (Operation &op : block) {
253  if (auto forOp = dyn_cast<AffineForOp>(op)) {
254  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
255  if (!node)
256  return false;
257  forToNodeMap[&op] = node->id;
258  } else if (isa<AffineReadOpInterface>(op)) {
259  // Create graph node for top-level load op.
260  Node node(nextNodeId++, &op);
261  node.loads.push_back(&op);
262  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
263  memrefAccesses[memref].insert(node.id);
264  nodes.insert({node.id, node});
265  } else if (isa<AffineWriteOpInterface>(op)) {
266  // Create graph node for top-level store op.
267  Node node(nextNodeId++, &op);
268  node.stores.push_back(&op);
269  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
270  memrefAccesses[memref].insert(node.id);
271  nodes.insert({node.id, node});
272  } else if (op.getNumResults() > 0 && !op.use_empty()) {
273  // Create graph node for top-level producer of SSA values, which
274  // could be used by loop nest nodes.
275  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
276  if (!node)
277  return false;
278  } else if (!isMemoryEffectFree(&op) &&
279  (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
280  // Create graph node for top-level op unless it is known to be
281  // memory-effect free. This covers all unknown/unregistered ops,
282  // non-affine ops with memory effects, and region-holding ops with a
283  // well-defined control flow. During the fusion validity checks, edges
284  // to/from these ops get looked at.
285  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
286  if (!node)
287  return false;
288  } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
289  // Return false if non-handled/unknown region-holding ops are found. We
290  // won't know what such ops do or what its regions mean; for e.g., it may
291  // not be an imperative op.
292  LDBG() << "MDG init failed; unknown region-holding op found!";
293  return false;
294  }
295  // We aren't creating nodes for memory-effect free ops either with no
296  // regions (unless it has results being used) or those with branch op
297  // interface.
298  }
299 
300  LDBG() << "Created " << nodes.size() << " nodes";
301 
302  // Add dependence edges between nodes which produce SSA values and their
303  // users. Load ops can be considered as the ones producing SSA values.
304  for (auto &idAndNode : nodes) {
305  const Node &node = idAndNode.second;
306  // Stores don't define SSA values, skip them.
307  if (!node.stores.empty())
308  continue;
309  Operation *opInst = node.op;
310  for (Value value : opInst->getResults()) {
311  for (Operation *user : value.getUsers()) {
312  // Ignore users outside of the block.
313  if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
314  &block)
315  continue;
317  getAffineForIVs(*user, &loops);
318  // Find the surrounding affine.for nested immediately within the
319  // block.
320  auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
321  return loop->getBlock() == &block;
322  });
323  if (it == loops.end())
324  continue;
325  assert(forToNodeMap.count(*it) > 0 && "missing mapping");
326  unsigned userLoopNestId = forToNodeMap[*it];
327  addEdge(node.id, userLoopNestId, value);
328  }
329  }
330  }
331 
332  // Walk memref access lists and add graph edges between dependent nodes.
333  for (auto &memrefAndList : memrefAccesses) {
334  unsigned n = memrefAndList.second.size();
335  Value srcMemRef = memrefAndList.first;
336  // Add edges between all dependent pairs among the node IDs on this memref.
337  for (unsigned i = 0; i < n; ++i) {
338  unsigned srcId = memrefAndList.second[i];
339  Node *srcNode = getNode(srcId);
340  bool srcHasStoreOrFree =
341  srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef);
342  for (unsigned j = i + 1; j < n; ++j) {
343  unsigned dstId = memrefAndList.second[j];
344  Node *dstNode = getNode(dstId);
345  bool dstHasStoreOrFree =
346  dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
347  if (srcHasStoreOrFree || dstHasStoreOrFree)
348  addEdge(srcId, dstId, srcMemRef);
349  }
350  }
351  }
352  return true;
353 }
354 
355 // Returns the graph node for 'id'.
356 const Node *MemRefDependenceGraph::getNode(unsigned id) const {
357  auto it = nodes.find(id);
358  assert(it != nodes.end());
359  return &it->second;
360 }
361 
362 // Returns the graph node for 'forOp'.
363 const Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) const {
364  for (auto &idAndNode : nodes)
365  if (idAndNode.second.op == forOp)
366  return &idAndNode.second;
367  return nullptr;
368 }
369 
370 // Adds a node with 'op' to the graph and returns its unique identifier.
372  Node node(nextNodeId++, op);
373  nodes.insert({node.id, node});
374  return node.id;
375 }
376 
377 // Remove node 'id' (and its associated edges) from graph.
379  // Remove each edge in 'inEdges[id]'.
380  if (inEdges.count(id) > 0) {
381  SmallVector<Edge, 2> oldInEdges = inEdges[id];
382  for (auto &inEdge : oldInEdges) {
383  removeEdge(inEdge.id, id, inEdge.value);
384  }
385  }
386  // Remove each edge in 'outEdges[id]'.
387  if (outEdges.contains(id)) {
388  SmallVector<Edge, 2> oldOutEdges = outEdges[id];
389  for (auto &outEdge : oldOutEdges) {
390  removeEdge(id, outEdge.id, outEdge.value);
391  }
392  }
393  // Erase remaining node state.
394  inEdges.erase(id);
395  outEdges.erase(id);
396  nodes.erase(id);
397 }
398 
399 // Returns true if node 'id' writes to any memref which escapes (or is an
400 // argument to) the block. Returns false otherwise.
402  const Node *node = getNode(id);
403  for (auto *storeOpInst : node->stores) {
404  auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
405  auto *op = memref.getDefiningOp();
406  // Return true if 'memref' is a block argument.
407  if (!op)
408  return true;
409  // Return true if any use of 'memref' does not deference it in an affine
410  // way.
411  for (auto *user : memref.getUsers())
412  if (!isa<AffineMapAccessInterface>(*user))
413  return true;
414  }
415  return false;
416 }
417 
418 // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
419 // is for 'value' if non-null, or for any value otherwise. Returns false
420 // otherwise.
421 bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
422  Value value) const {
423  if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) {
424  return false;
425  }
426  bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](const Edge &edge) {
427  return edge.id == dstId && (!value || edge.value == value);
428  });
429  bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](const Edge &edge) {
430  return edge.id == srcId && (!value || edge.value == value);
431  });
432  return hasOutEdge && hasInEdge;
433 }
434 
435 // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
436 void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
437  Value value) {
438  if (!hasEdge(srcId, dstId, value)) {
439  outEdges[srcId].push_back({dstId, value});
440  inEdges[dstId].push_back({srcId, value});
441  if (isa<MemRefType>(value.getType()))
442  memrefEdgeCount[value]++;
443  }
444 }
445 
446 // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
447 void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
448  Value value) {
449  assert(inEdges.count(dstId) > 0);
450  assert(outEdges.count(srcId) > 0);
451  if (isa<MemRefType>(value.getType())) {
452  assert(memrefEdgeCount.count(value) > 0);
453  memrefEdgeCount[value]--;
454  }
455  // Remove 'srcId' from 'inEdges[dstId]'.
456  for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
457  if ((*it).id == srcId && (*it).value == value) {
458  inEdges[dstId].erase(it);
459  break;
460  }
461  }
462  // Remove 'dstId' from 'outEdges[srcId]'.
463  for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
464  if ((*it).id == dstId && (*it).value == value) {
465  outEdges[srcId].erase(it);
466  break;
467  }
468  }
469 }
470 
471 // Returns true if there is a path in the dependence graph from node 'srcId'
472 // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
473 // operations that the edges connected are expected to be from the same block.
475  unsigned dstId) const {
476  // Worklist state is: <node-id, next-output-edge-index-to-visit>
478  worklist.push_back({srcId, 0});
479  Operation *dstOp = getNode(dstId)->op;
480  // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
481  while (!worklist.empty()) {
482  auto &idAndIndex = worklist.back();
483  // Return true if we have reached 'dstId'.
484  if (idAndIndex.first == dstId)
485  return true;
486  // Pop and continue if node has no out edges, or if all out edges have
487  // already been visited.
488  if (!outEdges.contains(idAndIndex.first) ||
489  idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) {
490  worklist.pop_back();
491  continue;
492  }
493  // Get graph edge to traverse.
494  const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second];
495  // Increment next output edge index for 'idAndIndex'.
496  ++idAndIndex.second;
497  // Add node at 'edge.id' to the worklist. We don't need to consider
498  // nodes that are "after" dstId in the containing block; one can't have a
499  // path to `dstId` from any of those nodes.
500  bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
501  if (!afterDst && edge.id != idAndIndex.first)
502  worklist.push_back({edge.id, 0});
503  }
504  return false;
505 }
506 
507 // Returns the input edge count for node 'id' and 'memref' from src nodes
508 // which access 'memref' with a store operation.
510  Value memref) const {
511  unsigned inEdgeCount = 0;
512  for (const Edge &inEdge : inEdges.lookup(id)) {
513  if (inEdge.value == memref) {
514  const Node *srcNode = getNode(inEdge.id);
515  // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
516  if (srcNode->getStoreOpCount(memref) > 0)
517  ++inEdgeCount;
518  }
519  }
520  return inEdgeCount;
521 }
522 
523 // Returns the output edge count for node 'id' and 'memref' (if non-null),
524 // otherwise returns the total output edge count from node 'id'.
526  Value memref) const {
527  unsigned outEdgeCount = 0;
528  for (const auto &outEdge : outEdges.lookup(id))
529  if (!memref || outEdge.value == memref)
530  ++outEdgeCount;
531  return outEdgeCount;
532 }
533 
534 /// Return all nodes which define SSA values used in node 'id'.
536  unsigned id, DenseSet<unsigned> &definingNodes) const {
537  for (const Edge &edge : inEdges.lookup(id))
538  // By definition of edge, if the edge value is a non-memref value,
539  // then the dependence is between a graph node which defines an SSA value
540  // and another graph node which uses the SSA value.
541  if (!isa<MemRefType>(edge.value.getType()))
542  definingNodes.insert(edge.id);
543 }
544 
545 // Computes and returns an insertion point operation, before which the
546 // the fused <srcId, dstId> loop nest can be inserted while preserving
547 // dependences. Returns nullptr if no such insertion point is found.
548 Operation *
550  unsigned dstId) const {
551  if (!outEdges.contains(srcId))
552  return getNode(dstId)->op;
553 
554  // Skip if there is any defining node of 'dstId' that depends on 'srcId'.
555  DenseSet<unsigned> definingNodes;
556  gatherDefiningNodes(dstId, definingNodes);
557  if (llvm::any_of(definingNodes,
558  [&](unsigned id) { return hasDependencePath(srcId, id); })) {
559  LDBG() << "Can't fuse: a defining op with a user in the dst "
560  << "loop has dependence from the src loop";
561  return nullptr;
562  }
563 
564  // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
565  SmallPtrSet<Operation *, 2> srcDepInsts;
566  for (auto &outEdge : outEdges.lookup(srcId))
567  if (outEdge.id != dstId)
568  srcDepInsts.insert(getNode(outEdge.id)->op);
569 
570  // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
571  SmallPtrSet<Operation *, 2> dstDepInsts;
572  for (auto &inEdge : inEdges.lookup(dstId))
573  if (inEdge.id != srcId)
574  dstDepInsts.insert(getNode(inEdge.id)->op);
575 
576  Operation *srcNodeInst = getNode(srcId)->op;
577  Operation *dstNodeInst = getNode(dstId)->op;
578 
579  // Computing insertion point:
580  // *) Walk all operation positions in Block operation list in the
581  // range (src, dst). For each operation 'op' visited in this search:
582  // *) Store in 'firstSrcDepPos' the first position where 'op' has a
583  // dependence edge from 'srcNode'.
584  // *) Store in 'lastDstDepPost' the last position where 'op' has a
585  // dependence edge to 'dstNode'.
586  // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
587  // operation insertion point (or return null pointer if no such
588  // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
590  std::optional<unsigned> firstSrcDepPos;
591  std::optional<unsigned> lastDstDepPos;
592  unsigned pos = 0;
593  for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
594  it != Block::iterator(dstNodeInst); ++it) {
595  Operation *op = &(*it);
596  if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
597  firstSrcDepPos = pos;
598  if (dstDepInsts.count(op) > 0)
599  lastDstDepPos = pos;
600  depInsts.push_back(op);
601  ++pos;
602  }
603 
604  if (firstSrcDepPos.has_value()) {
605  if (lastDstDepPos.has_value()) {
606  if (*firstSrcDepPos <= *lastDstDepPos) {
607  // No valid insertion point exists which preserves dependences.
608  return nullptr;
609  }
610  }
611  // Return the insertion point at 'firstSrcDepPos'.
612  return depInsts[*firstSrcDepPos];
613  }
614  // No dependence targets in range (or only dst deps in range), return
615  // 'dstNodInst' insertion point.
616  return dstNodeInst;
617 }
618 
619 // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
620 // taking into account that:
621 // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion,
622 // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a
623 // private memref.
624 void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
625  const DenseSet<Value> &privateMemRefs,
626  bool removeSrcId) {
627  // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
628  if (inEdges.count(srcId) > 0) {
629  SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
630  for (auto &inEdge : oldInEdges) {
631  // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
632  if (!privateMemRefs.contains(inEdge.value))
633  addEdge(inEdge.id, dstId, inEdge.value);
634  }
635  }
636  // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
637  // If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
638  if (outEdges.count(srcId) > 0) {
639  SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
640  for (auto &outEdge : oldOutEdges) {
641  // Remove any out edges from 'srcId' to 'dstId' across memrefs.
642  if (outEdge.id == dstId)
643  removeEdge(srcId, outEdge.id, outEdge.value);
644  else if (removeSrcId) {
645  addEdge(dstId, outEdge.id, outEdge.value);
646  removeEdge(srcId, outEdge.id, outEdge.value);
647  }
648  }
649  }
650  // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
651  // replaced by a private memref). These edges could come from nodes
652  // other than 'srcId' which were removed in the previous step.
653  if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
654  SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
655  for (auto &inEdge : oldInEdges)
656  if (privateMemRefs.count(inEdge.value) > 0)
657  removeEdge(inEdge.id, dstId, inEdge.value);
658  }
659 }
660 
661 // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
662 // of sibling node 'sibId' into node 'dstId'.
663 void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
664  // For each edge in 'inEdges[sibId]':
665  // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
666  // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
667  if (inEdges.count(sibId) > 0) {
668  SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
669  for (auto &inEdge : oldInEdges) {
670  addEdge(inEdge.id, dstId, inEdge.value);
671  removeEdge(inEdge.id, sibId, inEdge.value);
672  }
673  }
674 
675  // For each edge in 'outEdges[sibId]' to node 'id'
676  // *) Add new edge from 'dstId' to 'outEdge.id'.
677  // *) Remove edge from 'sibId' to 'outEdge.id'.
678  if (outEdges.count(sibId) > 0) {
679  SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
680  for (auto &outEdge : oldOutEdges) {
681  addEdge(dstId, outEdge.id, outEdge.value);
682  removeEdge(sibId, outEdge.id, outEdge.value);
683  }
684  }
685 }
686 
687 // Adds ops in 'loads' and 'stores' to node at 'id'.
689  ArrayRef<Operation *> stores,
690  ArrayRef<Operation *> memrefLoads,
691  ArrayRef<Operation *> memrefStores,
692  ArrayRef<Operation *> memrefFrees) {
693  Node *node = getNode(id);
694  llvm::append_range(node->loads, loads);
695  llvm::append_range(node->stores, stores);
696  llvm::append_range(node->memrefLoads, memrefLoads);
697  llvm::append_range(node->memrefStores, memrefStores);
698  llvm::append_range(node->memrefFrees, memrefFrees);
699 }
700 
702  Node *node = getNode(id);
703  node->loads.clear();
704  node->stores.clear();
705 }
706 
707 // Calls 'callback' for each input edge incident to node 'id' which carries a
708 // memref dependence.
710  unsigned id, const std::function<void(Edge)> &callback) {
711  if (inEdges.count(id) > 0)
712  forEachMemRefEdge(inEdges.at(id), callback);
713 }
714 
715 // Calls 'callback' for each output edge from node 'id' which carries a
716 // memref dependence.
718  unsigned id, const std::function<void(Edge)> &callback) {
719  if (outEdges.count(id) > 0)
720  forEachMemRefEdge(outEdges.at(id), callback);
721 }
722 
723 // Calls 'callback' for each edge in 'edges' which carries a memref
724 // dependence.
726  ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
727  for (const auto &edge : edges) {
728  // Skip if 'edge' is not a memref dependence edge.
729  if (!isa<MemRefType>(edge.value.getType()))
730  continue;
731  assert(nodes.count(edge.id) > 0);
732  // Visit current input edge 'edge'.
733  callback(edge);
734  }
735 }
736 
737 void MemRefDependenceGraph::print(raw_ostream &os) const {
738  os << "\nMemRefDependenceGraph\n";
739  os << "\nNodes:\n";
740  for (const auto &idAndNode : nodes) {
741  os << "Node: " << idAndNode.first << "\n";
742  auto it = inEdges.find(idAndNode.first);
743  if (it != inEdges.end()) {
744  for (const auto &e : it->second)
745  os << " InEdge: " << e.id << " " << e.value << "\n";
746  }
747  it = outEdges.find(idAndNode.first);
748  if (it != outEdges.end()) {
749  for (const auto &e : it->second)
750  os << " OutEdge: " << e.id << " " << e.value << "\n";
751  }
752  }
753 }
754 
757  auto *currOp = op.getParentOp();
758  AffineForOp currAffineForOp;
759  // Traverse up the hierarchy collecting all 'affine.for' operation while
760  // skipping over 'affine.if' operations.
761  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
762  if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
763  loops->push_back(currAffineForOp);
764  currOp = currOp->getParentOp();
765  }
766  std::reverse(loops->begin(), loops->end());
767 }
768 
771  ops->clear();
772  Operation *currOp = op.getParentOp();
773 
774  // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
775  // affine.parallel operations.
776  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
777  if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
778  ops->push_back(currOp);
779  currOp = currOp->getParentOp();
780  }
781  std::reverse(ops->begin(), ops->end());
782 }
783 
784 // Populates 'cst' with FlatAffineValueConstraints which represent original
785 // domain of the loop bounds that define 'ivs'.
787  FlatAffineValueConstraints &cst) const {
788  assert(!ivs.empty() && "Cannot have a slice without its IVs");
789  cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0,
790  /*numLocals=*/0, ivs);
791  for (Value iv : ivs) {
792  AffineForOp loop = getForInductionVarOwner(iv);
793  assert(loop && "Expected affine for");
794  if (failed(cst.addAffineForOpDomain(loop)))
795  return failure();
796  }
797  return success();
798 }
799 
800 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
801 LogicalResult
803  assert(!lbOperands.empty());
804  // Adds src 'ivs' as dimension variables in 'cst'.
805  unsigned numDims = ivs.size();
806  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
807  unsigned numSymbols = lbOperands[0].size();
808 
809  SmallVector<Value, 4> values(ivs);
810  // Append 'ivs' then 'operands' to 'values'.
811  values.append(lbOperands[0].begin(), lbOperands[0].end());
812  *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
813 
814  // Add loop bound constraints for values which are loop IVs of the destination
815  // of fusion and equality constraints for symbols which are constants.
816  for (unsigned i = numDims, end = values.size(); i < end; ++i) {
817  Value value = values[i];
818  assert(cst->containsVar(value) && "value expected to be present");
819  if (isValidSymbol(value)) {
820  // Check if the symbol is a constant.
821  if (std::optional<int64_t> cOp = getConstantIntValue(value))
822  cst->addBound(BoundType::EQ, value, cOp.value());
823  } else if (auto loop = getForInductionVarOwner(value)) {
824  if (failed(cst->addAffineForOpDomain(loop)))
825  return failure();
826  }
827  }
828 
829  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
830  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
831  assert(succeeded(ret) &&
832  "should not fail as we never have semi-affine slice maps");
833  (void)ret;
834  return success();
835 }
836 
837 // Clears state bounds and operand state.
839  lbs.clear();
840  ubs.clear();
841  lbOperands.clear();
842  ubOperands.clear();
843 }
844 
846  llvm::errs() << "\tIVs:\n";
847  for (Value iv : ivs)
848  llvm::errs() << "\t\t" << iv << "\n";
849 
850  llvm::errs() << "\tLBs:\n";
851  for (auto en : llvm::enumerate(lbs)) {
852  llvm::errs() << "\t\t" << en.value() << "\n";
853  llvm::errs() << "\t\tOperands:\n";
854  for (Value lbOp : lbOperands[en.index()])
855  llvm::errs() << "\t\t\t" << lbOp << "\n";
856  }
857 
858  llvm::errs() << "\tUBs:\n";
859  for (auto en : llvm::enumerate(ubs)) {
860  llvm::errs() << "\t\t" << en.value() << "\n";
861  llvm::errs() << "\t\tOperands:\n";
862  for (Value ubOp : ubOperands[en.index()])
863  llvm::errs() << "\t\t\t" << ubOp << "\n";
864  }
865 }
866 
867 /// Fast check to determine if the computation slice is maximal. Returns true if
868 /// each slice dimension maps to an existing dst dimension and both the src
869 /// and the dst loops for those dimensions have the same bounds. Returns false
870 /// if both the src and the dst loops don't have the same bounds. Returns
871 /// std::nullopt if none of the above can be proven.
872 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
873  assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
874  "Unexpected number of lbs, ubs and ivs in slice");
875 
876  for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
877  AffineMap lbMap = lbs[i];
878  AffineMap ubMap = ubs[i];
879 
880  // Check if this slice is just an equality along this dimension.
881  if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
882  ubMap.getNumResults() != 1 ||
883  lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
884  // The condition above will be true for maps describing a single
885  // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
886  // Make sure we skip those cases by checking that the lb result is not
887  // just a constant.
888  isa<AffineConstantExpr>(lbMap.getResult(0)))
889  return std::nullopt;
890 
891  // Limited support: we expect the lb result to be just a loop dimension for
892  // now.
893  AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0));
894  if (!result)
895  return std::nullopt;
896 
897  // Retrieve dst loop bounds.
898  AffineForOp dstLoop =
899  getForInductionVarOwner(lbOperands[i][result.getPosition()]);
900  if (!dstLoop)
901  return std::nullopt;
902  AffineMap dstLbMap = dstLoop.getLowerBoundMap();
903  AffineMap dstUbMap = dstLoop.getUpperBoundMap();
904 
905  // Retrieve src loop bounds.
906  AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
907  assert(srcLoop && "Expected affine for");
908  AffineMap srcLbMap = srcLoop.getLowerBoundMap();
909  AffineMap srcUbMap = srcLoop.getUpperBoundMap();
910 
911  // Limited support: we expect simple src and dst loops with a single
912  // constant component per bound for now.
913  if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
914  dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
915  return std::nullopt;
916 
917  AffineExpr srcLbResult = srcLbMap.getResult(0);
918  AffineExpr dstLbResult = dstLbMap.getResult(0);
919  AffineExpr srcUbResult = srcUbMap.getResult(0);
920  AffineExpr dstUbResult = dstUbMap.getResult(0);
921  if (!isa<AffineConstantExpr>(srcLbResult) ||
922  !isa<AffineConstantExpr>(srcUbResult) ||
923  !isa<AffineConstantExpr>(dstLbResult) ||
924  !isa<AffineConstantExpr>(dstUbResult))
925  return std::nullopt;
926 
927  // Check if src and dst loop bounds are the same. If not, we can guarantee
928  // that the slice is not maximal.
929  if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
930  srcLoop.getStep() != dstLoop.getStep())
931  return false;
932  }
933 
934  return true;
935 }
936 
937 /// Returns true if it is deterministically verified that the original iteration
938 /// space of the slice is contained within the new iteration space that is
939 /// created after fusing 'this' slice into its destination.
940 std::optional<bool> ComputationSliceState::isSliceValid() const {
941  // Fast check to determine if the slice is valid. If the following conditions
942  // are verified to be true, slice is declared valid by the fast check:
943  // 1. Each slice loop is a single iteration loop bound in terms of a single
944  // destination loop IV.
945  // 2. Loop bounds of the destination loop IV (from above) and those of the
946  // source loop IV are exactly the same.
947  // If the fast check is inconclusive or false, we proceed with a more
948  // expensive analysis.
949  // TODO: Store the result of the fast check, as it might be used again in
950  // `canRemoveSrcNodeAfterFusion`.
951  std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
952  if (isValidFastCheck && *isValidFastCheck)
953  return true;
954 
955  // Create constraints for the source loop nest using which slice is computed.
956  FlatAffineValueConstraints srcConstraints;
957  // TODO: Store the source's domain to avoid computation at each depth.
958  if (failed(getSourceAsConstraints(srcConstraints))) {
959  LDBG() << "Unable to compute source's domain";
960  return std::nullopt;
961  }
962  // As the set difference utility currently cannot handle symbols in its
963  // operands, validity of the slice cannot be determined.
964  if (srcConstraints.getNumSymbolVars() > 0) {
965  LDBG() << "Cannot handle symbols in source domain";
966  return std::nullopt;
967  }
968  // TODO: Handle local vars in the source domains while using the 'projectOut'
969  // utility below. Currently, aligning is not done assuming that there will be
970  // no local vars in the source domain.
971  if (srcConstraints.getNumLocalVars() != 0) {
972  LDBG() << "Cannot handle locals in source domain";
973  return std::nullopt;
974  }
975 
976  // Create constraints for the slice loop nest that would be created if the
977  // fusion succeeds.
978  FlatAffineValueConstraints sliceConstraints;
979  if (failed(getAsConstraints(&sliceConstraints))) {
980  LDBG() << "Unable to compute slice's domain";
981  return std::nullopt;
982  }
983 
984  // Projecting out every dimension other than the 'ivs' to express slice's
985  // domain completely in terms of source's IVs.
986  sliceConstraints.projectOut(ivs.size(),
987  sliceConstraints.getNumVars() - ivs.size());
988 
989  LDBG() << "Domain of the source of the slice:\n"
990  << "Source constraints:" << srcConstraints
991  << "\nDomain of the slice if this fusion succeeds "
992  << "(expressed in terms of its source's IVs):\n"
993  << "Slice constraints:" << sliceConstraints;
994 
995  // TODO: Store 'srcSet' to avoid recalculating for each depth.
996  PresburgerSet srcSet(srcConstraints);
997  PresburgerSet sliceSet(sliceConstraints);
998  PresburgerSet diffSet = sliceSet.subtract(srcSet);
999 
1000  if (!diffSet.isIntegerEmpty()) {
1001  LDBG() << "Incorrect slice";
1002  return false;
1003  }
1004  return true;
1005 }
1006 
1007 /// Returns true if the computation slice encloses all the iterations of the
1008 /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
1009 /// cannot determine if the slice is maximal or not.
1010 std::optional<bool> ComputationSliceState::isMaximal() const {
1011  // Fast check to determine if the computation slice is maximal. If the result
1012  // is inconclusive, we proceed with a more expensive analysis.
1013  std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
1014  if (isMaximalFastCheck)
1015  return isMaximalFastCheck;
1016 
1017  // Create constraints for the src loop nest being sliced.
1018  FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(),
1019  /*numSymbols=*/0,
1020  /*numLocals=*/0, ivs);
1021  for (Value iv : ivs) {
1022  AffineForOp loop = getForInductionVarOwner(iv);
1023  assert(loop && "Expected affine for");
1024  if (failed(srcConstraints.addAffineForOpDomain(loop)))
1025  return std::nullopt;
1026  }
1027 
1028  // Create constraints for the slice using the dst loop nest information. We
1029  // retrieve existing dst loops from the lbOperands.
1030  SmallVector<Value> consumerIVs;
1031  for (Value lbOp : lbOperands[0])
1032  if (getForInductionVarOwner(lbOp))
1033  consumerIVs.push_back(lbOp);
1034 
1035  // Add empty IV Values for those new loops that are not equalities and,
1036  // therefore, are not yet materialized in the IR.
1037  for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
1038  consumerIVs.push_back(Value());
1039 
1040  FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(),
1041  /*numSymbols=*/0,
1042  /*numLocals=*/0, consumerIVs);
1043 
1044  if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
1045  return std::nullopt;
1046 
1047  if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
1048  // Constraint dims are different. The integer set difference can't be
1049  // computed so we don't know if the slice is maximal.
1050  return std::nullopt;
1051 
1052  // Compute the difference between the src loop nest and the slice integer
1053  // sets.
1054  PresburgerSet srcSet(srcConstraints);
1055  PresburgerSet sliceSet(sliceConstraints);
1056  PresburgerSet diffSet = srcSet.subtract(sliceSet);
1057  return diffSet.isIntegerEmpty();
1058 }
1059 
1060 unsigned MemRefRegion::getRank() const {
1061  return cast<MemRefType>(memref.getType()).getRank();
1062 }
1063 
1066  auto memRefType = cast<MemRefType>(memref.getType());
1067  MLIRContext *context = memref.getContext();
1068  unsigned rank = memRefType.getRank();
1069  if (shape)
1070  shape->reserve(rank);
1071 
1072  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1073 
1074  // Use a copy of the region constraints that has upper/lower bounds for each
1075  // memref dimension with static size added to guard against potential
1076  // over-approximation from projection or union bounding box. We may not add
1077  // this on the region itself since they might just be redundant constraints
1078  // that will need non-trivials means to eliminate.
1079  FlatLinearValueConstraints cstWithShapeBounds(cst);
1080  for (unsigned r = 0; r < rank; r++) {
1081  cstWithShapeBounds.addBound(BoundType::LB, r, 0);
1082  int64_t dimSize = memRefType.getDimSize(r);
1083  if (ShapedType::isDynamic(dimSize))
1084  continue;
1085  cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
1086  }
1087 
1088  // Find a constant upper bound on the extent of this memref region along
1089  // each dimension.
1090  int64_t numElements = 1;
1091  int64_t diffConstant;
1092  for (unsigned d = 0; d < rank; d++) {
1093  AffineMap lb;
1094  std::optional<int64_t> diff =
1095  cstWithShapeBounds.getConstantBoundOnDimSize(context, d, &lb);
1096  if (diff.has_value()) {
1097  diffConstant = *diff;
1098  assert(diffConstant >= 0 && "dim size bound cannot be negative");
1099  } else {
1100  // If no constant bound is found, then it can always be bound by the
1101  // memref's dim size if the latter has a constant size along this dim.
1102  auto dimSize = memRefType.getDimSize(d);
1103  if (ShapedType::isDynamic(dimSize))
1104  return std::nullopt;
1105  diffConstant = dimSize;
1106  // Lower bound becomes 0.
1107  lb = AffineMap::get(/*dimCount=*/0, cstWithShapeBounds.getNumSymbolVars(),
1108  /*result=*/getAffineConstantExpr(0, context));
1109  }
1110  numElements *= diffConstant;
1111  // Populate outputs if available.
1112  if (lbs)
1113  lbs->push_back(lb);
1114  if (shape)
1115  shape->push_back(diffConstant);
1116  }
1117  return numElements;
1118 }
1119 
1121  AffineMap &ubMap) const {
1122  assert(pos < cst.getNumDimVars() && "invalid position");
1123  auto memRefType = cast<MemRefType>(memref.getType());
1124  unsigned rank = memRefType.getRank();
1125 
1126  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1127 
1128  auto boundPairs = cst.getLowerAndUpperBound(
1129  pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(),
1130  /*localExprs=*/{}, memRefType.getContext());
1131  lbMap = boundPairs.first;
1132  ubMap = boundPairs.second;
1133  assert(lbMap && "lower bound for a region must exist");
1134  assert(ubMap && "upper bound for a region must exist");
1135  assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1136  assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1137 }
1138 
1139 LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
1140  assert(memref == other.memref);
1141  return cst.unionBoundingBox(*other.getConstraints());
1142 }
1143 
1144 /// Computes the memory region accessed by this memref with the region
1145 /// represented as constraints symbolic/parametric in 'loopDepth' loops
1146 /// surrounding opInst and any additional Function symbols.
1147 // For example, the memref region for this load operation at loopDepth = 1 will
1148 // be as below:
1149 //
1150 // affine.for %i = 0 to 32 {
1151 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
1152 // load %A[%ii]
1153 // }
1154 // }
1155 //
1156 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
1157 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
1158 //
1159 // TODO: extend this to any other memref dereferencing ops
1160 // (dma_start, dma_wait).
1161 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
1162  const ComputationSliceState *sliceState,
1163  bool addMemRefDimBounds, bool dropLocalVars,
1164  bool dropOuterIvs) {
1165  assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1166  "affine read/write op expected");
1167 
1168  MemRefAccess access(op);
1169  memref = access.memref;
1170  write = access.isStore();
1171 
1172  unsigned rank = access.getRank();
1173 
1174  LDBG() << "MemRefRegion::compute: " << *op << " depth: " << loopDepth;
1175 
1176  // 0-d memrefs.
1177  if (rank == 0) {
1179  getAffineIVs(*op, ivs);
1180  assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
1181  // The first 'loopDepth' IVs are symbols for this region.
1182  ivs.resize(loopDepth);
1183  // A 0-d memref has a 0-d region.
1184  cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs);
1185  return success();
1186  }
1187 
1188  // Build the constraints for this region.
1189  AffineValueMap accessValueMap;
1190  access.getAccessMap(&accessValueMap);
1191  AffineMap accessMap = accessValueMap.getAffineMap();
1192 
1193  unsigned numDims = accessMap.getNumDims();
1194  unsigned numSymbols = accessMap.getNumSymbols();
1195  unsigned numOperands = accessValueMap.getNumOperands();
1196  // Merge operands with slice operands.
1197  SmallVector<Value, 4> operands;
1198  operands.resize(numOperands);
1199  for (unsigned i = 0; i < numOperands; ++i)
1200  operands[i] = accessValueMap.getOperand(i);
1201 
1202  if (sliceState != nullptr) {
1203  operands.reserve(operands.size() + sliceState->lbOperands[0].size());
1204  // Append slice operands to 'operands' as symbols.
1205  for (auto extraOperand : sliceState->lbOperands[0]) {
1206  if (!llvm::is_contained(operands, extraOperand)) {
1207  operands.push_back(extraOperand);
1208  numSymbols++;
1209  }
1210  }
1211  }
1212  // We'll first associate the dims and symbols of the access map to the dims
1213  // and symbols resp. of cst. This will change below once cst is
1214  // fully constructed out.
1215  cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
1216 
1217  // Add equality constraints.
1218  // Add inequalities for loop lower/upper bounds.
1219  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
1220  auto operand = operands[i];
1221  if (auto affineFor = getForInductionVarOwner(operand)) {
1222  // Note that cst can now have more dimensions than accessMap if the
1223  // bounds expressions involve outer loops or other symbols.
1224  // TODO: rewrite this to use getInstIndexSet; this way
1225  // conditionals will be handled when the latter supports it.
1226  if (failed(cst.addAffineForOpDomain(affineFor)))
1227  return failure();
1228  } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
1229  if (failed(cst.addAffineParallelOpDomain(parallelOp)))
1230  return failure();
1231  } else if (isValidSymbol(operand)) {
1232  // Check if the symbol is a constant.
1233  Value symbol = operand;
1234  if (auto constVal = getConstantIntValue(symbol))
1235  cst.addBound(BoundType::EQ, symbol, constVal.value());
1236  } else {
1237  LDBG() << "unknown affine dimensional value";
1238  return failure();
1239  }
1240  }
1241 
1242  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
1243  if (sliceState != nullptr) {
1244  // Add dim and symbol slice operands.
1245  for (auto operand : sliceState->lbOperands[0]) {
1246  if (failed(cst.addInductionVarOrTerminalSymbol(operand)))
1247  return failure();
1248  }
1249  // Add upper/lower bounds from 'sliceState' to 'cst'.
1250  LogicalResult ret =
1251  cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
1252  sliceState->lbOperands[0]);
1253  assert(succeeded(ret) &&
1254  "should not fail as we never have semi-affine slice maps");
1255  (void)ret;
1256  }
1257 
1258  // Add access function equalities to connect loop IVs to data dimensions.
1259  if (failed(cst.composeMap(&accessValueMap))) {
1260  op->emitError("getMemRefRegion: compose affine map failed");
1261  LDBG() << "Access map: " << accessValueMap.getAffineMap();
1262  return failure();
1263  }
1264 
1265  // Set all variables appearing after the first 'rank' variables as
1266  // symbolic variables - so that the ones corresponding to the memref
1267  // dimensions are the dimensional variables for the memref region.
1268  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1269 
1270  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1271  // this memref region is symbolic.
1272  SmallVector<Value, 4> enclosingIVs;
1273  getAffineIVs(*op, enclosingIVs);
1274  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1275  enclosingIVs.resize(loopDepth);
1276  SmallVector<Value, 4> vars;
1277  cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1278  for (auto en : llvm::enumerate(vars)) {
1279  if ((isAffineInductionVar(en.value())) &&
1280  !llvm::is_contained(enclosingIVs, en.value())) {
1281  if (dropOuterIvs) {
1282  cst.projectOut(en.value());
1283  } else {
1284  unsigned varPosition;
1285  cst.findVar(en.value(), &varPosition);
1286  auto varKind = cst.getVarKindAt(varPosition);
1287  varPosition -= cst.getNumDimVars();
1288  cst.convertToLocal(varKind, varPosition, varPosition + 1);
1289  }
1290  }
1291  }
1292 
1293  // Project out any local variables (these would have been added for any
1294  // mod/divs) if specified.
1295  if (dropLocalVars)
1296  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1297 
1298  // Constant fold any symbolic variables.
1299  cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),
1300  /*num=*/cst.getNumSymbolVars());
1301 
1302  assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1303 
1304  // Add upper/lower bounds for each memref dimension with static size
1305  // to guard against potential over-approximation from projection.
1306  // TODO: Support dynamic memref dimensions.
1307  if (addMemRefDimBounds) {
1308  auto memRefType = cast<MemRefType>(memref.getType());
1309  for (unsigned r = 0; r < rank; r++) {
1310  cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0);
1311  if (memRefType.isDynamicDim(r))
1312  continue;
1313  cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1);
1314  }
1315  }
1316  cst.removeTrivialRedundancy();
1317 
1318  LDBG() << "Memory region: " << cst;
1319  return success();
1320 }
1321 
1322 std::optional<int64_t>
1324  auto elementType = memRefType.getElementType();
1325 
1326  unsigned sizeInBits;
1327  if (elementType.isIntOrFloat()) {
1328  sizeInBits = elementType.getIntOrFloatBitWidth();
1329  } else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1330  if (vectorType.getElementType().isIntOrFloat())
1331  sizeInBits =
1332  vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1333  else
1334  return std::nullopt;
1335  } else {
1336  return std::nullopt;
1337  }
1338  return llvm::divideCeil(sizeInBits, 8);
1339 }
1340 
1341 // Returns the size of the region.
1342 std::optional<int64_t> MemRefRegion::getRegionSize() {
1343  auto memRefType = cast<MemRefType>(memref.getType());
1344 
1345  if (!memRefType.getLayout().isIdentity()) {
1346  LDBG() << "Non-identity layout map not yet supported";
1347  return false;
1348  }
1349 
1350  // Compute the extents of the buffer.
1351  std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1352  if (!numElements) {
1353  LDBG() << "Dynamic shapes not yet supported";
1354  return std::nullopt;
1355  }
1356  auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1357  if (!eltSize)
1358  return std::nullopt;
1359  return *eltSize * *numElements;
1360 }
1361 
1362 /// Returns the size of memref data in bytes if it's statically shaped,
1363 /// std::nullopt otherwise. If the element of the memref has vector type, takes
1364 /// into account size of the vector as well.
1365 // TODO: improve/complete this when we have target data.
1366 std::optional<uint64_t>
1368  if (!memRefType.hasStaticShape())
1369  return std::nullopt;
1370  auto elementType = memRefType.getElementType();
1371  if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1372  return std::nullopt;
1373 
1374  auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1375  if (!sizeInBytes)
1376  return std::nullopt;
1377  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1378  sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1379  }
1380  return sizeInBytes;
1381 }
1382 
1383 template <typename LoadOrStoreOp>
1384 LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1385  bool emitError) {
1386  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1387  AffineWriteOpInterface>::value,
1388  "argument should be either a AffineReadOpInterface or a "
1389  "AffineWriteOpInterface");
1390 
1391  Operation *op = loadOrStoreOp.getOperation();
1392  MemRefRegion region(op->getLoc());
1393  if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr,
1394  /*addMemRefDimBounds=*/false)))
1395  return success();
1396 
1397  LDBG() << "Memory region: " << region.getConstraints();
1398 
1399  bool outOfBounds = false;
1400  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1401 
1402  // For each dimension, check for out of bounds.
1403  for (unsigned r = 0; r < rank; r++) {
1404  FlatAffineValueConstraints ucst(*region.getConstraints());
1405 
1406  // Intersect memory region with constraint capturing out of bounds (both out
1407  // of upper and out of lower), and check if the constraint system is
1408  // feasible. If it is, there is at least one point out of bounds.
1409  SmallVector<int64_t, 4> ineq(rank + 1, 0);
1410  int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1411  // TODO: handle dynamic dim sizes.
1412  if (dimSize == -1)
1413  continue;
1414 
1415  // Check for overflow: d_i >= memref dim size.
1416  ucst.addBound(BoundType::LB, r, dimSize);
1417  outOfBounds = !ucst.isEmpty();
1418  if (outOfBounds && emitError) {
1419  loadOrStoreOp.emitOpError()
1420  << "memref out of upper bound access along dimension #" << (r + 1);
1421  }
1422 
1423  // Check for a negative index.
1424  FlatAffineValueConstraints lcst(*region.getConstraints());
1425  llvm::fill(ineq, 0);
1426  // d_i <= -1;
1427  lcst.addBound(BoundType::UB, r, -1);
1428  outOfBounds = !lcst.isEmpty();
1429  if (outOfBounds && emitError) {
1430  loadOrStoreOp.emitOpError()
1431  << "memref out of lower bound access along dimension #" << (r + 1);
1432  }
1433  }
1434  return failure(outOfBounds);
1435 }
1436 
1437 // Explicitly instantiate the template so that the compiler knows we need them!
1438 template LogicalResult
1439 mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1440  bool emitError);
1441 template LogicalResult
1442 mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1443  bool emitError);
1444 
1445 // Returns in 'positions' the Block positions of 'op' in each ancestor
1446 // Block from the Block containing operation, stopping at 'limitBlock'.
1447 static void findInstPosition(Operation *op, Block *limitBlock,
1448  SmallVectorImpl<unsigned> *positions) {
1449  Block *block = op->getBlock();
1450  while (block != limitBlock) {
1451  // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1452  // rely on linear scans.
1453  int instPosInBlock = std::distance(block->begin(), op->getIterator());
1454  positions->push_back(instPosInBlock);
1455  op = block->getParentOp();
1456  block = op->getBlock();
1457  }
1458  std::reverse(positions->begin(), positions->end());
1459 }
1460 
1461 // Returns the Operation in a possibly nested set of Blocks, where the
1462 // position of the operation is represented by 'positions', which has a
1463 // Block position for each level of nesting.
1465  unsigned level, Block *block) {
1466  unsigned i = 0;
1467  for (auto &op : *block) {
1468  if (i != positions[level]) {
1469  ++i;
1470  continue;
1471  }
1472  if (level == positions.size() - 1)
1473  return &op;
1474  if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1475  return getInstAtPosition(positions, level + 1,
1476  childAffineForOp.getBody());
1477 
1478  for (auto &region : op.getRegions()) {
1479  for (auto &b : region)
1480  if (auto *ret = getInstAtPosition(positions, level + 1, &b))
1481  return ret;
1482  }
1483  return nullptr;
1484  }
1485  return nullptr;
1486 }
1487 
1488 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1491  for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
1492  auto value = cst->getValue(i);
1493  if (ivs.count(value) == 0) {
1494  assert(isAffineForInductionVar(value));
1495  auto loop = getForInductionVarOwner(value);
1496  if (failed(cst->addAffineForOpDomain(loop)))
1497  return failure();
1498  }
1499  }
1500  return success();
1501 }
1502 
1503 /// Returns the innermost common loop depth for the set of operations in 'ops'.
1504 // TODO: Move this to LoopUtils.
1506  ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
1507  unsigned numOps = ops.size();
1508  assert(numOps > 0 && "Expected at least one operation");
1509 
1510  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1511  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1512  for (unsigned i = 0; i < numOps; ++i) {
1513  getAffineForIVs(*ops[i], &loops[i]);
1514  loopDepthLimit =
1515  std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
1516  }
1517 
1518  unsigned loopDepth = 0;
1519  for (unsigned d = 0; d < loopDepthLimit; ++d) {
1520  unsigned i;
1521  for (i = 1; i < numOps; ++i) {
1522  if (loops[i - 1][d] != loops[i][d])
1523  return loopDepth;
1524  }
1525  if (surroundingLoops)
1526  surroundingLoops->push_back(loops[i - 1][d]);
1527  ++loopDepth;
1528  }
1529  return loopDepth;
1530 }
1531 
1532 /// Computes in 'sliceUnion' the union of all slice bounds computed at
1533 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1534 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1535 /// union was computed correctly, an appropriate failure otherwise.
1538  ArrayRef<Operation *> opsB, unsigned loopDepth,
1539  unsigned numCommonLoops, bool isBackwardSlice,
1540  ComputationSliceState *sliceUnion) {
1541  // Compute the union of slice bounds between all pairs in 'opsA' and
1542  // 'opsB' in 'sliceUnionCst'.
1543  FlatAffineValueConstraints sliceUnionCst;
1544  assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
1545  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1546  MemRefAccess srcAccess;
1547  MemRefAccess dstAccess;
1548  for (Operation *a : opsA) {
1549  srcAccess = MemRefAccess(a);
1550  for (Operation *b : opsB) {
1551  dstAccess = MemRefAccess(b);
1552  if (srcAccess.memref != dstAccess.memref)
1553  continue;
1554  // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1555  if ((!isBackwardSlice && loopDepth > getNestingDepth(a)) ||
1556  (isBackwardSlice && loopDepth > getNestingDepth(b))) {
1557  LDBG() << "Invalid loop depth";
1559  }
1560 
1561  bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1562  isa<AffineReadOpInterface>(dstAccess.opInst);
1563  FlatAffineValueConstraints dependenceConstraints;
1564  // Check dependence between 'srcAccess' and 'dstAccess'.
1566  srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
1567  &dependenceConstraints, /*dependenceComponents=*/nullptr,
1568  /*allowRAR=*/readReadAccesses);
1569  if (result.value == DependenceResult::Failure) {
1570  LDBG() << "Dependence check failed";
1572  }
1573  if (result.value == DependenceResult::NoDependence)
1574  continue;
1575  dependentOpPairs.emplace_back(a, b);
1576 
1577  // Compute slice bounds for 'srcAccess' and 'dstAccess'.
1578  ComputationSliceState tmpSliceState;
1579  getComputationSliceState(a, b, dependenceConstraints, loopDepth,
1580  isBackwardSlice, &tmpSliceState);
1581 
1582  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1583  // Initialize 'sliceUnionCst' with the bounds computed in previous step.
1584  if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
1585  LDBG() << "Unable to compute slice bound constraints";
1587  }
1588  assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
1589  continue;
1590  }
1591 
1592  // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1593  FlatAffineValueConstraints tmpSliceCst;
1594  if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
1595  LDBG() << "Unable to compute slice bound constraints";
1597  }
1598 
1599  // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1600  if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
1601 
1602  // Pre-constraint var alignment: record loop IVs used in each constraint
1603  // system.
1604  SmallPtrSet<Value, 8> sliceUnionIVs;
1605  for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1606  sliceUnionIVs.insert(sliceUnionCst.getValue(k));
1607  SmallPtrSet<Value, 8> tmpSliceIVs;
1608  for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1609  tmpSliceIVs.insert(tmpSliceCst.getValue(k));
1610 
1611  sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst);
1612 
1613  // Post-constraint var alignment: add loop IV bounds missing after
1614  // var alignment to constraint systems. This can occur if one constraint
1615  // system uses an loop IV that is not used by the other. The call
1616  // to unionBoundingBox below expects constraints for each Loop IV, even
1617  // if they are the unsliced full loop bounds added here.
1618  if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
1620  if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
1622  }
1623  // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1624  if (sliceUnionCst.getNumLocalVars() > 0 ||
1625  tmpSliceCst.getNumLocalVars() > 0 ||
1626  failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
1627  LDBG() << "Unable to compute union bounding box of slice bounds";
1629  }
1630  }
1631  }
1632 
1633  // Empty union.
1634  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1635  LDBG() << "empty slice union - unexpected";
1637  }
1638 
1639  // Gather loops surrounding ops from loop nest where slice will be inserted.
1641  for (auto &dep : dependentOpPairs) {
1642  ops.push_back(isBackwardSlice ? dep.second : dep.first);
1643  }
1644  SmallVector<AffineForOp, 4> surroundingLoops;
1645  unsigned innermostCommonLoopDepth =
1646  getInnermostCommonLoopDepth(ops, &surroundingLoops);
1647  if (loopDepth > innermostCommonLoopDepth) {
1648  LDBG() << "Exceeds max loop depth";
1650  }
1651 
1652  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1653  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1654 
1655  // Convert any dst loop IVs which are symbol variables to dim variables.
1656  sliceUnionCst.convertLoopIVSymbolsToDims();
1657  sliceUnion->clearBounds();
1658  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
1659  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
1660 
1661  // Get slice bounds from slice union constraints 'sliceUnionCst'.
1662  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
1663  opsA[0]->getContext(), &sliceUnion->lbs,
1664  &sliceUnion->ubs);
1665 
1666  // Add slice bound operands of union.
1667  SmallVector<Value, 4> sliceBoundOperands;
1668  sliceUnionCst.getValues(numSliceLoopIVs,
1669  sliceUnionCst.getNumDimAndSymbolVars(),
1670  &sliceBoundOperands);
1671 
1672  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1673  sliceUnion->ivs.clear();
1674  sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
1675 
1676  // Set loop nest insertion point to block start at 'loopDepth' for forward
1677  // slices, while at the end for backward slices.
1678  sliceUnion->insertPoint =
1679  isBackwardSlice
1680  ? surroundingLoops[loopDepth - 1].getBody()->begin()
1681  : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1682 
1683  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1684  // canonicalization.
1685  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1686  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1687 
1688  // Check if the slice computed is valid. Return success only if it is verified
1689  // that the slice is valid, otherwise return appropriate failure status.
1690  std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1691  if (!isSliceValid) {
1692  LDBG() << "Cannot determine if the slice is valid";
1694  }
1695  if (!*isSliceValid)
1697 
1699 }
1700 
1701 // TODO: extend this to handle multiple result maps.
1702 static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1703  AffineMap ubMap) {
1704  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
1705  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
1706  assert(lbMap.getNumDims() == ubMap.getNumDims());
1707  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1708  AffineExpr lbExpr(lbMap.getResult(0));
1709  AffineExpr ubExpr(ubMap.getResult(0));
1710  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
1711  lbMap.getNumSymbols());
1712  auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1713  if (!cExpr)
1714  return std::nullopt;
1715  return cExpr.getValue();
1716 }
1717 
1718 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1719 // nest surrounding represented by slice loop bounds in 'slice'. Returns true
1720 // on success, false otherwise (if a non-constant trip count was encountered).
1721 // TODO: Make this work with non-unit step loops.
1723  const ComputationSliceState &slice,
1724  llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1725  unsigned numSrcLoopIVs = slice.ivs.size();
1726  // Populate map from AffineForOp -> trip count
1727  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1728  AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1729  auto *op = forOp.getOperation();
1730  AffineMap lbMap = slice.lbs[i];
1731  AffineMap ubMap = slice.ubs[i];
1732  // If lower or upper bound maps are null or provide no results, it implies
1733  // that source loop was not at all sliced, and the entire loop will be a
1734  // part of the slice.
1735  if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
1736  ubMap.getNumResults() == 0) {
1737  // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1738  if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1739  (*tripCountMap)[op] =
1740  forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1741  continue;
1742  }
1743  std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1744  if (maybeConstTripCount.has_value()) {
1745  (*tripCountMap)[op] = *maybeConstTripCount;
1746  continue;
1747  }
1748  return false;
1749  }
1750  std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1751  // Slice bounds are created with a constant ub - lb difference.
1752  if (!tripCount.has_value())
1753  return false;
1754  (*tripCountMap)[op] = *tripCount;
1755  }
1756  return true;
1757 }
1758 
1759 // Return the number of iterations in the given slice.
1761  const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1762  uint64_t iterCount = 1;
1763  for (const auto &count : sliceTripCountMap) {
1764  iterCount *= count.second;
1765  }
1766  return iterCount;
1767 }
1768 
1769 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1770 // Computes slice bounds by projecting out any loop IVs from
1771 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1772 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1773 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1775  Operation *depSourceOp, Operation *depSinkOp,
1776  const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth,
1777  bool isBackwardSlice, ComputationSliceState *sliceState) {
1778  // Get loop nest surrounding src operation.
1779  SmallVector<AffineForOp, 4> srcLoopIVs;
1780  getAffineForIVs(*depSourceOp, &srcLoopIVs);
1781  unsigned numSrcLoopIVs = srcLoopIVs.size();
1782 
1783  // Get loop nest surrounding dst operation.
1784  SmallVector<AffineForOp, 4> dstLoopIVs;
1785  getAffineForIVs(*depSinkOp, &dstLoopIVs);
1786  unsigned numDstLoopIVs = dstLoopIVs.size();
1787 
1788  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1789  (isBackwardSlice && loopDepth <= numDstLoopIVs));
1790 
1791  // Project out dimensions other than those up to 'loopDepth'.
1792  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1793  unsigned num =
1794  isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1795  FlatAffineValueConstraints sliceCst(dependenceConstraints);
1796  sliceCst.projectOut(pos, num);
1797 
1798  // Add slice loop IV values to 'sliceState'.
1799  unsigned offset = isBackwardSlice ? 0 : loopDepth;
1800  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1801  sliceCst.getValues(offset, offset + numSliceLoopIVs, &sliceState->ivs);
1802 
1803  // Set up lower/upper bound affine maps for the slice.
1804  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
1805  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
1806 
1807  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1808  sliceCst.getSliceBounds(offset, numSliceLoopIVs, depSourceOp->getContext(),
1809  &sliceState->lbs, &sliceState->ubs);
1810 
1811  // Set up bound operands for the slice's lower and upper bounds.
1812  SmallVector<Value, 4> sliceBoundOperands;
1813  unsigned numDimsAndSymbols = sliceCst.getNumDimAndSymbolVars();
1814  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
1815  if (i < offset || i >= offset + numSliceLoopIVs)
1816  sliceBoundOperands.push_back(sliceCst.getValue(i));
1817  }
1818 
1819  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1820  // canonicalization.
1821  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1822  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1823 
1824  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1825  sliceState->insertPoint =
1826  isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1827  : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1828 
1829  llvm::SmallDenseSet<Value, 8> sequentialLoops;
1830  if (isa<AffineReadOpInterface>(depSourceOp) &&
1831  isa<AffineReadOpInterface>(depSinkOp)) {
1832  // For read-read access pairs, clear any slice bounds on sequential loops.
1833  // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1834  getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
1835  &sequentialLoops);
1836  }
1837  auto getSliceLoop = [&](unsigned i) {
1838  return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1839  };
1840  auto isInnermostInsertion = [&]() {
1841  return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1842  : loopDepth >= dstLoopIVs.size());
1843  };
1844  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1845  auto srcIsUnitSlice = [&]() {
1846  return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
1847  (getSliceIterationCount(sliceTripCountMap) == 1));
1848  };
1849  // Clear all sliced loop bounds beginning at the first sequential loop, or
1850  // first loop with a slice fusion barrier attribute..
1851 
1852  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
1853  Value iv = getSliceLoop(i).getInductionVar();
1854  if (sequentialLoops.count(iv) == 0 &&
1855  getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1856  continue;
1857  // Skip reset of bounds of reduction loop inserted in the destination loop
1858  // that meets the following conditions:
1859  // 1. Slice is single trip count.
1860  // 2. Loop bounds of the source and destination match.
1861  // 3. Is being inserted at the innermost insertion point.
1862  std::optional<bool> isMaximal = sliceState->isMaximal();
1863  if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
1864  isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1865  continue;
1866  for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1867  sliceState->lbs[j] = AffineMap();
1868  sliceState->ubs[j] = AffineMap();
1869  }
1870  break;
1871  }
1872 }
1873 
1874 /// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1875 /// updates the slice loop bounds with any non-null bound maps specified in
1876 /// 'sliceState', and inserts this slice into the loop nest surrounding
1877 /// 'dstOpInst' at loop depth 'dstLoopDepth'.
1878 // TODO: extend the slicing utility to compute slices that
1879 // aren't necessarily a one-to-one relation b/w the source and destination. The
1880 // relation between the source and destination could be many-to-many in general.
1881 // TODO: the slice computation is incorrect in the cases
1882 // where the dependence from the source to the destination does not cover the
1883 // entire destination index set. Subtract out the dependent destination
1884 // iterations from destination index set and check for emptiness --- this is one
1885 // solution.
1887  Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
1888  ComputationSliceState *sliceState) {
1889  // Get loop nest surrounding src operation.
1890  SmallVector<AffineForOp, 4> srcLoopIVs;
1891  getAffineForIVs(*srcOpInst, &srcLoopIVs);
1892  unsigned numSrcLoopIVs = srcLoopIVs.size();
1893 
1894  // Get loop nest surrounding dst operation.
1895  SmallVector<AffineForOp, 4> dstLoopIVs;
1896  getAffineForIVs(*dstOpInst, &dstLoopIVs);
1897  unsigned dstLoopIVsSize = dstLoopIVs.size();
1898  if (dstLoopDepth > dstLoopIVsSize) {
1899  dstOpInst->emitError("invalid destination loop depth");
1900  return AffineForOp();
1901  }
1902 
1903  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1904  SmallVector<unsigned, 4> positions;
1905  // TODO: This code is incorrect since srcLoopIVs can be 0-d.
1906  findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
1907 
1908  // Clone src loop nest and insert it a the beginning of the operation block
1909  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1910  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1911  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1912  auto sliceLoopNest =
1913  cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
1914 
1915  Operation *sliceInst =
1916  getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
1917  // Get loop nest surrounding 'sliceInst'.
1918  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
1919  getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1920 
1921  // Sanity check.
1922  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1923  (void)sliceSurroundingLoopsSize;
1924  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1925  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1926  (void)sliceLoopLimit;
1927  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1928 
1929  // Update loop bounds for loops in 'sliceLoopNest'.
1930  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1931  auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1932  if (AffineMap lbMap = sliceState->lbs[i])
1933  forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
1934  if (AffineMap ubMap = sliceState->ubs[i])
1935  forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
1936  }
1937  return sliceLoopNest;
1938 }
1939 
1940 // Constructs MemRefAccess populating it with the memref, its indices and
1941 // opinst from 'loadOrStoreOpInst'.
1943  if (auto loadOp = dyn_cast<AffineReadOpInterface>(memOp)) {
1944  memref = loadOp.getMemRef();
1945  opInst = memOp;
1946  llvm::append_range(indices, loadOp.getMapOperands());
1947  } else {
1948  assert(isa<AffineWriteOpInterface>(memOp) &&
1949  "Affine read/write op expected");
1950  auto storeOp = cast<AffineWriteOpInterface>(memOp);
1951  opInst = memOp;
1952  memref = storeOp.getMemRef();
1953  llvm::append_range(indices, storeOp.getMapOperands());
1954  }
1955 }
1956 
1957 unsigned MemRefAccess::getRank() const {
1958  return cast<MemRefType>(memref.getType()).getRank();
1959 }
1960 
1962  return isa<AffineWriteOpInterface>(opInst);
1963 }
1964 
1965 /// Returns the nesting depth of this statement, i.e., the number of loops
1966 /// surrounding this statement.
1968  Operation *currOp = op;
1969  unsigned depth = 0;
1970  while ((currOp = currOp->getParentOp())) {
1971  if (isa<AffineForOp>(currOp))
1972  depth++;
1973  if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
1974  depth += parOp.getNumDims();
1975  }
1976  return depth;
1977 }
1978 
1979 /// Equal if both affine accesses are provably equivalent (at compile
1980 /// time) when considering the memref, the affine maps and their respective
1981 /// operands. The equality of access functions + operands is checked by
1982 /// subtracting fully composed value maps, and then simplifying the difference
1983 /// using the expression flattener.
1984 /// TODO: this does not account for aliasing of memrefs.
1985 bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
1986  if (memref != rhs.memref)
1987  return false;
1988 
1989  AffineValueMap diff, thisMap, rhsMap;
1990  getAccessMap(&thisMap);
1991  rhs.getAccessMap(&rhsMap);
1992  return thisMap == rhsMap;
1993 }
1994 
1996  auto *currOp = op.getParentOp();
1997  AffineForOp currAffineForOp;
1998  // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
1999  // operation while skipping over 'affine.if' operations.
2000  while (currOp) {
2001  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
2002  ivs.push_back(currAffineForOp.getInductionVar());
2003  else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
2004  llvm::append_range(ivs, parOp.getIVs());
2005  currOp = currOp->getParentOp();
2006  }
2007  std::reverse(ivs.begin(), ivs.end());
2008 }
2009 
2010 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
2011 /// where each lists loops from outer-most to inner-most in loop nest.
2013  Operation &b) {
2014  SmallVector<Value, 4> loopsA, loopsB;
2015  getAffineIVs(a, loopsA);
2016  getAffineIVs(b, loopsB);
2017 
2018  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
2019  unsigned numCommonLoops = 0;
2020  for (unsigned i = 0; i < minNumLoops; ++i) {
2021  if (loopsA[i] != loopsB[i])
2022  break;
2023  ++numCommonLoops;
2024  }
2025  return numCommonLoops;
2026 }
2027 
2028 static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
2029  Block::iterator start,
2030  Block::iterator end,
2031  int memorySpace) {
2032  SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
2033 
2034  // Walk this 'affine.for' operation to gather all memory regions.
2035  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
2036  if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
2037  // Neither load nor a store op.
2038  return WalkResult::advance();
2039  }
2040 
2041  // Compute the memref region symbolic in any IVs enclosing this block.
2042  auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
2043  if (failed(
2044  region->compute(opInst,
2045  /*loopDepth=*/getNestingDepth(&*block.begin())))) {
2046  LDBG() << "Error obtaining memory region";
2047  opInst->emitError("error obtaining memory region");
2048  return failure();
2049  }
2050 
2051  auto [it, inserted] = regions.try_emplace(region->memref);
2052  if (inserted) {
2053  it->second = std::move(region);
2054  } else if (failed(it->second->unionBoundingBox(*region))) {
2055  LDBG() << "getMemoryFootprintBytes: unable to perform a union on a "
2056  "memory region";
2057  opInst->emitWarning(
2058  "getMemoryFootprintBytes: unable to perform a union on a memory "
2059  "region");
2060  return failure();
2061  }
2062  return WalkResult::advance();
2063  });
2064  if (result.wasInterrupted())
2065  return std::nullopt;
2066 
2067  int64_t totalSizeInBytes = 0;
2068  for (const auto &region : regions) {
2069  std::optional<int64_t> size = region.second->getRegionSize();
2070  if (!size.has_value())
2071  return std::nullopt;
2072  totalSizeInBytes += *size;
2073  }
2074  return totalSizeInBytes;
2075 }
2076 
2077 std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
2078  int memorySpace) {
2079  auto *forInst = forOp.getOperation();
2081  *forInst->getBlock(), Block::iterator(forInst),
2082  std::next(Block::iterator(forInst)), memorySpace);
2083 }
2084 
2085 /// Returns whether a loop is parallel and contains a reduction loop.
2087  SmallVector<LoopReduction> reductions;
2088  if (!isLoopParallel(forOp, &reductions))
2089  return false;
2090  return !reductions.empty();
2091 }
2092 
2093 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
2094 /// at 'forOp'.
2096  AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
2097  forOp->walk([&](Operation *op) {
2098  if (auto innerFor = dyn_cast<AffineForOp>(op))
2099  if (!isLoopParallel(innerFor))
2100  sequentialLoops->insert(innerFor.getInductionVar());
2101  });
2102 }
2103 
2105  FlatAffineValueConstraints fac(set);
2106  if (fac.isEmpty())
2107  return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
2108  set.getContext());
2110 
2111  auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
2112  assert(simplifiedSet && "guaranteed to succeed while roundtripping");
2113  return simplifiedSet;
2114 }
2115 
2116 static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
2117  SmallVector<Value> &target) {
2118  target =
2119  llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
2120  return val.has_value() ? *val : Value();
2121  }));
2122 }
2123 
2124 /// Bound an identifier `pos` in a given FlatAffineValueConstraints with
2125 /// constraints drawn from an affine map. Before adding the constraint, the
2126 /// dimensions/symbols of the affine map are aligned with `constraints`.
2127 /// `operands` are the SSA Value operands used with the affine map.
2128 /// Note: This function adds a new symbol column to the `constraints` for each
2129 /// dimension/symbol that exists in the affine map but not in `constraints`.
2130 static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
2131  BoundType type, unsigned pos,
2132  AffineMap map, ValueRange operands) {
2133  SmallVector<Value> dims, syms, newSyms;
2134  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
2135  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
2136 
2137  AffineMap alignedMap =
2138  alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
2139  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
2140  constraints.appendSymbolVar(newSyms[i]);
2141  return constraints.addBound(type, pos, alignedMap);
2142 }
2143 
2144 /// Add `val` to each result of `map`.
2145 static AffineMap addConstToResults(AffineMap map, int64_t val) {
2146  SmallVector<AffineExpr> newResults;
2147  for (AffineExpr r : map.getResults())
2148  newResults.push_back(r + val);
2149  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
2150  map.getContext());
2151 }
2152 
2153 // Attempt to simplify the given min/max operation by proving that its value is
2154 // bounded by the same lower and upper bound.
2155 //
2156 // Bounds are computed by FlatAffineValueConstraints. Invariants required for
2157 // finding/proving bounds should be supplied via `constraints`.
2158 //
2159 // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
2160 // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
2161 // case of `!isMin`) and bind it to `opBound`. SSA values that are used in
2162 // `op` but are not part of `constraints`, are added as extra symbols.
2163 // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
2164 // * If `isMin`: r_i >= opBound
2165 // * If `isMax`: r_i <= opBound
2166 // If this is the case, ub(op) == lb(op).
2167 // 4. Replace `op` with `opBound`.
2168 //
2169 // In summary, the following constraints are added throughout this function.
2170 // Note: `invar` are dimensions added by the caller to express the invariants.
2171 // (Showing only the case where `isMin`.)
2172 //
2173 // invar | op | opBound | r_i | extra syms... | const | eq/ineq
2174 // ------+-------+---------+-----+---------------+-------+-------------------
2175 // (various eq./ineq. constraining `invar`, added by the caller)
2176 // ... | 0 | 0 | 0 | 0 | ... | ...
2177 // ------+-------+---------+-----+---------------+-------+-------------------
2178 // (various ineq. constraining `op` in terms of `op` operands (`invar` and
2179 // extra `op` operands "extra syms" that are not in `invar`)).
2180 // ... | -1 | 0 | 0 | ... | ... | >= 0
2181 // ------+-------+---------+-----+---------------+-------+-------------------
2182 // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
2183 // ... | 0 | -1 | 0 | ... | ... | = 0
2184 // ------+-------+---------+-----+---------------+-------+-------------------
2185 // (for each `op` map result r_i: set r_i to corresponding map result,
2186 // prove that r_i >= minOpUb via contradiction)
2187 // ... | 0 | 0 | -1 | ... | ... | = 0
2188 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0
2189 //
2191  Operation *op, FlatAffineValueConstraints constraints) {
2192  bool isMin = isa<AffineMinOp>(op);
2193  assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
2194  MLIRContext *ctx = op->getContext();
2195  Builder builder(ctx);
2196  AffineMap map =
2197  isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2198  ValueRange operands = op->getOperands();
2199  unsigned numResults = map.getNumResults();
2200 
2201  // Add a few extra dimensions.
2202  unsigned dimOp = constraints.appendDimVar(); // `op`
2203  unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
2204  unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults);
2205 
2206  // Add an inequality for each result expr_i of map:
2207  // isMin: op <= expr_i, !isMin: op >= expr_i
2208  auto boundType = isMin ? BoundType::UB : BoundType::LB;
2209  // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
2210  AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
2211  if (failed(
2212  alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
2213  return failure();
2214 
2215  // Try to compute a lower/upper bound for op, expressed in terms of the other
2216  // `dims` and extra symbols.
2217  SmallVector<AffineMap> opLb(1), opUb(1);
2218  constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
2219  AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2220  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
2221  // a TODO of `getSliceBounds` and not handled here.
2222  if (!sliceBound || sliceBound.getNumResults() != 1)
2223  return failure(); // No or multiple bounds found.
2224  // Recover the inclusive UB in the case of an `affine.min`.
2225  AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
2226 
2227  // Add an equality: Set dimOpBound to computed bound.
2228  // Add back dimension for op. (Was removed by `getSliceBounds`.)
2229  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
2230  if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2231  return failure();
2232 
2233  // If the constraint system is empty, there is an inconsistency. (E.g., this
2234  // can happen if loop lb > ub.)
2235  if (constraints.isEmpty())
2236  return failure();
2237 
2238  // In the case of `isMin` (`!isMin` is inversed):
2239  // Prove that each result of `map` has a lower bound that is equal to (or
2240  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
2241  // can be replaced with the bound. I.e., prove that for each result
2242  // expr_i (represented by dimension r_i):
2243  //
2244  // r_i >= opBound
2245  //
2246  // To prove this inequality, add its negation to the constraint set and prove
2247  // that the constraint set is empty.
2248  for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2249  FlatAffineValueConstraints newConstr(constraints);
2250 
2251  // Add an equality: r_i = expr_i
2252  // Note: These equalities could have been added earlier and used to express
2253  // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2254  // computes minOpUb in terms of r_i dims, which is not desired.
2255  if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
2256  map.getSubMap({i - resultDimStart}), operands)))
2257  return failure();
2258 
2259  // If `isMin`: Add inequality: r_i < opBound
2260  // equiv.: opBound - r_i - 1 >= 0
2261  // If `!isMin`: Add inequality: r_i > opBound
2262  // equiv.: -opBound + r_i - 1 >= 0
2263  SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
2264  ineq[dimOpBound] = isMin ? 1 : -1;
2265  ineq[i] = isMin ? -1 : 1;
2266  ineq[newConstr.getNumCols() - 1] = -1;
2267  newConstr.addInequality(ineq);
2268  if (!newConstr.isEmpty())
2269  return failure();
2270  }
2271 
2272  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2273  AffineMap newMap = alignedBoundMap;
2274  SmallVector<Value> newOperands;
2275  unpackOptionalValues(constraints.getMaybeValues(), newOperands);
2276  // If dims/symbols have known constant values, use those in order to simplify
2277  // the affine map further.
2278  for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2279  // Skip unused operands and operands that are already constants.
2280  if (!newOperands[i] || getConstantIntValue(newOperands[i]))
2281  continue;
2282  if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
2283  AffineExpr expr =
2284  i < newMap.getNumDims()
2285  ? builder.getAffineDimExpr(i)
2286  : builder.getAffineSymbolExpr(i - newMap.getNumDims());
2287  newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
2288  newMap.getNumDims(), newMap.getNumSymbols());
2289  }
2290  }
2291  affine::canonicalizeMapAndOperands(&newMap, &newOperands);
2292  return AffineValueMap(newMap, newOperands);
2293 }
2294 
2296  Operation *b) {
2297  Region *aScope = getAffineAnalysisScope(a);
2298  Region *bScope = getAffineAnalysisScope(b);
2299  if (aScope != bScope)
2300  return nullptr;
2301 
2302  // Get the block ancestry of `op` while stopping at the affine scope `aScope`
2303  // and store them in `ancestry`.
2304  auto getBlockAncestry = [&](Operation *op,
2305  SmallVectorImpl<Block *> &ancestry) {
2306  Operation *curOp = op;
2307  do {
2308  ancestry.push_back(curOp->getBlock());
2309  if (curOp->getParentRegion() == aScope)
2310  break;
2311  curOp = curOp->getParentOp();
2312  } while (curOp);
2313  assert(curOp && "can't reach root op without passing through affine scope");
2314  std::reverse(ancestry.begin(), ancestry.end());
2315  };
2316 
2317  SmallVector<Block *, 4> aAncestors, bAncestors;
2318  getBlockAncestry(a, aAncestors);
2319  getBlockAncestry(b, bAncestors);
2320  assert(!aAncestors.empty() && !bAncestors.empty() &&
2321  "at least one Block ancestor expected");
2322 
2323  Block *innermostCommonBlock = nullptr;
2324  for (unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size();
2325  a < e && b < f; ++a, ++b) {
2326  if (aAncestors[a] != bAncestors[b])
2327  break;
2328  innermostCommonBlock = aAncestors[a];
2329  }
2330  return innermostCommonBlock;
2331 }
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
Definition: Utils.cpp:1702
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
Definition: Utils.cpp:1447
static Node * addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, DenseMap< Value, SetVector< unsigned >> &memrefAccesses)
Add op to MDG creating a new node and adding its memory accesses (affine or non-affine to memrefAcces...
Definition: Utils.cpp:190
const char *const kSliceFusionBarrierAttrName
Definition: Utils.cpp:1769
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
Definition: Utils.cpp:1489
static void getEffectedValues(Operation *op, SmallVectorImpl< Value > &values)
Returns the values that this op has a memref effect of type EffectTys on, not considering recursive e...
Definition: Utils.cpp:164
MemRefDependenceGraph::Node Node
Definition: Utils.cpp:37
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
Definition: Utils.cpp:1464
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
Definition: Utils.cpp:2028
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
Definition: Utils.cpp:2145
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
Definition: Utils.cpp:2130
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
Definition: Utils.cpp:2116
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:346
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:339
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
Definition: AffineMap.h:267
unsigned getNumSymbols() const
Definition: AffineMap.cpp:394
unsigned getNumDims() const
Definition: AffineMap.cpp:390
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403
unsigned getNumResults() const
Definition: AffineMap.cpp:398
unsigned getNumInputs() const
Definition: AffineMap.cpp:399
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:407
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
Definition: AffineMap.cpp:511
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:647
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Definition: Block.h:308
iterator begin()
Definition: Block.h:143
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:31
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:363
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:367
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:359
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
std::optional< int64_t > getConstantBoundOnDimSize(MLIRContext *context, unsigned pos, AffineMap *lb=nullptr, AffineMap *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
Returns a non-negative constant bound on the extent (upper bound - lower bound) of the specified vari...
FlatLinearValueConstraints represents an extension of FlatLinearConstraints where each non-local vari...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
void addBound(presburger::BoundType type, Value val, int64_t value)
Adds a constant bound for the variable associated with the given Value.
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
An integer set representing a conjunction of one or more affine equalities and inequalities.
Definition: IntegerSet.h:44
unsigned getNumDims() const
Definition: IntegerSet.cpp:15
MLIRContext * getContext() const
Definition: IntegerSet.cpp:57
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
Definition: IntegerSet.h:56
unsigned getNumSymbols() const
Definition: IntegerSet.cpp:16
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:548
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:749
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:385
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:279
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: WalkResult.h:29
static WalkResult advance()
Definition: WalkResult.h:47
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
Definition: Utils.cpp:2104
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:769
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
Definition: Utils.cpp:1537
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
Definition: AffineOps.cpp:2797
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
Definition: Utils.cpp:2086
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2012
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2801
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
Definition: Utils.cpp:1995
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
Definition: Utils.cpp:2095
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1619
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2789
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:275
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
Definition: Utils.cpp:755
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:2077
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
Definition: Utils.cpp:1505
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:412
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
Definition: Utils.cpp:1774
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
Definition: AffineOps.cpp:2812
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
Definition: Utils.cpp:1367
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1967
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
Definition: Utils.cpp:1760
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)
Find the innermost common Block of a and b in the affine scope that a and b are part of.
Definition: Utils.cpp:2295
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
Definition: Utils.cpp:1722
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
Definition: Utils.cpp:1886
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
Definition: Utils.cpp:2190
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
Definition: Utils.cpp:1323
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:643
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
Definition: Utils.h:310
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
Definition: Utils.cpp:940
SmallVector< Value, 4 > ivs
Definition: Utils.h:313
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
Definition: Utils.cpp:802
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
Definition: Utils.cpp:786
std::vector< SmallVector< Value, 4 > > ubOperands
Definition: Utils.h:321
SmallVector< AffineMap, 4 > ubs
Definition: Utils.h:317
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
Definition: Utils.cpp:1010
SmallVector< AffineMap, 4 > lbs
Definition: Utils.h:315
std::vector< SmallVector< Value, 4 > > lbOperands
Definition: Utils.h:319
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:49
SmallVector< Operation *, 4 > loadOpInsts
Definition: Utils.h:41
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:47
void collect(Operation *opToWalk)
Definition: Utils.cpp:42
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:45
SmallVector< Operation *, 4 > storeOpInsts
Definition: Utils.h:43
Encapsulates a memref load or store access information.
unsigned getRank() const
Definition: Utils.cpp:1957
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
Definition: Utils.cpp:1985
SmallVector< Operation *, 4 > loads
Definition: Utils.h:73
SmallVector< Operation *, 4 > stores
Definition: Utils.h:77
unsigned hasFree(Value memref) const
Definition: Utils.cpp:122
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:75
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:79
unsigned getStoreOpCount(Value memref) const
Definition: Utils.cpp:92
unsigned hasStore(Value memref) const
Returns true if there exists an operation with a write memory effect to memref in this node.
Definition: Utils.cpp:108
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:81
unsigned addNode(Operation *op)
Definition: Utils.cpp:371
bool writesToLiveInOrEscapingMemrefs(unsigned id) const
Definition: Utils.cpp:401
void removeEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:447
void addEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:436
DenseMap< unsigned, Node > nodes
Definition: Utils.h:141
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes) const
Return all nodes which define SSA values used in node 'id'.
Definition: Utils.cpp:535
bool hasDependencePath(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:474
void clearNodeLoadAndStores(unsigned id)
Definition: Utils.cpp:701
const Node * getForOpNode(AffineForOp forOp) const
Definition: Utils.cpp:363
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:549
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
Definition: Utils.cpp:624
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:709
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const
Definition: Utils.cpp:525
const Node * getNode(unsigned id) const
Definition: Utils.cpp:356
void removeNode(unsigned id)
Definition: Utils.cpp:378
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:717
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:725
void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)
Definition: Utils.cpp:688
unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const
Definition: Utils.cpp:509
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr) const
Definition: Utils.cpp:421
void print(raw_ostream &os) const
Definition: Utils.cpp:737
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
Definition: Utils.h:481
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
Definition: Utils.cpp:1064
FlatAffineValueConstraints * getConstraints()
Definition: Utils.h:527
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
Definition: Utils.cpp:1060
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true, bool dropLocalVars=true, bool dropOuterIVs=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Definition: Utils.cpp:1161
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Definition: Utils.cpp:1120
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
Definition: Utils.cpp:1342
LogicalResult unionBoundingBox(const MemRefRegion &other)
Definition: Utils.cpp:1139
Value memref
Memref that this region corresponds to.
Definition: Utils.h:562
Enumerates different result statuses of slice computation by computeSliceUnion
Definition: Utils.h:297
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.