MLIR  22.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for non-loop IR
10 // structures.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
22 #include "mlir/IR/IntegerSet.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <optional>
27 
28 #define DEBUG_TYPE "analysis-utils"
29 
30 using namespace mlir;
31 using namespace affine;
32 using namespace presburger;
33 
34 using llvm::SmallDenseMap;
35 
37 
38 // LoopNestStateCollector walks loop nests and collects load and store
39 // operations, and whether or not a region holding op other than ForOp and IfOp
40 // was encountered in the loop nest.
42  opToWalk->walk([&](Operation *op) {
43  if (auto forOp = dyn_cast<AffineForOp>(op)) {
44  forOps.push_back(forOp);
45  } else if (isa<AffineReadOpInterface>(op)) {
46  loadOpInsts.push_back(op);
47  } else if (isa<AffineWriteOpInterface>(op)) {
48  storeOpInsts.push_back(op);
49  } else {
50  auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
51  if (!memInterface) {
53  // This op itself is memory-effect free.
54  return;
55  // Check operands. Eg. ops like the `call` op are handled here.
56  for (Value v : op->getOperands()) {
57  if (!isa<MemRefType>(v.getType()))
58  continue;
59  // Conservatively, we assume the memref is read and written to.
60  memrefLoads.push_back(op);
61  memrefStores.push_back(op);
62  }
63  } else {
64  // Non-affine loads and stores.
65  if (hasEffect<MemoryEffects::Read>(op))
66  memrefLoads.push_back(op);
67  if (hasEffect<MemoryEffects::Write>(op))
68  memrefStores.push_back(op);
69  if (hasEffect<MemoryEffects::Free>(op))
70  memrefFrees.push_back(op);
71  }
72  }
73  });
74 }
75 
76 unsigned Node::getLoadOpCount(Value memref) const {
77  unsigned loadOpCount = 0;
78  for (Operation *loadOp : loads) {
79  // Common case: affine reads.
80  if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
81  if (memref == affineLoad.getMemRef())
82  ++loadOpCount;
83  } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
84  ++loadOpCount;
85  }
86  }
87  return loadOpCount;
88 }
89 
90 // Returns the store op count for 'memref'.
91 unsigned Node::getStoreOpCount(Value memref) const {
92  unsigned storeOpCount = 0;
93  for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
94  // Common case: affine writes.
95  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
96  if (memref == affineStore.getMemRef())
97  ++storeOpCount;
98  } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp),
99  memref)) {
100  ++storeOpCount;
101  }
102  }
103  return storeOpCount;
104 }
105 
106 // Returns the store op count for 'memref'.
107 unsigned Node::hasStore(Value memref) const {
108  return llvm::any_of(
109  llvm::concat<Operation *const>(stores, memrefStores),
110  [&](Operation *storeOp) {
111  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
112  if (memref == affineStore.getMemRef())
113  return true;
114  } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
115  return true;
116  }
117  return false;
118  });
119 }
120 
121 unsigned Node::hasFree(Value memref) const {
122  return llvm::any_of(memrefFrees, [&](Operation *freeOp) {
123  return hasEffect<MemoryEffects::Free>(freeOp, memref);
124  });
125 }
126 
127 // Returns all store ops in 'storeOps' which access 'memref'.
128 void Node::getStoreOpsForMemref(Value memref,
129  SmallVectorImpl<Operation *> *storeOps) const {
130  for (Operation *storeOp : stores) {
131  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
132  storeOps->push_back(storeOp);
133  }
134 }
135 
136 // Returns all load ops in 'loadOps' which access 'memref'.
137 void Node::getLoadOpsForMemref(Value memref,
138  SmallVectorImpl<Operation *> *loadOps) const {
139  for (Operation *loadOp : loads) {
140  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
141  loadOps->push_back(loadOp);
142  }
143 }
144 
145 // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
146 // has at least one load and store operation.
147 void Node::getLoadAndStoreMemrefSet(
148  DenseSet<Value> *loadAndStoreMemrefSet) const {
149  llvm::SmallDenseSet<Value, 2> loadMemrefs;
150  for (Operation *loadOp : loads) {
151  loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
152  }
153  for (Operation *storeOp : stores) {
154  auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
155  if (loadMemrefs.count(memref) > 0)
156  loadAndStoreMemrefSet->insert(memref);
157  }
158 }
159 
160 /// Returns the values that this op has a memref effect of type `EffectTys` on,
161 /// not considering recursive effects.
162 template <typename... EffectTys>
164  auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
165  if (!memOp) {
167  // No effects.
168  return;
169  // Memref operands have to be considered as being affected.
170  for (Value operand : op->getOperands()) {
171  if (isa<MemRefType>(operand.getType()))
172  values.push_back(operand);
173  }
174  return;
175  }
177  memOp.getEffects(effects);
178  for (auto &effect : effects) {
179  Value effectVal = effect.getValue();
180  if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
181  isa<MemRefType>(effectVal.getType()))
182  values.push_back(effectVal);
183  };
184 }
185 
186 /// Add `op` to MDG creating a new node and adding its memory accesses (affine
187 /// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map.
188 static Node *
190  DenseMap<Value, SetVector<unsigned>> &memrefAccesses) {
191  auto &nodes = mdg.nodes;
192  // Create graph node 'id' to represent top-level 'forOp' and record
193  // all loads and store accesses it contains.
194  LoopNestStateCollector collector;
195  collector.collect(nodeOp);
196  unsigned newNodeId = mdg.nextNodeId++;
197  Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second;
198  for (Operation *op : collector.loadOpInsts) {
199  node.loads.push_back(op);
200  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
201  memrefAccesses[memref].insert(node.id);
202  }
203  for (Operation *op : collector.storeOpInsts) {
204  node.stores.push_back(op);
205  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
206  memrefAccesses[memref].insert(node.id);
207  }
208  for (Operation *op : collector.memrefLoads) {
209  SmallVector<Value> effectedValues;
210  getEffectedValues<MemoryEffects::Read>(op, effectedValues);
211  if (llvm::any_of(((ValueRange)effectedValues).getTypes(),
212  [](Type type) { return !isa<MemRefType>(type); }))
213  // We do not know the interaction here.
214  return nullptr;
215  for (Value memref : effectedValues)
216  memrefAccesses[memref].insert(node.id);
217  node.memrefLoads.push_back(op);
218  }
219  for (Operation *op : collector.memrefStores) {
220  SmallVector<Value> effectedValues;
221  getEffectedValues<MemoryEffects::Write>(op, effectedValues);
222  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
223  [](Type type) { return !isa<MemRefType>(type); }))
224  return nullptr;
225  for (Value memref : effectedValues)
226  memrefAccesses[memref].insert(node.id);
227  node.memrefStores.push_back(op);
228  }
229  for (Operation *op : collector.memrefFrees) {
230  SmallVector<Value> effectedValues;
231  getEffectedValues<MemoryEffects::Free>(op, effectedValues);
232  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
233  [](Type type) { return !isa<MemRefType>(type); }))
234  return nullptr;
235  for (Value memref : effectedValues)
236  memrefAccesses[memref].insert(node.id);
237  node.memrefFrees.push_back(op);
238  }
239 
240  return &node;
241 }
242 
244  LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
245  // Map from a memref to the set of ids of the nodes that have ops accessing
246  // the memref.
247  DenseMap<Value, SetVector<unsigned>> memrefAccesses;
248 
249  // Create graph nodes.
250  DenseMap<Operation *, unsigned> forToNodeMap;
251  for (Operation &op : block) {
252  if (auto forOp = dyn_cast<AffineForOp>(op)) {
253  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
254  if (!node)
255  return false;
256  forToNodeMap[&op] = node->id;
257  } else if (isa<AffineReadOpInterface>(op)) {
258  // Create graph node for top-level load op.
259  Node node(nextNodeId++, &op);
260  node.loads.push_back(&op);
261  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
262  memrefAccesses[memref].insert(node.id);
263  nodes.insert({node.id, node});
264  } else if (isa<AffineWriteOpInterface>(op)) {
265  // Create graph node for top-level store op.
266  Node node(nextNodeId++, &op);
267  node.stores.push_back(&op);
268  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
269  memrefAccesses[memref].insert(node.id);
270  nodes.insert({node.id, node});
271  } else if (op.getNumResults() > 0 && !op.use_empty()) {
272  // Create graph node for top-level producer of SSA values, which
273  // could be used by loop nest nodes.
274  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
275  if (!node)
276  return false;
277  } else if (!isMemoryEffectFree(&op) &&
278  (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
279  // Create graph node for top-level op unless it is known to be
280  // memory-effect free. This covers all unknown/unregistered ops,
281  // non-affine ops with memory effects, and region-holding ops with a
282  // well-defined control flow. During the fusion validity checks, edges
283  // to/from these ops get looked at.
284  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
285  if (!node)
286  return false;
287  } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
288  // Return false if non-handled/unknown region-holding ops are found. We
289  // won't know what such ops do or what its regions mean; for e.g., it may
290  // not be an imperative op.
291  LLVM_DEBUG(llvm::dbgs()
292  << "MDG init failed; unknown region-holding op found!\n");
293  return false;
294  }
295  // We aren't creating nodes for memory-effect free ops either with no
296  // regions (unless it has results being used) or those with branch op
297  // interface.
298  }
299 
300  LLVM_DEBUG(llvm::dbgs() << "Created " << nodes.size() << " nodes\n");
301 
302  // Add dependence edges between nodes which produce SSA values and their
303  // users. Load ops can be considered as the ones producing SSA values.
304  for (auto &idAndNode : nodes) {
305  const Node &node = idAndNode.second;
306  // Stores don't define SSA values, skip them.
307  if (!node.stores.empty())
308  continue;
309  Operation *opInst = node.op;
310  for (Value value : opInst->getResults()) {
311  for (Operation *user : value.getUsers()) {
312  // Ignore users outside of the block.
313  if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
314  &block)
315  continue;
317  getAffineForIVs(*user, &loops);
318  // Find the surrounding affine.for nested immediately within the
319  // block.
320  auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
321  return loop->getBlock() == &block;
322  });
323  if (it == loops.end())
324  continue;
325  assert(forToNodeMap.count(*it) > 0 && "missing mapping");
326  unsigned userLoopNestId = forToNodeMap[*it];
327  addEdge(node.id, userLoopNestId, value);
328  }
329  }
330  }
331 
332  // Walk memref access lists and add graph edges between dependent nodes.
333  for (auto &memrefAndList : memrefAccesses) {
334  unsigned n = memrefAndList.second.size();
335  Value srcMemRef = memrefAndList.first;
336  // Add edges between all dependent pairs among the node IDs on this memref.
337  for (unsigned i = 0; i < n; ++i) {
338  unsigned srcId = memrefAndList.second[i];
339  Node *srcNode = getNode(srcId);
340  bool srcHasStoreOrFree =
341  srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef);
342  for (unsigned j = i + 1; j < n; ++j) {
343  unsigned dstId = memrefAndList.second[j];
344  Node *dstNode = getNode(dstId);
345  bool dstHasStoreOrFree =
346  dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
347  if (srcHasStoreOrFree || dstHasStoreOrFree)
348  addEdge(srcId, dstId, srcMemRef);
349  }
350  }
351  }
352  return true;
353 }
354 
355 // Returns the graph node for 'id'.
356 const Node *MemRefDependenceGraph::getNode(unsigned id) const {
357  auto it = nodes.find(id);
358  assert(it != nodes.end());
359  return &it->second;
360 }
361 
362 // Returns the graph node for 'forOp'.
363 const Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) const {
364  for (auto &idAndNode : nodes)
365  if (idAndNode.second.op == forOp)
366  return &idAndNode.second;
367  return nullptr;
368 }
369 
370 // Adds a node with 'op' to the graph and returns its unique identifier.
372  Node node(nextNodeId++, op);
373  nodes.insert({node.id, node});
374  return node.id;
375 }
376 
377 // Remove node 'id' (and its associated edges) from graph.
379  // Remove each edge in 'inEdges[id]'.
380  if (inEdges.count(id) > 0) {
381  SmallVector<Edge, 2> oldInEdges = inEdges[id];
382  for (auto &inEdge : oldInEdges) {
383  removeEdge(inEdge.id, id, inEdge.value);
384  }
385  }
386  // Remove each edge in 'outEdges[id]'.
387  if (outEdges.contains(id)) {
388  SmallVector<Edge, 2> oldOutEdges = outEdges[id];
389  for (auto &outEdge : oldOutEdges) {
390  removeEdge(id, outEdge.id, outEdge.value);
391  }
392  }
393  // Erase remaining node state.
394  inEdges.erase(id);
395  outEdges.erase(id);
396  nodes.erase(id);
397 }
398 
399 // Returns true if node 'id' writes to any memref which escapes (or is an
400 // argument to) the block. Returns false otherwise.
402  const Node *node = getNode(id);
403  for (auto *storeOpInst : node->stores) {
404  auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
405  auto *op = memref.getDefiningOp();
406  // Return true if 'memref' is a block argument.
407  if (!op)
408  return true;
409  // Return true if any use of 'memref' does not deference it in an affine
410  // way.
411  for (auto *user : memref.getUsers())
412  if (!isa<AffineMapAccessInterface>(*user))
413  return true;
414  }
415  return false;
416 }
417 
418 // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
419 // is for 'value' if non-null, or for any value otherwise. Returns false
420 // otherwise.
421 bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
422  Value value) const {
423  if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) {
424  return false;
425  }
426  bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](const Edge &edge) {
427  return edge.id == dstId && (!value || edge.value == value);
428  });
429  bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](const Edge &edge) {
430  return edge.id == srcId && (!value || edge.value == value);
431  });
432  return hasOutEdge && hasInEdge;
433 }
434 
435 // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
436 void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
437  Value value) {
438  if (!hasEdge(srcId, dstId, value)) {
439  outEdges[srcId].push_back({dstId, value});
440  inEdges[dstId].push_back({srcId, value});
441  if (isa<MemRefType>(value.getType()))
442  memrefEdgeCount[value]++;
443  }
444 }
445 
446 // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
447 void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
448  Value value) {
449  assert(inEdges.count(dstId) > 0);
450  assert(outEdges.count(srcId) > 0);
451  if (isa<MemRefType>(value.getType())) {
452  assert(memrefEdgeCount.count(value) > 0);
453  memrefEdgeCount[value]--;
454  }
455  // Remove 'srcId' from 'inEdges[dstId]'.
456  for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
457  if ((*it).id == srcId && (*it).value == value) {
458  inEdges[dstId].erase(it);
459  break;
460  }
461  }
462  // Remove 'dstId' from 'outEdges[srcId]'.
463  for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
464  if ((*it).id == dstId && (*it).value == value) {
465  outEdges[srcId].erase(it);
466  break;
467  }
468  }
469 }
470 
471 // Returns true if there is a path in the dependence graph from node 'srcId'
472 // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
473 // operations that the edges connected are expected to be from the same block.
475  unsigned dstId) const {
476  // Worklist state is: <node-id, next-output-edge-index-to-visit>
478  worklist.push_back({srcId, 0});
479  Operation *dstOp = getNode(dstId)->op;
480  // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
481  while (!worklist.empty()) {
482  auto &idAndIndex = worklist.back();
483  // Return true if we have reached 'dstId'.
484  if (idAndIndex.first == dstId)
485  return true;
486  // Pop and continue if node has no out edges, or if all out edges have
487  // already been visited.
488  if (!outEdges.contains(idAndIndex.first) ||
489  idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) {
490  worklist.pop_back();
491  continue;
492  }
493  // Get graph edge to traverse.
494  const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second];
495  // Increment next output edge index for 'idAndIndex'.
496  ++idAndIndex.second;
497  // Add node at 'edge.id' to the worklist. We don't need to consider
498  // nodes that are "after" dstId in the containing block; one can't have a
499  // path to `dstId` from any of those nodes.
500  bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
501  if (!afterDst && edge.id != idAndIndex.first)
502  worklist.push_back({edge.id, 0});
503  }
504  return false;
505 }
506 
507 // Returns the input edge count for node 'id' and 'memref' from src nodes
508 // which access 'memref' with a store operation.
510  Value memref) const {
511  unsigned inEdgeCount = 0;
512  for (const Edge &inEdge : inEdges.lookup(id)) {
513  if (inEdge.value == memref) {
514  const Node *srcNode = getNode(inEdge.id);
515  // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
516  if (srcNode->getStoreOpCount(memref) > 0)
517  ++inEdgeCount;
518  }
519  }
520  return inEdgeCount;
521 }
522 
523 // Returns the output edge count for node 'id' and 'memref' (if non-null),
524 // otherwise returns the total output edge count from node 'id'.
526  Value memref) const {
527  unsigned outEdgeCount = 0;
528  for (const auto &outEdge : outEdges.lookup(id))
529  if (!memref || outEdge.value == memref)
530  ++outEdgeCount;
531  return outEdgeCount;
532 }
533 
534 /// Return all nodes which define SSA values used in node 'id'.
536  unsigned id, DenseSet<unsigned> &definingNodes) const {
537  for (const Edge &edge : inEdges.lookup(id))
538  // By definition of edge, if the edge value is a non-memref value,
539  // then the dependence is between a graph node which defines an SSA value
540  // and another graph node which uses the SSA value.
541  if (!isa<MemRefType>(edge.value.getType()))
542  definingNodes.insert(edge.id);
543 }
544 
545 // Computes and returns an insertion point operation, before which the
546 // the fused <srcId, dstId> loop nest can be inserted while preserving
547 // dependences. Returns nullptr if no such insertion point is found.
548 Operation *
550  unsigned dstId) const {
551  if (!outEdges.contains(srcId))
552  return getNode(dstId)->op;
553 
554  // Skip if there is any defining node of 'dstId' that depends on 'srcId'.
555  DenseSet<unsigned> definingNodes;
556  gatherDefiningNodes(dstId, definingNodes);
557  if (llvm::any_of(definingNodes,
558  [&](unsigned id) { return hasDependencePath(srcId, id); })) {
559  LLVM_DEBUG(llvm::dbgs()
560  << "Can't fuse: a defining op with a user in the dst "
561  "loop has dependence from the src loop\n");
562  return nullptr;
563  }
564 
565  // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
566  SmallPtrSet<Operation *, 2> srcDepInsts;
567  for (auto &outEdge : outEdges.lookup(srcId))
568  if (outEdge.id != dstId)
569  srcDepInsts.insert(getNode(outEdge.id)->op);
570 
571  // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
572  SmallPtrSet<Operation *, 2> dstDepInsts;
573  for (auto &inEdge : inEdges.lookup(dstId))
574  if (inEdge.id != srcId)
575  dstDepInsts.insert(getNode(inEdge.id)->op);
576 
577  Operation *srcNodeInst = getNode(srcId)->op;
578  Operation *dstNodeInst = getNode(dstId)->op;
579 
580  // Computing insertion point:
581  // *) Walk all operation positions in Block operation list in the
582  // range (src, dst). For each operation 'op' visited in this search:
583  // *) Store in 'firstSrcDepPos' the first position where 'op' has a
584  // dependence edge from 'srcNode'.
585  // *) Store in 'lastDstDepPost' the last position where 'op' has a
586  // dependence edge to 'dstNode'.
587  // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
588  // operation insertion point (or return null pointer if no such
589  // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
591  std::optional<unsigned> firstSrcDepPos;
592  std::optional<unsigned> lastDstDepPos;
593  unsigned pos = 0;
594  for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
595  it != Block::iterator(dstNodeInst); ++it) {
596  Operation *op = &(*it);
597  if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
598  firstSrcDepPos = pos;
599  if (dstDepInsts.count(op) > 0)
600  lastDstDepPos = pos;
601  depInsts.push_back(op);
602  ++pos;
603  }
604 
605  if (firstSrcDepPos.has_value()) {
606  if (lastDstDepPos.has_value()) {
607  if (*firstSrcDepPos <= *lastDstDepPos) {
608  // No valid insertion point exists which preserves dependences.
609  return nullptr;
610  }
611  }
612  // Return the insertion point at 'firstSrcDepPos'.
613  return depInsts[*firstSrcDepPos];
614  }
615  // No dependence targets in range (or only dst deps in range), return
616  // 'dstNodInst' insertion point.
617  return dstNodeInst;
618 }
619 
620 // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
621 // taking into account that:
622 // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion,
623 // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a
624 // private memref.
625 void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
626  const DenseSet<Value> &privateMemRefs,
627  bool removeSrcId) {
628  // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
629  if (inEdges.count(srcId) > 0) {
630  SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
631  for (auto &inEdge : oldInEdges) {
632  // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
633  if (!privateMemRefs.contains(inEdge.value))
634  addEdge(inEdge.id, dstId, inEdge.value);
635  }
636  }
637  // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
638  // If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
639  if (outEdges.count(srcId) > 0) {
640  SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
641  for (auto &outEdge : oldOutEdges) {
642  // Remove any out edges from 'srcId' to 'dstId' across memrefs.
643  if (outEdge.id == dstId)
644  removeEdge(srcId, outEdge.id, outEdge.value);
645  else if (removeSrcId) {
646  addEdge(dstId, outEdge.id, outEdge.value);
647  removeEdge(srcId, outEdge.id, outEdge.value);
648  }
649  }
650  }
651  // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
652  // replaced by a private memref). These edges could come from nodes
653  // other than 'srcId' which were removed in the previous step.
654  if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
655  SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
656  for (auto &inEdge : oldInEdges)
657  if (privateMemRefs.count(inEdge.value) > 0)
658  removeEdge(inEdge.id, dstId, inEdge.value);
659  }
660 }
661 
662 // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
663 // of sibling node 'sibId' into node 'dstId'.
664 void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
665  // For each edge in 'inEdges[sibId]':
666  // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
667  // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
668  if (inEdges.count(sibId) > 0) {
669  SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
670  for (auto &inEdge : oldInEdges) {
671  addEdge(inEdge.id, dstId, inEdge.value);
672  removeEdge(inEdge.id, sibId, inEdge.value);
673  }
674  }
675 
676  // For each edge in 'outEdges[sibId]' to node 'id'
677  // *) Add new edge from 'dstId' to 'outEdge.id'.
678  // *) Remove edge from 'sibId' to 'outEdge.id'.
679  if (outEdges.count(sibId) > 0) {
680  SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
681  for (auto &outEdge : oldOutEdges) {
682  addEdge(dstId, outEdge.id, outEdge.value);
683  removeEdge(sibId, outEdge.id, outEdge.value);
684  }
685  }
686 }
687 
688 // Adds ops in 'loads' and 'stores' to node at 'id'.
690  ArrayRef<Operation *> stores,
691  ArrayRef<Operation *> memrefLoads,
692  ArrayRef<Operation *> memrefStores,
693  ArrayRef<Operation *> memrefFrees) {
694  Node *node = getNode(id);
695  llvm::append_range(node->loads, loads);
696  llvm::append_range(node->stores, stores);
697  llvm::append_range(node->memrefLoads, memrefLoads);
698  llvm::append_range(node->memrefStores, memrefStores);
699  llvm::append_range(node->memrefFrees, memrefFrees);
700 }
701 
703  Node *node = getNode(id);
704  node->loads.clear();
705  node->stores.clear();
706 }
707 
708 // Calls 'callback' for each input edge incident to node 'id' which carries a
709 // memref dependence.
711  unsigned id, const std::function<void(Edge)> &callback) {
712  if (inEdges.count(id) > 0)
713  forEachMemRefEdge(inEdges[id], callback);
714 }
715 
716 // Calls 'callback' for each output edge from node 'id' which carries a
717 // memref dependence.
719  unsigned id, const std::function<void(Edge)> &callback) {
720  if (outEdges.count(id) > 0)
721  forEachMemRefEdge(outEdges[id], callback);
722 }
723 
724 // Calls 'callback' for each edge in 'edges' which carries a memref
725 // dependence.
727  ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
728  for (const auto &edge : edges) {
729  // Skip if 'edge' is not a memref dependence edge.
730  if (!isa<MemRefType>(edge.value.getType()))
731  continue;
732  assert(nodes.count(edge.id) > 0);
733  // Skip if 'edge.id' is not a loop nest.
734  if (!isa<AffineForOp>(getNode(edge.id)->op))
735  continue;
736  // Visit current input edge 'edge'.
737  callback(edge);
738  }
739 }
740 
741 void MemRefDependenceGraph::print(raw_ostream &os) const {
742  os << "\nMemRefDependenceGraph\n";
743  os << "\nNodes:\n";
744  for (const auto &idAndNode : nodes) {
745  os << "Node: " << idAndNode.first << "\n";
746  auto it = inEdges.find(idAndNode.first);
747  if (it != inEdges.end()) {
748  for (const auto &e : it->second)
749  os << " InEdge: " << e.id << " " << e.value << "\n";
750  }
751  it = outEdges.find(idAndNode.first);
752  if (it != outEdges.end()) {
753  for (const auto &e : it->second)
754  os << " OutEdge: " << e.id << " " << e.value << "\n";
755  }
756  }
757 }
758 
761  auto *currOp = op.getParentOp();
762  AffineForOp currAffineForOp;
763  // Traverse up the hierarchy collecting all 'affine.for' operation while
764  // skipping over 'affine.if' operations.
765  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
766  if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
767  loops->push_back(currAffineForOp);
768  currOp = currOp->getParentOp();
769  }
770  std::reverse(loops->begin(), loops->end());
771 }
772 
775  ops->clear();
776  Operation *currOp = op.getParentOp();
777 
778  // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
779  // affine.parallel operations.
780  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
781  if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
782  ops->push_back(currOp);
783  currOp = currOp->getParentOp();
784  }
785  std::reverse(ops->begin(), ops->end());
786 }
787 
788 // Populates 'cst' with FlatAffineValueConstraints which represent original
789 // domain of the loop bounds that define 'ivs'.
791  FlatAffineValueConstraints &cst) const {
792  assert(!ivs.empty() && "Cannot have a slice without its IVs");
793  cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0,
794  /*numLocals=*/0, ivs);
795  for (Value iv : ivs) {
796  AffineForOp loop = getForInductionVarOwner(iv);
797  assert(loop && "Expected affine for");
798  if (failed(cst.addAffineForOpDomain(loop)))
799  return failure();
800  }
801  return success();
802 }
803 
804 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
805 LogicalResult
807  assert(!lbOperands.empty());
808  // Adds src 'ivs' as dimension variables in 'cst'.
809  unsigned numDims = ivs.size();
810  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
811  unsigned numSymbols = lbOperands[0].size();
812 
813  SmallVector<Value, 4> values(ivs);
814  // Append 'ivs' then 'operands' to 'values'.
815  values.append(lbOperands[0].begin(), lbOperands[0].end());
816  *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
817 
818  // Add loop bound constraints for values which are loop IVs of the destination
819  // of fusion and equality constraints for symbols which are constants.
820  for (unsigned i = numDims, end = values.size(); i < end; ++i) {
821  Value value = values[i];
822  assert(cst->containsVar(value) && "value expected to be present");
823  if (isValidSymbol(value)) {
824  // Check if the symbol is a constant.
825  if (std::optional<int64_t> cOp = getConstantIntValue(value))
826  cst->addBound(BoundType::EQ, value, cOp.value());
827  } else if (auto loop = getForInductionVarOwner(value)) {
828  if (failed(cst->addAffineForOpDomain(loop)))
829  return failure();
830  }
831  }
832 
833  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
834  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
835  assert(succeeded(ret) &&
836  "should not fail as we never have semi-affine slice maps");
837  (void)ret;
838  return success();
839 }
840 
841 // Clears state bounds and operand state.
843  lbs.clear();
844  ubs.clear();
845  lbOperands.clear();
846  ubOperands.clear();
847 }
848 
850  llvm::errs() << "\tIVs:\n";
851  for (Value iv : ivs)
852  llvm::errs() << "\t\t" << iv << "\n";
853 
854  llvm::errs() << "\tLBs:\n";
855  for (auto en : llvm::enumerate(lbs)) {
856  llvm::errs() << "\t\t" << en.value() << "\n";
857  llvm::errs() << "\t\tOperands:\n";
858  for (Value lbOp : lbOperands[en.index()])
859  llvm::errs() << "\t\t\t" << lbOp << "\n";
860  }
861 
862  llvm::errs() << "\tUBs:\n";
863  for (auto en : llvm::enumerate(ubs)) {
864  llvm::errs() << "\t\t" << en.value() << "\n";
865  llvm::errs() << "\t\tOperands:\n";
866  for (Value ubOp : ubOperands[en.index()])
867  llvm::errs() << "\t\t\t" << ubOp << "\n";
868  }
869 }
870 
871 /// Fast check to determine if the computation slice is maximal. Returns true if
872 /// each slice dimension maps to an existing dst dimension and both the src
873 /// and the dst loops for those dimensions have the same bounds. Returns false
874 /// if both the src and the dst loops don't have the same bounds. Returns
875 /// std::nullopt if none of the above can be proven.
876 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
877  assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
878  "Unexpected number of lbs, ubs and ivs in slice");
879 
880  for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
881  AffineMap lbMap = lbs[i];
882  AffineMap ubMap = ubs[i];
883 
884  // Check if this slice is just an equality along this dimension.
885  if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
886  ubMap.getNumResults() != 1 ||
887  lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
888  // The condition above will be true for maps describing a single
889  // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
890  // Make sure we skip those cases by checking that the lb result is not
891  // just a constant.
892  isa<AffineConstantExpr>(lbMap.getResult(0)))
893  return std::nullopt;
894 
895  // Limited support: we expect the lb result to be just a loop dimension for
896  // now.
897  AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0));
898  if (!result)
899  return std::nullopt;
900 
901  // Retrieve dst loop bounds.
902  AffineForOp dstLoop =
903  getForInductionVarOwner(lbOperands[i][result.getPosition()]);
904  if (!dstLoop)
905  return std::nullopt;
906  AffineMap dstLbMap = dstLoop.getLowerBoundMap();
907  AffineMap dstUbMap = dstLoop.getUpperBoundMap();
908 
909  // Retrieve src loop bounds.
910  AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
911  assert(srcLoop && "Expected affine for");
912  AffineMap srcLbMap = srcLoop.getLowerBoundMap();
913  AffineMap srcUbMap = srcLoop.getUpperBoundMap();
914 
915  // Limited support: we expect simple src and dst loops with a single
916  // constant component per bound for now.
917  if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
918  dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
919  return std::nullopt;
920 
921  AffineExpr srcLbResult = srcLbMap.getResult(0);
922  AffineExpr dstLbResult = dstLbMap.getResult(0);
923  AffineExpr srcUbResult = srcUbMap.getResult(0);
924  AffineExpr dstUbResult = dstUbMap.getResult(0);
925  if (!isa<AffineConstantExpr>(srcLbResult) ||
926  !isa<AffineConstantExpr>(srcUbResult) ||
927  !isa<AffineConstantExpr>(dstLbResult) ||
928  !isa<AffineConstantExpr>(dstUbResult))
929  return std::nullopt;
930 
931  // Check if src and dst loop bounds are the same. If not, we can guarantee
932  // that the slice is not maximal.
933  if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
934  srcLoop.getStep() != dstLoop.getStep())
935  return false;
936  }
937 
938  return true;
939 }
940 
941 /// Returns true if it is deterministically verified that the original iteration
942 /// space of the slice is contained within the new iteration space that is
943 /// created after fusing 'this' slice into its destination.
944 std::optional<bool> ComputationSliceState::isSliceValid() const {
945  // Fast check to determine if the slice is valid. If the following conditions
946  // are verified to be true, slice is declared valid by the fast check:
947  // 1. Each slice loop is a single iteration loop bound in terms of a single
948  // destination loop IV.
949  // 2. Loop bounds of the destination loop IV (from above) and those of the
950  // source loop IV are exactly the same.
951  // If the fast check is inconclusive or false, we proceed with a more
952  // expensive analysis.
953  // TODO: Store the result of the fast check, as it might be used again in
954  // `canRemoveSrcNodeAfterFusion`.
955  std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
956  if (isValidFastCheck && *isValidFastCheck)
957  return true;
958 
959  // Create constraints for the source loop nest using which slice is computed.
960  FlatAffineValueConstraints srcConstraints;
961  // TODO: Store the source's domain to avoid computation at each depth.
962  if (failed(getSourceAsConstraints(srcConstraints))) {
963  LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
964  return std::nullopt;
965  }
966  // As the set difference utility currently cannot handle symbols in its
967  // operands, validity of the slice cannot be determined.
968  if (srcConstraints.getNumSymbolVars() > 0) {
969  LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
970  return std::nullopt;
971  }
972  // TODO: Handle local vars in the source domains while using the 'projectOut'
973  // utility below. Currently, aligning is not done assuming that there will be
974  // no local vars in the source domain.
975  if (srcConstraints.getNumLocalVars() != 0) {
976  LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
977  return std::nullopt;
978  }
979 
980  // Create constraints for the slice loop nest that would be created if the
981  // fusion succeeds.
982  FlatAffineValueConstraints sliceConstraints;
983  if (failed(getAsConstraints(&sliceConstraints))) {
984  LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
985  return std::nullopt;
986  }
987 
988  // Projecting out every dimension other than the 'ivs' to express slice's
989  // domain completely in terms of source's IVs.
990  sliceConstraints.projectOut(ivs.size(),
991  sliceConstraints.getNumVars() - ivs.size());
992 
993  LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
994  LLVM_DEBUG(srcConstraints.dump());
995  LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
996  "(expressed in terms of its source's IVs):\n");
997  LLVM_DEBUG(sliceConstraints.dump());
998 
999  // TODO: Store 'srcSet' to avoid recalculating for each depth.
1000  PresburgerSet srcSet(srcConstraints);
1001  PresburgerSet sliceSet(sliceConstraints);
1002  PresburgerSet diffSet = sliceSet.subtract(srcSet);
1003 
1004  if (!diffSet.isIntegerEmpty()) {
1005  LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
1006  return false;
1007  }
1008  return true;
1009 }
1010 
1011 /// Returns true if the computation slice encloses all the iterations of the
1012 /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
1013 /// cannot determine if the slice is maximal or not.
1014 std::optional<bool> ComputationSliceState::isMaximal() const {
1015  // Fast check to determine if the computation slice is maximal. If the result
1016  // is inconclusive, we proceed with a more expensive analysis.
1017  std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
1018  if (isMaximalFastCheck)
1019  return isMaximalFastCheck;
1020 
1021  // Create constraints for the src loop nest being sliced.
1022  FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(),
1023  /*numSymbols=*/0,
1024  /*numLocals=*/0, ivs);
1025  for (Value iv : ivs) {
1026  AffineForOp loop = getForInductionVarOwner(iv);
1027  assert(loop && "Expected affine for");
1028  if (failed(srcConstraints.addAffineForOpDomain(loop)))
1029  return std::nullopt;
1030  }
1031 
1032  // Create constraints for the slice using the dst loop nest information. We
1033  // retrieve existing dst loops from the lbOperands.
1034  SmallVector<Value> consumerIVs;
1035  for (Value lbOp : lbOperands[0])
1036  if (getForInductionVarOwner(lbOp))
1037  consumerIVs.push_back(lbOp);
1038 
1039  // Add empty IV Values for those new loops that are not equalities and,
1040  // therefore, are not yet materialized in the IR.
1041  for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
1042  consumerIVs.push_back(Value());
1043 
1044  FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(),
1045  /*numSymbols=*/0,
1046  /*numLocals=*/0, consumerIVs);
1047 
1048  if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
1049  return std::nullopt;
1050 
1051  if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
1052  // Constraint dims are different. The integer set difference can't be
1053  // computed so we don't know if the slice is maximal.
1054  return std::nullopt;
1055 
1056  // Compute the difference between the src loop nest and the slice integer
1057  // sets.
1058  PresburgerSet srcSet(srcConstraints);
1059  PresburgerSet sliceSet(sliceConstraints);
1060  PresburgerSet diffSet = srcSet.subtract(sliceSet);
1061  return diffSet.isIntegerEmpty();
1062 }
1063 
1064 unsigned MemRefRegion::getRank() const {
1065  return cast<MemRefType>(memref.getType()).getRank();
1066 }
1067 
1070  auto memRefType = cast<MemRefType>(memref.getType());
1071  MLIRContext *context = memref.getContext();
1072  unsigned rank = memRefType.getRank();
1073  if (shape)
1074  shape->reserve(rank);
1075 
1076  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1077 
1078  // Use a copy of the region constraints that has upper/lower bounds for each
1079  // memref dimension with static size added to guard against potential
1080  // over-approximation from projection or union bounding box. We may not add
1081  // this on the region itself since they might just be redundant constraints
1082  // that will need non-trivials means to eliminate.
1083  FlatLinearValueConstraints cstWithShapeBounds(cst);
1084  for (unsigned r = 0; r < rank; r++) {
1085  cstWithShapeBounds.addBound(BoundType::LB, r, 0);
1086  int64_t dimSize = memRefType.getDimSize(r);
1087  if (ShapedType::isDynamic(dimSize))
1088  continue;
1089  cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
1090  }
1091 
1092  // Find a constant upper bound on the extent of this memref region along
1093  // each dimension.
1094  int64_t numElements = 1;
1095  int64_t diffConstant;
1096  for (unsigned d = 0; d < rank; d++) {
1097  AffineMap lb;
1098  std::optional<int64_t> diff =
1099  cstWithShapeBounds.getConstantBoundOnDimSize(context, d, &lb);
1100  if (diff.has_value()) {
1101  diffConstant = *diff;
1102  assert(diffConstant >= 0 && "dim size bound cannot be negative");
1103  } else {
1104  // If no constant bound is found, then it can always be bound by the
1105  // memref's dim size if the latter has a constant size along this dim.
1106  auto dimSize = memRefType.getDimSize(d);
1107  if (ShapedType::isDynamic(dimSize))
1108  return std::nullopt;
1109  diffConstant = dimSize;
1110  // Lower bound becomes 0.
1111  lb = AffineMap::get(/*dimCount=*/0, cstWithShapeBounds.getNumSymbolVars(),
1112  /*result=*/getAffineConstantExpr(0, context));
1113  }
1114  numElements *= diffConstant;
1115  // Populate outputs if available.
1116  if (lbs)
1117  lbs->push_back(lb);
1118  if (shape)
1119  shape->push_back(diffConstant);
1120  }
1121  return numElements;
1122 }
1123 
1125  AffineMap &ubMap) const {
1126  assert(pos < cst.getNumDimVars() && "invalid position");
1127  auto memRefType = cast<MemRefType>(memref.getType());
1128  unsigned rank = memRefType.getRank();
1129 
1130  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1131 
1132  auto boundPairs = cst.getLowerAndUpperBound(
1133  pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(),
1134  /*localExprs=*/{}, memRefType.getContext());
1135  lbMap = boundPairs.first;
1136  ubMap = boundPairs.second;
1137  assert(lbMap && "lower bound for a region must exist");
1138  assert(ubMap && "upper bound for a region must exist");
1139  assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1140  assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1141 }
1142 
1143 LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
1144  assert(memref == other.memref);
1145  return cst.unionBoundingBox(*other.getConstraints());
1146 }
1147 
1148 /// Computes the memory region accessed by this memref with the region
1149 /// represented as constraints symbolic/parametric in 'loopDepth' loops
1150 /// surrounding opInst and any additional Function symbols.
1151 // For example, the memref region for this load operation at loopDepth = 1 will
1152 // be as below:
1153 //
1154 // affine.for %i = 0 to 32 {
1155 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
1156 // load %A[%ii]
1157 // }
1158 // }
1159 //
1160 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
1161 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
1162 //
1163 // TODO: extend this to any other memref dereferencing ops
1164 // (dma_start, dma_wait).
1165 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
1166  const ComputationSliceState *sliceState,
1167  bool addMemRefDimBounds, bool dropLocalVars,
1168  bool dropOuterIvs) {
1169  assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1170  "affine read/write op expected");
1171 
1172  MemRefAccess access(op);
1173  memref = access.memref;
1174  write = access.isStore();
1175 
1176  unsigned rank = access.getRank();
1177 
1178  LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
1179  << "\ndepth: " << loopDepth << "\n";);
1180 
1181  // 0-d memrefs.
1182  if (rank == 0) {
1184  getAffineIVs(*op, ivs);
1185  assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
1186  // The first 'loopDepth' IVs are symbols for this region.
1187  ivs.resize(loopDepth);
1188  // A 0-d memref has a 0-d region.
1189  cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs);
1190  return success();
1191  }
1192 
1193  // Build the constraints for this region.
1194  AffineValueMap accessValueMap;
1195  access.getAccessMap(&accessValueMap);
1196  AffineMap accessMap = accessValueMap.getAffineMap();
1197 
1198  unsigned numDims = accessMap.getNumDims();
1199  unsigned numSymbols = accessMap.getNumSymbols();
1200  unsigned numOperands = accessValueMap.getNumOperands();
1201  // Merge operands with slice operands.
1202  SmallVector<Value, 4> operands;
1203  operands.resize(numOperands);
1204  for (unsigned i = 0; i < numOperands; ++i)
1205  operands[i] = accessValueMap.getOperand(i);
1206 
1207  if (sliceState != nullptr) {
1208  operands.reserve(operands.size() + sliceState->lbOperands[0].size());
1209  // Append slice operands to 'operands' as symbols.
1210  for (auto extraOperand : sliceState->lbOperands[0]) {
1211  if (!llvm::is_contained(operands, extraOperand)) {
1212  operands.push_back(extraOperand);
1213  numSymbols++;
1214  }
1215  }
1216  }
1217  // We'll first associate the dims and symbols of the access map to the dims
1218  // and symbols resp. of cst. This will change below once cst is
1219  // fully constructed out.
1220  cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
1221 
1222  // Add equality constraints.
1223  // Add inequalities for loop lower/upper bounds.
1224  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
1225  auto operand = operands[i];
1226  if (auto affineFor = getForInductionVarOwner(operand)) {
1227  // Note that cst can now have more dimensions than accessMap if the
1228  // bounds expressions involve outer loops or other symbols.
1229  // TODO: rewrite this to use getInstIndexSet; this way
1230  // conditionals will be handled when the latter supports it.
1231  if (failed(cst.addAffineForOpDomain(affineFor)))
1232  return failure();
1233  } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
1234  if (failed(cst.addAffineParallelOpDomain(parallelOp)))
1235  return failure();
1236  } else if (isValidSymbol(operand)) {
1237  // Check if the symbol is a constant.
1238  Value symbol = operand;
1239  if (auto constVal = getConstantIntValue(symbol))
1240  cst.addBound(BoundType::EQ, symbol, constVal.value());
1241  } else {
1242  LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
1243  return failure();
1244  }
1245  }
1246 
1247  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
1248  if (sliceState != nullptr) {
1249  // Add dim and symbol slice operands.
1250  for (auto operand : sliceState->lbOperands[0]) {
1251  if (failed(cst.addInductionVarOrTerminalSymbol(operand)))
1252  return failure();
1253  }
1254  // Add upper/lower bounds from 'sliceState' to 'cst'.
1255  LogicalResult ret =
1256  cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
1257  sliceState->lbOperands[0]);
1258  assert(succeeded(ret) &&
1259  "should not fail as we never have semi-affine slice maps");
1260  (void)ret;
1261  }
1262 
1263  // Add access function equalities to connect loop IVs to data dimensions.
1264  if (failed(cst.composeMap(&accessValueMap))) {
1265  op->emitError("getMemRefRegion: compose affine map failed");
1266  LLVM_DEBUG(accessValueMap.getAffineMap().dump());
1267  return failure();
1268  }
1269 
1270  // Set all variables appearing after the first 'rank' variables as
1271  // symbolic variables - so that the ones corresponding to the memref
1272  // dimensions are the dimensional variables for the memref region.
1273  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1274 
1275  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1276  // this memref region is symbolic.
1277  SmallVector<Value, 4> enclosingIVs;
1278  getAffineIVs(*op, enclosingIVs);
1279  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1280  enclosingIVs.resize(loopDepth);
1281  SmallVector<Value, 4> vars;
1282  cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1283  for (auto en : llvm::enumerate(vars)) {
1284  if ((isAffineInductionVar(en.value())) &&
1285  !llvm::is_contained(enclosingIVs, en.value())) {
1286  if (dropOuterIvs) {
1287  cst.projectOut(en.value());
1288  } else {
1289  unsigned varPosition;
1290  cst.findVar(en.value(), &varPosition);
1291  auto varKind = cst.getVarKindAt(varPosition);
1292  varPosition -= cst.getNumDimVars();
1293  cst.convertToLocal(varKind, varPosition, varPosition + 1);
1294  }
1295  }
1296  }
1297 
1298  // Project out any local variables (these would have been added for any
1299  // mod/divs) if specified.
1300  if (dropLocalVars)
1301  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1302 
1303  // Constant fold any symbolic variables.
1304  cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),
1305  /*num=*/cst.getNumSymbolVars());
1306 
1307  assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1308 
1309  // Add upper/lower bounds for each memref dimension with static size
1310  // to guard against potential over-approximation from projection.
1311  // TODO: Support dynamic memref dimensions.
1312  if (addMemRefDimBounds) {
1313  auto memRefType = cast<MemRefType>(memref.getType());
1314  for (unsigned r = 0; r < rank; r++) {
1315  cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0);
1316  if (memRefType.isDynamicDim(r))
1317  continue;
1318  cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1);
1319  }
1320  }
1321  cst.removeTrivialRedundancy();
1322 
1323  LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
1324  LLVM_DEBUG(cst.dump());
1325  return success();
1326 }
1327 
1328 std::optional<int64_t>
1330  auto elementType = memRefType.getElementType();
1331 
1332  unsigned sizeInBits;
1333  if (elementType.isIntOrFloat()) {
1334  sizeInBits = elementType.getIntOrFloatBitWidth();
1335  } else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1336  if (vectorType.getElementType().isIntOrFloat())
1337  sizeInBits =
1338  vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1339  else
1340  return std::nullopt;
1341  } else {
1342  return std::nullopt;
1343  }
1344  return llvm::divideCeil(sizeInBits, 8);
1345 }
1346 
1347 // Returns the size of the region.
1348 std::optional<int64_t> MemRefRegion::getRegionSize() {
1349  auto memRefType = cast<MemRefType>(memref.getType());
1350 
1351  if (!memRefType.getLayout().isIdentity()) {
1352  LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
1353  return false;
1354  }
1355 
1356  // Compute the extents of the buffer.
1357  std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1358  if (!numElements) {
1359  LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
1360  return std::nullopt;
1361  }
1362  auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1363  if (!eltSize)
1364  return std::nullopt;
1365  return *eltSize * *numElements;
1366 }
1367 
1368 /// Returns the size of memref data in bytes if it's statically shaped,
1369 /// std::nullopt otherwise. If the element of the memref has vector type, takes
1370 /// into account size of the vector as well.
1371 // TODO: improve/complete this when we have target data.
1372 std::optional<uint64_t>
1374  if (!memRefType.hasStaticShape())
1375  return std::nullopt;
1376  auto elementType = memRefType.getElementType();
1377  if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1378  return std::nullopt;
1379 
1380  auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1381  if (!sizeInBytes)
1382  return std::nullopt;
1383  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1384  sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1385  }
1386  return sizeInBytes;
1387 }
1388 
1389 template <typename LoadOrStoreOp>
1390 LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1391  bool emitError) {
1392  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1393  AffineWriteOpInterface>::value,
1394  "argument should be either a AffineReadOpInterface or a "
1395  "AffineWriteOpInterface");
1396 
1397  Operation *op = loadOrStoreOp.getOperation();
1398  MemRefRegion region(op->getLoc());
1399  if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr,
1400  /*addMemRefDimBounds=*/false)))
1401  return success();
1402 
1403  LLVM_DEBUG(llvm::dbgs() << "Memory region");
1404  LLVM_DEBUG(region.getConstraints()->dump());
1405 
1406  bool outOfBounds = false;
1407  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1408 
1409  // For each dimension, check for out of bounds.
1410  for (unsigned r = 0; r < rank; r++) {
1411  FlatAffineValueConstraints ucst(*region.getConstraints());
1412 
1413  // Intersect memory region with constraint capturing out of bounds (both out
1414  // of upper and out of lower), and check if the constraint system is
1415  // feasible. If it is, there is at least one point out of bounds.
1416  SmallVector<int64_t, 4> ineq(rank + 1, 0);
1417  int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1418  // TODO: handle dynamic dim sizes.
1419  if (dimSize == -1)
1420  continue;
1421 
1422  // Check for overflow: d_i >= memref dim size.
1423  ucst.addBound(BoundType::LB, r, dimSize);
1424  outOfBounds = !ucst.isEmpty();
1425  if (outOfBounds && emitError) {
1426  loadOrStoreOp.emitOpError()
1427  << "memref out of upper bound access along dimension #" << (r + 1);
1428  }
1429 
1430  // Check for a negative index.
1431  FlatAffineValueConstraints lcst(*region.getConstraints());
1432  llvm::fill(ineq, 0);
1433  // d_i <= -1;
1434  lcst.addBound(BoundType::UB, r, -1);
1435  outOfBounds = !lcst.isEmpty();
1436  if (outOfBounds && emitError) {
1437  loadOrStoreOp.emitOpError()
1438  << "memref out of lower bound access along dimension #" << (r + 1);
1439  }
1440  }
1441  return failure(outOfBounds);
1442 }
1443 
1444 // Explicitly instantiate the template so that the compiler knows we need them!
1445 template LogicalResult
1446 mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1447  bool emitError);
1448 template LogicalResult
1449 mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1450  bool emitError);
1451 
1452 // Returns in 'positions' the Block positions of 'op' in each ancestor
1453 // Block from the Block containing operation, stopping at 'limitBlock'.
1454 static void findInstPosition(Operation *op, Block *limitBlock,
1455  SmallVectorImpl<unsigned> *positions) {
1456  Block *block = op->getBlock();
1457  while (block != limitBlock) {
1458  // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1459  // rely on linear scans.
1460  int instPosInBlock = std::distance(block->begin(), op->getIterator());
1461  positions->push_back(instPosInBlock);
1462  op = block->getParentOp();
1463  block = op->getBlock();
1464  }
1465  std::reverse(positions->begin(), positions->end());
1466 }
1467 
1468 // Returns the Operation in a possibly nested set of Blocks, where the
1469 // position of the operation is represented by 'positions', which has a
1470 // Block position for each level of nesting.
1472  unsigned level, Block *block) {
1473  unsigned i = 0;
1474  for (auto &op : *block) {
1475  if (i != positions[level]) {
1476  ++i;
1477  continue;
1478  }
1479  if (level == positions.size() - 1)
1480  return &op;
1481  if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1482  return getInstAtPosition(positions, level + 1,
1483  childAffineForOp.getBody());
1484 
1485  for (auto &region : op.getRegions()) {
1486  for (auto &b : region)
1487  if (auto *ret = getInstAtPosition(positions, level + 1, &b))
1488  return ret;
1489  }
1490  return nullptr;
1491  }
1492  return nullptr;
1493 }
1494 
1495 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1498  for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
1499  auto value = cst->getValue(i);
1500  if (ivs.count(value) == 0) {
1501  assert(isAffineForInductionVar(value));
1502  auto loop = getForInductionVarOwner(value);
1503  if (failed(cst->addAffineForOpDomain(loop)))
1504  return failure();
1505  }
1506  }
1507  return success();
1508 }
1509 
1510 /// Returns the innermost common loop depth for the set of operations in 'ops'.
1511 // TODO: Move this to LoopUtils.
1513  ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
1514  unsigned numOps = ops.size();
1515  assert(numOps > 0 && "Expected at least one operation");
1516 
1517  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1518  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1519  for (unsigned i = 0; i < numOps; ++i) {
1520  getAffineForIVs(*ops[i], &loops[i]);
1521  loopDepthLimit =
1522  std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
1523  }
1524 
1525  unsigned loopDepth = 0;
1526  for (unsigned d = 0; d < loopDepthLimit; ++d) {
1527  unsigned i;
1528  for (i = 1; i < numOps; ++i) {
1529  if (loops[i - 1][d] != loops[i][d])
1530  return loopDepth;
1531  }
1532  if (surroundingLoops)
1533  surroundingLoops->push_back(loops[i - 1][d]);
1534  ++loopDepth;
1535  }
1536  return loopDepth;
1537 }
1538 
1539 /// Computes in 'sliceUnion' the union of all slice bounds computed at
1540 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1541 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1542 /// union was computed correctly, an appropriate failure otherwise.
1545  ArrayRef<Operation *> opsB, unsigned loopDepth,
1546  unsigned numCommonLoops, bool isBackwardSlice,
1547  ComputationSliceState *sliceUnion) {
1548  // Compute the union of slice bounds between all pairs in 'opsA' and
1549  // 'opsB' in 'sliceUnionCst'.
1550  FlatAffineValueConstraints sliceUnionCst;
1551  assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
1552  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1553  MemRefAccess srcAccess;
1554  MemRefAccess dstAccess;
1555  for (Operation *a : opsA) {
1556  srcAccess = MemRefAccess(a);
1557  for (Operation *b : opsB) {
1558  dstAccess = MemRefAccess(b);
1559  if (srcAccess.memref != dstAccess.memref)
1560  continue;
1561  // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1562  if ((!isBackwardSlice && loopDepth > getNestingDepth(a)) ||
1563  (isBackwardSlice && loopDepth > getNestingDepth(b))) {
1564  LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
1566  }
1567 
1568  bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1569  isa<AffineReadOpInterface>(dstAccess.opInst);
1570  FlatAffineValueConstraints dependenceConstraints;
1571  // Check dependence between 'srcAccess' and 'dstAccess'.
1573  srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
1574  &dependenceConstraints, /*dependenceComponents=*/nullptr,
1575  /*allowRAR=*/readReadAccesses);
1576  if (result.value == DependenceResult::Failure) {
1577  LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
1579  }
1580  if (result.value == DependenceResult::NoDependence)
1581  continue;
1582  dependentOpPairs.emplace_back(a, b);
1583 
1584  // Compute slice bounds for 'srcAccess' and 'dstAccess'.
1585  ComputationSliceState tmpSliceState;
1586  getComputationSliceState(a, b, dependenceConstraints, loopDepth,
1587  isBackwardSlice, &tmpSliceState);
1588 
1589  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1590  // Initialize 'sliceUnionCst' with the bounds computed in previous step.
1591  if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
1592  LLVM_DEBUG(llvm::dbgs()
1593  << "Unable to compute slice bound constraints\n");
1595  }
1596  assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
1597  continue;
1598  }
1599 
1600  // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1601  FlatAffineValueConstraints tmpSliceCst;
1602  if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
1603  LLVM_DEBUG(llvm::dbgs()
1604  << "Unable to compute slice bound constraints\n");
1606  }
1607 
1608  // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1609  if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
1610 
1611  // Pre-constraint var alignment: record loop IVs used in each constraint
1612  // system.
1613  SmallPtrSet<Value, 8> sliceUnionIVs;
1614  for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1615  sliceUnionIVs.insert(sliceUnionCst.getValue(k));
1616  SmallPtrSet<Value, 8> tmpSliceIVs;
1617  for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1618  tmpSliceIVs.insert(tmpSliceCst.getValue(k));
1619 
1620  sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst);
1621 
1622  // Post-constraint var alignment: add loop IV bounds missing after
1623  // var alignment to constraint systems. This can occur if one constraint
1624  // system uses an loop IV that is not used by the other. The call
1625  // to unionBoundingBox below expects constraints for each Loop IV, even
1626  // if they are the unsliced full loop bounds added here.
1627  if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
1629  if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
1631  }
1632  // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1633  if (sliceUnionCst.getNumLocalVars() > 0 ||
1634  tmpSliceCst.getNumLocalVars() > 0 ||
1635  failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
1636  LLVM_DEBUG(llvm::dbgs()
1637  << "Unable to compute union bounding box of slice bounds\n");
1639  }
1640  }
1641  }
1642 
1643  // Empty union.
1644  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1645  LLVM_DEBUG(llvm::dbgs() << "empty slice union - unexpected\n");
1647  }
1648 
1649  // Gather loops surrounding ops from loop nest where slice will be inserted.
1651  for (auto &dep : dependentOpPairs) {
1652  ops.push_back(isBackwardSlice ? dep.second : dep.first);
1653  }
1654  SmallVector<AffineForOp, 4> surroundingLoops;
1655  unsigned innermostCommonLoopDepth =
1656  getInnermostCommonLoopDepth(ops, &surroundingLoops);
1657  if (loopDepth > innermostCommonLoopDepth) {
1658  LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
1660  }
1661 
1662  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1663  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1664 
1665  // Convert any dst loop IVs which are symbol variables to dim variables.
1666  sliceUnionCst.convertLoopIVSymbolsToDims();
1667  sliceUnion->clearBounds();
1668  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
1669  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
1670 
1671  // Get slice bounds from slice union constraints 'sliceUnionCst'.
1672  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
1673  opsA[0]->getContext(), &sliceUnion->lbs,
1674  &sliceUnion->ubs);
1675 
1676  // Add slice bound operands of union.
1677  SmallVector<Value, 4> sliceBoundOperands;
1678  sliceUnionCst.getValues(numSliceLoopIVs,
1679  sliceUnionCst.getNumDimAndSymbolVars(),
1680  &sliceBoundOperands);
1681 
1682  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1683  sliceUnion->ivs.clear();
1684  sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
1685 
1686  // Set loop nest insertion point to block start at 'loopDepth' for forward
1687  // slices, while at the end for backward slices.
1688  sliceUnion->insertPoint =
1689  isBackwardSlice
1690  ? surroundingLoops[loopDepth - 1].getBody()->begin()
1691  : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1692 
1693  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1694  // canonicalization.
1695  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1696  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1697 
1698  // Check if the slice computed is valid. Return success only if it is verified
1699  // that the slice is valid, otherwise return appropriate failure status.
1700  std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1701  if (!isSliceValid) {
1702  LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
1704  }
1705  if (!*isSliceValid)
1707 
1709 }
1710 
1711 // TODO: extend this to handle multiple result maps.
1712 static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1713  AffineMap ubMap) {
1714  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
1715  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
1716  assert(lbMap.getNumDims() == ubMap.getNumDims());
1717  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1718  AffineExpr lbExpr(lbMap.getResult(0));
1719  AffineExpr ubExpr(ubMap.getResult(0));
1720  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
1721  lbMap.getNumSymbols());
1722  auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1723  if (!cExpr)
1724  return std::nullopt;
1725  return cExpr.getValue();
1726 }
1727 
1728 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1729 // nest surrounding represented by slice loop bounds in 'slice'. Returns true
1730 // on success, false otherwise (if a non-constant trip count was encountered).
1731 // TODO: Make this work with non-unit step loops.
1733  const ComputationSliceState &slice,
1734  llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1735  unsigned numSrcLoopIVs = slice.ivs.size();
1736  // Populate map from AffineForOp -> trip count
1737  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1738  AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1739  auto *op = forOp.getOperation();
1740  AffineMap lbMap = slice.lbs[i];
1741  AffineMap ubMap = slice.ubs[i];
1742  // If lower or upper bound maps are null or provide no results, it implies
1743  // that source loop was not at all sliced, and the entire loop will be a
1744  // part of the slice.
1745  if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
1746  ubMap.getNumResults() == 0) {
1747  // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1748  if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1749  (*tripCountMap)[op] =
1750  forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1751  continue;
1752  }
1753  std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1754  if (maybeConstTripCount.has_value()) {
1755  (*tripCountMap)[op] = *maybeConstTripCount;
1756  continue;
1757  }
1758  return false;
1759  }
1760  std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1761  // Slice bounds are created with a constant ub - lb difference.
1762  if (!tripCount.has_value())
1763  return false;
1764  (*tripCountMap)[op] = *tripCount;
1765  }
1766  return true;
1767 }
1768 
1769 // Return the number of iterations in the given slice.
1771  const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1772  uint64_t iterCount = 1;
1773  for (const auto &count : sliceTripCountMap) {
1774  iterCount *= count.second;
1775  }
1776  return iterCount;
1777 }
1778 
1779 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1780 // Computes slice bounds by projecting out any loop IVs from
1781 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1782 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1783 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1785  Operation *depSourceOp, Operation *depSinkOp,
1786  const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth,
1787  bool isBackwardSlice, ComputationSliceState *sliceState) {
1788  // Get loop nest surrounding src operation.
1789  SmallVector<AffineForOp, 4> srcLoopIVs;
1790  getAffineForIVs(*depSourceOp, &srcLoopIVs);
1791  unsigned numSrcLoopIVs = srcLoopIVs.size();
1792 
1793  // Get loop nest surrounding dst operation.
1794  SmallVector<AffineForOp, 4> dstLoopIVs;
1795  getAffineForIVs(*depSinkOp, &dstLoopIVs);
1796  unsigned numDstLoopIVs = dstLoopIVs.size();
1797 
1798  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1799  (isBackwardSlice && loopDepth <= numDstLoopIVs));
1800 
1801  // Project out dimensions other than those up to 'loopDepth'.
1802  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1803  unsigned num =
1804  isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1805  FlatAffineValueConstraints sliceCst(dependenceConstraints);
1806  sliceCst.projectOut(pos, num);
1807 
1808  // Add slice loop IV values to 'sliceState'.
1809  unsigned offset = isBackwardSlice ? 0 : loopDepth;
1810  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1811  sliceCst.getValues(offset, offset + numSliceLoopIVs, &sliceState->ivs);
1812 
1813  // Set up lower/upper bound affine maps for the slice.
1814  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
1815  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
1816 
1817  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1818  sliceCst.getSliceBounds(offset, numSliceLoopIVs, depSourceOp->getContext(),
1819  &sliceState->lbs, &sliceState->ubs);
1820 
1821  // Set up bound operands for the slice's lower and upper bounds.
1822  SmallVector<Value, 4> sliceBoundOperands;
1823  unsigned numDimsAndSymbols = sliceCst.getNumDimAndSymbolVars();
1824  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
1825  if (i < offset || i >= offset + numSliceLoopIVs)
1826  sliceBoundOperands.push_back(sliceCst.getValue(i));
1827  }
1828 
1829  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1830  // canonicalization.
1831  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1832  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1833 
1834  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1835  sliceState->insertPoint =
1836  isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1837  : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1838 
1839  llvm::SmallDenseSet<Value, 8> sequentialLoops;
1840  if (isa<AffineReadOpInterface>(depSourceOp) &&
1841  isa<AffineReadOpInterface>(depSinkOp)) {
1842  // For read-read access pairs, clear any slice bounds on sequential loops.
1843  // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1844  getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
1845  &sequentialLoops);
1846  }
1847  auto getSliceLoop = [&](unsigned i) {
1848  return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1849  };
1850  auto isInnermostInsertion = [&]() {
1851  return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1852  : loopDepth >= dstLoopIVs.size());
1853  };
1854  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1855  auto srcIsUnitSlice = [&]() {
1856  return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
1857  (getSliceIterationCount(sliceTripCountMap) == 1));
1858  };
1859  // Clear all sliced loop bounds beginning at the first sequential loop, or
1860  // first loop with a slice fusion barrier attribute..
1861 
1862  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
1863  Value iv = getSliceLoop(i).getInductionVar();
1864  if (sequentialLoops.count(iv) == 0 &&
1865  getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1866  continue;
1867  // Skip reset of bounds of reduction loop inserted in the destination loop
1868  // that meets the following conditions:
1869  // 1. Slice is single trip count.
1870  // 2. Loop bounds of the source and destination match.
1871  // 3. Is being inserted at the innermost insertion point.
1872  std::optional<bool> isMaximal = sliceState->isMaximal();
1873  if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
1874  isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1875  continue;
1876  for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1877  sliceState->lbs[j] = AffineMap();
1878  sliceState->ubs[j] = AffineMap();
1879  }
1880  break;
1881  }
1882 }
1883 
1884 /// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1885 /// updates the slice loop bounds with any non-null bound maps specified in
1886 /// 'sliceState', and inserts this slice into the loop nest surrounding
1887 /// 'dstOpInst' at loop depth 'dstLoopDepth'.
1888 // TODO: extend the slicing utility to compute slices that
1889 // aren't necessarily a one-to-one relation b/w the source and destination. The
1890 // relation between the source and destination could be many-to-many in general.
1891 // TODO: the slice computation is incorrect in the cases
1892 // where the dependence from the source to the destination does not cover the
1893 // entire destination index set. Subtract out the dependent destination
1894 // iterations from destination index set and check for emptiness --- this is one
1895 // solution.
1897  Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
1898  ComputationSliceState *sliceState) {
1899  // Get loop nest surrounding src operation.
1900  SmallVector<AffineForOp, 4> srcLoopIVs;
1901  getAffineForIVs(*srcOpInst, &srcLoopIVs);
1902  unsigned numSrcLoopIVs = srcLoopIVs.size();
1903 
1904  // Get loop nest surrounding dst operation.
1905  SmallVector<AffineForOp, 4> dstLoopIVs;
1906  getAffineForIVs(*dstOpInst, &dstLoopIVs);
1907  unsigned dstLoopIVsSize = dstLoopIVs.size();
1908  if (dstLoopDepth > dstLoopIVsSize) {
1909  dstOpInst->emitError("invalid destination loop depth");
1910  return AffineForOp();
1911  }
1912 
1913  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1914  SmallVector<unsigned, 4> positions;
1915  // TODO: This code is incorrect since srcLoopIVs can be 0-d.
1916  findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
1917 
1918  // Clone src loop nest and insert it a the beginning of the operation block
1919  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1920  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1921  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1922  auto sliceLoopNest =
1923  cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
1924 
1925  Operation *sliceInst =
1926  getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
1927  // Get loop nest surrounding 'sliceInst'.
1928  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
1929  getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1930 
1931  // Sanity check.
1932  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1933  (void)sliceSurroundingLoopsSize;
1934  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1935  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1936  (void)sliceLoopLimit;
1937  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1938 
1939  // Update loop bounds for loops in 'sliceLoopNest'.
1940  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1941  auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1942  if (AffineMap lbMap = sliceState->lbs[i])
1943  forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
1944  if (AffineMap ubMap = sliceState->ubs[i])
1945  forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
1946  }
1947  return sliceLoopNest;
1948 }
1949 
1950 // Constructs MemRefAccess populating it with the memref, its indices and
1951 // opinst from 'loadOrStoreOpInst'.
1953  if (auto loadOp = dyn_cast<AffineReadOpInterface>(memOp)) {
1954  memref = loadOp.getMemRef();
1955  opInst = memOp;
1956  llvm::append_range(indices, loadOp.getMapOperands());
1957  } else {
1958  assert(isa<AffineWriteOpInterface>(memOp) &&
1959  "Affine read/write op expected");
1960  auto storeOp = cast<AffineWriteOpInterface>(memOp);
1961  opInst = memOp;
1962  memref = storeOp.getMemRef();
1963  llvm::append_range(indices, storeOp.getMapOperands());
1964  }
1965 }
1966 
1967 unsigned MemRefAccess::getRank() const {
1968  return cast<MemRefType>(memref.getType()).getRank();
1969 }
1970 
1972  return isa<AffineWriteOpInterface>(opInst);
1973 }
1974 
1975 /// Returns the nesting depth of this statement, i.e., the number of loops
1976 /// surrounding this statement.
1978  Operation *currOp = op;
1979  unsigned depth = 0;
1980  while ((currOp = currOp->getParentOp())) {
1981  if (isa<AffineForOp>(currOp))
1982  depth++;
1983  if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
1984  depth += parOp.getNumDims();
1985  }
1986  return depth;
1987 }
1988 
1989 /// Equal if both affine accesses are provably equivalent (at compile
1990 /// time) when considering the memref, the affine maps and their respective
1991 /// operands. The equality of access functions + operands is checked by
1992 /// subtracting fully composed value maps, and then simplifying the difference
1993 /// using the expression flattener.
1994 /// TODO: this does not account for aliasing of memrefs.
1995 bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
1996  if (memref != rhs.memref)
1997  return false;
1998 
1999  AffineValueMap diff, thisMap, rhsMap;
2000  getAccessMap(&thisMap);
2001  rhs.getAccessMap(&rhsMap);
2002  return thisMap == rhsMap;
2003 }
2004 
2006  auto *currOp = op.getParentOp();
2007  AffineForOp currAffineForOp;
2008  // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
2009  // operation while skipping over 'affine.if' operations.
2010  while (currOp) {
2011  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
2012  ivs.push_back(currAffineForOp.getInductionVar());
2013  else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
2014  llvm::append_range(ivs, parOp.getIVs());
2015  currOp = currOp->getParentOp();
2016  }
2017  std::reverse(ivs.begin(), ivs.end());
2018 }
2019 
2020 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
2021 /// where each lists loops from outer-most to inner-most in loop nest.
2023  Operation &b) {
2024  SmallVector<Value, 4> loopsA, loopsB;
2025  getAffineIVs(a, loopsA);
2026  getAffineIVs(b, loopsB);
2027 
2028  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
2029  unsigned numCommonLoops = 0;
2030  for (unsigned i = 0; i < minNumLoops; ++i) {
2031  if (loopsA[i] != loopsB[i])
2032  break;
2033  ++numCommonLoops;
2034  }
2035  return numCommonLoops;
2036 }
2037 
2038 static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
2039  Block::iterator start,
2040  Block::iterator end,
2041  int memorySpace) {
2042  SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
2043 
2044  // Walk this 'affine.for' operation to gather all memory regions.
2045  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
2046  if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
2047  // Neither load nor a store op.
2048  return WalkResult::advance();
2049  }
2050 
2051  // Compute the memref region symbolic in any IVs enclosing this block.
2052  auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
2053  if (failed(
2054  region->compute(opInst,
2055  /*loopDepth=*/getNestingDepth(&*block.begin())))) {
2056  LLVM_DEBUG(opInst->emitError("error obtaining memory region"));
2057  return failure();
2058  }
2059 
2060  auto [it, inserted] = regions.try_emplace(region->memref);
2061  if (inserted) {
2062  it->second = std::move(region);
2063  } else if (failed(it->second->unionBoundingBox(*region))) {
2064  LLVM_DEBUG(opInst->emitWarning(
2065  "getMemoryFootprintBytes: unable to perform a union on a memory "
2066  "region"));
2067  return failure();
2068  }
2069  return WalkResult::advance();
2070  });
2071  if (result.wasInterrupted())
2072  return std::nullopt;
2073 
2074  int64_t totalSizeInBytes = 0;
2075  for (const auto &region : regions) {
2076  std::optional<int64_t> size = region.second->getRegionSize();
2077  if (!size.has_value())
2078  return std::nullopt;
2079  totalSizeInBytes += *size;
2080  }
2081  return totalSizeInBytes;
2082 }
2083 
2084 std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
2085  int memorySpace) {
2086  auto *forInst = forOp.getOperation();
2088  *forInst->getBlock(), Block::iterator(forInst),
2089  std::next(Block::iterator(forInst)), memorySpace);
2090 }
2091 
2092 /// Returns whether a loop is parallel and contains a reduction loop.
2094  SmallVector<LoopReduction> reductions;
2095  if (!isLoopParallel(forOp, &reductions))
2096  return false;
2097  return !reductions.empty();
2098 }
2099 
2100 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
2101 /// at 'forOp'.
2103  AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
2104  forOp->walk([&](Operation *op) {
2105  if (auto innerFor = dyn_cast<AffineForOp>(op))
2106  if (!isLoopParallel(innerFor))
2107  sequentialLoops->insert(innerFor.getInductionVar());
2108  });
2109 }
2110 
2112  FlatAffineValueConstraints fac(set);
2113  if (fac.isEmpty())
2114  return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
2115  set.getContext());
2117 
2118  auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
2119  assert(simplifiedSet && "guaranteed to succeed while roundtripping");
2120  return simplifiedSet;
2121 }
2122 
2123 static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
2124  SmallVector<Value> &target) {
2125  target =
2126  llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
2127  return val.has_value() ? *val : Value();
2128  }));
2129 }
2130 
2131 /// Bound an identifier `pos` in a given FlatAffineValueConstraints with
2132 /// constraints drawn from an affine map. Before adding the constraint, the
2133 /// dimensions/symbols of the affine map are aligned with `constraints`.
2134 /// `operands` are the SSA Value operands used with the affine map.
2135 /// Note: This function adds a new symbol column to the `constraints` for each
2136 /// dimension/symbol that exists in the affine map but not in `constraints`.
2137 static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
2138  BoundType type, unsigned pos,
2139  AffineMap map, ValueRange operands) {
2140  SmallVector<Value> dims, syms, newSyms;
2141  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
2142  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
2143 
2144  AffineMap alignedMap =
2145  alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
2146  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
2147  constraints.appendSymbolVar(newSyms[i]);
2148  return constraints.addBound(type, pos, alignedMap);
2149 }
2150 
2151 /// Add `val` to each result of `map`.
2152 static AffineMap addConstToResults(AffineMap map, int64_t val) {
2153  SmallVector<AffineExpr> newResults;
2154  for (AffineExpr r : map.getResults())
2155  newResults.push_back(r + val);
2156  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
2157  map.getContext());
2158 }
2159 
2160 // Attempt to simplify the given min/max operation by proving that its value is
2161 // bounded by the same lower and upper bound.
2162 //
2163 // Bounds are computed by FlatAffineValueConstraints. Invariants required for
2164 // finding/proving bounds should be supplied via `constraints`.
2165 //
2166 // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
2167 // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
2168 // case of `!isMin`) and bind it to `opBound`. SSA values that are used in
2169 // `op` but are not part of `constraints`, are added as extra symbols.
2170 // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
2171 // * If `isMin`: r_i >= opBound
2172 // * If `isMax`: r_i <= opBound
2173 // If this is the case, ub(op) == lb(op).
2174 // 4. Replace `op` with `opBound`.
2175 //
2176 // In summary, the following constraints are added throughout this function.
2177 // Note: `invar` are dimensions added by the caller to express the invariants.
2178 // (Showing only the case where `isMin`.)
2179 //
2180 // invar | op | opBound | r_i | extra syms... | const | eq/ineq
2181 // ------+-------+---------+-----+---------------+-------+-------------------
2182 // (various eq./ineq. constraining `invar`, added by the caller)
2183 // ... | 0 | 0 | 0 | 0 | ... | ...
2184 // ------+-------+---------+-----+---------------+-------+-------------------
2185 // (various ineq. constraining `op` in terms of `op` operands (`invar` and
2186 // extra `op` operands "extra syms" that are not in `invar`)).
2187 // ... | -1 | 0 | 0 | ... | ... | >= 0
2188 // ------+-------+---------+-----+---------------+-------+-------------------
2189 // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
2190 // ... | 0 | -1 | 0 | ... | ... | = 0
2191 // ------+-------+---------+-----+---------------+-------+-------------------
2192 // (for each `op` map result r_i: set r_i to corresponding map result,
2193 // prove that r_i >= minOpUb via contradiction)
2194 // ... | 0 | 0 | -1 | ... | ... | = 0
2195 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0
2196 //
2198  Operation *op, FlatAffineValueConstraints constraints) {
2199  bool isMin = isa<AffineMinOp>(op);
2200  assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
2201  MLIRContext *ctx = op->getContext();
2202  Builder builder(ctx);
2203  AffineMap map =
2204  isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2205  ValueRange operands = op->getOperands();
2206  unsigned numResults = map.getNumResults();
2207 
2208  // Add a few extra dimensions.
2209  unsigned dimOp = constraints.appendDimVar(); // `op`
2210  unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
2211  unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults);
2212 
2213  // Add an inequality for each result expr_i of map:
2214  // isMin: op <= expr_i, !isMin: op >= expr_i
2215  auto boundType = isMin ? BoundType::UB : BoundType::LB;
2216  // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
2217  AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
2218  if (failed(
2219  alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
2220  return failure();
2221 
2222  // Try to compute a lower/upper bound for op, expressed in terms of the other
2223  // `dims` and extra symbols.
2224  SmallVector<AffineMap> opLb(1), opUb(1);
2225  constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
2226  AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2227  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
2228  // a TODO of `getSliceBounds` and not handled here.
2229  if (!sliceBound || sliceBound.getNumResults() != 1)
2230  return failure(); // No or multiple bounds found.
2231  // Recover the inclusive UB in the case of an `affine.min`.
2232  AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
2233 
2234  // Add an equality: Set dimOpBound to computed bound.
2235  // Add back dimension for op. (Was removed by `getSliceBounds`.)
2236  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
2237  if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2238  return failure();
2239 
2240  // If the constraint system is empty, there is an inconsistency. (E.g., this
2241  // can happen if loop lb > ub.)
2242  if (constraints.isEmpty())
2243  return failure();
2244 
2245  // In the case of `isMin` (`!isMin` is inversed):
2246  // Prove that each result of `map` has a lower bound that is equal to (or
2247  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
2248  // can be replaced with the bound. I.e., prove that for each result
2249  // expr_i (represented by dimension r_i):
2250  //
2251  // r_i >= opBound
2252  //
2253  // To prove this inequality, add its negation to the constraint set and prove
2254  // that the constraint set is empty.
2255  for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2256  FlatAffineValueConstraints newConstr(constraints);
2257 
2258  // Add an equality: r_i = expr_i
2259  // Note: These equalities could have been added earlier and used to express
2260  // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2261  // computes minOpUb in terms of r_i dims, which is not desired.
2262  if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
2263  map.getSubMap({i - resultDimStart}), operands)))
2264  return failure();
2265 
2266  // If `isMin`: Add inequality: r_i < opBound
2267  // equiv.: opBound - r_i - 1 >= 0
2268  // If `!isMin`: Add inequality: r_i > opBound
2269  // equiv.: -opBound + r_i - 1 >= 0
2270  SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
2271  ineq[dimOpBound] = isMin ? 1 : -1;
2272  ineq[i] = isMin ? -1 : 1;
2273  ineq[newConstr.getNumCols() - 1] = -1;
2274  newConstr.addInequality(ineq);
2275  if (!newConstr.isEmpty())
2276  return failure();
2277  }
2278 
2279  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2280  AffineMap newMap = alignedBoundMap;
2281  SmallVector<Value> newOperands;
2282  unpackOptionalValues(constraints.getMaybeValues(), newOperands);
2283  // If dims/symbols have known constant values, use those in order to simplify
2284  // the affine map further.
2285  for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2286  // Skip unused operands and operands that are already constants.
2287  if (!newOperands[i] || getConstantIntValue(newOperands[i]))
2288  continue;
2289  if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
2290  AffineExpr expr =
2291  i < newMap.getNumDims()
2292  ? builder.getAffineDimExpr(i)
2293  : builder.getAffineSymbolExpr(i - newMap.getNumDims());
2294  newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
2295  newMap.getNumDims(), newMap.getNumSymbols());
2296  }
2297  }
2298  affine::canonicalizeMapAndOperands(&newMap, &newOperands);
2299  return AffineValueMap(newMap, newOperands);
2300 }
2301 
2303  Operation *b) {
2304  Region *aScope = getAffineAnalysisScope(a);
2305  Region *bScope = getAffineAnalysisScope(b);
2306  if (aScope != bScope)
2307  return nullptr;
2308 
2309  // Get the block ancestry of `op` while stopping at the affine scope `aScope`
2310  // and store them in `ancestry`.
2311  auto getBlockAncestry = [&](Operation *op,
2312  SmallVectorImpl<Block *> &ancestry) {
2313  Operation *curOp = op;
2314  do {
2315  ancestry.push_back(curOp->getBlock());
2316  if (curOp->getParentRegion() == aScope)
2317  break;
2318  curOp = curOp->getParentOp();
2319  } while (curOp);
2320  assert(curOp && "can't reach root op without passing through affine scope");
2321  std::reverse(ancestry.begin(), ancestry.end());
2322  };
2323 
2324  SmallVector<Block *, 4> aAncestors, bAncestors;
2325  getBlockAncestry(a, aAncestors);
2326  getBlockAncestry(b, bAncestors);
2327  assert(!aAncestors.empty() && !bAncestors.empty() &&
2328  "at least one Block ancestor expected");
2329 
2330  Block *innermostCommonBlock = nullptr;
2331  for (unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size();
2332  a < e && b < f; ++a, ++b) {
2333  if (aAncestors[a] != bAncestors[b])
2334  break;
2335  innermostCommonBlock = aAncestors[a];
2336  }
2337  return innermostCommonBlock;
2338 }
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
Definition: Utils.cpp:1712
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
Definition: Utils.cpp:1454
static Node * addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, DenseMap< Value, SetVector< unsigned >> &memrefAccesses)
Add op to MDG creating a new node and adding its memory accesses (affine or non-affine to memrefAcces...
Definition: Utils.cpp:189
const char *const kSliceFusionBarrierAttrName
Definition: Utils.cpp:1779
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
Definition: Utils.cpp:1496
static void getEffectedValues(Operation *op, SmallVectorImpl< Value > &values)
Returns the values that this op has a memref effect of type EffectTys on, not considering recursive e...
Definition: Utils.cpp:163
MemRefDependenceGraph::Node Node
Definition: Utils.cpp:36
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
Definition: Utils.cpp:1471
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
Definition: Utils.cpp:2038
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
Definition: Utils.cpp:2152
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
Definition: Utils.cpp:2137
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
Definition: Utils.cpp:2123
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:348
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:339
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
Definition: AffineMap.h:267
unsigned getNumSymbols() const
Definition: AffineMap.cpp:394
unsigned getNumDims() const
Definition: AffineMap.cpp:390
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403
unsigned getNumResults() const
Definition: AffineMap.cpp:398
unsigned getNumInputs() const
Definition: AffineMap.cpp:399
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:407
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
Definition: AffineMap.cpp:511
void dump() const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:647
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Definition: Block.h:305
iterator begin()
Definition: Block.h:143
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:31
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:363
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:367
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:359
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
std::optional< int64_t > getConstantBoundOnDimSize(MLIRContext *context, unsigned pos, AffineMap *lb=nullptr, AffineMap *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
Returns a non-negative constant bound on the extent (upper bound - lower bound) of the specified vari...
FlatLinearValueConstraints represents an extension of FlatLinearConstraints where each non-local vari...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
void addBound(presburger::BoundType type, Value val, int64_t value)
Adds a constant bound for the variable associated with the given Value.
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
An integer set representing a conjunction of one or more affine equalities and inequalities.
Definition: IntegerSet.h:44
unsigned getNumDims() const
Definition: IntegerSet.cpp:15
MLIRContext * getContext() const
Definition: IntegerSet.cpp:57
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
Definition: IntegerSet.h:56
unsigned getNumSymbols() const
Definition: IntegerSet.cpp:16
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:548
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:749
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:385
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:279
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: WalkResult.h:29
static WalkResult advance()
Definition: WalkResult.h:47
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
Definition: Utils.cpp:2111
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:773
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
Definition: Utils.cpp:1544
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
Definition: AffineOps.cpp:2754
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
Definition: Utils.cpp:2093
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2022
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2758
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
Definition: Utils.cpp:2005
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
Definition: Utils.cpp:2102
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1621
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2746
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:276
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
Definition: Utils.cpp:759
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:2084
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
Definition: Utils.cpp:1512
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:413
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
Definition: Utils.cpp:1784
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
Definition: AffineOps.cpp:2769
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
Definition: Utils.cpp:1373
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1977
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
Definition: Utils.cpp:1770
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)
Find the innermost common Block of a and b in the affine scope that a and b are part of.
Definition: Utils.cpp:2302
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
Definition: Utils.cpp:1732
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
Definition: Utils.cpp:1896
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
Definition: Utils.cpp:2197
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
Definition: Utils.cpp:1329
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:645
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
Definition: Utils.h:310
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
Definition: Utils.cpp:944
SmallVector< Value, 4 > ivs
Definition: Utils.h:313
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
Definition: Utils.cpp:806
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
Definition: Utils.cpp:790
std::vector< SmallVector< Value, 4 > > ubOperands
Definition: Utils.h:321
SmallVector< AffineMap, 4 > ubs
Definition: Utils.h:317
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
Definition: Utils.cpp:1014
SmallVector< AffineMap, 4 > lbs
Definition: Utils.h:315
std::vector< SmallVector< Value, 4 > > lbOperands
Definition: Utils.h:319
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:49
SmallVector< Operation *, 4 > loadOpInsts
Definition: Utils.h:41
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:47
void collect(Operation *opToWalk)
Definition: Utils.cpp:41
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:45
SmallVector< Operation *, 4 > storeOpInsts
Definition: Utils.h:43
Encapsulates a memref load or store access information.
unsigned getRank() const
Definition: Utils.cpp:1967
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
Definition: Utils.cpp:1995
SmallVector< Operation *, 4 > loads
Definition: Utils.h:73
SmallVector< Operation *, 4 > stores
Definition: Utils.h:77
unsigned hasFree(Value memref) const
Definition: Utils.cpp:121
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:75
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:79
unsigned getStoreOpCount(Value memref) const
Definition: Utils.cpp:91
unsigned hasStore(Value memref) const
Returns true if there exists an operation with a write memory effect to memref in this node.
Definition: Utils.cpp:107
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:81
unsigned addNode(Operation *op)
Definition: Utils.cpp:371
bool writesToLiveInOrEscapingMemrefs(unsigned id) const
Definition: Utils.cpp:401
void removeEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:447
void addEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:436
DenseMap< unsigned, Node > nodes
Definition: Utils.h:141
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes) const
Return all nodes which define SSA values used in node 'id'.
Definition: Utils.cpp:535
bool hasDependencePath(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:474
void clearNodeLoadAndStores(unsigned id)
Definition: Utils.cpp:702
const Node * getForOpNode(AffineForOp forOp) const
Definition: Utils.cpp:363
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:549
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
Definition: Utils.cpp:625
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:710
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const
Definition: Utils.cpp:525
const Node * getNode(unsigned id) const
Definition: Utils.cpp:356
void removeNode(unsigned id)
Definition: Utils.cpp:378
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:718
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:726
void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)
Definition: Utils.cpp:689
unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const
Definition: Utils.cpp:509
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr) const
Definition: Utils.cpp:421
void print(raw_ostream &os) const
Definition: Utils.cpp:741
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
Definition: Utils.h:481
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
Definition: Utils.cpp:1068
FlatAffineValueConstraints * getConstraints()
Definition: Utils.h:527
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
Definition: Utils.cpp:1064
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true, bool dropLocalVars=true, bool dropOuterIVs=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Definition: Utils.cpp:1165
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Definition: Utils.cpp:1124
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
Definition: Utils.cpp:1348
LogicalResult unionBoundingBox(const MemRefRegion &other)
Definition: Utils.cpp:1143
Value memref
Memref that this region corresponds to.
Definition: Utils.h:562
Enumerates different result statuses of slice computation by computeSliceUnion
Definition: Utils.h:297
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.