MLIR  21.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for non-loop IR
10 // structures.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
23 #include "mlir/IR/IntegerSet.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <optional>
30 
31 #define DEBUG_TYPE "analysis-utils"
32 
33 using namespace mlir;
34 using namespace affine;
35 using namespace presburger;
36 
37 using llvm::SmallDenseMap;
38 
40 
41 // LoopNestStateCollector walks loop nests and collects load and store
42 // operations, and whether or not a region holding op other than ForOp and IfOp
43 // was encountered in the loop nest.
45  opToWalk->walk([&](Operation *op) {
46  if (auto forOp = dyn_cast<AffineForOp>(op)) {
47  forOps.push_back(forOp);
48  } else if (isa<AffineReadOpInterface>(op)) {
49  loadOpInsts.push_back(op);
50  } else if (isa<AffineWriteOpInterface>(op)) {
51  storeOpInsts.push_back(op);
52  } else {
53  auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
54  if (!memInterface) {
56  // This op itself is memory-effect free.
57  return;
58  // Check operands. Eg. ops like the `call` op are handled here.
59  for (Value v : op->getOperands()) {
60  if (!isa<MemRefType>(v.getType()))
61  continue;
62  // Conservatively, we assume the memref is read and written to.
63  memrefLoads.push_back(op);
64  memrefStores.push_back(op);
65  }
66  } else {
67  // Non-affine loads and stores.
68  if (hasEffect<MemoryEffects::Read>(op))
69  memrefLoads.push_back(op);
70  if (hasEffect<MemoryEffects::Write>(op))
71  memrefStores.push_back(op);
72  if (hasEffect<MemoryEffects::Free>(op))
73  memrefFrees.push_back(op);
74  }
75  }
76  });
77 }
78 
79 unsigned Node::getLoadOpCount(Value memref) const {
80  unsigned loadOpCount = 0;
81  for (Operation *loadOp : loads) {
82  // Common case: affine reads.
83  if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
84  if (memref == affineLoad.getMemRef())
85  ++loadOpCount;
86  } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
87  ++loadOpCount;
88  }
89  }
90  return loadOpCount;
91 }
92 
93 // Returns the store op count for 'memref'.
94 unsigned Node::getStoreOpCount(Value memref) const {
95  unsigned storeOpCount = 0;
96  for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
97  // Common case: affine writes.
98  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
99  if (memref == affineStore.getMemRef())
100  ++storeOpCount;
101  } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp),
102  memref)) {
103  ++storeOpCount;
104  }
105  }
106  return storeOpCount;
107 }
108 
109 // Returns the store op count for 'memref'.
110 unsigned Node::hasStore(Value memref) const {
111  return llvm::any_of(
112  llvm::concat<Operation *const>(stores, memrefStores),
113  [&](Operation *storeOp) {
114  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
115  if (memref == affineStore.getMemRef())
116  return true;
117  } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
118  return true;
119  }
120  return false;
121  });
122 }
123 
124 unsigned Node::hasFree(Value memref) const {
125  return llvm::any_of(memrefFrees, [&](Operation *freeOp) {
126  return hasEffect<MemoryEffects::Free>(freeOp, memref);
127  });
128 }
129 
130 // Returns all store ops in 'storeOps' which access 'memref'.
131 void Node::getStoreOpsForMemref(Value memref,
132  SmallVectorImpl<Operation *> *storeOps) const {
133  for (Operation *storeOp : stores) {
134  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
135  storeOps->push_back(storeOp);
136  }
137 }
138 
139 // Returns all load ops in 'loadOps' which access 'memref'.
140 void Node::getLoadOpsForMemref(Value memref,
141  SmallVectorImpl<Operation *> *loadOps) const {
142  for (Operation *loadOp : loads) {
143  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
144  loadOps->push_back(loadOp);
145  }
146 }
147 
148 // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
149 // has at least one load and store operation.
150 void Node::getLoadAndStoreMemrefSet(
151  DenseSet<Value> *loadAndStoreMemrefSet) const {
152  llvm::SmallDenseSet<Value, 2> loadMemrefs;
153  for (Operation *loadOp : loads) {
154  loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
155  }
156  for (Operation *storeOp : stores) {
157  auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
158  if (loadMemrefs.count(memref) > 0)
159  loadAndStoreMemrefSet->insert(memref);
160  }
161 }
162 
163 /// Returns the values that this op has a memref effect of type `EffectTys` on,
164 /// not considering recursive effects.
165 template <typename... EffectTys>
167  auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
168  if (!memOp) {
170  // No effects.
171  return;
172  // Memref operands have to be considered as being affected.
173  for (Value operand : op->getOperands()) {
174  if (isa<MemRefType>(operand.getType()))
175  values.push_back(operand);
176  }
177  return;
178  }
180  memOp.getEffects(effects);
181  for (auto &effect : effects) {
182  Value effectVal = effect.getValue();
183  if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
184  isa<MemRefType>(effectVal.getType()))
185  values.push_back(effectVal);
186  };
187 }
188 
189 /// Add `op` to MDG creating a new node and adding its memory accesses (affine
190 /// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map.
191 static Node *
193  DenseMap<Value, SetVector<unsigned>> &memrefAccesses) {
194  auto &nodes = mdg.nodes;
195  // Create graph node 'id' to represent top-level 'forOp' and record
196  // all loads and store accesses it contains.
197  LoopNestStateCollector collector;
198  collector.collect(nodeOp);
199  unsigned newNodeId = mdg.nextNodeId++;
200  Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second;
201  for (Operation *op : collector.loadOpInsts) {
202  node.loads.push_back(op);
203  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
204  memrefAccesses[memref].insert(node.id);
205  }
206  for (Operation *op : collector.storeOpInsts) {
207  node.stores.push_back(op);
208  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
209  memrefAccesses[memref].insert(node.id);
210  }
211  for (Operation *op : collector.memrefLoads) {
212  SmallVector<Value> effectedValues;
213  getEffectedValues<MemoryEffects::Read>(op, effectedValues);
214  if (llvm::any_of(((ValueRange)effectedValues).getTypes(),
215  [](Type type) { return !isa<MemRefType>(type); }))
216  // We do not know the interaction here.
217  return nullptr;
218  for (Value memref : effectedValues)
219  memrefAccesses[memref].insert(node.id);
220  node.memrefLoads.push_back(op);
221  }
222  for (Operation *op : collector.memrefStores) {
223  SmallVector<Value> effectedValues;
224  getEffectedValues<MemoryEffects::Write>(op, effectedValues);
225  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
226  [](Type type) { return !isa<MemRefType>(type); }))
227  return nullptr;
228  for (Value memref : effectedValues)
229  memrefAccesses[memref].insert(node.id);
230  node.memrefStores.push_back(op);
231  }
232  for (Operation *op : collector.memrefFrees) {
233  SmallVector<Value> effectedValues;
234  getEffectedValues<MemoryEffects::Free>(op, effectedValues);
235  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
236  [](Type type) { return !isa<MemRefType>(type); }))
237  return nullptr;
238  for (Value memref : effectedValues)
239  memrefAccesses[memref].insert(node.id);
240  node.memrefFrees.push_back(op);
241  }
242 
243  return &node;
244 }
245 
247  LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
248  // Map from a memref to the set of ids of the nodes that have ops accessing
249  // the memref.
250  DenseMap<Value, SetVector<unsigned>> memrefAccesses;
251 
252  // Create graph nodes.
253  DenseMap<Operation *, unsigned> forToNodeMap;
254  for (Operation &op : block) {
255  if (auto forOp = dyn_cast<AffineForOp>(op)) {
256  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
257  if (!node)
258  return false;
259  forToNodeMap[&op] = node->id;
260  } else if (isa<AffineReadOpInterface>(op)) {
261  // Create graph node for top-level load op.
262  Node node(nextNodeId++, &op);
263  node.loads.push_back(&op);
264  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
265  memrefAccesses[memref].insert(node.id);
266  nodes.insert({node.id, node});
267  } else if (isa<AffineWriteOpInterface>(op)) {
268  // Create graph node for top-level store op.
269  Node node(nextNodeId++, &op);
270  node.stores.push_back(&op);
271  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
272  memrefAccesses[memref].insert(node.id);
273  nodes.insert({node.id, node});
274  } else if (op.getNumResults() > 0 && !op.use_empty()) {
275  // Create graph node for top-level producer of SSA values, which
276  // could be used by loop nest nodes.
277  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
278  if (!node)
279  return false;
280  } else if (!isMemoryEffectFree(&op) &&
281  (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
282  // Create graph node for top-level op unless it is known to be
283  // memory-effect free. This covers all unknown/unregistered ops,
284  // non-affine ops with memory effects, and region-holding ops with a
285  // well-defined control flow. During the fusion validity checks, edges
286  // to/from these ops get looked at.
287  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
288  if (!node)
289  return false;
290  } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
291  // Return false if non-handled/unknown region-holding ops are found. We
292  // won't know what such ops do or what its regions mean; for e.g., it may
293  // not be an imperative op.
294  LLVM_DEBUG(llvm::dbgs()
295  << "MDG init failed; unknown region-holding op found!\n");
296  return false;
297  }
298  // We aren't creating nodes for memory-effect free ops either with no
299  // regions (unless it has results being used) or those with branch op
300  // interface.
301  }
302 
303  LLVM_DEBUG(llvm::dbgs() << "Created " << nodes.size() << " nodes\n");
304 
305  // Add dependence edges between nodes which produce SSA values and their
306  // users. Load ops can be considered as the ones producing SSA values.
307  for (auto &idAndNode : nodes) {
308  const Node &node = idAndNode.second;
309  // Stores don't define SSA values, skip them.
310  if (!node.stores.empty())
311  continue;
312  Operation *opInst = node.op;
313  for (Value value : opInst->getResults()) {
314  for (Operation *user : value.getUsers()) {
315  // Ignore users outside of the block.
316  if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
317  &block)
318  continue;
320  getAffineForIVs(*user, &loops);
321  // Find the surrounding affine.for nested immediately within the
322  // block.
323  auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
324  return loop->getBlock() == &block;
325  });
326  if (it == loops.end())
327  continue;
328  assert(forToNodeMap.count(*it) > 0 && "missing mapping");
329  unsigned userLoopNestId = forToNodeMap[*it];
330  addEdge(node.id, userLoopNestId, value);
331  }
332  }
333  }
334 
335  // Walk memref access lists and add graph edges between dependent nodes.
336  for (auto &memrefAndList : memrefAccesses) {
337  unsigned n = memrefAndList.second.size();
338  Value srcMemRef = memrefAndList.first;
339  // Add edges between all dependent pairs among the node IDs on this memref.
340  for (unsigned i = 0; i < n; ++i) {
341  unsigned srcId = memrefAndList.second[i];
342  Node *srcNode = getNode(srcId);
343  bool srcHasStoreOrFree =
344  srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef);
345  for (unsigned j = i + 1; j < n; ++j) {
346  unsigned dstId = memrefAndList.second[j];
347  Node *dstNode = getNode(dstId);
348  bool dstHasStoreOrFree =
349  dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
350  if (srcHasStoreOrFree || dstHasStoreOrFree)
351  addEdge(srcId, dstId, srcMemRef);
352  }
353  }
354  }
355  return true;
356 }
357 
358 // Returns the graph node for 'id'.
359 const Node *MemRefDependenceGraph::getNode(unsigned id) const {
360  auto it = nodes.find(id);
361  assert(it != nodes.end());
362  return &it->second;
363 }
364 
365 // Returns the graph node for 'forOp'.
366 const Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) const {
367  for (auto &idAndNode : nodes)
368  if (idAndNode.second.op == forOp)
369  return &idAndNode.second;
370  return nullptr;
371 }
372 
373 // Adds a node with 'op' to the graph and returns its unique identifier.
375  Node node(nextNodeId++, op);
376  nodes.insert({node.id, node});
377  return node.id;
378 }
379 
380 // Remove node 'id' (and its associated edges) from graph.
382  // Remove each edge in 'inEdges[id]'.
383  if (inEdges.count(id) > 0) {
384  SmallVector<Edge, 2> oldInEdges = inEdges[id];
385  for (auto &inEdge : oldInEdges) {
386  removeEdge(inEdge.id, id, inEdge.value);
387  }
388  }
389  // Remove each edge in 'outEdges[id]'.
390  if (outEdges.contains(id)) {
391  SmallVector<Edge, 2> oldOutEdges = outEdges[id];
392  for (auto &outEdge : oldOutEdges) {
393  removeEdge(id, outEdge.id, outEdge.value);
394  }
395  }
396  // Erase remaining node state.
397  inEdges.erase(id);
398  outEdges.erase(id);
399  nodes.erase(id);
400 }
401 
402 // Returns true if node 'id' writes to any memref which escapes (or is an
403 // argument to) the block. Returns false otherwise.
405  const Node *node = getNode(id);
406  for (auto *storeOpInst : node->stores) {
407  auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
408  auto *op = memref.getDefiningOp();
409  // Return true if 'memref' is a block argument.
410  if (!op)
411  return true;
412  // Return true if any use of 'memref' does not deference it in an affine
413  // way.
414  for (auto *user : memref.getUsers())
415  if (!isa<AffineMapAccessInterface>(*user))
416  return true;
417  }
418  return false;
419 }
420 
421 // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
422 // is for 'value' if non-null, or for any value otherwise. Returns false
423 // otherwise.
424 bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
425  Value value) const {
426  if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) {
427  return false;
428  }
429  bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](const Edge &edge) {
430  return edge.id == dstId && (!value || edge.value == value);
431  });
432  bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](const Edge &edge) {
433  return edge.id == srcId && (!value || edge.value == value);
434  });
435  return hasOutEdge && hasInEdge;
436 }
437 
438 // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
439 void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
440  Value value) {
441  if (!hasEdge(srcId, dstId, value)) {
442  outEdges[srcId].push_back({dstId, value});
443  inEdges[dstId].push_back({srcId, value});
444  if (isa<MemRefType>(value.getType()))
445  memrefEdgeCount[value]++;
446  }
447 }
448 
449 // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
450 void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
451  Value value) {
452  assert(inEdges.count(dstId) > 0);
453  assert(outEdges.count(srcId) > 0);
454  if (isa<MemRefType>(value.getType())) {
455  assert(memrefEdgeCount.count(value) > 0);
456  memrefEdgeCount[value]--;
457  }
458  // Remove 'srcId' from 'inEdges[dstId]'.
459  for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
460  if ((*it).id == srcId && (*it).value == value) {
461  inEdges[dstId].erase(it);
462  break;
463  }
464  }
465  // Remove 'dstId' from 'outEdges[srcId]'.
466  for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
467  if ((*it).id == dstId && (*it).value == value) {
468  outEdges[srcId].erase(it);
469  break;
470  }
471  }
472 }
473 
474 // Returns true if there is a path in the dependence graph from node 'srcId'
475 // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
476 // operations that the edges connected are expected to be from the same block.
478  unsigned dstId) const {
479  // Worklist state is: <node-id, next-output-edge-index-to-visit>
481  worklist.push_back({srcId, 0});
482  Operation *dstOp = getNode(dstId)->op;
483  // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
484  while (!worklist.empty()) {
485  auto &idAndIndex = worklist.back();
486  // Return true if we have reached 'dstId'.
487  if (idAndIndex.first == dstId)
488  return true;
489  // Pop and continue if node has no out edges, or if all out edges have
490  // already been visited.
491  if (!outEdges.contains(idAndIndex.first) ||
492  idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) {
493  worklist.pop_back();
494  continue;
495  }
496  // Get graph edge to traverse.
497  const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second];
498  // Increment next output edge index for 'idAndIndex'.
499  ++idAndIndex.second;
500  // Add node at 'edge.id' to the worklist. We don't need to consider
501  // nodes that are "after" dstId in the containing block; one can't have a
502  // path to `dstId` from any of those nodes.
503  bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
504  if (!afterDst && edge.id != idAndIndex.first)
505  worklist.push_back({edge.id, 0});
506  }
507  return false;
508 }
509 
510 // Returns the input edge count for node 'id' and 'memref' from src nodes
511 // which access 'memref' with a store operation.
513  Value memref) const {
514  unsigned inEdgeCount = 0;
515  for (const Edge &inEdge : inEdges.lookup(id)) {
516  if (inEdge.value == memref) {
517  const Node *srcNode = getNode(inEdge.id);
518  // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
519  if (srcNode->getStoreOpCount(memref) > 0)
520  ++inEdgeCount;
521  }
522  }
523  return inEdgeCount;
524 }
525 
526 // Returns the output edge count for node 'id' and 'memref' (if non-null),
527 // otherwise returns the total output edge count from node 'id'.
529  Value memref) const {
530  unsigned outEdgeCount = 0;
531  for (const auto &outEdge : outEdges.lookup(id))
532  if (!memref || outEdge.value == memref)
533  ++outEdgeCount;
534  return outEdgeCount;
535 }
536 
537 /// Return all nodes which define SSA values used in node 'id'.
539  unsigned id, DenseSet<unsigned> &definingNodes) const {
540  for (const Edge &edge : inEdges.lookup(id))
541  // By definition of edge, if the edge value is a non-memref value,
542  // then the dependence is between a graph node which defines an SSA value
543  // and another graph node which uses the SSA value.
544  if (!isa<MemRefType>(edge.value.getType()))
545  definingNodes.insert(edge.id);
546 }
547 
548 // Computes and returns an insertion point operation, before which the
549 // the fused <srcId, dstId> loop nest can be inserted while preserving
550 // dependences. Returns nullptr if no such insertion point is found.
551 Operation *
553  unsigned dstId) const {
554  if (!outEdges.contains(srcId))
555  return getNode(dstId)->op;
556 
557  // Skip if there is any defining node of 'dstId' that depends on 'srcId'.
558  DenseSet<unsigned> definingNodes;
559  gatherDefiningNodes(dstId, definingNodes);
560  if (llvm::any_of(definingNodes,
561  [&](unsigned id) { return hasDependencePath(srcId, id); })) {
562  LLVM_DEBUG(llvm::dbgs()
563  << "Can't fuse: a defining op with a user in the dst "
564  "loop has dependence from the src loop\n");
565  return nullptr;
566  }
567 
568  // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
569  SmallPtrSet<Operation *, 2> srcDepInsts;
570  for (auto &outEdge : outEdges.lookup(srcId))
571  if (outEdge.id != dstId)
572  srcDepInsts.insert(getNode(outEdge.id)->op);
573 
574  // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
575  SmallPtrSet<Operation *, 2> dstDepInsts;
576  for (auto &inEdge : inEdges.lookup(dstId))
577  if (inEdge.id != srcId)
578  dstDepInsts.insert(getNode(inEdge.id)->op);
579 
580  Operation *srcNodeInst = getNode(srcId)->op;
581  Operation *dstNodeInst = getNode(dstId)->op;
582 
583  // Computing insertion point:
584  // *) Walk all operation positions in Block operation list in the
585  // range (src, dst). For each operation 'op' visited in this search:
586  // *) Store in 'firstSrcDepPos' the first position where 'op' has a
587  // dependence edge from 'srcNode'.
588  // *) Store in 'lastDstDepPost' the last position where 'op' has a
589  // dependence edge to 'dstNode'.
590  // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
591  // operation insertion point (or return null pointer if no such
592  // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
594  std::optional<unsigned> firstSrcDepPos;
595  std::optional<unsigned> lastDstDepPos;
596  unsigned pos = 0;
597  for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
598  it != Block::iterator(dstNodeInst); ++it) {
599  Operation *op = &(*it);
600  if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
601  firstSrcDepPos = pos;
602  if (dstDepInsts.count(op) > 0)
603  lastDstDepPos = pos;
604  depInsts.push_back(op);
605  ++pos;
606  }
607 
608  if (firstSrcDepPos.has_value()) {
609  if (lastDstDepPos.has_value()) {
610  if (*firstSrcDepPos <= *lastDstDepPos) {
611  // No valid insertion point exists which preserves dependences.
612  return nullptr;
613  }
614  }
615  // Return the insertion point at 'firstSrcDepPos'.
616  return depInsts[*firstSrcDepPos];
617  }
618  // No dependence targets in range (or only dst deps in range), return
619  // 'dstNodInst' insertion point.
620  return dstNodeInst;
621 }
622 
623 // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
624 // taking into account that:
625 // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion,
626 // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a
627 // private memref.
628 void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
629  const DenseSet<Value> &privateMemRefs,
630  bool removeSrcId) {
631  // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
632  if (inEdges.count(srcId) > 0) {
633  SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
634  for (auto &inEdge : oldInEdges) {
635  // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
636  if (!privateMemRefs.contains(inEdge.value))
637  addEdge(inEdge.id, dstId, inEdge.value);
638  }
639  }
640  // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
641  // If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
642  if (outEdges.count(srcId) > 0) {
643  SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
644  for (auto &outEdge : oldOutEdges) {
645  // Remove any out edges from 'srcId' to 'dstId' across memrefs.
646  if (outEdge.id == dstId)
647  removeEdge(srcId, outEdge.id, outEdge.value);
648  else if (removeSrcId) {
649  addEdge(dstId, outEdge.id, outEdge.value);
650  removeEdge(srcId, outEdge.id, outEdge.value);
651  }
652  }
653  }
654  // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
655  // replaced by a private memref). These edges could come from nodes
656  // other than 'srcId' which were removed in the previous step.
657  if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
658  SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
659  for (auto &inEdge : oldInEdges)
660  if (privateMemRefs.count(inEdge.value) > 0)
661  removeEdge(inEdge.id, dstId, inEdge.value);
662  }
663 }
664 
665 // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
666 // of sibling node 'sibId' into node 'dstId'.
667 void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
668  // For each edge in 'inEdges[sibId]':
669  // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
670  // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
671  if (inEdges.count(sibId) > 0) {
672  SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
673  for (auto &inEdge : oldInEdges) {
674  addEdge(inEdge.id, dstId, inEdge.value);
675  removeEdge(inEdge.id, sibId, inEdge.value);
676  }
677  }
678 
679  // For each edge in 'outEdges[sibId]' to node 'id'
680  // *) Add new edge from 'dstId' to 'outEdge.id'.
681  // *) Remove edge from 'sibId' to 'outEdge.id'.
682  if (outEdges.count(sibId) > 0) {
683  SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
684  for (auto &outEdge : oldOutEdges) {
685  addEdge(dstId, outEdge.id, outEdge.value);
686  removeEdge(sibId, outEdge.id, outEdge.value);
687  }
688  }
689 }
690 
691 // Adds ops in 'loads' and 'stores' to node at 'id'.
693  ArrayRef<Operation *> stores,
694  ArrayRef<Operation *> memrefLoads,
695  ArrayRef<Operation *> memrefStores,
696  ArrayRef<Operation *> memrefFrees) {
697  Node *node = getNode(id);
698  llvm::append_range(node->loads, loads);
699  llvm::append_range(node->stores, stores);
700  llvm::append_range(node->memrefLoads, memrefLoads);
701  llvm::append_range(node->memrefStores, memrefStores);
702  llvm::append_range(node->memrefFrees, memrefFrees);
703 }
704 
706  Node *node = getNode(id);
707  node->loads.clear();
708  node->stores.clear();
709 }
710 
711 // Calls 'callback' for each input edge incident to node 'id' which carries a
712 // memref dependence.
714  unsigned id, const std::function<void(Edge)> &callback) {
715  if (inEdges.count(id) > 0)
716  forEachMemRefEdge(inEdges[id], callback);
717 }
718 
719 // Calls 'callback' for each output edge from node 'id' which carries a
720 // memref dependence.
722  unsigned id, const std::function<void(Edge)> &callback) {
723  if (outEdges.count(id) > 0)
724  forEachMemRefEdge(outEdges[id], callback);
725 }
726 
727 // Calls 'callback' for each edge in 'edges' which carries a memref
728 // dependence.
730  ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
731  for (const auto &edge : edges) {
732  // Skip if 'edge' is not a memref dependence edge.
733  if (!isa<MemRefType>(edge.value.getType()))
734  continue;
735  assert(nodes.count(edge.id) > 0);
736  // Skip if 'edge.id' is not a loop nest.
737  if (!isa<AffineForOp>(getNode(edge.id)->op))
738  continue;
739  // Visit current input edge 'edge'.
740  callback(edge);
741  }
742 }
743 
744 void MemRefDependenceGraph::print(raw_ostream &os) const {
745  os << "\nMemRefDependenceGraph\n";
746  os << "\nNodes:\n";
747  for (const auto &idAndNode : nodes) {
748  os << "Node: " << idAndNode.first << "\n";
749  auto it = inEdges.find(idAndNode.first);
750  if (it != inEdges.end()) {
751  for (const auto &e : it->second)
752  os << " InEdge: " << e.id << " " << e.value << "\n";
753  }
754  it = outEdges.find(idAndNode.first);
755  if (it != outEdges.end()) {
756  for (const auto &e : it->second)
757  os << " OutEdge: " << e.id << " " << e.value << "\n";
758  }
759  }
760 }
761 
764  auto *currOp = op.getParentOp();
765  AffineForOp currAffineForOp;
766  // Traverse up the hierarchy collecting all 'affine.for' operation while
767  // skipping over 'affine.if' operations.
768  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
769  if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
770  loops->push_back(currAffineForOp);
771  currOp = currOp->getParentOp();
772  }
773  std::reverse(loops->begin(), loops->end());
774 }
775 
778  ops->clear();
779  Operation *currOp = op.getParentOp();
780 
781  // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
782  // affine.parallel operations.
783  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
784  if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
785  ops->push_back(currOp);
786  currOp = currOp->getParentOp();
787  }
788  std::reverse(ops->begin(), ops->end());
789 }
790 
791 // Populates 'cst' with FlatAffineValueConstraints which represent original
792 // domain of the loop bounds that define 'ivs'.
794  FlatAffineValueConstraints &cst) const {
795  assert(!ivs.empty() && "Cannot have a slice without its IVs");
796  cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0,
797  /*numLocals=*/0, ivs);
798  for (Value iv : ivs) {
799  AffineForOp loop = getForInductionVarOwner(iv);
800  assert(loop && "Expected affine for");
801  if (failed(cst.addAffineForOpDomain(loop)))
802  return failure();
803  }
804  return success();
805 }
806 
807 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
808 LogicalResult
810  assert(!lbOperands.empty());
811  // Adds src 'ivs' as dimension variables in 'cst'.
812  unsigned numDims = ivs.size();
813  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
814  unsigned numSymbols = lbOperands[0].size();
815 
816  SmallVector<Value, 4> values(ivs);
817  // Append 'ivs' then 'operands' to 'values'.
818  values.append(lbOperands[0].begin(), lbOperands[0].end());
819  *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
820 
821  // Add loop bound constraints for values which are loop IVs of the destination
822  // of fusion and equality constraints for symbols which are constants.
823  for (unsigned i = numDims, end = values.size(); i < end; ++i) {
824  Value value = values[i];
825  assert(cst->containsVar(value) && "value expected to be present");
826  if (isValidSymbol(value)) {
827  // Check if the symbol is a constant.
828  if (std::optional<int64_t> cOp = getConstantIntValue(value))
829  cst->addBound(BoundType::EQ, value, cOp.value());
830  } else if (auto loop = getForInductionVarOwner(value)) {
831  if (failed(cst->addAffineForOpDomain(loop)))
832  return failure();
833  }
834  }
835 
836  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
837  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
838  assert(succeeded(ret) &&
839  "should not fail as we never have semi-affine slice maps");
840  (void)ret;
841  return success();
842 }
843 
844 // Clears state bounds and operand state.
846  lbs.clear();
847  ubs.clear();
848  lbOperands.clear();
849  ubOperands.clear();
850 }
851 
853  llvm::errs() << "\tIVs:\n";
854  for (Value iv : ivs)
855  llvm::errs() << "\t\t" << iv << "\n";
856 
857  llvm::errs() << "\tLBs:\n";
858  for (auto en : llvm::enumerate(lbs)) {
859  llvm::errs() << "\t\t" << en.value() << "\n";
860  llvm::errs() << "\t\tOperands:\n";
861  for (Value lbOp : lbOperands[en.index()])
862  llvm::errs() << "\t\t\t" << lbOp << "\n";
863  }
864 
865  llvm::errs() << "\tUBs:\n";
866  for (auto en : llvm::enumerate(ubs)) {
867  llvm::errs() << "\t\t" << en.value() << "\n";
868  llvm::errs() << "\t\tOperands:\n";
869  for (Value ubOp : ubOperands[en.index()])
870  llvm::errs() << "\t\t\t" << ubOp << "\n";
871  }
872 }
873 
874 /// Fast check to determine if the computation slice is maximal. Returns true if
875 /// each slice dimension maps to an existing dst dimension and both the src
876 /// and the dst loops for those dimensions have the same bounds. Returns false
877 /// if both the src and the dst loops don't have the same bounds. Returns
878 /// std::nullopt if none of the above can be proven.
879 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
880  assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
881  "Unexpected number of lbs, ubs and ivs in slice");
882 
883  for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
884  AffineMap lbMap = lbs[i];
885  AffineMap ubMap = ubs[i];
886 
887  // Check if this slice is just an equality along this dimension.
888  if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
889  ubMap.getNumResults() != 1 ||
890  lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
891  // The condition above will be true for maps describing a single
892  // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
893  // Make sure we skip those cases by checking that the lb result is not
894  // just a constant.
895  isa<AffineConstantExpr>(lbMap.getResult(0)))
896  return std::nullopt;
897 
898  // Limited support: we expect the lb result to be just a loop dimension for
899  // now.
900  AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0));
901  if (!result)
902  return std::nullopt;
903 
904  // Retrieve dst loop bounds.
905  AffineForOp dstLoop =
906  getForInductionVarOwner(lbOperands[i][result.getPosition()]);
907  if (!dstLoop)
908  return std::nullopt;
909  AffineMap dstLbMap = dstLoop.getLowerBoundMap();
910  AffineMap dstUbMap = dstLoop.getUpperBoundMap();
911 
912  // Retrieve src loop bounds.
913  AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
914  assert(srcLoop && "Expected affine for");
915  AffineMap srcLbMap = srcLoop.getLowerBoundMap();
916  AffineMap srcUbMap = srcLoop.getUpperBoundMap();
917 
918  // Limited support: we expect simple src and dst loops with a single
919  // constant component per bound for now.
920  if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
921  dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
922  return std::nullopt;
923 
924  AffineExpr srcLbResult = srcLbMap.getResult(0);
925  AffineExpr dstLbResult = dstLbMap.getResult(0);
926  AffineExpr srcUbResult = srcUbMap.getResult(0);
927  AffineExpr dstUbResult = dstUbMap.getResult(0);
928  if (!isa<AffineConstantExpr>(srcLbResult) ||
929  !isa<AffineConstantExpr>(srcUbResult) ||
930  !isa<AffineConstantExpr>(dstLbResult) ||
931  !isa<AffineConstantExpr>(dstUbResult))
932  return std::nullopt;
933 
934  // Check if src and dst loop bounds are the same. If not, we can guarantee
935  // that the slice is not maximal.
936  if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
937  srcLoop.getStep() != dstLoop.getStep())
938  return false;
939  }
940 
941  return true;
942 }
943 
944 /// Returns true if it is deterministically verified that the original iteration
945 /// space of the slice is contained within the new iteration space that is
946 /// created after fusing 'this' slice into its destination.
947 std::optional<bool> ComputationSliceState::isSliceValid() const {
948  // Fast check to determine if the slice is valid. If the following conditions
949  // are verified to be true, slice is declared valid by the fast check:
950  // 1. Each slice loop is a single iteration loop bound in terms of a single
951  // destination loop IV.
952  // 2. Loop bounds of the destination loop IV (from above) and those of the
953  // source loop IV are exactly the same.
954  // If the fast check is inconclusive or false, we proceed with a more
955  // expensive analysis.
956  // TODO: Store the result of the fast check, as it might be used again in
957  // `canRemoveSrcNodeAfterFusion`.
958  std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
959  if (isValidFastCheck && *isValidFastCheck)
960  return true;
961 
962  // Create constraints for the source loop nest using which slice is computed.
963  FlatAffineValueConstraints srcConstraints;
964  // TODO: Store the source's domain to avoid computation at each depth.
965  if (failed(getSourceAsConstraints(srcConstraints))) {
966  LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
967  return std::nullopt;
968  }
969  // As the set difference utility currently cannot handle symbols in its
970  // operands, validity of the slice cannot be determined.
971  if (srcConstraints.getNumSymbolVars() > 0) {
972  LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
973  return std::nullopt;
974  }
975  // TODO: Handle local vars in the source domains while using the 'projectOut'
976  // utility below. Currently, aligning is not done assuming that there will be
977  // no local vars in the source domain.
978  if (srcConstraints.getNumLocalVars() != 0) {
979  LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
980  return std::nullopt;
981  }
982 
983  // Create constraints for the slice loop nest that would be created if the
984  // fusion succeeds.
985  FlatAffineValueConstraints sliceConstraints;
986  if (failed(getAsConstraints(&sliceConstraints))) {
987  LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
988  return std::nullopt;
989  }
990 
991  // Projecting out every dimension other than the 'ivs' to express slice's
992  // domain completely in terms of source's IVs.
993  sliceConstraints.projectOut(ivs.size(),
994  sliceConstraints.getNumVars() - ivs.size());
995 
996  LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
997  LLVM_DEBUG(srcConstraints.dump());
998  LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
999  "(expressed in terms of its source's IVs):\n");
1000  LLVM_DEBUG(sliceConstraints.dump());
1001 
1002  // TODO: Store 'srcSet' to avoid recalculating for each depth.
1003  PresburgerSet srcSet(srcConstraints);
1004  PresburgerSet sliceSet(sliceConstraints);
1005  PresburgerSet diffSet = sliceSet.subtract(srcSet);
1006 
1007  if (!diffSet.isIntegerEmpty()) {
1008  LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
1009  return false;
1010  }
1011  return true;
1012 }
1013 
1014 /// Returns true if the computation slice encloses all the iterations of the
1015 /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
1016 /// cannot determine if the slice is maximal or not.
1017 std::optional<bool> ComputationSliceState::isMaximal() const {
1018  // Fast check to determine if the computation slice is maximal. If the result
1019  // is inconclusive, we proceed with a more expensive analysis.
1020  std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
1021  if (isMaximalFastCheck)
1022  return isMaximalFastCheck;
1023 
1024  // Create constraints for the src loop nest being sliced.
1025  FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(),
1026  /*numSymbols=*/0,
1027  /*numLocals=*/0, ivs);
1028  for (Value iv : ivs) {
1029  AffineForOp loop = getForInductionVarOwner(iv);
1030  assert(loop && "Expected affine for");
1031  if (failed(srcConstraints.addAffineForOpDomain(loop)))
1032  return std::nullopt;
1033  }
1034 
1035  // Create constraints for the slice using the dst loop nest information. We
1036  // retrieve existing dst loops from the lbOperands.
1037  SmallVector<Value> consumerIVs;
1038  for (Value lbOp : lbOperands[0])
1039  if (getForInductionVarOwner(lbOp))
1040  consumerIVs.push_back(lbOp);
1041 
1042  // Add empty IV Values for those new loops that are not equalities and,
1043  // therefore, are not yet materialized in the IR.
1044  for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
1045  consumerIVs.push_back(Value());
1046 
1047  FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(),
1048  /*numSymbols=*/0,
1049  /*numLocals=*/0, consumerIVs);
1050 
1051  if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
1052  return std::nullopt;
1053 
1054  if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
1055  // Constraint dims are different. The integer set difference can't be
1056  // computed so we don't know if the slice is maximal.
1057  return std::nullopt;
1058 
1059  // Compute the difference between the src loop nest and the slice integer
1060  // sets.
1061  PresburgerSet srcSet(srcConstraints);
1062  PresburgerSet sliceSet(sliceConstraints);
1063  PresburgerSet diffSet = srcSet.subtract(sliceSet);
1064  return diffSet.isIntegerEmpty();
1065 }
1066 
1067 unsigned MemRefRegion::getRank() const {
1068  return cast<MemRefType>(memref.getType()).getRank();
1069 }
1070 
1073  auto memRefType = cast<MemRefType>(memref.getType());
1074  MLIRContext *context = memref.getContext();
1075  unsigned rank = memRefType.getRank();
1076  if (shape)
1077  shape->reserve(rank);
1078 
1079  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1080 
1081  // Use a copy of the region constraints that has upper/lower bounds for each
1082  // memref dimension with static size added to guard against potential
1083  // over-approximation from projection or union bounding box. We may not add
1084  // this on the region itself since they might just be redundant constraints
1085  // that will need non-trivials means to eliminate.
1086  FlatLinearValueConstraints cstWithShapeBounds(cst);
1087  for (unsigned r = 0; r < rank; r++) {
1088  cstWithShapeBounds.addBound(BoundType::LB, r, 0);
1089  int64_t dimSize = memRefType.getDimSize(r);
1090  if (ShapedType::isDynamic(dimSize))
1091  continue;
1092  cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
1093  }
1094 
1095  // Find a constant upper bound on the extent of this memref region along
1096  // each dimension.
1097  int64_t numElements = 1;
1098  int64_t diffConstant;
1099  for (unsigned d = 0; d < rank; d++) {
1100  AffineMap lb;
1101  std::optional<int64_t> diff =
1102  cstWithShapeBounds.getConstantBoundOnDimSize(context, d, &lb);
1103  if (diff.has_value()) {
1104  diffConstant = *diff;
1105  assert(diffConstant >= 0 && "dim size bound cannot be negative");
1106  } else {
1107  // If no constant bound is found, then it can always be bound by the
1108  // memref's dim size if the latter has a constant size along this dim.
1109  auto dimSize = memRefType.getDimSize(d);
1110  if (ShapedType::isDynamic(dimSize))
1111  return std::nullopt;
1112  diffConstant = dimSize;
1113  // Lower bound becomes 0.
1114  lb = AffineMap::get(/*dimCount=*/0, cstWithShapeBounds.getNumSymbolVars(),
1115  /*result=*/getAffineConstantExpr(0, context));
1116  }
1117  numElements *= diffConstant;
1118  // Populate outputs if available.
1119  if (lbs)
1120  lbs->push_back(lb);
1121  if (shape)
1122  shape->push_back(diffConstant);
1123  }
1124  return numElements;
1125 }
1126 
1128  AffineMap &ubMap) const {
1129  assert(pos < cst.getNumDimVars() && "invalid position");
1130  auto memRefType = cast<MemRefType>(memref.getType());
1131  unsigned rank = memRefType.getRank();
1132 
1133  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1134 
1135  auto boundPairs = cst.getLowerAndUpperBound(
1136  pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(),
1137  /*localExprs=*/{}, memRefType.getContext());
1138  lbMap = boundPairs.first;
1139  ubMap = boundPairs.second;
1140  assert(lbMap && "lower bound for a region must exist");
1141  assert(ubMap && "upper bound for a region must exist");
1142  assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1143  assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1144 }
1145 
1146 LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
1147  assert(memref == other.memref);
1148  return cst.unionBoundingBox(*other.getConstraints());
1149 }
1150 
1151 /// Computes the memory region accessed by this memref with the region
1152 /// represented as constraints symbolic/parametric in 'loopDepth' loops
1153 /// surrounding opInst and any additional Function symbols.
1154 // For example, the memref region for this load operation at loopDepth = 1 will
1155 // be as below:
1156 //
1157 // affine.for %i = 0 to 32 {
1158 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
1159 // load %A[%ii]
1160 // }
1161 // }
1162 //
1163 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
1164 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
1165 //
1166 // TODO: extend this to any other memref dereferencing ops
1167 // (dma_start, dma_wait).
1168 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
1169  const ComputationSliceState *sliceState,
1170  bool addMemRefDimBounds, bool dropLocalVars,
1171  bool dropOuterIvs) {
1172  assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1173  "affine read/write op expected");
1174 
1175  MemRefAccess access(op);
1176  memref = access.memref;
1177  write = access.isStore();
1178 
1179  unsigned rank = access.getRank();
1180 
1181  LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
1182  << "\ndepth: " << loopDepth << "\n";);
1183 
1184  // 0-d memrefs.
1185  if (rank == 0) {
1187  getAffineIVs(*op, ivs);
1188  assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
1189  // The first 'loopDepth' IVs are symbols for this region.
1190  ivs.resize(loopDepth);
1191  // A 0-d memref has a 0-d region.
1192  cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs);
1193  return success();
1194  }
1195 
1196  // Build the constraints for this region.
1197  AffineValueMap accessValueMap;
1198  access.getAccessMap(&accessValueMap);
1199  AffineMap accessMap = accessValueMap.getAffineMap();
1200 
1201  unsigned numDims = accessMap.getNumDims();
1202  unsigned numSymbols = accessMap.getNumSymbols();
1203  unsigned numOperands = accessValueMap.getNumOperands();
1204  // Merge operands with slice operands.
1205  SmallVector<Value, 4> operands;
1206  operands.resize(numOperands);
1207  for (unsigned i = 0; i < numOperands; ++i)
1208  operands[i] = accessValueMap.getOperand(i);
1209 
1210  if (sliceState != nullptr) {
1211  operands.reserve(operands.size() + sliceState->lbOperands[0].size());
1212  // Append slice operands to 'operands' as symbols.
1213  for (auto extraOperand : sliceState->lbOperands[0]) {
1214  if (!llvm::is_contained(operands, extraOperand)) {
1215  operands.push_back(extraOperand);
1216  numSymbols++;
1217  }
1218  }
1219  }
1220  // We'll first associate the dims and symbols of the access map to the dims
1221  // and symbols resp. of cst. This will change below once cst is
1222  // fully constructed out.
1223  cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
1224 
1225  // Add equality constraints.
1226  // Add inequalities for loop lower/upper bounds.
1227  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
1228  auto operand = operands[i];
1229  if (auto affineFor = getForInductionVarOwner(operand)) {
1230  // Note that cst can now have more dimensions than accessMap if the
1231  // bounds expressions involve outer loops or other symbols.
1232  // TODO: rewrite this to use getInstIndexSet; this way
1233  // conditionals will be handled when the latter supports it.
1234  if (failed(cst.addAffineForOpDomain(affineFor)))
1235  return failure();
1236  } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
1237  if (failed(cst.addAffineParallelOpDomain(parallelOp)))
1238  return failure();
1239  } else if (isValidSymbol(operand)) {
1240  // Check if the symbol is a constant.
1241  Value symbol = operand;
1242  if (auto constVal = getConstantIntValue(symbol))
1243  cst.addBound(BoundType::EQ, symbol, constVal.value());
1244  } else {
1245  LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
1246  return failure();
1247  }
1248  }
1249 
1250  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
1251  if (sliceState != nullptr) {
1252  // Add dim and symbol slice operands.
1253  for (auto operand : sliceState->lbOperands[0]) {
1254  if (failed(cst.addInductionVarOrTerminalSymbol(operand)))
1255  return failure();
1256  }
1257  // Add upper/lower bounds from 'sliceState' to 'cst'.
1258  LogicalResult ret =
1259  cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
1260  sliceState->lbOperands[0]);
1261  assert(succeeded(ret) &&
1262  "should not fail as we never have semi-affine slice maps");
1263  (void)ret;
1264  }
1265 
1266  // Add access function equalities to connect loop IVs to data dimensions.
1267  if (failed(cst.composeMap(&accessValueMap))) {
1268  op->emitError("getMemRefRegion: compose affine map failed");
1269  LLVM_DEBUG(accessValueMap.getAffineMap().dump());
1270  return failure();
1271  }
1272 
1273  // Set all variables appearing after the first 'rank' variables as
1274  // symbolic variables - so that the ones corresponding to the memref
1275  // dimensions are the dimensional variables for the memref region.
1276  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1277 
1278  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1279  // this memref region is symbolic.
1280  SmallVector<Value, 4> enclosingIVs;
1281  getAffineIVs(*op, enclosingIVs);
1282  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1283  enclosingIVs.resize(loopDepth);
1284  SmallVector<Value, 4> vars;
1285  cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1286  for (auto en : llvm::enumerate(vars)) {
1287  if ((isAffineInductionVar(en.value())) &&
1288  !llvm::is_contained(enclosingIVs, en.value())) {
1289  if (dropOuterIvs) {
1290  cst.projectOut(en.value());
1291  } else {
1292  unsigned varPosition;
1293  cst.findVar(en.value(), &varPosition);
1294  auto varKind = cst.getVarKindAt(varPosition);
1295  varPosition -= cst.getNumDimVars();
1296  cst.convertToLocal(varKind, varPosition, varPosition + 1);
1297  }
1298  }
1299  }
1300 
1301  // Project out any local variables (these would have been added for any
1302  // mod/divs) if specified.
1303  if (dropLocalVars)
1304  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1305 
1306  // Constant fold any symbolic variables.
1307  cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),
1308  /*num=*/cst.getNumSymbolVars());
1309 
1310  assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1311 
1312  // Add upper/lower bounds for each memref dimension with static size
1313  // to guard against potential over-approximation from projection.
1314  // TODO: Support dynamic memref dimensions.
1315  if (addMemRefDimBounds) {
1316  auto memRefType = cast<MemRefType>(memref.getType());
1317  for (unsigned r = 0; r < rank; r++) {
1318  cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0);
1319  if (memRefType.isDynamicDim(r))
1320  continue;
1321  cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1);
1322  }
1323  }
1324  cst.removeTrivialRedundancy();
1325 
1326  LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
1327  LLVM_DEBUG(cst.dump());
1328  return success();
1329 }
1330 
1331 std::optional<int64_t>
1333  auto elementType = memRefType.getElementType();
1334 
1335  unsigned sizeInBits;
1336  if (elementType.isIntOrFloat()) {
1337  sizeInBits = elementType.getIntOrFloatBitWidth();
1338  } else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1339  if (vectorType.getElementType().isIntOrFloat())
1340  sizeInBits =
1341  vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1342  else
1343  return std::nullopt;
1344  } else {
1345  return std::nullopt;
1346  }
1347  return llvm::divideCeil(sizeInBits, 8);
1348 }
1349 
1350 // Returns the size of the region.
1351 std::optional<int64_t> MemRefRegion::getRegionSize() {
1352  auto memRefType = cast<MemRefType>(memref.getType());
1353 
1354  if (!memRefType.getLayout().isIdentity()) {
1355  LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
1356  return false;
1357  }
1358 
1359  // Indices to use for the DmaStart op.
1360  // Indices for the original memref being DMAed from/to.
1361  SmallVector<Value, 4> memIndices;
1362  // Indices for the faster buffer being DMAed into/from.
1363  SmallVector<Value, 4> bufIndices;
1364 
1365  // Compute the extents of the buffer.
1366  std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1367  if (!numElements) {
1368  LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
1369  return std::nullopt;
1370  }
1371  auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1372  if (!eltSize)
1373  return std::nullopt;
1374  return *eltSize * *numElements;
1375 }
1376 
1377 /// Returns the size of memref data in bytes if it's statically shaped,
1378 /// std::nullopt otherwise. If the element of the memref has vector type, takes
1379 /// into account size of the vector as well.
1380 // TODO: improve/complete this when we have target data.
1381 std::optional<uint64_t>
1383  if (!memRefType.hasStaticShape())
1384  return std::nullopt;
1385  auto elementType = memRefType.getElementType();
1386  if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1387  return std::nullopt;
1388 
1389  auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1390  if (!sizeInBytes)
1391  return std::nullopt;
1392  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1393  sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1394  }
1395  return sizeInBytes;
1396 }
1397 
1398 template <typename LoadOrStoreOp>
1399 LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1400  bool emitError) {
1401  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1402  AffineWriteOpInterface>::value,
1403  "argument should be either a AffineReadOpInterface or a "
1404  "AffineWriteOpInterface");
1405 
1406  Operation *op = loadOrStoreOp.getOperation();
1407  MemRefRegion region(op->getLoc());
1408  if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr,
1409  /*addMemRefDimBounds=*/false)))
1410  return success();
1411 
1412  LLVM_DEBUG(llvm::dbgs() << "Memory region");
1413  LLVM_DEBUG(region.getConstraints()->dump());
1414 
1415  bool outOfBounds = false;
1416  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1417 
1418  // For each dimension, check for out of bounds.
1419  for (unsigned r = 0; r < rank; r++) {
1420  FlatAffineValueConstraints ucst(*region.getConstraints());
1421 
1422  // Intersect memory region with constraint capturing out of bounds (both out
1423  // of upper and out of lower), and check if the constraint system is
1424  // feasible. If it is, there is at least one point out of bounds.
1425  SmallVector<int64_t, 4> ineq(rank + 1, 0);
1426  int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1427  // TODO: handle dynamic dim sizes.
1428  if (dimSize == -1)
1429  continue;
1430 
1431  // Check for overflow: d_i >= memref dim size.
1432  ucst.addBound(BoundType::LB, r, dimSize);
1433  outOfBounds = !ucst.isEmpty();
1434  if (outOfBounds && emitError) {
1435  loadOrStoreOp.emitOpError()
1436  << "memref out of upper bound access along dimension #" << (r + 1);
1437  }
1438 
1439  // Check for a negative index.
1440  FlatAffineValueConstraints lcst(*region.getConstraints());
1441  std::fill(ineq.begin(), ineq.end(), 0);
1442  // d_i <= -1;
1443  lcst.addBound(BoundType::UB, r, -1);
1444  outOfBounds = !lcst.isEmpty();
1445  if (outOfBounds && emitError) {
1446  loadOrStoreOp.emitOpError()
1447  << "memref out of lower bound access along dimension #" << (r + 1);
1448  }
1449  }
1450  return failure(outOfBounds);
1451 }
1452 
1453 // Explicitly instantiate the template so that the compiler knows we need them!
1454 template LogicalResult
1455 mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1456  bool emitError);
1457 template LogicalResult
1458 mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1459  bool emitError);
1460 
1461 // Returns in 'positions' the Block positions of 'op' in each ancestor
1462 // Block from the Block containing operation, stopping at 'limitBlock'.
1463 static void findInstPosition(Operation *op, Block *limitBlock,
1464  SmallVectorImpl<unsigned> *positions) {
1465  Block *block = op->getBlock();
1466  while (block != limitBlock) {
1467  // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1468  // rely on linear scans.
1469  int instPosInBlock = std::distance(block->begin(), op->getIterator());
1470  positions->push_back(instPosInBlock);
1471  op = block->getParentOp();
1472  block = op->getBlock();
1473  }
1474  std::reverse(positions->begin(), positions->end());
1475 }
1476 
1477 // Returns the Operation in a possibly nested set of Blocks, where the
1478 // position of the operation is represented by 'positions', which has a
1479 // Block position for each level of nesting.
1481  unsigned level, Block *block) {
1482  unsigned i = 0;
1483  for (auto &op : *block) {
1484  if (i != positions[level]) {
1485  ++i;
1486  continue;
1487  }
1488  if (level == positions.size() - 1)
1489  return &op;
1490  if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1491  return getInstAtPosition(positions, level + 1,
1492  childAffineForOp.getBody());
1493 
1494  for (auto &region : op.getRegions()) {
1495  for (auto &b : region)
1496  if (auto *ret = getInstAtPosition(positions, level + 1, &b))
1497  return ret;
1498  }
1499  return nullptr;
1500  }
1501  return nullptr;
1502 }
1503 
1504 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1507  for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
1508  auto value = cst->getValue(i);
1509  if (ivs.count(value) == 0) {
1510  assert(isAffineForInductionVar(value));
1511  auto loop = getForInductionVarOwner(value);
1512  if (failed(cst->addAffineForOpDomain(loop)))
1513  return failure();
1514  }
1515  }
1516  return success();
1517 }
1518 
1519 /// Returns the innermost common loop depth for the set of operations in 'ops'.
1520 // TODO: Move this to LoopUtils.
1522  ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
1523  unsigned numOps = ops.size();
1524  assert(numOps > 0 && "Expected at least one operation");
1525 
1526  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1527  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1528  for (unsigned i = 0; i < numOps; ++i) {
1529  getAffineForIVs(*ops[i], &loops[i]);
1530  loopDepthLimit =
1531  std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
1532  }
1533 
1534  unsigned loopDepth = 0;
1535  for (unsigned d = 0; d < loopDepthLimit; ++d) {
1536  unsigned i;
1537  for (i = 1; i < numOps; ++i) {
1538  if (loops[i - 1][d] != loops[i][d])
1539  return loopDepth;
1540  }
1541  if (surroundingLoops)
1542  surroundingLoops->push_back(loops[i - 1][d]);
1543  ++loopDepth;
1544  }
1545  return loopDepth;
1546 }
1547 
1548 /// Computes in 'sliceUnion' the union of all slice bounds computed at
1549 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1550 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1551 /// union was computed correctly, an appropriate failure otherwise.
1554  ArrayRef<Operation *> opsB, unsigned loopDepth,
1555  unsigned numCommonLoops, bool isBackwardSlice,
1556  ComputationSliceState *sliceUnion) {
1557  // Compute the union of slice bounds between all pairs in 'opsA' and
1558  // 'opsB' in 'sliceUnionCst'.
1559  FlatAffineValueConstraints sliceUnionCst;
1560  assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
1561  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1562  for (Operation *i : opsA) {
1563  MemRefAccess srcAccess(i);
1564  for (Operation *j : opsB) {
1565  MemRefAccess dstAccess(j);
1566  if (srcAccess.memref != dstAccess.memref)
1567  continue;
1568  // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1569  if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) ||
1570  (isBackwardSlice && loopDepth > getNestingDepth(j))) {
1571  LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
1573  }
1574 
1575  bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1576  isa<AffineReadOpInterface>(dstAccess.opInst);
1577  FlatAffineValueConstraints dependenceConstraints;
1578  // Check dependence between 'srcAccess' and 'dstAccess'.
1580  srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
1581  &dependenceConstraints, /*dependenceComponents=*/nullptr,
1582  /*allowRAR=*/readReadAccesses);
1583  if (result.value == DependenceResult::Failure) {
1584  LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
1586  }
1587  if (result.value == DependenceResult::NoDependence)
1588  continue;
1589  dependentOpPairs.emplace_back(i, j);
1590 
1591  // Compute slice bounds for 'srcAccess' and 'dstAccess'.
1592  ComputationSliceState tmpSliceState;
1593  mlir::affine::getComputationSliceState(i, j, dependenceConstraints,
1594  loopDepth, isBackwardSlice,
1595  &tmpSliceState);
1596 
1597  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1598  // Initialize 'sliceUnionCst' with the bounds computed in previous step.
1599  if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
1600  LLVM_DEBUG(llvm::dbgs()
1601  << "Unable to compute slice bound constraints\n");
1603  }
1604  assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
1605  continue;
1606  }
1607 
1608  // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1609  FlatAffineValueConstraints tmpSliceCst;
1610  if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
1611  LLVM_DEBUG(llvm::dbgs()
1612  << "Unable to compute slice bound constraints\n");
1614  }
1615 
1616  // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1617  if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
1618 
1619  // Pre-constraint var alignment: record loop IVs used in each constraint
1620  // system.
1621  SmallPtrSet<Value, 8> sliceUnionIVs;
1622  for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1623  sliceUnionIVs.insert(sliceUnionCst.getValue(k));
1624  SmallPtrSet<Value, 8> tmpSliceIVs;
1625  for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1626  tmpSliceIVs.insert(tmpSliceCst.getValue(k));
1627 
1628  sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst);
1629 
1630  // Post-constraint var alignment: add loop IV bounds missing after
1631  // var alignment to constraint systems. This can occur if one constraint
1632  // system uses an loop IV that is not used by the other. The call
1633  // to unionBoundingBox below expects constraints for each Loop IV, even
1634  // if they are the unsliced full loop bounds added here.
1635  if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
1637  if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
1639  }
1640  // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1641  if (sliceUnionCst.getNumLocalVars() > 0 ||
1642  tmpSliceCst.getNumLocalVars() > 0 ||
1643  failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
1644  LLVM_DEBUG(llvm::dbgs()
1645  << "Unable to compute union bounding box of slice bounds\n");
1647  }
1648  }
1649  }
1650 
1651  // Empty union.
1652  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1653  LLVM_DEBUG(llvm::dbgs() << "empty slice union - unexpected\n");
1655  }
1656 
1657  // Gather loops surrounding ops from loop nest where slice will be inserted.
1659  for (auto &dep : dependentOpPairs) {
1660  ops.push_back(isBackwardSlice ? dep.second : dep.first);
1661  }
1662  SmallVector<AffineForOp, 4> surroundingLoops;
1663  unsigned innermostCommonLoopDepth =
1664  getInnermostCommonLoopDepth(ops, &surroundingLoops);
1665  if (loopDepth > innermostCommonLoopDepth) {
1666  LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
1668  }
1669 
1670  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1671  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1672 
1673  // Convert any dst loop IVs which are symbol variables to dim variables.
1674  sliceUnionCst.convertLoopIVSymbolsToDims();
1675  sliceUnion->clearBounds();
1676  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
1677  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
1678 
1679  // Get slice bounds from slice union constraints 'sliceUnionCst'.
1680  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
1681  opsA[0]->getContext(), &sliceUnion->lbs,
1682  &sliceUnion->ubs);
1683 
1684  // Add slice bound operands of union.
1685  SmallVector<Value, 4> sliceBoundOperands;
1686  sliceUnionCst.getValues(numSliceLoopIVs,
1687  sliceUnionCst.getNumDimAndSymbolVars(),
1688  &sliceBoundOperands);
1689 
1690  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1691  sliceUnion->ivs.clear();
1692  sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
1693 
1694  // Set loop nest insertion point to block start at 'loopDepth' for forward
1695  // slices, while at the end for backward slices.
1696  sliceUnion->insertPoint =
1697  isBackwardSlice
1698  ? surroundingLoops[loopDepth - 1].getBody()->begin()
1699  : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1700 
1701  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1702  // canonicalization.
1703  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1704  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1705 
1706  // Check if the slice computed is valid. Return success only if it is verified
1707  // that the slice is valid, otherwise return appropriate failure status.
1708  std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1709  if (!isSliceValid) {
1710  LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
1712  }
1713  if (!*isSliceValid)
1715 
1717 }
1718 
1719 // TODO: extend this to handle multiple result maps.
1720 static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1721  AffineMap ubMap) {
1722  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
1723  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
1724  assert(lbMap.getNumDims() == ubMap.getNumDims());
1725  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1726  AffineExpr lbExpr(lbMap.getResult(0));
1727  AffineExpr ubExpr(ubMap.getResult(0));
1728  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
1729  lbMap.getNumSymbols());
1730  auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1731  if (!cExpr)
1732  return std::nullopt;
1733  return cExpr.getValue();
1734 }
1735 
1736 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1737 // nest surrounding represented by slice loop bounds in 'slice'. Returns true
1738 // on success, false otherwise (if a non-constant trip count was encountered).
1739 // TODO: Make this work with non-unit step loops.
1741  const ComputationSliceState &slice,
1742  llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1743  unsigned numSrcLoopIVs = slice.ivs.size();
1744  // Populate map from AffineForOp -> trip count
1745  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1746  AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1747  auto *op = forOp.getOperation();
1748  AffineMap lbMap = slice.lbs[i];
1749  AffineMap ubMap = slice.ubs[i];
1750  // If lower or upper bound maps are null or provide no results, it implies
1751  // that source loop was not at all sliced, and the entire loop will be a
1752  // part of the slice.
1753  if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
1754  ubMap.getNumResults() == 0) {
1755  // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1756  if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1757  (*tripCountMap)[op] =
1758  forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1759  continue;
1760  }
1761  std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1762  if (maybeConstTripCount.has_value()) {
1763  (*tripCountMap)[op] = *maybeConstTripCount;
1764  continue;
1765  }
1766  return false;
1767  }
1768  std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1769  // Slice bounds are created with a constant ub - lb difference.
1770  if (!tripCount.has_value())
1771  return false;
1772  (*tripCountMap)[op] = *tripCount;
1773  }
1774  return true;
1775 }
1776 
1777 // Return the number of iterations in the given slice.
1779  const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1780  uint64_t iterCount = 1;
1781  for (const auto &count : sliceTripCountMap) {
1782  iterCount *= count.second;
1783  }
1784  return iterCount;
1785 }
1786 
1787 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1788 // Computes slice bounds by projecting out any loop IVs from
1789 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1790 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1791 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1793  Operation *depSourceOp, Operation *depSinkOp,
1794  const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth,
1795  bool isBackwardSlice, ComputationSliceState *sliceState) {
1796  // Get loop nest surrounding src operation.
1797  SmallVector<AffineForOp, 4> srcLoopIVs;
1798  getAffineForIVs(*depSourceOp, &srcLoopIVs);
1799  unsigned numSrcLoopIVs = srcLoopIVs.size();
1800 
1801  // Get loop nest surrounding dst operation.
1802  SmallVector<AffineForOp, 4> dstLoopIVs;
1803  getAffineForIVs(*depSinkOp, &dstLoopIVs);
1804  unsigned numDstLoopIVs = dstLoopIVs.size();
1805 
1806  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1807  (isBackwardSlice && loopDepth <= numDstLoopIVs));
1808 
1809  // Project out dimensions other than those up to 'loopDepth'.
1810  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1811  unsigned num =
1812  isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1813  FlatAffineValueConstraints sliceCst(dependenceConstraints);
1814  sliceCst.projectOut(pos, num);
1815 
1816  // Add slice loop IV values to 'sliceState'.
1817  unsigned offset = isBackwardSlice ? 0 : loopDepth;
1818  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1819  sliceCst.getValues(offset, offset + numSliceLoopIVs, &sliceState->ivs);
1820 
1821  // Set up lower/upper bound affine maps for the slice.
1822  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
1823  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
1824 
1825  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1826  sliceCst.getSliceBounds(offset, numSliceLoopIVs, depSourceOp->getContext(),
1827  &sliceState->lbs, &sliceState->ubs);
1828 
1829  // Set up bound operands for the slice's lower and upper bounds.
1830  SmallVector<Value, 4> sliceBoundOperands;
1831  unsigned numDimsAndSymbols = sliceCst.getNumDimAndSymbolVars();
1832  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
1833  if (i < offset || i >= offset + numSliceLoopIVs)
1834  sliceBoundOperands.push_back(sliceCst.getValue(i));
1835  }
1836 
1837  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1838  // canonicalization.
1839  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1840  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1841 
1842  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1843  sliceState->insertPoint =
1844  isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1845  : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1846 
1847  llvm::SmallDenseSet<Value, 8> sequentialLoops;
1848  if (isa<AffineReadOpInterface>(depSourceOp) &&
1849  isa<AffineReadOpInterface>(depSinkOp)) {
1850  // For read-read access pairs, clear any slice bounds on sequential loops.
1851  // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1852  getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
1853  &sequentialLoops);
1854  }
1855  auto getSliceLoop = [&](unsigned i) {
1856  return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1857  };
1858  auto isInnermostInsertion = [&]() {
1859  return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1860  : loopDepth >= dstLoopIVs.size());
1861  };
1862  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1863  auto srcIsUnitSlice = [&]() {
1864  return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
1865  (getSliceIterationCount(sliceTripCountMap) == 1));
1866  };
1867  // Clear all sliced loop bounds beginning at the first sequential loop, or
1868  // first loop with a slice fusion barrier attribute..
1869 
1870  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
1871  Value iv = getSliceLoop(i).getInductionVar();
1872  if (sequentialLoops.count(iv) == 0 &&
1873  getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1874  continue;
1875  // Skip reset of bounds of reduction loop inserted in the destination loop
1876  // that meets the following conditions:
1877  // 1. Slice is single trip count.
1878  // 2. Loop bounds of the source and destination match.
1879  // 3. Is being inserted at the innermost insertion point.
1880  std::optional<bool> isMaximal = sliceState->isMaximal();
1881  if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
1882  isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1883  continue;
1884  for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1885  sliceState->lbs[j] = AffineMap();
1886  sliceState->ubs[j] = AffineMap();
1887  }
1888  break;
1889  }
1890 }
1891 
1892 /// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1893 /// updates the slice loop bounds with any non-null bound maps specified in
1894 /// 'sliceState', and inserts this slice into the loop nest surrounding
1895 /// 'dstOpInst' at loop depth 'dstLoopDepth'.
1896 // TODO: extend the slicing utility to compute slices that
1897 // aren't necessarily a one-to-one relation b/w the source and destination. The
1898 // relation between the source and destination could be many-to-many in general.
1899 // TODO: the slice computation is incorrect in the cases
1900 // where the dependence from the source to the destination does not cover the
1901 // entire destination index set. Subtract out the dependent destination
1902 // iterations from destination index set and check for emptiness --- this is one
1903 // solution.
1905  Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
1906  ComputationSliceState *sliceState) {
1907  // Get loop nest surrounding src operation.
1908  SmallVector<AffineForOp, 4> srcLoopIVs;
1909  getAffineForIVs(*srcOpInst, &srcLoopIVs);
1910  unsigned numSrcLoopIVs = srcLoopIVs.size();
1911 
1912  // Get loop nest surrounding dst operation.
1913  SmallVector<AffineForOp, 4> dstLoopIVs;
1914  getAffineForIVs(*dstOpInst, &dstLoopIVs);
1915  unsigned dstLoopIVsSize = dstLoopIVs.size();
1916  if (dstLoopDepth > dstLoopIVsSize) {
1917  dstOpInst->emitError("invalid destination loop depth");
1918  return AffineForOp();
1919  }
1920 
1921  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1922  SmallVector<unsigned, 4> positions;
1923  // TODO: This code is incorrect since srcLoopIVs can be 0-d.
1924  findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
1925 
1926  // Clone src loop nest and insert it a the beginning of the operation block
1927  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1928  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1929  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1930  auto sliceLoopNest =
1931  cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
1932 
1933  Operation *sliceInst =
1934  getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
1935  // Get loop nest surrounding 'sliceInst'.
1936  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
1937  getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1938 
1939  // Sanity check.
1940  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1941  (void)sliceSurroundingLoopsSize;
1942  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1943  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1944  (void)sliceLoopLimit;
1945  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1946 
1947  // Update loop bounds for loops in 'sliceLoopNest'.
1948  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1949  auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1950  if (AffineMap lbMap = sliceState->lbs[i])
1951  forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
1952  if (AffineMap ubMap = sliceState->ubs[i])
1953  forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
1954  }
1955  return sliceLoopNest;
1956 }
1957 
1958 // Constructs MemRefAccess populating it with the memref, its indices and
1959 // opinst from 'loadOrStoreOpInst'.
1961  if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
1962  memref = loadOp.getMemRef();
1963  opInst = loadOrStoreOpInst;
1964  llvm::append_range(indices, loadOp.getMapOperands());
1965  } else {
1966  assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
1967  "Affine read/write op expected");
1968  auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
1969  opInst = loadOrStoreOpInst;
1970  memref = storeOp.getMemRef();
1971  llvm::append_range(indices, storeOp.getMapOperands());
1972  }
1973 }
1974 
1975 unsigned MemRefAccess::getRank() const {
1976  return cast<MemRefType>(memref.getType()).getRank();
1977 }
1978 
1980  return isa<AffineWriteOpInterface>(opInst);
1981 }
1982 
1983 /// Returns the nesting depth of this statement, i.e., the number of loops
1984 /// surrounding this statement.
1986  Operation *currOp = op;
1987  unsigned depth = 0;
1988  while ((currOp = currOp->getParentOp())) {
1989  if (isa<AffineForOp>(currOp))
1990  depth++;
1991  if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
1992  depth += parOp.getNumDims();
1993  }
1994  return depth;
1995 }
1996 
1997 /// Equal if both affine accesses are provably equivalent (at compile
1998 /// time) when considering the memref, the affine maps and their respective
1999 /// operands. The equality of access functions + operands is checked by
2000 /// subtracting fully composed value maps, and then simplifying the difference
2001 /// using the expression flattener.
2002 /// TODO: this does not account for aliasing of memrefs.
2003 bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
2004  if (memref != rhs.memref)
2005  return false;
2006 
2007  AffineValueMap diff, thisMap, rhsMap;
2008  getAccessMap(&thisMap);
2009  rhs.getAccessMap(&rhsMap);
2010  return thisMap == rhsMap;
2011 }
2012 
2014  auto *currOp = op.getParentOp();
2015  AffineForOp currAffineForOp;
2016  // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
2017  // operation while skipping over 'affine.if' operations.
2018  while (currOp) {
2019  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
2020  ivs.push_back(currAffineForOp.getInductionVar());
2021  else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
2022  llvm::append_range(ivs, parOp.getIVs());
2023  currOp = currOp->getParentOp();
2024  }
2025  std::reverse(ivs.begin(), ivs.end());
2026 }
2027 
2028 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
2029 /// where each lists loops from outer-most to inner-most in loop nest.
2031  Operation &b) {
2032  SmallVector<Value, 4> loopsA, loopsB;
2033  getAffineIVs(a, loopsA);
2034  getAffineIVs(b, loopsB);
2035 
2036  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
2037  unsigned numCommonLoops = 0;
2038  for (unsigned i = 0; i < minNumLoops; ++i) {
2039  if (loopsA[i] != loopsB[i])
2040  break;
2041  ++numCommonLoops;
2042  }
2043  return numCommonLoops;
2044 }
2045 
2046 static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
2047  Block::iterator start,
2048  Block::iterator end,
2049  int memorySpace) {
2050  SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
2051 
2052  // Walk this 'affine.for' operation to gather all memory regions.
2053  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
2054  if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
2055  // Neither load nor a store op.
2056  return WalkResult::advance();
2057  }
2058 
2059  // Compute the memref region symbolic in any IVs enclosing this block.
2060  auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
2061  if (failed(
2062  region->compute(opInst,
2063  /*loopDepth=*/getNestingDepth(&*block.begin())))) {
2064  LLVM_DEBUG(opInst->emitError("error obtaining memory region"));
2065  return failure();
2066  }
2067 
2068  auto [it, inserted] = regions.try_emplace(region->memref);
2069  if (inserted) {
2070  it->second = std::move(region);
2071  } else if (failed(it->second->unionBoundingBox(*region))) {
2072  LLVM_DEBUG(opInst->emitWarning(
2073  "getMemoryFootprintBytes: unable to perform a union on a memory "
2074  "region"));
2075  return failure();
2076  }
2077  return WalkResult::advance();
2078  });
2079  if (result.wasInterrupted())
2080  return std::nullopt;
2081 
2082  int64_t totalSizeInBytes = 0;
2083  for (const auto &region : regions) {
2084  std::optional<int64_t> size = region.second->getRegionSize();
2085  if (!size.has_value())
2086  return std::nullopt;
2087  totalSizeInBytes += *size;
2088  }
2089  return totalSizeInBytes;
2090 }
2091 
2092 std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
2093  int memorySpace) {
2094  auto *forInst = forOp.getOperation();
2096  *forInst->getBlock(), Block::iterator(forInst),
2097  std::next(Block::iterator(forInst)), memorySpace);
2098 }
2099 
2100 /// Returns whether a loop is parallel and contains a reduction loop.
2102  SmallVector<LoopReduction> reductions;
2103  if (!isLoopParallel(forOp, &reductions))
2104  return false;
2105  return !reductions.empty();
2106 }
2107 
2108 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
2109 /// at 'forOp'.
2111  AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
2112  forOp->walk([&](Operation *op) {
2113  if (auto innerFor = dyn_cast<AffineForOp>(op))
2114  if (!isLoopParallel(innerFor))
2115  sequentialLoops->insert(innerFor.getInductionVar());
2116  });
2117 }
2118 
2120  FlatAffineValueConstraints fac(set);
2121  if (fac.isEmpty())
2122  return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
2123  set.getContext());
2125 
2126  auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
2127  assert(simplifiedSet && "guaranteed to succeed while roundtripping");
2128  return simplifiedSet;
2129 }
2130 
2131 static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
2132  SmallVector<Value> &target) {
2133  target =
2134  llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
2135  return val.has_value() ? *val : Value();
2136  }));
2137 }
2138 
2139 /// Bound an identifier `pos` in a given FlatAffineValueConstraints with
2140 /// constraints drawn from an affine map. Before adding the constraint, the
2141 /// dimensions/symbols of the affine map are aligned with `constraints`.
2142 /// `operands` are the SSA Value operands used with the affine map.
2143 /// Note: This function adds a new symbol column to the `constraints` for each
2144 /// dimension/symbol that exists in the affine map but not in `constraints`.
2145 static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
2146  BoundType type, unsigned pos,
2147  AffineMap map, ValueRange operands) {
2148  SmallVector<Value> dims, syms, newSyms;
2149  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
2150  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
2151 
2152  AffineMap alignedMap =
2153  alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
2154  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
2155  constraints.appendSymbolVar(newSyms[i]);
2156  return constraints.addBound(type, pos, alignedMap);
2157 }
2158 
2159 /// Add `val` to each result of `map`.
2160 static AffineMap addConstToResults(AffineMap map, int64_t val) {
2161  SmallVector<AffineExpr> newResults;
2162  for (AffineExpr r : map.getResults())
2163  newResults.push_back(r + val);
2164  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
2165  map.getContext());
2166 }
2167 
2168 // Attempt to simplify the given min/max operation by proving that its value is
2169 // bounded by the same lower and upper bound.
2170 //
2171 // Bounds are computed by FlatAffineValueConstraints. Invariants required for
2172 // finding/proving bounds should be supplied via `constraints`.
2173 //
2174 // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
2175 // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
2176 // case of `!isMin`) and bind it to `opBound`. SSA values that are used in
2177 // `op` but are not part of `constraints`, are added as extra symbols.
2178 // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
2179 // * If `isMin`: r_i >= opBound
2180 // * If `isMax`: r_i <= opBound
2181 // If this is the case, ub(op) == lb(op).
2182 // 4. Replace `op` with `opBound`.
2183 //
2184 // In summary, the following constraints are added throughout this function.
2185 // Note: `invar` are dimensions added by the caller to express the invariants.
2186 // (Showing only the case where `isMin`.)
2187 //
2188 // invar | op | opBound | r_i | extra syms... | const | eq/ineq
2189 // ------+-------+---------+-----+---------------+-------+-------------------
2190 // (various eq./ineq. constraining `invar`, added by the caller)
2191 // ... | 0 | 0 | 0 | 0 | ... | ...
2192 // ------+-------+---------+-----+---------------+-------+-------------------
2193 // (various ineq. constraining `op` in terms of `op` operands (`invar` and
2194 // extra `op` operands "extra syms" that are not in `invar`)).
2195 // ... | -1 | 0 | 0 | ... | ... | >= 0
2196 // ------+-------+---------+-----+---------------+-------+-------------------
2197 // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
2198 // ... | 0 | -1 | 0 | ... | ... | = 0
2199 // ------+-------+---------+-----+---------------+-------+-------------------
2200 // (for each `op` map result r_i: set r_i to corresponding map result,
2201 // prove that r_i >= minOpUb via contradiction)
2202 // ... | 0 | 0 | -1 | ... | ... | = 0
2203 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0
2204 //
2206  Operation *op, FlatAffineValueConstraints constraints) {
2207  bool isMin = isa<AffineMinOp>(op);
2208  assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
2209  MLIRContext *ctx = op->getContext();
2210  Builder builder(ctx);
2211  AffineMap map =
2212  isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2213  ValueRange operands = op->getOperands();
2214  unsigned numResults = map.getNumResults();
2215 
2216  // Add a few extra dimensions.
2217  unsigned dimOp = constraints.appendDimVar(); // `op`
2218  unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
2219  unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults);
2220 
2221  // Add an inequality for each result expr_i of map:
2222  // isMin: op <= expr_i, !isMin: op >= expr_i
2223  auto boundType = isMin ? BoundType::UB : BoundType::LB;
2224  // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
2225  AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
2226  if (failed(
2227  alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
2228  return failure();
2229 
2230  // Try to compute a lower/upper bound for op, expressed in terms of the other
2231  // `dims` and extra symbols.
2232  SmallVector<AffineMap> opLb(1), opUb(1);
2233  constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
2234  AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2235  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
2236  // a TODO of `getSliceBounds` and not handled here.
2237  if (!sliceBound || sliceBound.getNumResults() != 1)
2238  return failure(); // No or multiple bounds found.
2239  // Recover the inclusive UB in the case of an `affine.min`.
2240  AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
2241 
2242  // Add an equality: Set dimOpBound to computed bound.
2243  // Add back dimension for op. (Was removed by `getSliceBounds`.)
2244  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
2245  if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2246  return failure();
2247 
2248  // If the constraint system is empty, there is an inconsistency. (E.g., this
2249  // can happen if loop lb > ub.)
2250  if (constraints.isEmpty())
2251  return failure();
2252 
2253  // In the case of `isMin` (`!isMin` is inversed):
2254  // Prove that each result of `map` has a lower bound that is equal to (or
2255  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
2256  // can be replaced with the bound. I.e., prove that for each result
2257  // expr_i (represented by dimension r_i):
2258  //
2259  // r_i >= opBound
2260  //
2261  // To prove this inequality, add its negation to the constraint set and prove
2262  // that the constraint set is empty.
2263  for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2264  FlatAffineValueConstraints newConstr(constraints);
2265 
2266  // Add an equality: r_i = expr_i
2267  // Note: These equalities could have been added earlier and used to express
2268  // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2269  // computes minOpUb in terms of r_i dims, which is not desired.
2270  if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
2271  map.getSubMap({i - resultDimStart}), operands)))
2272  return failure();
2273 
2274  // If `isMin`: Add inequality: r_i < opBound
2275  // equiv.: opBound - r_i - 1 >= 0
2276  // If `!isMin`: Add inequality: r_i > opBound
2277  // equiv.: -opBound + r_i - 1 >= 0
2278  SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
2279  ineq[dimOpBound] = isMin ? 1 : -1;
2280  ineq[i] = isMin ? -1 : 1;
2281  ineq[newConstr.getNumCols() - 1] = -1;
2282  newConstr.addInequality(ineq);
2283  if (!newConstr.isEmpty())
2284  return failure();
2285  }
2286 
2287  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2288  AffineMap newMap = alignedBoundMap;
2289  SmallVector<Value> newOperands;
2290  unpackOptionalValues(constraints.getMaybeValues(), newOperands);
2291  // If dims/symbols have known constant values, use those in order to simplify
2292  // the affine map further.
2293  for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2294  // Skip unused operands and operands that are already constants.
2295  if (!newOperands[i] || getConstantIntValue(newOperands[i]))
2296  continue;
2297  if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
2298  AffineExpr expr =
2299  i < newMap.getNumDims()
2300  ? builder.getAffineDimExpr(i)
2301  : builder.getAffineSymbolExpr(i - newMap.getNumDims());
2302  newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
2303  newMap.getNumDims(), newMap.getNumSymbols());
2304  }
2305  }
2306  affine::canonicalizeMapAndOperands(&newMap, &newOperands);
2307  return AffineValueMap(newMap, newOperands);
2308 }
2309 
2311  Operation *b) {
2312  Region *aScope = getAffineAnalysisScope(a);
2313  Region *bScope = getAffineAnalysisScope(b);
2314  if (aScope != bScope)
2315  return nullptr;
2316 
2317  // Get the block ancestry of `op` while stopping at the affine scope `aScope`
2318  // and store them in `ancestry`.
2319  auto getBlockAncestry = [&](Operation *op,
2320  SmallVectorImpl<Block *> &ancestry) {
2321  Operation *curOp = op;
2322  do {
2323  ancestry.push_back(curOp->getBlock());
2324  if (curOp->getParentRegion() == aScope)
2325  break;
2326  curOp = curOp->getParentOp();
2327  } while (curOp);
2328  assert(curOp && "can't reach root op without passing through affine scope");
2329  std::reverse(ancestry.begin(), ancestry.end());
2330  };
2331 
2332  SmallVector<Block *, 4> aAncestors, bAncestors;
2333  getBlockAncestry(a, aAncestors);
2334  getBlockAncestry(b, bAncestors);
2335  assert(!aAncestors.empty() && !bAncestors.empty() &&
2336  "at least one Block ancestor expected");
2337 
2338  Block *innermostCommonBlock = nullptr;
2339  for (unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size();
2340  a < e && b < f; ++a, ++b) {
2341  if (aAncestors[a] != bAncestors[b])
2342  break;
2343  innermostCommonBlock = aAncestors[a];
2344  }
2345  return innermostCommonBlock;
2346 }
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
Definition: Utils.cpp:1720
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
Definition: Utils.cpp:1463
static Node * addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, DenseMap< Value, SetVector< unsigned >> &memrefAccesses)
Add op to MDG creating a new node and adding its memory accesses (affine or non-affine to memrefAcces...
Definition: Utils.cpp:192
const char *const kSliceFusionBarrierAttrName
Definition: Utils.cpp:1787
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
Definition: Utils.cpp:1505
static void getEffectedValues(Operation *op, SmallVectorImpl< Value > &values)
Returns the values that this op has a memref effect of type EffectTys on, not considering recursive e...
Definition: Utils.cpp:166
MemRefDependenceGraph::Node Node
Definition: Utils.cpp:39
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
Definition: Utils.cpp:1480
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
Definition: Utils.cpp:2046
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
Definition: Utils.cpp:2160
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
Definition: Utils.cpp:2145
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
Definition: Utils.cpp:2131
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:236
unsigned getPosition() const
Definition: AffineExpr.cpp:348
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:343
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
Definition: AffineMap.h:267
unsigned getNumSymbols() const
Definition: AffineMap.cpp:398
unsigned getNumDims() const
Definition: AffineMap.cpp:394
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
unsigned getNumInputs() const
Definition: AffineMap.cpp:403
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
Definition: AffineMap.cpp:515
void dump() const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:654
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Definition: Block.h:305
iterator begin()
Definition: Block.h:143
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:33
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:364
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:368
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:360
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
std::optional< int64_t > getConstantBoundOnDimSize(MLIRContext *context, unsigned pos, AffineMap *lb=nullptr, AffineMap *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
Returns a non-negative constant bound on the extent (upper bound - lower bound) of the specified vari...
FlatLinearValueConstraints represents an extension of FlatLinearConstraints where each non-local vari...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
void addBound(presburger::BoundType type, Value val, int64_t value)
Adds a constant bound for the variable associated with the given Value.
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
An integer set representing a conjunction of one or more affine equalities and inequalities.
Definition: IntegerSet.h:44
unsigned getNumDims() const
Definition: IntegerSet.cpp:15
MLIRContext * getContext() const
Definition: IntegerSet.cpp:57
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
Definition: IntegerSet.h:56
unsigned getNumSymbols() const
Definition: IntegerSet.cpp:16
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:544
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:750
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:386
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:798
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:874
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
Definition: Utils.cpp:2119
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:776
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
Definition: Utils.cpp:1553
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
Definition: AffineOps.cpp:2581
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
Definition: Utils.cpp:2101
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2030
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2585
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
Definition: Utils.cpp:2013
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
Definition: Utils.cpp:2110
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1447
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2573
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:273
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
Definition: Utils.cpp:762
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:2092
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
Definition: Utils.cpp:1521
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:402
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
Definition: Utils.cpp:1792
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
Definition: AffineOps.cpp:2596
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
Definition: Utils.cpp:1382
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1985
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
Definition: Utils.cpp:1778
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)
Find the innermost common Block of a and b in the affine scope that a and b are part of.
Definition: Utils.cpp:2310
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
Definition: Utils.cpp:1740
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
Definition: Utils.cpp:1904
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
Definition: Utils.cpp:2205
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
Definition: Utils.cpp:1332
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:645
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
Definition: Utils.h:310
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
Definition: Utils.cpp:947
SmallVector< Value, 4 > ivs
Definition: Utils.h:313
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
Definition: Utils.cpp:809
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
Definition: Utils.cpp:793
std::vector< SmallVector< Value, 4 > > ubOperands
Definition: Utils.h:321
SmallVector< AffineMap, 4 > ubs
Definition: Utils.h:317
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
Definition: Utils.cpp:1017
SmallVector< AffineMap, 4 > lbs
Definition: Utils.h:315
std::vector< SmallVector< Value, 4 > > lbOperands
Definition: Utils.h:319
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:49
SmallVector< Operation *, 4 > loadOpInsts
Definition: Utils.h:41
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:47
void collect(Operation *opToWalk)
Definition: Utils.cpp:44
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:45
SmallVector< Operation *, 4 > storeOpInsts
Definition: Utils.h:43
Encapsulates a memref load or store access information.
unsigned getRank() const
Definition: Utils.cpp:1975
MemRefAccess(Operation *opInst)
Constructs a MemRefAccess from a load or store operation.
Definition: Utils.cpp:1960
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
Definition: Utils.cpp:2003
SmallVector< Operation *, 4 > loads
Definition: Utils.h:73
SmallVector< Operation *, 4 > stores
Definition: Utils.h:77
unsigned hasFree(Value memref) const
Definition: Utils.cpp:124
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:75
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:79
unsigned getStoreOpCount(Value memref) const
Definition: Utils.cpp:94
unsigned hasStore(Value memref) const
Returns true if there exists an operation with a write memory effect to memref in this node.
Definition: Utils.cpp:110
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:81
unsigned addNode(Operation *op)
Definition: Utils.cpp:374
bool writesToLiveInOrEscapingMemrefs(unsigned id) const
Definition: Utils.cpp:404
void removeEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:450
void addEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:439
DenseMap< unsigned, Node > nodes
Definition: Utils.h:141
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes) const
Return all nodes which define SSA values used in node 'id'.
Definition: Utils.cpp:538
bool hasDependencePath(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:477
void clearNodeLoadAndStores(unsigned id)
Definition: Utils.cpp:705
const Node * getForOpNode(AffineForOp forOp) const
Definition: Utils.cpp:366
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:552
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
Definition: Utils.cpp:628
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:713
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const
Definition: Utils.cpp:528
const Node * getNode(unsigned id) const
Definition: Utils.cpp:359
void removeNode(unsigned id)
Definition: Utils.cpp:381
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:721
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:729
void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)
Definition: Utils.cpp:692
unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const
Definition: Utils.cpp:512
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr) const
Definition: Utils.cpp:424
void print(raw_ostream &os) const
Definition: Utils.cpp:744
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
Definition: Utils.h:481
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
Definition: Utils.cpp:1071
FlatAffineValueConstraints * getConstraints()
Definition: Utils.h:527
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
Definition: Utils.cpp:1067
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true, bool dropLocalVars=true, bool dropOuterIVs=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Definition: Utils.cpp:1168
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Definition: Utils.cpp:1127
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
Definition: Utils.cpp:1351
LogicalResult unionBoundingBox(const MemRefRegion &other)
Definition: Utils.cpp:1146
Value memref
Memref that this region corresponds to.
Definition: Utils.h:562
Enumerates different result statuses of slice computation by computeSliceUnion
Definition: Utils.h:297
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.