MLIR  21.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for non-loop IR
10 // structures.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
23 #include "mlir/IR/IntegerSet.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <optional>
30 
31 #define DEBUG_TYPE "analysis-utils"
32 
33 using namespace mlir;
34 using namespace affine;
35 using namespace presburger;
36 
37 using llvm::SmallDenseMap;
38 
40 
41 // LoopNestStateCollector walks loop nests and collects load and store
42 // operations, and whether or not a region holding op other than ForOp and IfOp
43 // was encountered in the loop nest.
45  opToWalk->walk([&](Operation *op) {
46  if (auto forOp = dyn_cast<AffineForOp>(op)) {
47  forOps.push_back(forOp);
48  } else if (isa<AffineReadOpInterface>(op)) {
49  loadOpInsts.push_back(op);
50  } else if (isa<AffineWriteOpInterface>(op)) {
51  storeOpInsts.push_back(op);
52  } else {
53  auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
54  if (!memInterface) {
56  // This op itself is memory-effect free.
57  return;
58  // Check operands. Eg. ops like the `call` op are handled here.
59  for (Value v : op->getOperands()) {
60  if (!isa<MemRefType>(v.getType()))
61  continue;
62  // Conservatively, we assume the memref is read and written to.
63  memrefLoads.push_back(op);
64  memrefStores.push_back(op);
65  }
66  } else {
67  // Non-affine loads and stores.
68  if (hasEffect<MemoryEffects::Read>(op))
69  memrefLoads.push_back(op);
70  if (hasEffect<MemoryEffects::Write>(op))
71  memrefStores.push_back(op);
72  if (hasEffect<MemoryEffects::Free>(op))
73  memrefFrees.push_back(op);
74  }
75  }
76  });
77 }
78 
79 unsigned Node::getLoadOpCount(Value memref) const {
80  unsigned loadOpCount = 0;
81  for (Operation *loadOp : loads) {
82  // Common case: affine reads.
83  if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
84  if (memref == affineLoad.getMemRef())
85  ++loadOpCount;
86  } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
87  ++loadOpCount;
88  }
89  }
90  return loadOpCount;
91 }
92 
93 // Returns the store op count for 'memref'.
94 unsigned Node::getStoreOpCount(Value memref) const {
95  unsigned storeOpCount = 0;
96  for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
97  // Common case: affine writes.
98  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
99  if (memref == affineStore.getMemRef())
100  ++storeOpCount;
101  } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp),
102  memref)) {
103  ++storeOpCount;
104  }
105  }
106  return storeOpCount;
107 }
108 
109 // Returns the store op count for 'memref'.
110 unsigned Node::hasStore(Value memref) const {
111  return llvm::any_of(
112  llvm::concat<Operation *const>(stores, memrefStores),
113  [&](Operation *storeOp) {
114  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
115  if (memref == affineStore.getMemRef())
116  return true;
117  } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
118  return true;
119  }
120  return false;
121  });
122 }
123 
124 unsigned Node::hasFree(Value memref) const {
125  return llvm::any_of(memrefFrees, [&](Operation *freeOp) {
126  return hasEffect<MemoryEffects::Free>(freeOp, memref);
127  });
128 }
129 
130 // Returns all store ops in 'storeOps' which access 'memref'.
131 void Node::getStoreOpsForMemref(Value memref,
132  SmallVectorImpl<Operation *> *storeOps) const {
133  for (Operation *storeOp : stores) {
134  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
135  storeOps->push_back(storeOp);
136  }
137 }
138 
139 // Returns all load ops in 'loadOps' which access 'memref'.
140 void Node::getLoadOpsForMemref(Value memref,
141  SmallVectorImpl<Operation *> *loadOps) const {
142  for (Operation *loadOp : loads) {
143  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
144  loadOps->push_back(loadOp);
145  }
146 }
147 
148 // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
149 // has at least one load and store operation.
150 void Node::getLoadAndStoreMemrefSet(
151  DenseSet<Value> *loadAndStoreMemrefSet) const {
152  llvm::SmallDenseSet<Value, 2> loadMemrefs;
153  for (Operation *loadOp : loads) {
154  loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
155  }
156  for (Operation *storeOp : stores) {
157  auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
158  if (loadMemrefs.count(memref) > 0)
159  loadAndStoreMemrefSet->insert(memref);
160  }
161 }
162 
163 /// Returns the values that this op has a memref effect of type `EffectTys` on,
164 /// not considering recursive effects.
165 template <typename... EffectTys>
167  auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
168  if (!memOp) {
170  // No effects.
171  return;
172  // Memref operands have to be considered as being affected.
173  for (Value operand : op->getOperands()) {
174  if (isa<MemRefType>(operand.getType()))
175  values.push_back(operand);
176  }
177  return;
178  }
180  memOp.getEffects(effects);
181  for (auto &effect : effects) {
182  Value effectVal = effect.getValue();
183  if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
184  isa<MemRefType>(effectVal.getType()))
185  values.push_back(effectVal);
186  };
187 }
188 
189 /// Add `op` to MDG creating a new node and adding its memory accesses (affine
190 /// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map.
191 static Node *
193  DenseMap<Value, SetVector<unsigned>> &memrefAccesses) {
194  auto &nodes = mdg.nodes;
195  // Create graph node 'id' to represent top-level 'forOp' and record
196  // all loads and store accesses it contains.
197  LoopNestStateCollector collector;
198  collector.collect(nodeOp);
199  unsigned newNodeId = mdg.nextNodeId++;
200  Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second;
201  for (Operation *op : collector.loadOpInsts) {
202  node.loads.push_back(op);
203  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
204  memrefAccesses[memref].insert(node.id);
205  }
206  for (Operation *op : collector.storeOpInsts) {
207  node.stores.push_back(op);
208  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
209  memrefAccesses[memref].insert(node.id);
210  }
211  for (Operation *op : collector.memrefLoads) {
212  SmallVector<Value> effectedValues;
213  getEffectedValues<MemoryEffects::Read>(op, effectedValues);
214  if (llvm::any_of(((ValueRange)effectedValues).getTypes(),
215  [](Type type) { return !isa<MemRefType>(type); }))
216  // We do not know the interaction here.
217  return nullptr;
218  for (Value memref : effectedValues)
219  memrefAccesses[memref].insert(node.id);
220  node.memrefLoads.push_back(op);
221  }
222  for (Operation *op : collector.memrefStores) {
223  SmallVector<Value> effectedValues;
224  getEffectedValues<MemoryEffects::Write>(op, effectedValues);
225  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
226  [](Type type) { return !isa<MemRefType>(type); }))
227  return nullptr;
228  for (Value memref : effectedValues)
229  memrefAccesses[memref].insert(node.id);
230  node.memrefStores.push_back(op);
231  }
232  for (Operation *op : collector.memrefFrees) {
233  SmallVector<Value> effectedValues;
234  getEffectedValues<MemoryEffects::Free>(op, effectedValues);
235  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
236  [](Type type) { return !isa<MemRefType>(type); }))
237  return nullptr;
238  for (Value memref : effectedValues)
239  memrefAccesses[memref].insert(node.id);
240  node.memrefFrees.push_back(op);
241  }
242 
243  return &node;
244 }
245 
247  LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
248  // Map from a memref to the set of ids of the nodes that have ops accessing
249  // the memref.
250  DenseMap<Value, SetVector<unsigned>> memrefAccesses;
251 
252  // Create graph nodes.
253  DenseMap<Operation *, unsigned> forToNodeMap;
254  for (Operation &op : block) {
255  if (auto forOp = dyn_cast<AffineForOp>(op)) {
256  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
257  if (!node)
258  return false;
259  forToNodeMap[&op] = node->id;
260  } else if (isa<AffineReadOpInterface>(op)) {
261  // Create graph node for top-level load op.
262  Node node(nextNodeId++, &op);
263  node.loads.push_back(&op);
264  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
265  memrefAccesses[memref].insert(node.id);
266  nodes.insert({node.id, node});
267  } else if (isa<AffineWriteOpInterface>(op)) {
268  // Create graph node for top-level store op.
269  Node node(nextNodeId++, &op);
270  node.stores.push_back(&op);
271  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
272  memrefAccesses[memref].insert(node.id);
273  nodes.insert({node.id, node});
274  } else if (op.getNumResults() > 0 && !op.use_empty()) {
275  // Create graph node for top-level producer of SSA values, which
276  // could be used by loop nest nodes.
277  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
278  if (!node)
279  return false;
280  } else if (!isMemoryEffectFree(&op) &&
281  (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
282  // Create graph node for top-level op unless it is known to be
283  // memory-effect free. This covers all unknown/unregistered ops,
284  // non-affine ops with memory effects, and region-holding ops with a
285  // well-defined control flow. During the fusion validity checks, edges
286  // to/from these ops get looked at.
287  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
288  if (!node)
289  return false;
290  } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
291  // Return false if non-handled/unknown region-holding ops are found. We
292  // won't know what such ops do or what its regions mean; for e.g., it may
293  // not be an imperative op.
294  LLVM_DEBUG(llvm::dbgs()
295  << "MDG init failed; unknown region-holding op found!\n");
296  return false;
297  }
298  // We aren't creating nodes for memory-effect free ops either with no
299  // regions (unless it has results being used) or those with branch op
300  // interface.
301  }
302 
303  LLVM_DEBUG(llvm::dbgs() << "Created " << nodes.size() << " nodes\n");
304 
305  // Add dependence edges between nodes which produce SSA values and their
306  // users. Load ops can be considered as the ones producing SSA values.
307  for (auto &idAndNode : nodes) {
308  const Node &node = idAndNode.second;
309  // Stores don't define SSA values, skip them.
310  if (!node.stores.empty())
311  continue;
312  Operation *opInst = node.op;
313  for (Value value : opInst->getResults()) {
314  for (Operation *user : value.getUsers()) {
315  // Ignore users outside of the block.
316  if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
317  &block)
318  continue;
320  getAffineForIVs(*user, &loops);
321  // Find the surrounding affine.for nested immediately within the
322  // block.
323  auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
324  return loop->getBlock() == &block;
325  });
326  if (it == loops.end())
327  continue;
328  assert(forToNodeMap.count(*it) > 0 && "missing mapping");
329  unsigned userLoopNestId = forToNodeMap[*it];
330  addEdge(node.id, userLoopNestId, value);
331  }
332  }
333  }
334 
335  // Walk memref access lists and add graph edges between dependent nodes.
336  for (auto &memrefAndList : memrefAccesses) {
337  unsigned n = memrefAndList.second.size();
338  Value srcMemRef = memrefAndList.first;
339  // Add edges between all dependent pairs among the node IDs on this memref.
340  for (unsigned i = 0; i < n; ++i) {
341  unsigned srcId = memrefAndList.second[i];
342  Node *srcNode = getNode(srcId);
343  bool srcHasStoreOrFree =
344  srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef);
345  for (unsigned j = i + 1; j < n; ++j) {
346  unsigned dstId = memrefAndList.second[j];
347  Node *dstNode = getNode(dstId);
348  bool dstHasStoreOrFree =
349  dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
350  if (srcHasStoreOrFree || dstHasStoreOrFree)
351  addEdge(srcId, dstId, srcMemRef);
352  }
353  }
354  }
355  return true;
356 }
357 
358 // Returns the graph node for 'id'.
359 const Node *MemRefDependenceGraph::getNode(unsigned id) const {
360  auto it = nodes.find(id);
361  assert(it != nodes.end());
362  return &it->second;
363 }
364 
365 // Returns the graph node for 'forOp'.
366 const Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) const {
367  for (auto &idAndNode : nodes)
368  if (idAndNode.second.op == forOp)
369  return &idAndNode.second;
370  return nullptr;
371 }
372 
373 // Adds a node with 'op' to the graph and returns its unique identifier.
375  Node node(nextNodeId++, op);
376  nodes.insert({node.id, node});
377  return node.id;
378 }
379 
380 // Remove node 'id' (and its associated edges) from graph.
382  // Remove each edge in 'inEdges[id]'.
383  if (inEdges.count(id) > 0) {
384  SmallVector<Edge, 2> oldInEdges = inEdges[id];
385  for (auto &inEdge : oldInEdges) {
386  removeEdge(inEdge.id, id, inEdge.value);
387  }
388  }
389  // Remove each edge in 'outEdges[id]'.
390  if (outEdges.contains(id)) {
391  SmallVector<Edge, 2> oldOutEdges = outEdges[id];
392  for (auto &outEdge : oldOutEdges) {
393  removeEdge(id, outEdge.id, outEdge.value);
394  }
395  }
396  // Erase remaining node state.
397  inEdges.erase(id);
398  outEdges.erase(id);
399  nodes.erase(id);
400 }
401 
402 // Returns true if node 'id' writes to any memref which escapes (or is an
403 // argument to) the block. Returns false otherwise.
405  const Node *node = getNode(id);
406  for (auto *storeOpInst : node->stores) {
407  auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
408  auto *op = memref.getDefiningOp();
409  // Return true if 'memref' is a block argument.
410  if (!op)
411  return true;
412  // Return true if any use of 'memref' does not deference it in an affine
413  // way.
414  for (auto *user : memref.getUsers())
415  if (!isa<AffineMapAccessInterface>(*user))
416  return true;
417  }
418  return false;
419 }
420 
421 // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
422 // is for 'value' if non-null, or for any value otherwise. Returns false
423 // otherwise.
424 bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
425  Value value) const {
426  if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) {
427  return false;
428  }
429  bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](const Edge &edge) {
430  return edge.id == dstId && (!value || edge.value == value);
431  });
432  bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](const Edge &edge) {
433  return edge.id == srcId && (!value || edge.value == value);
434  });
435  return hasOutEdge && hasInEdge;
436 }
437 
438 // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
439 void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
440  Value value) {
441  if (!hasEdge(srcId, dstId, value)) {
442  outEdges[srcId].push_back({dstId, value});
443  inEdges[dstId].push_back({srcId, value});
444  if (isa<MemRefType>(value.getType()))
445  memrefEdgeCount[value]++;
446  }
447 }
448 
449 // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
450 void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
451  Value value) {
452  assert(inEdges.count(dstId) > 0);
453  assert(outEdges.count(srcId) > 0);
454  if (isa<MemRefType>(value.getType())) {
455  assert(memrefEdgeCount.count(value) > 0);
456  memrefEdgeCount[value]--;
457  }
458  // Remove 'srcId' from 'inEdges[dstId]'.
459  for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
460  if ((*it).id == srcId && (*it).value == value) {
461  inEdges[dstId].erase(it);
462  break;
463  }
464  }
465  // Remove 'dstId' from 'outEdges[srcId]'.
466  for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
467  if ((*it).id == dstId && (*it).value == value) {
468  outEdges[srcId].erase(it);
469  break;
470  }
471  }
472 }
473 
474 // Returns true if there is a path in the dependence graph from node 'srcId'
475 // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
476 // operations that the edges connected are expected to be from the same block.
478  unsigned dstId) const {
479  // Worklist state is: <node-id, next-output-edge-index-to-visit>
481  worklist.push_back({srcId, 0});
482  Operation *dstOp = getNode(dstId)->op;
483  // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
484  while (!worklist.empty()) {
485  auto &idAndIndex = worklist.back();
486  // Return true if we have reached 'dstId'.
487  if (idAndIndex.first == dstId)
488  return true;
489  // Pop and continue if node has no out edges, or if all out edges have
490  // already been visited.
491  if (!outEdges.contains(idAndIndex.first) ||
492  idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) {
493  worklist.pop_back();
494  continue;
495  }
496  // Get graph edge to traverse.
497  const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second];
498  // Increment next output edge index for 'idAndIndex'.
499  ++idAndIndex.second;
500  // Add node at 'edge.id' to the worklist. We don't need to consider
501  // nodes that are "after" dstId in the containing block; one can't have a
502  // path to `dstId` from any of those nodes.
503  bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
504  if (!afterDst && edge.id != idAndIndex.first)
505  worklist.push_back({edge.id, 0});
506  }
507  return false;
508 }
509 
510 // Returns the input edge count for node 'id' and 'memref' from src nodes
511 // which access 'memref' with a store operation.
513  Value memref) const {
514  unsigned inEdgeCount = 0;
515  for (const Edge &inEdge : inEdges.lookup(id)) {
516  if (inEdge.value == memref) {
517  const Node *srcNode = getNode(inEdge.id);
518  // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
519  if (srcNode->getStoreOpCount(memref) > 0)
520  ++inEdgeCount;
521  }
522  }
523  return inEdgeCount;
524 }
525 
526 // Returns the output edge count for node 'id' and 'memref' (if non-null),
527 // otherwise returns the total output edge count from node 'id'.
529  Value memref) const {
530  unsigned outEdgeCount = 0;
531  for (const auto &outEdge : outEdges.lookup(id))
532  if (!memref || outEdge.value == memref)
533  ++outEdgeCount;
534  return outEdgeCount;
535 }
536 
537 /// Return all nodes which define SSA values used in node 'id'.
539  unsigned id, DenseSet<unsigned> &definingNodes) const {
540  for (const Edge &edge : inEdges.lookup(id))
541  // By definition of edge, if the edge value is a non-memref value,
542  // then the dependence is between a graph node which defines an SSA value
543  // and another graph node which uses the SSA value.
544  if (!isa<MemRefType>(edge.value.getType()))
545  definingNodes.insert(edge.id);
546 }
547 
548 // Computes and returns an insertion point operation, before which the
549 // the fused <srcId, dstId> loop nest can be inserted while preserving
550 // dependences. Returns nullptr if no such insertion point is found.
551 Operation *
553  unsigned dstId) const {
554  if (!outEdges.contains(srcId))
555  return getNode(dstId)->op;
556 
557  // Skip if there is any defining node of 'dstId' that depends on 'srcId'.
558  DenseSet<unsigned> definingNodes;
559  gatherDefiningNodes(dstId, definingNodes);
560  if (llvm::any_of(definingNodes,
561  [&](unsigned id) { return hasDependencePath(srcId, id); })) {
562  LLVM_DEBUG(llvm::dbgs()
563  << "Can't fuse: a defining op with a user in the dst "
564  "loop has dependence from the src loop\n");
565  return nullptr;
566  }
567 
568  // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
569  SmallPtrSet<Operation *, 2> srcDepInsts;
570  for (auto &outEdge : outEdges.lookup(srcId))
571  if (outEdge.id != dstId)
572  srcDepInsts.insert(getNode(outEdge.id)->op);
573 
574  // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
575  SmallPtrSet<Operation *, 2> dstDepInsts;
576  for (auto &inEdge : inEdges.lookup(dstId))
577  if (inEdge.id != srcId)
578  dstDepInsts.insert(getNode(inEdge.id)->op);
579 
580  Operation *srcNodeInst = getNode(srcId)->op;
581  Operation *dstNodeInst = getNode(dstId)->op;
582 
583  // Computing insertion point:
584  // *) Walk all operation positions in Block operation list in the
585  // range (src, dst). For each operation 'op' visited in this search:
586  // *) Store in 'firstSrcDepPos' the first position where 'op' has a
587  // dependence edge from 'srcNode'.
588  // *) Store in 'lastDstDepPost' the last position where 'op' has a
589  // dependence edge to 'dstNode'.
590  // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
591  // operation insertion point (or return null pointer if no such
592  // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
594  std::optional<unsigned> firstSrcDepPos;
595  std::optional<unsigned> lastDstDepPos;
596  unsigned pos = 0;
597  for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
598  it != Block::iterator(dstNodeInst); ++it) {
599  Operation *op = &(*it);
600  if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
601  firstSrcDepPos = pos;
602  if (dstDepInsts.count(op) > 0)
603  lastDstDepPos = pos;
604  depInsts.push_back(op);
605  ++pos;
606  }
607 
608  if (firstSrcDepPos.has_value()) {
609  if (lastDstDepPos.has_value()) {
610  if (*firstSrcDepPos <= *lastDstDepPos) {
611  // No valid insertion point exists which preserves dependences.
612  return nullptr;
613  }
614  }
615  // Return the insertion point at 'firstSrcDepPos'.
616  return depInsts[*firstSrcDepPos];
617  }
618  // No dependence targets in range (or only dst deps in range), return
619  // 'dstNodInst' insertion point.
620  return dstNodeInst;
621 }
622 
623 // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
624 // taking into account that:
625 // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion,
626 // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a
627 // private memref.
628 void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
629  const DenseSet<Value> &privateMemRefs,
630  bool removeSrcId) {
631  // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
632  if (inEdges.count(srcId) > 0) {
633  SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
634  for (auto &inEdge : oldInEdges) {
635  // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
636  if (!privateMemRefs.contains(inEdge.value))
637  addEdge(inEdge.id, dstId, inEdge.value);
638  }
639  }
640  // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
641  // If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
642  if (outEdges.count(srcId) > 0) {
643  SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
644  for (auto &outEdge : oldOutEdges) {
645  // Remove any out edges from 'srcId' to 'dstId' across memrefs.
646  if (outEdge.id == dstId)
647  removeEdge(srcId, outEdge.id, outEdge.value);
648  else if (removeSrcId) {
649  addEdge(dstId, outEdge.id, outEdge.value);
650  removeEdge(srcId, outEdge.id, outEdge.value);
651  }
652  }
653  }
654  // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
655  // replaced by a private memref). These edges could come from nodes
656  // other than 'srcId' which were removed in the previous step.
657  if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
658  SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
659  for (auto &inEdge : oldInEdges)
660  if (privateMemRefs.count(inEdge.value) > 0)
661  removeEdge(inEdge.id, dstId, inEdge.value);
662  }
663 }
664 
665 // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
666 // of sibling node 'sibId' into node 'dstId'.
667 void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
668  // For each edge in 'inEdges[sibId]':
669  // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
670  // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
671  if (inEdges.count(sibId) > 0) {
672  SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
673  for (auto &inEdge : oldInEdges) {
674  addEdge(inEdge.id, dstId, inEdge.value);
675  removeEdge(inEdge.id, sibId, inEdge.value);
676  }
677  }
678 
679  // For each edge in 'outEdges[sibId]' to node 'id'
680  // *) Add new edge from 'dstId' to 'outEdge.id'.
681  // *) Remove edge from 'sibId' to 'outEdge.id'.
682  if (outEdges.count(sibId) > 0) {
683  SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
684  for (auto &outEdge : oldOutEdges) {
685  addEdge(dstId, outEdge.id, outEdge.value);
686  removeEdge(sibId, outEdge.id, outEdge.value);
687  }
688  }
689 }
690 
691 // Adds ops in 'loads' and 'stores' to node at 'id'.
693  ArrayRef<Operation *> stores,
694  ArrayRef<Operation *> memrefLoads,
695  ArrayRef<Operation *> memrefStores,
696  ArrayRef<Operation *> memrefFrees) {
697  Node *node = getNode(id);
698  llvm::append_range(node->loads, loads);
699  llvm::append_range(node->stores, stores);
700  llvm::append_range(node->memrefLoads, memrefLoads);
701  llvm::append_range(node->memrefStores, memrefStores);
702  llvm::append_range(node->memrefFrees, memrefFrees);
703 }
704 
706  Node *node = getNode(id);
707  node->loads.clear();
708  node->stores.clear();
709 }
710 
711 // Calls 'callback' for each input edge incident to node 'id' which carries a
712 // memref dependence.
714  unsigned id, const std::function<void(Edge)> &callback) {
715  if (inEdges.count(id) > 0)
716  forEachMemRefEdge(inEdges[id], callback);
717 }
718 
719 // Calls 'callback' for each output edge from node 'id' which carries a
720 // memref dependence.
722  unsigned id, const std::function<void(Edge)> &callback) {
723  if (outEdges.count(id) > 0)
724  forEachMemRefEdge(outEdges[id], callback);
725 }
726 
727 // Calls 'callback' for each edge in 'edges' which carries a memref
728 // dependence.
730  ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
731  for (const auto &edge : edges) {
732  // Skip if 'edge' is not a memref dependence edge.
733  if (!isa<MemRefType>(edge.value.getType()))
734  continue;
735  assert(nodes.count(edge.id) > 0);
736  // Skip if 'edge.id' is not a loop nest.
737  if (!isa<AffineForOp>(getNode(edge.id)->op))
738  continue;
739  // Visit current input edge 'edge'.
740  callback(edge);
741  }
742 }
743 
744 void MemRefDependenceGraph::print(raw_ostream &os) const {
745  os << "\nMemRefDependenceGraph\n";
746  os << "\nNodes:\n";
747  for (const auto &idAndNode : nodes) {
748  os << "Node: " << idAndNode.first << "\n";
749  auto it = inEdges.find(idAndNode.first);
750  if (it != inEdges.end()) {
751  for (const auto &e : it->second)
752  os << " InEdge: " << e.id << " " << e.value << "\n";
753  }
754  it = outEdges.find(idAndNode.first);
755  if (it != outEdges.end()) {
756  for (const auto &e : it->second)
757  os << " OutEdge: " << e.id << " " << e.value << "\n";
758  }
759  }
760 }
761 
764  auto *currOp = op.getParentOp();
765  AffineForOp currAffineForOp;
766  // Traverse up the hierarchy collecting all 'affine.for' operation while
767  // skipping over 'affine.if' operations.
768  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
769  if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
770  loops->push_back(currAffineForOp);
771  currOp = currOp->getParentOp();
772  }
773  std::reverse(loops->begin(), loops->end());
774 }
775 
778  ops->clear();
779  Operation *currOp = op.getParentOp();
780 
781  // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
782  // affine.parallel operations.
783  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
784  if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
785  ops->push_back(currOp);
786  currOp = currOp->getParentOp();
787  }
788  std::reverse(ops->begin(), ops->end());
789 }
790 
791 // Populates 'cst' with FlatAffineValueConstraints which represent original
792 // domain of the loop bounds that define 'ivs'.
794  FlatAffineValueConstraints &cst) const {
795  assert(!ivs.empty() && "Cannot have a slice without its IVs");
796  cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0,
797  /*numLocals=*/0, ivs);
798  for (Value iv : ivs) {
799  AffineForOp loop = getForInductionVarOwner(iv);
800  assert(loop && "Expected affine for");
801  if (failed(cst.addAffineForOpDomain(loop)))
802  return failure();
803  }
804  return success();
805 }
806 
807 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
808 LogicalResult
810  assert(!lbOperands.empty());
811  // Adds src 'ivs' as dimension variables in 'cst'.
812  unsigned numDims = ivs.size();
813  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
814  unsigned numSymbols = lbOperands[0].size();
815 
816  SmallVector<Value, 4> values(ivs);
817  // Append 'ivs' then 'operands' to 'values'.
818  values.append(lbOperands[0].begin(), lbOperands[0].end());
819  *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
820 
821  // Add loop bound constraints for values which are loop IVs of the destination
822  // of fusion and equality constraints for symbols which are constants.
823  for (unsigned i = numDims, end = values.size(); i < end; ++i) {
824  Value value = values[i];
825  assert(cst->containsVar(value) && "value expected to be present");
826  if (isValidSymbol(value)) {
827  // Check if the symbol is a constant.
828  if (std::optional<int64_t> cOp = getConstantIntValue(value))
829  cst->addBound(BoundType::EQ, value, cOp.value());
830  } else if (auto loop = getForInductionVarOwner(value)) {
831  if (failed(cst->addAffineForOpDomain(loop)))
832  return failure();
833  }
834  }
835 
836  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
837  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
838  assert(succeeded(ret) &&
839  "should not fail as we never have semi-affine slice maps");
840  (void)ret;
841  return success();
842 }
843 
844 // Clears state bounds and operand state.
846  lbs.clear();
847  ubs.clear();
848  lbOperands.clear();
849  ubOperands.clear();
850 }
851 
853  llvm::errs() << "\tIVs:\n";
854  for (Value iv : ivs)
855  llvm::errs() << "\t\t" << iv << "\n";
856 
857  llvm::errs() << "\tLBs:\n";
858  for (auto en : llvm::enumerate(lbs)) {
859  llvm::errs() << "\t\t" << en.value() << "\n";
860  llvm::errs() << "\t\tOperands:\n";
861  for (Value lbOp : lbOperands[en.index()])
862  llvm::errs() << "\t\t\t" << lbOp << "\n";
863  }
864 
865  llvm::errs() << "\tUBs:\n";
866  for (auto en : llvm::enumerate(ubs)) {
867  llvm::errs() << "\t\t" << en.value() << "\n";
868  llvm::errs() << "\t\tOperands:\n";
869  for (Value ubOp : ubOperands[en.index()])
870  llvm::errs() << "\t\t\t" << ubOp << "\n";
871  }
872 }
873 
874 /// Fast check to determine if the computation slice is maximal. Returns true if
875 /// each slice dimension maps to an existing dst dimension and both the src
876 /// and the dst loops for those dimensions have the same bounds. Returns false
877 /// if both the src and the dst loops don't have the same bounds. Returns
878 /// std::nullopt if none of the above can be proven.
879 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
880  assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
881  "Unexpected number of lbs, ubs and ivs in slice");
882 
883  for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
884  AffineMap lbMap = lbs[i];
885  AffineMap ubMap = ubs[i];
886 
887  // Check if this slice is just an equality along this dimension.
888  if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
889  ubMap.getNumResults() != 1 ||
890  lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
891  // The condition above will be true for maps describing a single
892  // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
893  // Make sure we skip those cases by checking that the lb result is not
894  // just a constant.
895  isa<AffineConstantExpr>(lbMap.getResult(0)))
896  return std::nullopt;
897 
898  // Limited support: we expect the lb result to be just a loop dimension for
899  // now.
900  AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0));
901  if (!result)
902  return std::nullopt;
903 
904  // Retrieve dst loop bounds.
905  AffineForOp dstLoop =
906  getForInductionVarOwner(lbOperands[i][result.getPosition()]);
907  if (!dstLoop)
908  return std::nullopt;
909  AffineMap dstLbMap = dstLoop.getLowerBoundMap();
910  AffineMap dstUbMap = dstLoop.getUpperBoundMap();
911 
912  // Retrieve src loop bounds.
913  AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
914  assert(srcLoop && "Expected affine for");
915  AffineMap srcLbMap = srcLoop.getLowerBoundMap();
916  AffineMap srcUbMap = srcLoop.getUpperBoundMap();
917 
918  // Limited support: we expect simple src and dst loops with a single
919  // constant component per bound for now.
920  if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
921  dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
922  return std::nullopt;
923 
924  AffineExpr srcLbResult = srcLbMap.getResult(0);
925  AffineExpr dstLbResult = dstLbMap.getResult(0);
926  AffineExpr srcUbResult = srcUbMap.getResult(0);
927  AffineExpr dstUbResult = dstUbMap.getResult(0);
928  if (!isa<AffineConstantExpr>(srcLbResult) ||
929  !isa<AffineConstantExpr>(srcUbResult) ||
930  !isa<AffineConstantExpr>(dstLbResult) ||
931  !isa<AffineConstantExpr>(dstUbResult))
932  return std::nullopt;
933 
934  // Check if src and dst loop bounds are the same. If not, we can guarantee
935  // that the slice is not maximal.
936  if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
937  srcLoop.getStep() != dstLoop.getStep())
938  return false;
939  }
940 
941  return true;
942 }
943 
944 /// Returns true if it is deterministically verified that the original iteration
945 /// space of the slice is contained within the new iteration space that is
946 /// created after fusing 'this' slice into its destination.
947 std::optional<bool> ComputationSliceState::isSliceValid() const {
948  // Fast check to determine if the slice is valid. If the following conditions
949  // are verified to be true, slice is declared valid by the fast check:
950  // 1. Each slice loop is a single iteration loop bound in terms of a single
951  // destination loop IV.
952  // 2. Loop bounds of the destination loop IV (from above) and those of the
953  // source loop IV are exactly the same.
954  // If the fast check is inconclusive or false, we proceed with a more
955  // expensive analysis.
956  // TODO: Store the result of the fast check, as it might be used again in
957  // `canRemoveSrcNodeAfterFusion`.
958  std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
959  if (isValidFastCheck && *isValidFastCheck)
960  return true;
961 
962  // Create constraints for the source loop nest using which slice is computed.
963  FlatAffineValueConstraints srcConstraints;
964  // TODO: Store the source's domain to avoid computation at each depth.
965  if (failed(getSourceAsConstraints(srcConstraints))) {
966  LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
967  return std::nullopt;
968  }
969  // As the set difference utility currently cannot handle symbols in its
970  // operands, validity of the slice cannot be determined.
971  if (srcConstraints.getNumSymbolVars() > 0) {
972  LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
973  return std::nullopt;
974  }
975  // TODO: Handle local vars in the source domains while using the 'projectOut'
976  // utility below. Currently, aligning is not done assuming that there will be
977  // no local vars in the source domain.
978  if (srcConstraints.getNumLocalVars() != 0) {
979  LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
980  return std::nullopt;
981  }
982 
983  // Create constraints for the slice loop nest that would be created if the
984  // fusion succeeds.
985  FlatAffineValueConstraints sliceConstraints;
986  if (failed(getAsConstraints(&sliceConstraints))) {
987  LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
988  return std::nullopt;
989  }
990 
991  // Projecting out every dimension other than the 'ivs' to express slice's
992  // domain completely in terms of source's IVs.
993  sliceConstraints.projectOut(ivs.size(),
994  sliceConstraints.getNumVars() - ivs.size());
995 
996  LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
997  LLVM_DEBUG(srcConstraints.dump());
998  LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
999  "(expressed in terms of its source's IVs):\n");
1000  LLVM_DEBUG(sliceConstraints.dump());
1001 
1002  // TODO: Store 'srcSet' to avoid recalculating for each depth.
1003  PresburgerSet srcSet(srcConstraints);
1004  PresburgerSet sliceSet(sliceConstraints);
1005  PresburgerSet diffSet = sliceSet.subtract(srcSet);
1006 
1007  if (!diffSet.isIntegerEmpty()) {
1008  LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
1009  return false;
1010  }
1011  return true;
1012 }
1013 
1014 /// Returns true if the computation slice encloses all the iterations of the
1015 /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
1016 /// cannot determine if the slice is maximal or not.
1017 std::optional<bool> ComputationSliceState::isMaximal() const {
1018  // Fast check to determine if the computation slice is maximal. If the result
1019  // is inconclusive, we proceed with a more expensive analysis.
1020  std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
1021  if (isMaximalFastCheck)
1022  return isMaximalFastCheck;
1023 
1024  // Create constraints for the src loop nest being sliced.
1025  FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(),
1026  /*numSymbols=*/0,
1027  /*numLocals=*/0, ivs);
1028  for (Value iv : ivs) {
1029  AffineForOp loop = getForInductionVarOwner(iv);
1030  assert(loop && "Expected affine for");
1031  if (failed(srcConstraints.addAffineForOpDomain(loop)))
1032  return std::nullopt;
1033  }
1034 
1035  // Create constraints for the slice using the dst loop nest information. We
1036  // retrieve existing dst loops from the lbOperands.
1037  SmallVector<Value> consumerIVs;
1038  for (Value lbOp : lbOperands[0])
1039  if (getForInductionVarOwner(lbOp))
1040  consumerIVs.push_back(lbOp);
1041 
1042  // Add empty IV Values for those new loops that are not equalities and,
1043  // therefore, are not yet materialized in the IR.
1044  for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
1045  consumerIVs.push_back(Value());
1046 
1047  FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(),
1048  /*numSymbols=*/0,
1049  /*numLocals=*/0, consumerIVs);
1050 
1051  if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
1052  return std::nullopt;
1053 
1054  if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
1055  // Constraint dims are different. The integer set difference can't be
1056  // computed so we don't know if the slice is maximal.
1057  return std::nullopt;
1058 
1059  // Compute the difference between the src loop nest and the slice integer
1060  // sets.
1061  PresburgerSet srcSet(srcConstraints);
1062  PresburgerSet sliceSet(sliceConstraints);
1063  PresburgerSet diffSet = srcSet.subtract(sliceSet);
1064  return diffSet.isIntegerEmpty();
1065 }
1066 
1067 unsigned MemRefRegion::getRank() const {
1068  return cast<MemRefType>(memref.getType()).getRank();
1069 }
1070 
1072  SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs,
1073  SmallVectorImpl<int64_t> *lbDivisors) const {
1074  auto memRefType = cast<MemRefType>(memref.getType());
1075  unsigned rank = memRefType.getRank();
1076  if (shape)
1077  shape->reserve(rank);
1078 
1079  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1080 
1081  // Use a copy of the region constraints that has upper/lower bounds for each
1082  // memref dimension with static size added to guard against potential
1083  // over-approximation from projection or union bounding box. We may not add
1084  // this on the region itself since they might just be redundant constraints
1085  // that will need non-trivials means to eliminate.
1086  FlatAffineValueConstraints cstWithShapeBounds(cst);
1087  for (unsigned r = 0; r < rank; r++) {
1088  cstWithShapeBounds.addBound(BoundType::LB, r, 0);
1089  int64_t dimSize = memRefType.getDimSize(r);
1090  if (ShapedType::isDynamic(dimSize))
1091  continue;
1092  cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
1093  }
1094 
1095  // Find a constant upper bound on the extent of this memref region along each
1096  // dimension.
1097  int64_t numElements = 1;
1098  int64_t diffConstant;
1099  int64_t lbDivisor;
1100  for (unsigned d = 0; d < rank; d++) {
1102  std::optional<int64_t> diff =
1103  cstWithShapeBounds.getConstantBoundOnDimSize64(d, &lb, &lbDivisor);
1104  if (diff.has_value()) {
1105  diffConstant = *diff;
1106  assert(diffConstant >= 0 && "Dim size bound can't be negative");
1107  assert(lbDivisor > 0);
1108  } else {
1109  // If no constant bound is found, then it can always be bound by the
1110  // memref's dim size if the latter has a constant size along this dim.
1111  auto dimSize = memRefType.getDimSize(d);
1112  if (dimSize == ShapedType::kDynamic)
1113  return std::nullopt;
1114  diffConstant = dimSize;
1115  // Lower bound becomes 0.
1116  lb.resize(cstWithShapeBounds.getNumSymbolVars() + 1, 0);
1117  lbDivisor = 1;
1118  }
1119  numElements *= diffConstant;
1120  if (lbs) {
1121  lbs->push_back(lb);
1122  assert(lbDivisors && "both lbs and lbDivisor or none");
1123  lbDivisors->push_back(lbDivisor);
1124  }
1125  if (shape) {
1126  shape->push_back(diffConstant);
1127  }
1128  }
1129  return numElements;
1130 }
1131 
1133  AffineMap &ubMap) const {
1134  assert(pos < cst.getNumDimVars() && "invalid position");
1135  auto memRefType = cast<MemRefType>(memref.getType());
1136  unsigned rank = memRefType.getRank();
1137 
1138  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1139 
1140  auto boundPairs = cst.getLowerAndUpperBound(
1141  pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(),
1142  /*localExprs=*/{}, memRefType.getContext());
1143  lbMap = boundPairs.first;
1144  ubMap = boundPairs.second;
1145  assert(lbMap && "lower bound for a region must exist");
1146  assert(ubMap && "upper bound for a region must exist");
1147  assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1148  assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1149 }
1150 
1151 LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
1152  assert(memref == other.memref);
1153  return cst.unionBoundingBox(*other.getConstraints());
1154 }
1155 
1156 /// Computes the memory region accessed by this memref with the region
1157 /// represented as constraints symbolic/parametric in 'loopDepth' loops
1158 /// surrounding opInst and any additional Function symbols.
1159 // For example, the memref region for this load operation at loopDepth = 1 will
1160 // be as below:
1161 //
1162 // affine.for %i = 0 to 32 {
1163 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
1164 // load %A[%ii]
1165 // }
1166 // }
1167 //
1168 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
1169 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
1170 //
1171 // TODO: extend this to any other memref dereferencing ops
1172 // (dma_start, dma_wait).
1173 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
1174  const ComputationSliceState *sliceState,
1175  bool addMemRefDimBounds) {
1176  assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1177  "affine read/write op expected");
1178 
1179  MemRefAccess access(op);
1180  memref = access.memref;
1181  write = access.isStore();
1182 
1183  unsigned rank = access.getRank();
1184 
1185  LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
1186  << "\ndepth: " << loopDepth << "\n";);
1187 
1188  // 0-d memrefs.
1189  if (rank == 0) {
1191  getAffineIVs(*op, ivs);
1192  assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
1193  // The first 'loopDepth' IVs are symbols for this region.
1194  ivs.resize(loopDepth);
1195  // A 0-d memref has a 0-d region.
1196  cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs);
1197  return success();
1198  }
1199 
1200  // Build the constraints for this region.
1201  AffineValueMap accessValueMap;
1202  access.getAccessMap(&accessValueMap);
1203  AffineMap accessMap = accessValueMap.getAffineMap();
1204 
1205  unsigned numDims = accessMap.getNumDims();
1206  unsigned numSymbols = accessMap.getNumSymbols();
1207  unsigned numOperands = accessValueMap.getNumOperands();
1208  // Merge operands with slice operands.
1209  SmallVector<Value, 4> operands;
1210  operands.resize(numOperands);
1211  for (unsigned i = 0; i < numOperands; ++i)
1212  operands[i] = accessValueMap.getOperand(i);
1213 
1214  if (sliceState != nullptr) {
1215  operands.reserve(operands.size() + sliceState->lbOperands[0].size());
1216  // Append slice operands to 'operands' as symbols.
1217  for (auto extraOperand : sliceState->lbOperands[0]) {
1218  if (!llvm::is_contained(operands, extraOperand)) {
1219  operands.push_back(extraOperand);
1220  numSymbols++;
1221  }
1222  }
1223  }
1224  // We'll first associate the dims and symbols of the access map to the dims
1225  // and symbols resp. of cst. This will change below once cst is
1226  // fully constructed out.
1227  cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
1228 
1229  // Add equality constraints.
1230  // Add inequalities for loop lower/upper bounds.
1231  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
1232  auto operand = operands[i];
1233  if (auto affineFor = getForInductionVarOwner(operand)) {
1234  // Note that cst can now have more dimensions than accessMap if the
1235  // bounds expressions involve outer loops or other symbols.
1236  // TODO: rewrite this to use getInstIndexSet; this way
1237  // conditionals will be handled when the latter supports it.
1238  if (failed(cst.addAffineForOpDomain(affineFor)))
1239  return failure();
1240  } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
1241  if (failed(cst.addAffineParallelOpDomain(parallelOp)))
1242  return failure();
1243  } else if (isValidSymbol(operand)) {
1244  // Check if the symbol is a constant.
1245  Value symbol = operand;
1246  if (auto constVal = getConstantIntValue(symbol))
1247  cst.addBound(BoundType::EQ, symbol, constVal.value());
1248  } else {
1249  LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
1250  return failure();
1251  }
1252  }
1253 
1254  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
1255  if (sliceState != nullptr) {
1256  // Add dim and symbol slice operands.
1257  for (auto operand : sliceState->lbOperands[0]) {
1258  cst.addInductionVarOrTerminalSymbol(operand);
1259  }
1260  // Add upper/lower bounds from 'sliceState' to 'cst'.
1261  LogicalResult ret =
1262  cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
1263  sliceState->lbOperands[0]);
1264  assert(succeeded(ret) &&
1265  "should not fail as we never have semi-affine slice maps");
1266  (void)ret;
1267  }
1268 
1269  // Add access function equalities to connect loop IVs to data dimensions.
1270  if (failed(cst.composeMap(&accessValueMap))) {
1271  op->emitError("getMemRefRegion: compose affine map failed");
1272  LLVM_DEBUG(accessValueMap.getAffineMap().dump());
1273  return failure();
1274  }
1275 
1276  // Set all variables appearing after the first 'rank' variables as
1277  // symbolic variables - so that the ones corresponding to the memref
1278  // dimensions are the dimensional variables for the memref region.
1279  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1280 
1281  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1282  // this memref region is symbolic.
1283  SmallVector<Value, 4> enclosingIVs;
1284  getAffineIVs(*op, enclosingIVs);
1285  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1286  enclosingIVs.resize(loopDepth);
1287  SmallVector<Value, 4> vars;
1288  cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1289  for (Value var : vars) {
1290  if ((isAffineInductionVar(var)) && !llvm::is_contained(enclosingIVs, var)) {
1291  cst.projectOut(var);
1292  }
1293  }
1294 
1295  // Project out any local variables (these would have been added for any
1296  // mod/divs).
1297  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1298 
1299  // Constant fold any symbolic variables.
1300  cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),
1301  /*num=*/cst.getNumSymbolVars());
1302 
1303  assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1304 
1305  // Add upper/lower bounds for each memref dimension with static size
1306  // to guard against potential over-approximation from projection.
1307  // TODO: Support dynamic memref dimensions.
1308  if (addMemRefDimBounds) {
1309  auto memRefType = cast<MemRefType>(memref.getType());
1310  for (unsigned r = 0; r < rank; r++) {
1311  cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0);
1312  if (memRefType.isDynamicDim(r))
1313  continue;
1314  cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1);
1315  }
1316  }
1317  cst.removeTrivialRedundancy();
1318 
1319  LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
1320  LLVM_DEBUG(cst.dump());
1321  return success();
1322 }
1323 
1324 std::optional<int64_t>
1326  auto elementType = memRefType.getElementType();
1327 
1328  unsigned sizeInBits;
1329  if (elementType.isIntOrFloat()) {
1330  sizeInBits = elementType.getIntOrFloatBitWidth();
1331  } else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1332  if (vectorType.getElementType().isIntOrFloat())
1333  sizeInBits =
1334  vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1335  else
1336  return std::nullopt;
1337  } else {
1338  return std::nullopt;
1339  }
1340  return llvm::divideCeil(sizeInBits, 8);
1341 }
1342 
1343 // Returns the size of the region.
1344 std::optional<int64_t> MemRefRegion::getRegionSize() {
1345  auto memRefType = cast<MemRefType>(memref.getType());
1346 
1347  if (!memRefType.getLayout().isIdentity()) {
1348  LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
1349  return false;
1350  }
1351 
1352  // Indices to use for the DmaStart op.
1353  // Indices for the original memref being DMAed from/to.
1354  SmallVector<Value, 4> memIndices;
1355  // Indices for the faster buffer being DMAed into/from.
1356  SmallVector<Value, 4> bufIndices;
1357 
1358  // Compute the extents of the buffer.
1359  std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1360  if (!numElements) {
1361  LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
1362  return std::nullopt;
1363  }
1364  auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1365  if (!eltSize)
1366  return std::nullopt;
1367  return *eltSize * *numElements;
1368 }
1369 
1370 /// Returns the size of memref data in bytes if it's statically shaped,
1371 /// std::nullopt otherwise. If the element of the memref has vector type, takes
1372 /// into account size of the vector as well.
1373 // TODO: improve/complete this when we have target data.
1374 std::optional<uint64_t>
1376  if (!memRefType.hasStaticShape())
1377  return std::nullopt;
1378  auto elementType = memRefType.getElementType();
1379  if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1380  return std::nullopt;
1381 
1382  auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1383  if (!sizeInBytes)
1384  return std::nullopt;
1385  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1386  sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1387  }
1388  return sizeInBytes;
1389 }
1390 
1391 template <typename LoadOrStoreOp>
1392 LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1393  bool emitError) {
1394  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1395  AffineWriteOpInterface>::value,
1396  "argument should be either a AffineReadOpInterface or a "
1397  "AffineWriteOpInterface");
1398 
1399  Operation *op = loadOrStoreOp.getOperation();
1400  MemRefRegion region(op->getLoc());
1401  if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr,
1402  /*addMemRefDimBounds=*/false)))
1403  return success();
1404 
1405  LLVM_DEBUG(llvm::dbgs() << "Memory region");
1406  LLVM_DEBUG(region.getConstraints()->dump());
1407 
1408  bool outOfBounds = false;
1409  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1410 
1411  // For each dimension, check for out of bounds.
1412  for (unsigned r = 0; r < rank; r++) {
1413  FlatAffineValueConstraints ucst(*region.getConstraints());
1414 
1415  // Intersect memory region with constraint capturing out of bounds (both out
1416  // of upper and out of lower), and check if the constraint system is
1417  // feasible. If it is, there is at least one point out of bounds.
1418  SmallVector<int64_t, 4> ineq(rank + 1, 0);
1419  int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1420  // TODO: handle dynamic dim sizes.
1421  if (dimSize == -1)
1422  continue;
1423 
1424  // Check for overflow: d_i >= memref dim size.
1425  ucst.addBound(BoundType::LB, r, dimSize);
1426  outOfBounds = !ucst.isEmpty();
1427  if (outOfBounds && emitError) {
1428  loadOrStoreOp.emitOpError()
1429  << "memref out of upper bound access along dimension #" << (r + 1);
1430  }
1431 
1432  // Check for a negative index.
1433  FlatAffineValueConstraints lcst(*region.getConstraints());
1434  std::fill(ineq.begin(), ineq.end(), 0);
1435  // d_i <= -1;
1436  lcst.addBound(BoundType::UB, r, -1);
1437  outOfBounds = !lcst.isEmpty();
1438  if (outOfBounds && emitError) {
1439  loadOrStoreOp.emitOpError()
1440  << "memref out of lower bound access along dimension #" << (r + 1);
1441  }
1442  }
1443  return failure(outOfBounds);
1444 }
1445 
1446 // Explicitly instantiate the template so that the compiler knows we need them!
1447 template LogicalResult
1448 mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1449  bool emitError);
1450 template LogicalResult
1451 mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1452  bool emitError);
1453 
1454 // Returns in 'positions' the Block positions of 'op' in each ancestor
1455 // Block from the Block containing operation, stopping at 'limitBlock'.
1456 static void findInstPosition(Operation *op, Block *limitBlock,
1457  SmallVectorImpl<unsigned> *positions) {
1458  Block *block = op->getBlock();
1459  while (block != limitBlock) {
1460  // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1461  // rely on linear scans.
1462  int instPosInBlock = std::distance(block->begin(), op->getIterator());
1463  positions->push_back(instPosInBlock);
1464  op = block->getParentOp();
1465  block = op->getBlock();
1466  }
1467  std::reverse(positions->begin(), positions->end());
1468 }
1469 
1470 // Returns the Operation in a possibly nested set of Blocks, where the
1471 // position of the operation is represented by 'positions', which has a
1472 // Block position for each level of nesting.
1474  unsigned level, Block *block) {
1475  unsigned i = 0;
1476  for (auto &op : *block) {
1477  if (i != positions[level]) {
1478  ++i;
1479  continue;
1480  }
1481  if (level == positions.size() - 1)
1482  return &op;
1483  if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1484  return getInstAtPosition(positions, level + 1,
1485  childAffineForOp.getBody());
1486 
1487  for (auto &region : op.getRegions()) {
1488  for (auto &b : region)
1489  if (auto *ret = getInstAtPosition(positions, level + 1, &b))
1490  return ret;
1491  }
1492  return nullptr;
1493  }
1494  return nullptr;
1495 }
1496 
1497 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1500  for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
1501  auto value = cst->getValue(i);
1502  if (ivs.count(value) == 0) {
1503  assert(isAffineForInductionVar(value));
1504  auto loop = getForInductionVarOwner(value);
1505  if (failed(cst->addAffineForOpDomain(loop)))
1506  return failure();
1507  }
1508  }
1509  return success();
1510 }
1511 
1512 /// Returns the innermost common loop depth for the set of operations in 'ops'.
1513 // TODO: Move this to LoopUtils.
1515  ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
1516  unsigned numOps = ops.size();
1517  assert(numOps > 0 && "Expected at least one operation");
1518 
1519  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1520  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1521  for (unsigned i = 0; i < numOps; ++i) {
1522  getAffineForIVs(*ops[i], &loops[i]);
1523  loopDepthLimit =
1524  std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
1525  }
1526 
1527  unsigned loopDepth = 0;
1528  for (unsigned d = 0; d < loopDepthLimit; ++d) {
1529  unsigned i;
1530  for (i = 1; i < numOps; ++i) {
1531  if (loops[i - 1][d] != loops[i][d])
1532  return loopDepth;
1533  }
1534  if (surroundingLoops)
1535  surroundingLoops->push_back(loops[i - 1][d]);
1536  ++loopDepth;
1537  }
1538  return loopDepth;
1539 }
1540 
1541 /// Computes in 'sliceUnion' the union of all slice bounds computed at
1542 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1543 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1544 /// union was computed correctly, an appropriate failure otherwise.
1547  ArrayRef<Operation *> opsB, unsigned loopDepth,
1548  unsigned numCommonLoops, bool isBackwardSlice,
1549  ComputationSliceState *sliceUnion) {
1550  // Compute the union of slice bounds between all pairs in 'opsA' and
1551  // 'opsB' in 'sliceUnionCst'.
1552  FlatAffineValueConstraints sliceUnionCst;
1553  assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
1554  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1555  for (Operation *i : opsA) {
1556  MemRefAccess srcAccess(i);
1557  for (Operation *j : opsB) {
1558  MemRefAccess dstAccess(j);
1559  if (srcAccess.memref != dstAccess.memref)
1560  continue;
1561  // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1562  if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) ||
1563  (isBackwardSlice && loopDepth > getNestingDepth(j))) {
1564  LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
1566  }
1567 
1568  bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1569  isa<AffineReadOpInterface>(dstAccess.opInst);
1570  FlatAffineValueConstraints dependenceConstraints;
1571  // Check dependence between 'srcAccess' and 'dstAccess'.
1573  srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
1574  &dependenceConstraints, /*dependenceComponents=*/nullptr,
1575  /*allowRAR=*/readReadAccesses);
1576  if (result.value == DependenceResult::Failure) {
1577  LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
1579  }
1580  if (result.value == DependenceResult::NoDependence)
1581  continue;
1582  dependentOpPairs.emplace_back(i, j);
1583 
1584  // Compute slice bounds for 'srcAccess' and 'dstAccess'.
1585  ComputationSliceState tmpSliceState;
1586  mlir::affine::getComputationSliceState(i, j, dependenceConstraints,
1587  loopDepth, isBackwardSlice,
1588  &tmpSliceState);
1589 
1590  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1591  // Initialize 'sliceUnionCst' with the bounds computed in previous step.
1592  if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
1593  LLVM_DEBUG(llvm::dbgs()
1594  << "Unable to compute slice bound constraints\n");
1596  }
1597  assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
1598  continue;
1599  }
1600 
1601  // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1602  FlatAffineValueConstraints tmpSliceCst;
1603  if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
1604  LLVM_DEBUG(llvm::dbgs()
1605  << "Unable to compute slice bound constraints\n");
1607  }
1608 
1609  // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1610  if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
1611 
1612  // Pre-constraint var alignment: record loop IVs used in each constraint
1613  // system.
1614  SmallPtrSet<Value, 8> sliceUnionIVs;
1615  for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1616  sliceUnionIVs.insert(sliceUnionCst.getValue(k));
1617  SmallPtrSet<Value, 8> tmpSliceIVs;
1618  for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1619  tmpSliceIVs.insert(tmpSliceCst.getValue(k));
1620 
1621  sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst);
1622 
1623  // Post-constraint var alignment: add loop IV bounds missing after
1624  // var alignment to constraint systems. This can occur if one constraint
1625  // system uses an loop IV that is not used by the other. The call
1626  // to unionBoundingBox below expects constraints for each Loop IV, even
1627  // if they are the unsliced full loop bounds added here.
1628  if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
1630  if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
1632  }
1633  // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1634  if (sliceUnionCst.getNumLocalVars() > 0 ||
1635  tmpSliceCst.getNumLocalVars() > 0 ||
1636  failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
1637  LLVM_DEBUG(llvm::dbgs()
1638  << "Unable to compute union bounding box of slice bounds\n");
1640  }
1641  }
1642  }
1643 
1644  // Empty union.
1645  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1646  LLVM_DEBUG(llvm::dbgs() << "empty slice union - unexpected\n");
1648  }
1649 
1650  // Gather loops surrounding ops from loop nest where slice will be inserted.
1652  for (auto &dep : dependentOpPairs) {
1653  ops.push_back(isBackwardSlice ? dep.second : dep.first);
1654  }
1655  SmallVector<AffineForOp, 4> surroundingLoops;
1656  unsigned innermostCommonLoopDepth =
1657  getInnermostCommonLoopDepth(ops, &surroundingLoops);
1658  if (loopDepth > innermostCommonLoopDepth) {
1659  LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
1661  }
1662 
1663  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1664  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1665 
1666  // Convert any dst loop IVs which are symbol variables to dim variables.
1667  sliceUnionCst.convertLoopIVSymbolsToDims();
1668  sliceUnion->clearBounds();
1669  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
1670  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
1671 
1672  // Get slice bounds from slice union constraints 'sliceUnionCst'.
1673  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
1674  opsA[0]->getContext(), &sliceUnion->lbs,
1675  &sliceUnion->ubs);
1676 
1677  // Add slice bound operands of union.
1678  SmallVector<Value, 4> sliceBoundOperands;
1679  sliceUnionCst.getValues(numSliceLoopIVs,
1680  sliceUnionCst.getNumDimAndSymbolVars(),
1681  &sliceBoundOperands);
1682 
1683  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1684  sliceUnion->ivs.clear();
1685  sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
1686 
1687  // Set loop nest insertion point to block start at 'loopDepth' for forward
1688  // slices, while at the end for backward slices.
1689  sliceUnion->insertPoint =
1690  isBackwardSlice
1691  ? surroundingLoops[loopDepth - 1].getBody()->begin()
1692  : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1693 
1694  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1695  // canonicalization.
1696  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1697  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1698 
1699  // Check if the slice computed is valid. Return success only if it is verified
1700  // that the slice is valid, otherwise return appropriate failure status.
1701  std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1702  if (!isSliceValid) {
1703  LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
1705  }
1706  if (!*isSliceValid)
1708 
1710 }
1711 
1712 // TODO: extend this to handle multiple result maps.
1713 static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1714  AffineMap ubMap) {
1715  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
1716  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
1717  assert(lbMap.getNumDims() == ubMap.getNumDims());
1718  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1719  AffineExpr lbExpr(lbMap.getResult(0));
1720  AffineExpr ubExpr(ubMap.getResult(0));
1721  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
1722  lbMap.getNumSymbols());
1723  auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1724  if (!cExpr)
1725  return std::nullopt;
1726  return cExpr.getValue();
1727 }
1728 
1729 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1730 // nest surrounding represented by slice loop bounds in 'slice'. Returns true
1731 // on success, false otherwise (if a non-constant trip count was encountered).
1732 // TODO: Make this work with non-unit step loops.
1734  const ComputationSliceState &slice,
1735  llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1736  unsigned numSrcLoopIVs = slice.ivs.size();
1737  // Populate map from AffineForOp -> trip count
1738  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1739  AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1740  auto *op = forOp.getOperation();
1741  AffineMap lbMap = slice.lbs[i];
1742  AffineMap ubMap = slice.ubs[i];
1743  // If lower or upper bound maps are null or provide no results, it implies
1744  // that source loop was not at all sliced, and the entire loop will be a
1745  // part of the slice.
1746  if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
1747  ubMap.getNumResults() == 0) {
1748  // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1749  if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1750  (*tripCountMap)[op] =
1751  forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1752  continue;
1753  }
1754  std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1755  if (maybeConstTripCount.has_value()) {
1756  (*tripCountMap)[op] = *maybeConstTripCount;
1757  continue;
1758  }
1759  return false;
1760  }
1761  std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1762  // Slice bounds are created with a constant ub - lb difference.
1763  if (!tripCount.has_value())
1764  return false;
1765  (*tripCountMap)[op] = *tripCount;
1766  }
1767  return true;
1768 }
1769 
1770 // Return the number of iterations in the given slice.
1772  const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1773  uint64_t iterCount = 1;
1774  for (const auto &count : sliceTripCountMap) {
1775  iterCount *= count.second;
1776  }
1777  return iterCount;
1778 }
1779 
1780 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1781 // Computes slice bounds by projecting out any loop IVs from
1782 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1783 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1784 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1786  Operation *depSourceOp, Operation *depSinkOp,
1787  const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth,
1788  bool isBackwardSlice, ComputationSliceState *sliceState) {
1789  // Get loop nest surrounding src operation.
1790  SmallVector<AffineForOp, 4> srcLoopIVs;
1791  getAffineForIVs(*depSourceOp, &srcLoopIVs);
1792  unsigned numSrcLoopIVs = srcLoopIVs.size();
1793 
1794  // Get loop nest surrounding dst operation.
1795  SmallVector<AffineForOp, 4> dstLoopIVs;
1796  getAffineForIVs(*depSinkOp, &dstLoopIVs);
1797  unsigned numDstLoopIVs = dstLoopIVs.size();
1798 
1799  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1800  (isBackwardSlice && loopDepth <= numDstLoopIVs));
1801 
1802  // Project out dimensions other than those up to 'loopDepth'.
1803  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1804  unsigned num =
1805  isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1806  FlatAffineValueConstraints sliceCst(dependenceConstraints);
1807  sliceCst.projectOut(pos, num);
1808 
1809  // Add slice loop IV values to 'sliceState'.
1810  unsigned offset = isBackwardSlice ? 0 : loopDepth;
1811  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1812  sliceCst.getValues(offset, offset + numSliceLoopIVs, &sliceState->ivs);
1813 
1814  // Set up lower/upper bound affine maps for the slice.
1815  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
1816  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
1817 
1818  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1819  sliceCst.getSliceBounds(offset, numSliceLoopIVs, depSourceOp->getContext(),
1820  &sliceState->lbs, &sliceState->ubs);
1821 
1822  // Set up bound operands for the slice's lower and upper bounds.
1823  SmallVector<Value, 4> sliceBoundOperands;
1824  unsigned numDimsAndSymbols = sliceCst.getNumDimAndSymbolVars();
1825  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
1826  if (i < offset || i >= offset + numSliceLoopIVs)
1827  sliceBoundOperands.push_back(sliceCst.getValue(i));
1828  }
1829 
1830  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1831  // canonicalization.
1832  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1833  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1834 
1835  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1836  sliceState->insertPoint =
1837  isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1838  : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1839 
1840  llvm::SmallDenseSet<Value, 8> sequentialLoops;
1841  if (isa<AffineReadOpInterface>(depSourceOp) &&
1842  isa<AffineReadOpInterface>(depSinkOp)) {
1843  // For read-read access pairs, clear any slice bounds on sequential loops.
1844  // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1845  getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
1846  &sequentialLoops);
1847  }
1848  auto getSliceLoop = [&](unsigned i) {
1849  return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1850  };
1851  auto isInnermostInsertion = [&]() {
1852  return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1853  : loopDepth >= dstLoopIVs.size());
1854  };
1855  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1856  auto srcIsUnitSlice = [&]() {
1857  return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
1858  (getSliceIterationCount(sliceTripCountMap) == 1));
1859  };
1860  // Clear all sliced loop bounds beginning at the first sequential loop, or
1861  // first loop with a slice fusion barrier attribute..
1862 
1863  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
1864  Value iv = getSliceLoop(i).getInductionVar();
1865  if (sequentialLoops.count(iv) == 0 &&
1866  getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1867  continue;
1868  // Skip reset of bounds of reduction loop inserted in the destination loop
1869  // that meets the following conditions:
1870  // 1. Slice is single trip count.
1871  // 2. Loop bounds of the source and destination match.
1872  // 3. Is being inserted at the innermost insertion point.
1873  std::optional<bool> isMaximal = sliceState->isMaximal();
1874  if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
1875  isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1876  continue;
1877  for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1878  sliceState->lbs[j] = AffineMap();
1879  sliceState->ubs[j] = AffineMap();
1880  }
1881  break;
1882  }
1883 }
1884 
1885 /// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1886 /// updates the slice loop bounds with any non-null bound maps specified in
1887 /// 'sliceState', and inserts this slice into the loop nest surrounding
1888 /// 'dstOpInst' at loop depth 'dstLoopDepth'.
1889 // TODO: extend the slicing utility to compute slices that
1890 // aren't necessarily a one-to-one relation b/w the source and destination. The
1891 // relation between the source and destination could be many-to-many in general.
1892 // TODO: the slice computation is incorrect in the cases
1893 // where the dependence from the source to the destination does not cover the
1894 // entire destination index set. Subtract out the dependent destination
1895 // iterations from destination index set and check for emptiness --- this is one
1896 // solution.
1898  Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
1899  ComputationSliceState *sliceState) {
1900  // Get loop nest surrounding src operation.
1901  SmallVector<AffineForOp, 4> srcLoopIVs;
1902  getAffineForIVs(*srcOpInst, &srcLoopIVs);
1903  unsigned numSrcLoopIVs = srcLoopIVs.size();
1904 
1905  // Get loop nest surrounding dst operation.
1906  SmallVector<AffineForOp, 4> dstLoopIVs;
1907  getAffineForIVs(*dstOpInst, &dstLoopIVs);
1908  unsigned dstLoopIVsSize = dstLoopIVs.size();
1909  if (dstLoopDepth > dstLoopIVsSize) {
1910  dstOpInst->emitError("invalid destination loop depth");
1911  return AffineForOp();
1912  }
1913 
1914  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1915  SmallVector<unsigned, 4> positions;
1916  // TODO: This code is incorrect since srcLoopIVs can be 0-d.
1917  findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
1918 
1919  // Clone src loop nest and insert it a the beginning of the operation block
1920  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1921  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1922  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1923  auto sliceLoopNest =
1924  cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
1925 
1926  Operation *sliceInst =
1927  getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
1928  // Get loop nest surrounding 'sliceInst'.
1929  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
1930  getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1931 
1932  // Sanity check.
1933  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1934  (void)sliceSurroundingLoopsSize;
1935  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1936  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1937  (void)sliceLoopLimit;
1938  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1939 
1940  // Update loop bounds for loops in 'sliceLoopNest'.
1941  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1942  auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1943  if (AffineMap lbMap = sliceState->lbs[i])
1944  forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
1945  if (AffineMap ubMap = sliceState->ubs[i])
1946  forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
1947  }
1948  return sliceLoopNest;
1949 }
1950 
1951 // Constructs MemRefAccess populating it with the memref, its indices and
1952 // opinst from 'loadOrStoreOpInst'.
1954  if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
1955  memref = loadOp.getMemRef();
1956  opInst = loadOrStoreOpInst;
1957  llvm::append_range(indices, loadOp.getMapOperands());
1958  } else {
1959  assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
1960  "Affine read/write op expected");
1961  auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
1962  opInst = loadOrStoreOpInst;
1963  memref = storeOp.getMemRef();
1964  llvm::append_range(indices, storeOp.getMapOperands());
1965  }
1966 }
1967 
1968 unsigned MemRefAccess::getRank() const {
1969  return cast<MemRefType>(memref.getType()).getRank();
1970 }
1971 
1973  return isa<AffineWriteOpInterface>(opInst);
1974 }
1975 
1976 /// Returns the nesting depth of this statement, i.e., the number of loops
1977 /// surrounding this statement.
1979  Operation *currOp = op;
1980  unsigned depth = 0;
1981  while ((currOp = currOp->getParentOp())) {
1982  if (isa<AffineForOp>(currOp))
1983  depth++;
1984  }
1985  return depth;
1986 }
1987 
1988 /// Equal if both affine accesses are provably equivalent (at compile
1989 /// time) when considering the memref, the affine maps and their respective
1990 /// operands. The equality of access functions + operands is checked by
1991 /// subtracting fully composed value maps, and then simplifying the difference
1992 /// using the expression flattener.
1993 /// TODO: this does not account for aliasing of memrefs.
1994 bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
1995  if (memref != rhs.memref)
1996  return false;
1997 
1998  AffineValueMap diff, thisMap, rhsMap;
1999  getAccessMap(&thisMap);
2000  rhs.getAccessMap(&rhsMap);
2001  return thisMap == rhsMap;
2002 }
2003 
2005  auto *currOp = op.getParentOp();
2006  AffineForOp currAffineForOp;
2007  // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
2008  // operation while skipping over 'affine.if' operations.
2009  while (currOp) {
2010  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
2011  ivs.push_back(currAffineForOp.getInductionVar());
2012  else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
2013  llvm::append_range(ivs, parOp.getIVs());
2014  currOp = currOp->getParentOp();
2015  }
2016  std::reverse(ivs.begin(), ivs.end());
2017 }
2018 
2019 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
2020 /// where each lists loops from outer-most to inner-most in loop nest.
2022  Operation &b) {
2023  SmallVector<Value, 4> loopsA, loopsB;
2024  getAffineIVs(a, loopsA);
2025  getAffineIVs(b, loopsB);
2026 
2027  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
2028  unsigned numCommonLoops = 0;
2029  for (unsigned i = 0; i < minNumLoops; ++i) {
2030  if (loopsA[i] != loopsB[i])
2031  break;
2032  ++numCommonLoops;
2033  }
2034  return numCommonLoops;
2035 }
2036 
2037 static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
2038  Block::iterator start,
2039  Block::iterator end,
2040  int memorySpace) {
2041  SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
2042 
2043  // Walk this 'affine.for' operation to gather all memory regions.
2044  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
2045  if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
2046  // Neither load nor a store op.
2047  return WalkResult::advance();
2048  }
2049 
2050  // Compute the memref region symbolic in any IVs enclosing this block.
2051  auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
2052  if (failed(
2053  region->compute(opInst,
2054  /*loopDepth=*/getNestingDepth(&*block.begin())))) {
2055  LLVM_DEBUG(opInst->emitError("error obtaining memory region"));
2056  return failure();
2057  }
2058 
2059  auto [it, inserted] = regions.try_emplace(region->memref);
2060  if (inserted) {
2061  it->second = std::move(region);
2062  } else if (failed(it->second->unionBoundingBox(*region))) {
2063  LLVM_DEBUG(opInst->emitWarning(
2064  "getMemoryFootprintBytes: unable to perform a union on a memory "
2065  "region"));
2066  return failure();
2067  }
2068  return WalkResult::advance();
2069  });
2070  if (result.wasInterrupted())
2071  return std::nullopt;
2072 
2073  int64_t totalSizeInBytes = 0;
2074  for (const auto &region : regions) {
2075  std::optional<int64_t> size = region.second->getRegionSize();
2076  if (!size.has_value())
2077  return std::nullopt;
2078  totalSizeInBytes += *size;
2079  }
2080  return totalSizeInBytes;
2081 }
2082 
2083 std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
2084  int memorySpace) {
2085  auto *forInst = forOp.getOperation();
2087  *forInst->getBlock(), Block::iterator(forInst),
2088  std::next(Block::iterator(forInst)), memorySpace);
2089 }
2090 
2091 /// Returns whether a loop is parallel and contains a reduction loop.
2093  SmallVector<LoopReduction> reductions;
2094  if (!isLoopParallel(forOp, &reductions))
2095  return false;
2096  return !reductions.empty();
2097 }
2098 
2099 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
2100 /// at 'forOp'.
2102  AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
2103  forOp->walk([&](Operation *op) {
2104  if (auto innerFor = dyn_cast<AffineForOp>(op))
2105  if (!isLoopParallel(innerFor))
2106  sequentialLoops->insert(innerFor.getInductionVar());
2107  });
2108 }
2109 
2111  FlatAffineValueConstraints fac(set);
2112  if (fac.isEmpty())
2113  return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
2114  set.getContext());
2116 
2117  auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
2118  assert(simplifiedSet && "guaranteed to succeed while roundtripping");
2119  return simplifiedSet;
2120 }
2121 
2122 static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
2123  SmallVector<Value> &target) {
2124  target =
2125  llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
2126  return val.has_value() ? *val : Value();
2127  }));
2128 }
2129 
2130 /// Bound an identifier `pos` in a given FlatAffineValueConstraints with
2131 /// constraints drawn from an affine map. Before adding the constraint, the
2132 /// dimensions/symbols of the affine map are aligned with `constraints`.
2133 /// `operands` are the SSA Value operands used with the affine map.
2134 /// Note: This function adds a new symbol column to the `constraints` for each
2135 /// dimension/symbol that exists in the affine map but not in `constraints`.
2136 static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
2137  BoundType type, unsigned pos,
2138  AffineMap map, ValueRange operands) {
2139  SmallVector<Value> dims, syms, newSyms;
2140  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
2141  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
2142 
2143  AffineMap alignedMap =
2144  alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
2145  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
2146  constraints.appendSymbolVar(newSyms[i]);
2147  return constraints.addBound(type, pos, alignedMap);
2148 }
2149 
2150 /// Add `val` to each result of `map`.
2151 static AffineMap addConstToResults(AffineMap map, int64_t val) {
2152  SmallVector<AffineExpr> newResults;
2153  for (AffineExpr r : map.getResults())
2154  newResults.push_back(r + val);
2155  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
2156  map.getContext());
2157 }
2158 
2159 // Attempt to simplify the given min/max operation by proving that its value is
2160 // bounded by the same lower and upper bound.
2161 //
2162 // Bounds are computed by FlatAffineValueConstraints. Invariants required for
2163 // finding/proving bounds should be supplied via `constraints`.
2164 //
2165 // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
2166 // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
2167 // case of `!isMin`) and bind it to `opBound`. SSA values that are used in
2168 // `op` but are not part of `constraints`, are added as extra symbols.
2169 // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
2170 // * If `isMin`: r_i >= opBound
2171 // * If `isMax`: r_i <= opBound
2172 // If this is the case, ub(op) == lb(op).
2173 // 4. Replace `op` with `opBound`.
2174 //
2175 // In summary, the following constraints are added throughout this function.
2176 // Note: `invar` are dimensions added by the caller to express the invariants.
2177 // (Showing only the case where `isMin`.)
2178 //
2179 // invar | op | opBound | r_i | extra syms... | const | eq/ineq
2180 // ------+-------+---------+-----+---------------+-------+-------------------
2181 // (various eq./ineq. constraining `invar`, added by the caller)
2182 // ... | 0 | 0 | 0 | 0 | ... | ...
2183 // ------+-------+---------+-----+---------------+-------+-------------------
2184 // (various ineq. constraining `op` in terms of `op` operands (`invar` and
2185 // extra `op` operands "extra syms" that are not in `invar`)).
2186 // ... | -1 | 0 | 0 | ... | ... | >= 0
2187 // ------+-------+---------+-----+---------------+-------+-------------------
2188 // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
2189 // ... | 0 | -1 | 0 | ... | ... | = 0
2190 // ------+-------+---------+-----+---------------+-------+-------------------
2191 // (for each `op` map result r_i: set r_i to corresponding map result,
2192 // prove that r_i >= minOpUb via contradiction)
2193 // ... | 0 | 0 | -1 | ... | ... | = 0
2194 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0
2195 //
2197  Operation *op, FlatAffineValueConstraints constraints) {
2198  bool isMin = isa<AffineMinOp>(op);
2199  assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
2200  MLIRContext *ctx = op->getContext();
2201  Builder builder(ctx);
2202  AffineMap map =
2203  isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2204  ValueRange operands = op->getOperands();
2205  unsigned numResults = map.getNumResults();
2206 
2207  // Add a few extra dimensions.
2208  unsigned dimOp = constraints.appendDimVar(); // `op`
2209  unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
2210  unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults);
2211 
2212  // Add an inequality for each result expr_i of map:
2213  // isMin: op <= expr_i, !isMin: op >= expr_i
2214  auto boundType = isMin ? BoundType::UB : BoundType::LB;
2215  // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
2216  AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
2217  if (failed(
2218  alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
2219  return failure();
2220 
2221  // Try to compute a lower/upper bound for op, expressed in terms of the other
2222  // `dims` and extra symbols.
2223  SmallVector<AffineMap> opLb(1), opUb(1);
2224  constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
2225  AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2226  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
2227  // a TODO of `getSliceBounds` and not handled here.
2228  if (!sliceBound || sliceBound.getNumResults() != 1)
2229  return failure(); // No or multiple bounds found.
2230  // Recover the inclusive UB in the case of an `affine.min`.
2231  AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
2232 
2233  // Add an equality: Set dimOpBound to computed bound.
2234  // Add back dimension for op. (Was removed by `getSliceBounds`.)
2235  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
2236  if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2237  return failure();
2238 
2239  // If the constraint system is empty, there is an inconsistency. (E.g., this
2240  // can happen if loop lb > ub.)
2241  if (constraints.isEmpty())
2242  return failure();
2243 
2244  // In the case of `isMin` (`!isMin` is inversed):
2245  // Prove that each result of `map` has a lower bound that is equal to (or
2246  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
2247  // can be replaced with the bound. I.e., prove that for each result
2248  // expr_i (represented by dimension r_i):
2249  //
2250  // r_i >= opBound
2251  //
2252  // To prove this inequality, add its negation to the constraint set and prove
2253  // that the constraint set is empty.
2254  for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2255  FlatAffineValueConstraints newConstr(constraints);
2256 
2257  // Add an equality: r_i = expr_i
2258  // Note: These equalities could have been added earlier and used to express
2259  // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2260  // computes minOpUb in terms of r_i dims, which is not desired.
2261  if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
2262  map.getSubMap({i - resultDimStart}), operands)))
2263  return failure();
2264 
2265  // If `isMin`: Add inequality: r_i < opBound
2266  // equiv.: opBound - r_i - 1 >= 0
2267  // If `!isMin`: Add inequality: r_i > opBound
2268  // equiv.: -opBound + r_i - 1 >= 0
2269  SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
2270  ineq[dimOpBound] = isMin ? 1 : -1;
2271  ineq[i] = isMin ? -1 : 1;
2272  ineq[newConstr.getNumCols() - 1] = -1;
2273  newConstr.addInequality(ineq);
2274  if (!newConstr.isEmpty())
2275  return failure();
2276  }
2277 
2278  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2279  AffineMap newMap = alignedBoundMap;
2280  SmallVector<Value> newOperands;
2281  unpackOptionalValues(constraints.getMaybeValues(), newOperands);
2282  // If dims/symbols have known constant values, use those in order to simplify
2283  // the affine map further.
2284  for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2285  // Skip unused operands and operands that are already constants.
2286  if (!newOperands[i] || getConstantIntValue(newOperands[i]))
2287  continue;
2288  if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
2289  AffineExpr expr =
2290  i < newMap.getNumDims()
2291  ? builder.getAffineDimExpr(i)
2292  : builder.getAffineSymbolExpr(i - newMap.getNumDims());
2293  newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
2294  newMap.getNumDims(), newMap.getNumSymbols());
2295  }
2296  }
2297  affine::canonicalizeMapAndOperands(&newMap, &newOperands);
2298  return AffineValueMap(newMap, newOperands);
2299 }
2300 
2302  Operation *b) {
2303  Region *aScope = mlir::affine::getAffineScope(a);
2304  Region *bScope = mlir::affine::getAffineScope(b);
2305  if (aScope != bScope)
2306  return nullptr;
2307 
2308  // Get the block ancestry of `op` while stopping at the affine scope `aScope`
2309  // and store them in `ancestry`.
2310  auto getBlockAncestry = [&](Operation *op,
2311  SmallVectorImpl<Block *> &ancestry) {
2312  Operation *curOp = op;
2313  do {
2314  ancestry.push_back(curOp->getBlock());
2315  if (curOp->getParentRegion() == aScope)
2316  break;
2317  curOp = curOp->getParentOp();
2318  } while (curOp);
2319  assert(curOp && "can't reach root op without passing through affine scope");
2320  std::reverse(ancestry.begin(), ancestry.end());
2321  };
2322 
2323  SmallVector<Block *, 4> aAncestors, bAncestors;
2324  getBlockAncestry(a, aAncestors);
2325  getBlockAncestry(b, bAncestors);
2326  assert(!aAncestors.empty() && !bAncestors.empty() &&
2327  "at least one Block ancestor expected");
2328 
2329  Block *innermostCommonBlock = nullptr;
2330  for (unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size();
2331  a < e && b < f; ++a, ++b) {
2332  if (aAncestors[a] != bAncestors[b])
2333  break;
2334  innermostCommonBlock = aAncestors[a];
2335  }
2336  return innermostCommonBlock;
2337 }
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
Definition: Utils.cpp:1713
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
Definition: Utils.cpp:1456
static Node * addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, DenseMap< Value, SetVector< unsigned >> &memrefAccesses)
Add op to MDG creating a new node and adding its memory accesses (affine or non-affine to memrefAcces...
Definition: Utils.cpp:192
const char *const kSliceFusionBarrierAttrName
Definition: Utils.cpp:1780
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
Definition: Utils.cpp:1498
static void getEffectedValues(Operation *op, SmallVectorImpl< Value > &values)
Returns the values that this op has a memref effect of type EffectTys on, not considering recursive e...
Definition: Utils.cpp:166
MemRefDependenceGraph::Node Node
Definition: Utils.cpp:39
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
Definition: Utils.cpp:1473
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
Definition: Utils.cpp:2037
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
Definition: Utils.cpp:2151
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
Definition: Utils.cpp:2136
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
Definition: Utils.cpp:2122
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:236
unsigned getPosition() const
Definition: AffineExpr.cpp:348
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:343
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
Definition: AffineMap.h:267
unsigned getNumSymbols() const
Definition: AffineMap.cpp:398
unsigned getNumDims() const
Definition: AffineMap.cpp:394
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
unsigned getNumInputs() const
Definition: AffineMap.cpp:403
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
Definition: AffineMap.cpp:515
void dump() const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:654
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Definition: Block.h:305
iterator begin()
Definition: Block.h:143
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:33
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:364
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:368
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:360
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
An integer set representing a conjunction of one or more affine equalities and inequalities.
Definition: IntegerSet.h:44
unsigned getNumDims() const
Definition: IntegerSet.cpp:15
MLIRContext * getContext() const
Definition: IntegerSet.cpp:57
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
Definition: IntegerSet.h:56
unsigned getNumSymbols() const
Definition: IntegerSet.cpp:16
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:544
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:750
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:386
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:798
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:874
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBoundOnDimSize64(unsigned pos, SmallVectorImpl< int64_t > *lb=nullptr, int64_t *boundFloorDivisor=nullptr, SmallVectorImpl< int64_t > *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
The same, but casts to int64_t.
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
Definition: Utils.cpp:2110
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:776
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
Definition: Utils.cpp:1546
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
Definition: AffineOps.cpp:2569
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
Definition: Utils.cpp:2092
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2021
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2573
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
Definition: Utils.cpp:2004
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
Definition: Utils.cpp:2101
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1439
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2561
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
Definition: Utils.cpp:762
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:2083
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
Definition: Utils.cpp:1514
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:392
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
Definition: Utils.cpp:1785
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
Definition: AffineOps.cpp:2584
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
Definition: Utils.cpp:1375
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1978
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
Definition: Utils.cpp:1771
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:263
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)
Find the innermost common Block of a and b in the affine scope that a and b are part of.
Definition: Utils.cpp:2301
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
Definition: Utils.cpp:1733
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
Definition: Utils.cpp:1897
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
Definition: Utils.cpp:2196
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
Definition: Utils.cpp:1325
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
Definition: Utils.h:310
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
Definition: Utils.cpp:947
SmallVector< Value, 4 > ivs
Definition: Utils.h:313
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
Definition: Utils.cpp:809
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
Definition: Utils.cpp:793
std::vector< SmallVector< Value, 4 > > ubOperands
Definition: Utils.h:321
SmallVector< AffineMap, 4 > ubs
Definition: Utils.h:317
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
Definition: Utils.cpp:1017
SmallVector< AffineMap, 4 > lbs
Definition: Utils.h:315
std::vector< SmallVector< Value, 4 > > lbOperands
Definition: Utils.h:319
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:49
SmallVector< Operation *, 4 > loadOpInsts
Definition: Utils.h:41
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:47
void collect(Operation *opToWalk)
Definition: Utils.cpp:44
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:45
SmallVector< Operation *, 4 > storeOpInsts
Definition: Utils.h:43
Encapsulates a memref load or store access information.
unsigned getRank() const
Definition: Utils.cpp:1968
MemRefAccess(Operation *opInst)
Constructs a MemRefAccess from a load or store operation.
Definition: Utils.cpp:1953
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
Definition: Utils.cpp:1994
SmallVector< Operation *, 4 > loads
Definition: Utils.h:73
SmallVector< Operation *, 4 > stores
Definition: Utils.h:77
unsigned hasFree(Value memref) const
Definition: Utils.cpp:124
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:75
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:79
unsigned getStoreOpCount(Value memref) const
Definition: Utils.cpp:94
unsigned hasStore(Value memref) const
Returns true if there exists an operation with a write memory effect to memref in this node.
Definition: Utils.cpp:110
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:81
unsigned addNode(Operation *op)
Definition: Utils.cpp:374
bool writesToLiveInOrEscapingMemrefs(unsigned id) const
Definition: Utils.cpp:404
void removeEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:450
void addEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:439
DenseMap< unsigned, Node > nodes
Definition: Utils.h:141
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes) const
Return all nodes which define SSA values used in node 'id'.
Definition: Utils.cpp:538
bool hasDependencePath(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:477
void clearNodeLoadAndStores(unsigned id)
Definition: Utils.cpp:705
const Node * getForOpNode(AffineForOp forOp) const
Definition: Utils.cpp:366
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:552
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
Definition: Utils.cpp:628
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:713
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const
Definition: Utils.cpp:528
const Node * getNode(unsigned id) const
Definition: Utils.cpp:359
void removeNode(unsigned id)
Definition: Utils.cpp:381
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:721
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:729
void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)
Definition: Utils.cpp:692
unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const
Definition: Utils.cpp:512
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr) const
Definition: Utils.cpp:424
void print(raw_ostream &os) const
Definition: Utils.cpp:744
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
Definition: Utils.h:481
FlatAffineValueConstraints * getConstraints()
Definition: Utils.h:520
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
Definition: Utils.cpp:1067
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, std::vector< SmallVector< int64_t, 4 >> *lbs=nullptr, SmallVectorImpl< int64_t > *lbDivisors=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
Definition: Utils.cpp:1071
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Definition: Utils.cpp:1132
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
Definition: Utils.cpp:1344
LogicalResult unionBoundingBox(const MemRefRegion &other)
Definition: Utils.cpp:1151
Value memref
Memref that this region corresponds to.
Definition: Utils.h:568
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Definition: Utils.cpp:1173
Enumerates different result statuses of slice computation by computeSliceUnion
Definition: Utils.h:297
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.