MLIR  21.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for non-loop IR
10 // structures.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
23 #include "mlir/IR/IntegerSet.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <optional>
30 
31 #define DEBUG_TYPE "analysis-utils"
32 
33 using namespace mlir;
34 using namespace affine;
35 using namespace presburger;
36 
37 using llvm::SmallDenseMap;
38 
40 
41 // LoopNestStateCollector walks loop nests and collects load and store
42 // operations, and whether or not a region holding op other than ForOp and IfOp
43 // was encountered in the loop nest.
45  opToWalk->walk([&](Operation *op) {
46  if (auto forOp = dyn_cast<AffineForOp>(op)) {
47  forOps.push_back(forOp);
48  } else if (isa<AffineReadOpInterface>(op)) {
49  loadOpInsts.push_back(op);
50  } else if (isa<AffineWriteOpInterface>(op)) {
51  storeOpInsts.push_back(op);
52  } else {
53  auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
54  if (!memInterface) {
56  // This op itself is memory-effect free.
57  return;
58  // Check operands. Eg. ops like the `call` op are handled here.
59  for (Value v : op->getOperands()) {
60  if (!isa<MemRefType>(v.getType()))
61  continue;
62  // Conservatively, we assume the memref is read and written to.
63  memrefLoads.push_back(op);
64  memrefStores.push_back(op);
65  }
66  } else {
67  // Non-affine loads and stores.
68  if (hasEffect<MemoryEffects::Read>(op))
69  memrefLoads.push_back(op);
70  if (hasEffect<MemoryEffects::Write>(op))
71  memrefStores.push_back(op);
72  if (hasEffect<MemoryEffects::Free>(op))
73  memrefFrees.push_back(op);
74  }
75  }
76  });
77 }
78 
79 unsigned Node::getLoadOpCount(Value memref) const {
80  unsigned loadOpCount = 0;
81  for (Operation *loadOp : loads) {
82  // Common case: affine reads.
83  if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
84  if (memref == affineLoad.getMemRef())
85  ++loadOpCount;
86  } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
87  ++loadOpCount;
88  }
89  }
90  return loadOpCount;
91 }
92 
93 // Returns the store op count for 'memref'.
94 unsigned Node::getStoreOpCount(Value memref) const {
95  unsigned storeOpCount = 0;
96  for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
97  // Common case: affine writes.
98  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
99  if (memref == affineStore.getMemRef())
100  ++storeOpCount;
101  } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp),
102  memref)) {
103  ++storeOpCount;
104  }
105  }
106  return storeOpCount;
107 }
108 
109 // Returns the store op count for 'memref'.
110 unsigned Node::hasStore(Value memref) const {
111  return llvm::any_of(
112  llvm::concat<Operation *const>(stores, memrefStores),
113  [&](Operation *storeOp) {
114  if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
115  if (memref == affineStore.getMemRef())
116  return true;
117  } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
118  return true;
119  }
120  return false;
121  });
122 }
123 
124 unsigned Node::hasFree(Value memref) const {
125  return llvm::any_of(memrefFrees, [&](Operation *freeOp) {
126  return hasEffect<MemoryEffects::Free>(freeOp, memref);
127  });
128 }
129 
130 // Returns all store ops in 'storeOps' which access 'memref'.
131 void Node::getStoreOpsForMemref(Value memref,
132  SmallVectorImpl<Operation *> *storeOps) const {
133  for (Operation *storeOp : stores) {
134  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
135  storeOps->push_back(storeOp);
136  }
137 }
138 
139 // Returns all load ops in 'loadOps' which access 'memref'.
140 void Node::getLoadOpsForMemref(Value memref,
141  SmallVectorImpl<Operation *> *loadOps) const {
142  for (Operation *loadOp : loads) {
143  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
144  loadOps->push_back(loadOp);
145  }
146 }
147 
148 // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
149 // has at least one load and store operation.
150 void Node::getLoadAndStoreMemrefSet(
151  DenseSet<Value> *loadAndStoreMemrefSet) const {
152  llvm::SmallDenseSet<Value, 2> loadMemrefs;
153  for (Operation *loadOp : loads) {
154  loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
155  }
156  for (Operation *storeOp : stores) {
157  auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
158  if (loadMemrefs.count(memref) > 0)
159  loadAndStoreMemrefSet->insert(memref);
160  }
161 }
162 
163 /// Returns the values that this op has a memref effect of type `EffectTys` on,
164 /// not considering recursive effects.
165 template <typename... EffectTys>
167  auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
168  if (!memOp) {
170  // No effects.
171  return;
172  // Memref operands have to be considered as being affected.
173  for (Value operand : op->getOperands()) {
174  if (isa<MemRefType>(operand.getType()))
175  values.push_back(operand);
176  }
177  return;
178  }
180  memOp.getEffects(effects);
181  for (auto &effect : effects) {
182  Value effectVal = effect.getValue();
183  if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
184  isa<MemRefType>(effectVal.getType()))
185  values.push_back(effectVal);
186  };
187 }
188 
189 /// Add `op` to MDG creating a new node and adding its memory accesses (affine
190 /// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map.
191 static Node *
193  DenseMap<Value, SetVector<unsigned>> &memrefAccesses) {
194  auto &nodes = mdg.nodes;
195  // Create graph node 'id' to represent top-level 'forOp' and record
196  // all loads and store accesses it contains.
197  LoopNestStateCollector collector;
198  collector.collect(nodeOp);
199  unsigned newNodeId = mdg.nextNodeId++;
200  Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second;
201  for (Operation *op : collector.loadOpInsts) {
202  node.loads.push_back(op);
203  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
204  memrefAccesses[memref].insert(node.id);
205  }
206  for (Operation *op : collector.storeOpInsts) {
207  node.stores.push_back(op);
208  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
209  memrefAccesses[memref].insert(node.id);
210  }
211  for (Operation *op : collector.memrefLoads) {
212  SmallVector<Value> effectedValues;
213  getEffectedValues<MemoryEffects::Read>(op, effectedValues);
214  if (llvm::any_of(((ValueRange)effectedValues).getTypes(),
215  [](Type type) { return !isa<MemRefType>(type); }))
216  // We do not know the interaction here.
217  return nullptr;
218  for (Value memref : effectedValues)
219  memrefAccesses[memref].insert(node.id);
220  node.memrefLoads.push_back(op);
221  }
222  for (Operation *op : collector.memrefStores) {
223  SmallVector<Value> effectedValues;
224  getEffectedValues<MemoryEffects::Write>(op, effectedValues);
225  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
226  [](Type type) { return !isa<MemRefType>(type); }))
227  return nullptr;
228  for (Value memref : effectedValues)
229  memrefAccesses[memref].insert(node.id);
230  node.memrefStores.push_back(op);
231  }
232  for (Operation *op : collector.memrefFrees) {
233  SmallVector<Value> effectedValues;
234  getEffectedValues<MemoryEffects::Free>(op, effectedValues);
235  if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
236  [](Type type) { return !isa<MemRefType>(type); }))
237  return nullptr;
238  for (Value memref : effectedValues)
239  memrefAccesses[memref].insert(node.id);
240  node.memrefFrees.push_back(op);
241  }
242 
243  return &node;
244 }
245 
247  LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
248  // Map from a memref to the set of ids of the nodes that have ops accessing
249  // the memref.
250  DenseMap<Value, SetVector<unsigned>> memrefAccesses;
251 
252  // Create graph nodes.
253  DenseMap<Operation *, unsigned> forToNodeMap;
254  for (Operation &op : block) {
255  if (auto forOp = dyn_cast<AffineForOp>(op)) {
256  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
257  if (!node)
258  return false;
259  forToNodeMap[&op] = node->id;
260  } else if (isa<AffineReadOpInterface>(op)) {
261  // Create graph node for top-level load op.
262  Node node(nextNodeId++, &op);
263  node.loads.push_back(&op);
264  auto memref = cast<AffineReadOpInterface>(op).getMemRef();
265  memrefAccesses[memref].insert(node.id);
266  nodes.insert({node.id, node});
267  } else if (isa<AffineWriteOpInterface>(op)) {
268  // Create graph node for top-level store op.
269  Node node(nextNodeId++, &op);
270  node.stores.push_back(&op);
271  auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
272  memrefAccesses[memref].insert(node.id);
273  nodes.insert({node.id, node});
274  } else if (op.getNumResults() > 0 && !op.use_empty()) {
275  // Create graph node for top-level producer of SSA values, which
276  // could be used by loop nest nodes.
277  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
278  if (!node)
279  return false;
280  } else if (!isMemoryEffectFree(&op) &&
281  (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
282  // Create graph node for top-level op unless it is known to be
283  // memory-effect free. This covers all unknown/unregistered ops,
284  // non-affine ops with memory effects, and region-holding ops with a
285  // well-defined control flow. During the fusion validity checks, edges
286  // to/from these ops get looked at.
287  Node *node = addNodeToMDG(&op, *this, memrefAccesses);
288  if (!node)
289  return false;
290  } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
291  // Return false if non-handled/unknown region-holding ops are found. We
292  // won't know what such ops do or what its regions mean; for e.g., it may
293  // not be an imperative op.
294  LLVM_DEBUG(llvm::dbgs()
295  << "MDG init failed; unknown region-holding op found!\n");
296  return false;
297  }
298  // We aren't creating nodes for memory-effect free ops either with no
299  // regions (unless it has results being used) or those with branch op
300  // interface.
301  }
302 
303  LLVM_DEBUG(llvm::dbgs() << "Created " << nodes.size() << " nodes\n");
304 
305  // Add dependence edges between nodes which produce SSA values and their
306  // users. Load ops can be considered as the ones producing SSA values.
307  for (auto &idAndNode : nodes) {
308  const Node &node = idAndNode.second;
309  // Stores don't define SSA values, skip them.
310  if (!node.stores.empty())
311  continue;
312  Operation *opInst = node.op;
313  for (Value value : opInst->getResults()) {
314  for (Operation *user : value.getUsers()) {
315  // Ignore users outside of the block.
316  if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
317  &block)
318  continue;
320  getAffineForIVs(*user, &loops);
321  // Find the surrounding affine.for nested immediately within the
322  // block.
323  auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
324  return loop->getBlock() == &block;
325  });
326  if (it == loops.end())
327  continue;
328  assert(forToNodeMap.count(*it) > 0 && "missing mapping");
329  unsigned userLoopNestId = forToNodeMap[*it];
330  addEdge(node.id, userLoopNestId, value);
331  }
332  }
333  }
334 
335  // Walk memref access lists and add graph edges between dependent nodes.
336  for (auto &memrefAndList : memrefAccesses) {
337  unsigned n = memrefAndList.second.size();
338  Value srcMemRef = memrefAndList.first;
339  // Add edges between all dependent pairs among the node IDs on this memref.
340  for (unsigned i = 0; i < n; ++i) {
341  unsigned srcId = memrefAndList.second[i];
342  Node *srcNode = getNode(srcId);
343  bool srcHasStoreOrFree =
344  srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef);
345  for (unsigned j = i + 1; j < n; ++j) {
346  unsigned dstId = memrefAndList.second[j];
347  Node *dstNode = getNode(dstId);
348  bool dstHasStoreOrFree =
349  dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
350  if (srcHasStoreOrFree || dstHasStoreOrFree)
351  addEdge(srcId, dstId, srcMemRef);
352  }
353  }
354  }
355  return true;
356 }
357 
358 // Returns the graph node for 'id'.
359 const Node *MemRefDependenceGraph::getNode(unsigned id) const {
360  auto it = nodes.find(id);
361  assert(it != nodes.end());
362  return &it->second;
363 }
364 
365 // Returns the graph node for 'forOp'.
366 const Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) const {
367  for (auto &idAndNode : nodes)
368  if (idAndNode.second.op == forOp)
369  return &idAndNode.second;
370  return nullptr;
371 }
372 
373 // Adds a node with 'op' to the graph and returns its unique identifier.
375  Node node(nextNodeId++, op);
376  nodes.insert({node.id, node});
377  return node.id;
378 }
379 
380 // Remove node 'id' (and its associated edges) from graph.
382  // Remove each edge in 'inEdges[id]'.
383  if (inEdges.count(id) > 0) {
384  SmallVector<Edge, 2> oldInEdges = inEdges[id];
385  for (auto &inEdge : oldInEdges) {
386  removeEdge(inEdge.id, id, inEdge.value);
387  }
388  }
389  // Remove each edge in 'outEdges[id]'.
390  if (outEdges.contains(id)) {
391  SmallVector<Edge, 2> oldOutEdges = outEdges[id];
392  for (auto &outEdge : oldOutEdges) {
393  removeEdge(id, outEdge.id, outEdge.value);
394  }
395  }
396  // Erase remaining node state.
397  inEdges.erase(id);
398  outEdges.erase(id);
399  nodes.erase(id);
400 }
401 
402 // Returns true if node 'id' writes to any memref which escapes (or is an
403 // argument to) the block. Returns false otherwise.
405  const Node *node = getNode(id);
406  for (auto *storeOpInst : node->stores) {
407  auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
408  auto *op = memref.getDefiningOp();
409  // Return true if 'memref' is a block argument.
410  if (!op)
411  return true;
412  // Return true if any use of 'memref' does not deference it in an affine
413  // way.
414  for (auto *user : memref.getUsers())
415  if (!isa<AffineMapAccessInterface>(*user))
416  return true;
417  }
418  return false;
419 }
420 
421 // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
422 // is for 'value' if non-null, or for any value otherwise. Returns false
423 // otherwise.
424 bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
425  Value value) const {
426  if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) {
427  return false;
428  }
429  bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](const Edge &edge) {
430  return edge.id == dstId && (!value || edge.value == value);
431  });
432  bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](const Edge &edge) {
433  return edge.id == srcId && (!value || edge.value == value);
434  });
435  return hasOutEdge && hasInEdge;
436 }
437 
438 // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
439 void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
440  Value value) {
441  if (!hasEdge(srcId, dstId, value)) {
442  outEdges[srcId].push_back({dstId, value});
443  inEdges[dstId].push_back({srcId, value});
444  if (isa<MemRefType>(value.getType()))
445  memrefEdgeCount[value]++;
446  }
447 }
448 
449 // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
450 void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
451  Value value) {
452  assert(inEdges.count(dstId) > 0);
453  assert(outEdges.count(srcId) > 0);
454  if (isa<MemRefType>(value.getType())) {
455  assert(memrefEdgeCount.count(value) > 0);
456  memrefEdgeCount[value]--;
457  }
458  // Remove 'srcId' from 'inEdges[dstId]'.
459  for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
460  if ((*it).id == srcId && (*it).value == value) {
461  inEdges[dstId].erase(it);
462  break;
463  }
464  }
465  // Remove 'dstId' from 'outEdges[srcId]'.
466  for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
467  if ((*it).id == dstId && (*it).value == value) {
468  outEdges[srcId].erase(it);
469  break;
470  }
471  }
472 }
473 
474 // Returns true if there is a path in the dependence graph from node 'srcId'
475 // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
476 // operations that the edges connected are expected to be from the same block.
478  unsigned dstId) const {
479  // Worklist state is: <node-id, next-output-edge-index-to-visit>
481  worklist.push_back({srcId, 0});
482  Operation *dstOp = getNode(dstId)->op;
483  // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
484  while (!worklist.empty()) {
485  auto &idAndIndex = worklist.back();
486  // Return true if we have reached 'dstId'.
487  if (idAndIndex.first == dstId)
488  return true;
489  // Pop and continue if node has no out edges, or if all out edges have
490  // already been visited.
491  if (!outEdges.contains(idAndIndex.first) ||
492  idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) {
493  worklist.pop_back();
494  continue;
495  }
496  // Get graph edge to traverse.
497  const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second];
498  // Increment next output edge index for 'idAndIndex'.
499  ++idAndIndex.second;
500  // Add node at 'edge.id' to the worklist. We don't need to consider
501  // nodes that are "after" dstId in the containing block; one can't have a
502  // path to `dstId` from any of those nodes.
503  bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
504  if (!afterDst && edge.id != idAndIndex.first)
505  worklist.push_back({edge.id, 0});
506  }
507  return false;
508 }
509 
510 // Returns the input edge count for node 'id' and 'memref' from src nodes
511 // which access 'memref' with a store operation.
513  Value memref) const {
514  unsigned inEdgeCount = 0;
515  for (const Edge &inEdge : inEdges.lookup(id)) {
516  if (inEdge.value == memref) {
517  const Node *srcNode = getNode(inEdge.id);
518  // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
519  if (srcNode->getStoreOpCount(memref) > 0)
520  ++inEdgeCount;
521  }
522  }
523  return inEdgeCount;
524 }
525 
526 // Returns the output edge count for node 'id' and 'memref' (if non-null),
527 // otherwise returns the total output edge count from node 'id'.
529  Value memref) const {
530  unsigned outEdgeCount = 0;
531  for (const auto &outEdge : outEdges.lookup(id))
532  if (!memref || outEdge.value == memref)
533  ++outEdgeCount;
534  return outEdgeCount;
535 }
536 
537 /// Return all nodes which define SSA values used in node 'id'.
539  unsigned id, DenseSet<unsigned> &definingNodes) const {
540  for (const Edge &edge : inEdges.lookup(id))
541  // By definition of edge, if the edge value is a non-memref value,
542  // then the dependence is between a graph node which defines an SSA value
543  // and another graph node which uses the SSA value.
544  if (!isa<MemRefType>(edge.value.getType()))
545  definingNodes.insert(edge.id);
546 }
547 
548 // Computes and returns an insertion point operation, before which the
549 // the fused <srcId, dstId> loop nest can be inserted while preserving
550 // dependences. Returns nullptr if no such insertion point is found.
551 Operation *
553  unsigned dstId) const {
554  if (!outEdges.contains(srcId))
555  return getNode(dstId)->op;
556 
557  // Skip if there is any defining node of 'dstId' that depends on 'srcId'.
558  DenseSet<unsigned> definingNodes;
559  gatherDefiningNodes(dstId, definingNodes);
560  if (llvm::any_of(definingNodes,
561  [&](unsigned id) { return hasDependencePath(srcId, id); })) {
562  LLVM_DEBUG(llvm::dbgs()
563  << "Can't fuse: a defining op with a user in the dst "
564  "loop has dependence from the src loop\n");
565  return nullptr;
566  }
567 
568  // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
569  SmallPtrSet<Operation *, 2> srcDepInsts;
570  for (auto &outEdge : outEdges.lookup(srcId))
571  if (outEdge.id != dstId)
572  srcDepInsts.insert(getNode(outEdge.id)->op);
573 
574  // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
575  SmallPtrSet<Operation *, 2> dstDepInsts;
576  for (auto &inEdge : inEdges.lookup(dstId))
577  if (inEdge.id != srcId)
578  dstDepInsts.insert(getNode(inEdge.id)->op);
579 
580  Operation *srcNodeInst = getNode(srcId)->op;
581  Operation *dstNodeInst = getNode(dstId)->op;
582 
583  // Computing insertion point:
584  // *) Walk all operation positions in Block operation list in the
585  // range (src, dst). For each operation 'op' visited in this search:
586  // *) Store in 'firstSrcDepPos' the first position where 'op' has a
587  // dependence edge from 'srcNode'.
588  // *) Store in 'lastDstDepPost' the last position where 'op' has a
589  // dependence edge to 'dstNode'.
590  // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
591  // operation insertion point (or return null pointer if no such
592  // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
594  std::optional<unsigned> firstSrcDepPos;
595  std::optional<unsigned> lastDstDepPos;
596  unsigned pos = 0;
597  for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
598  it != Block::iterator(dstNodeInst); ++it) {
599  Operation *op = &(*it);
600  if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
601  firstSrcDepPos = pos;
602  if (dstDepInsts.count(op) > 0)
603  lastDstDepPos = pos;
604  depInsts.push_back(op);
605  ++pos;
606  }
607 
608  if (firstSrcDepPos.has_value()) {
609  if (lastDstDepPos.has_value()) {
610  if (*firstSrcDepPos <= *lastDstDepPos) {
611  // No valid insertion point exists which preserves dependences.
612  return nullptr;
613  }
614  }
615  // Return the insertion point at 'firstSrcDepPos'.
616  return depInsts[*firstSrcDepPos];
617  }
618  // No dependence targets in range (or only dst deps in range), return
619  // 'dstNodInst' insertion point.
620  return dstNodeInst;
621 }
622 
623 // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
624 // taking into account that:
625 // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion,
626 // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a
627 // private memref.
628 void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
629  const DenseSet<Value> &privateMemRefs,
630  bool removeSrcId) {
631  // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
632  if (inEdges.count(srcId) > 0) {
633  SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
634  for (auto &inEdge : oldInEdges) {
635  // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
636  if (!privateMemRefs.contains(inEdge.value))
637  addEdge(inEdge.id, dstId, inEdge.value);
638  }
639  }
640  // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
641  // If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
642  if (outEdges.count(srcId) > 0) {
643  SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
644  for (auto &outEdge : oldOutEdges) {
645  // Remove any out edges from 'srcId' to 'dstId' across memrefs.
646  if (outEdge.id == dstId)
647  removeEdge(srcId, outEdge.id, outEdge.value);
648  else if (removeSrcId) {
649  addEdge(dstId, outEdge.id, outEdge.value);
650  removeEdge(srcId, outEdge.id, outEdge.value);
651  }
652  }
653  }
654  // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
655  // replaced by a private memref). These edges could come from nodes
656  // other than 'srcId' which were removed in the previous step.
657  if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
658  SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
659  for (auto &inEdge : oldInEdges)
660  if (privateMemRefs.count(inEdge.value) > 0)
661  removeEdge(inEdge.id, dstId, inEdge.value);
662  }
663 }
664 
665 // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
666 // of sibling node 'sibId' into node 'dstId'.
667 void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
668  // For each edge in 'inEdges[sibId]':
669  // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
670  // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
671  if (inEdges.count(sibId) > 0) {
672  SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
673  for (auto &inEdge : oldInEdges) {
674  addEdge(inEdge.id, dstId, inEdge.value);
675  removeEdge(inEdge.id, sibId, inEdge.value);
676  }
677  }
678 
679  // For each edge in 'outEdges[sibId]' to node 'id'
680  // *) Add new edge from 'dstId' to 'outEdge.id'.
681  // *) Remove edge from 'sibId' to 'outEdge.id'.
682  if (outEdges.count(sibId) > 0) {
683  SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
684  for (auto &outEdge : oldOutEdges) {
685  addEdge(dstId, outEdge.id, outEdge.value);
686  removeEdge(sibId, outEdge.id, outEdge.value);
687  }
688  }
689 }
690 
691 // Adds ops in 'loads' and 'stores' to node at 'id'.
693  ArrayRef<Operation *> stores,
694  ArrayRef<Operation *> memrefLoads,
695  ArrayRef<Operation *> memrefStores,
696  ArrayRef<Operation *> memrefFrees) {
697  Node *node = getNode(id);
698  llvm::append_range(node->loads, loads);
699  llvm::append_range(node->stores, stores);
700  llvm::append_range(node->memrefLoads, memrefLoads);
701  llvm::append_range(node->memrefStores, memrefStores);
702  llvm::append_range(node->memrefFrees, memrefFrees);
703 }
704 
706  Node *node = getNode(id);
707  node->loads.clear();
708  node->stores.clear();
709 }
710 
711 // Calls 'callback' for each input edge incident to node 'id' which carries a
712 // memref dependence.
714  unsigned id, const std::function<void(Edge)> &callback) {
715  if (inEdges.count(id) > 0)
716  forEachMemRefEdge(inEdges[id], callback);
717 }
718 
719 // Calls 'callback' for each output edge from node 'id' which carries a
720 // memref dependence.
722  unsigned id, const std::function<void(Edge)> &callback) {
723  if (outEdges.count(id) > 0)
724  forEachMemRefEdge(outEdges[id], callback);
725 }
726 
727 // Calls 'callback' for each edge in 'edges' which carries a memref
728 // dependence.
730  ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
731  for (const auto &edge : edges) {
732  // Skip if 'edge' is not a memref dependence edge.
733  if (!isa<MemRefType>(edge.value.getType()))
734  continue;
735  assert(nodes.count(edge.id) > 0);
736  // Skip if 'edge.id' is not a loop nest.
737  if (!isa<AffineForOp>(getNode(edge.id)->op))
738  continue;
739  // Visit current input edge 'edge'.
740  callback(edge);
741  }
742 }
743 
744 void MemRefDependenceGraph::print(raw_ostream &os) const {
745  os << "\nMemRefDependenceGraph\n";
746  os << "\nNodes:\n";
747  for (const auto &idAndNode : nodes) {
748  os << "Node: " << idAndNode.first << "\n";
749  auto it = inEdges.find(idAndNode.first);
750  if (it != inEdges.end()) {
751  for (const auto &e : it->second)
752  os << " InEdge: " << e.id << " " << e.value << "\n";
753  }
754  it = outEdges.find(idAndNode.first);
755  if (it != outEdges.end()) {
756  for (const auto &e : it->second)
757  os << " OutEdge: " << e.id << " " << e.value << "\n";
758  }
759  }
760 }
761 
764  auto *currOp = op.getParentOp();
765  AffineForOp currAffineForOp;
766  // Traverse up the hierarchy collecting all 'affine.for' operation while
767  // skipping over 'affine.if' operations.
768  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
769  if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
770  loops->push_back(currAffineForOp);
771  currOp = currOp->getParentOp();
772  }
773  std::reverse(loops->begin(), loops->end());
774 }
775 
778  ops->clear();
779  Operation *currOp = op.getParentOp();
780 
781  // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
782  // affine.parallel operations.
783  while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
784  if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
785  ops->push_back(currOp);
786  currOp = currOp->getParentOp();
787  }
788  std::reverse(ops->begin(), ops->end());
789 }
790 
791 // Populates 'cst' with FlatAffineValueConstraints which represent original
792 // domain of the loop bounds that define 'ivs'.
794  FlatAffineValueConstraints &cst) const {
795  assert(!ivs.empty() && "Cannot have a slice without its IVs");
796  cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0,
797  /*numLocals=*/0, ivs);
798  for (Value iv : ivs) {
799  AffineForOp loop = getForInductionVarOwner(iv);
800  assert(loop && "Expected affine for");
801  if (failed(cst.addAffineForOpDomain(loop)))
802  return failure();
803  }
804  return success();
805 }
806 
807 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
808 LogicalResult
810  assert(!lbOperands.empty());
811  // Adds src 'ivs' as dimension variables in 'cst'.
812  unsigned numDims = ivs.size();
813  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
814  unsigned numSymbols = lbOperands[0].size();
815 
816  SmallVector<Value, 4> values(ivs);
817  // Append 'ivs' then 'operands' to 'values'.
818  values.append(lbOperands[0].begin(), lbOperands[0].end());
819  *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
820 
821  // Add loop bound constraints for values which are loop IVs of the destination
822  // of fusion and equality constraints for symbols which are constants.
823  for (unsigned i = numDims, end = values.size(); i < end; ++i) {
824  Value value = values[i];
825  assert(cst->containsVar(value) && "value expected to be present");
826  if (isValidSymbol(value)) {
827  // Check if the symbol is a constant.
828  if (std::optional<int64_t> cOp = getConstantIntValue(value))
829  cst->addBound(BoundType::EQ, value, cOp.value());
830  } else if (auto loop = getForInductionVarOwner(value)) {
831  if (failed(cst->addAffineForOpDomain(loop)))
832  return failure();
833  }
834  }
835 
836  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
837  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
838  assert(succeeded(ret) &&
839  "should not fail as we never have semi-affine slice maps");
840  (void)ret;
841  return success();
842 }
843 
844 // Clears state bounds and operand state.
846  lbs.clear();
847  ubs.clear();
848  lbOperands.clear();
849  ubOperands.clear();
850 }
851 
853  llvm::errs() << "\tIVs:\n";
854  for (Value iv : ivs)
855  llvm::errs() << "\t\t" << iv << "\n";
856 
857  llvm::errs() << "\tLBs:\n";
858  for (auto en : llvm::enumerate(lbs)) {
859  llvm::errs() << "\t\t" << en.value() << "\n";
860  llvm::errs() << "\t\tOperands:\n";
861  for (Value lbOp : lbOperands[en.index()])
862  llvm::errs() << "\t\t\t" << lbOp << "\n";
863  }
864 
865  llvm::errs() << "\tUBs:\n";
866  for (auto en : llvm::enumerate(ubs)) {
867  llvm::errs() << "\t\t" << en.value() << "\n";
868  llvm::errs() << "\t\tOperands:\n";
869  for (Value ubOp : ubOperands[en.index()])
870  llvm::errs() << "\t\t\t" << ubOp << "\n";
871  }
872 }
873 
874 /// Fast check to determine if the computation slice is maximal. Returns true if
875 /// each slice dimension maps to an existing dst dimension and both the src
876 /// and the dst loops for those dimensions have the same bounds. Returns false
877 /// if both the src and the dst loops don't have the same bounds. Returns
878 /// std::nullopt if none of the above can be proven.
879 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
880  assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
881  "Unexpected number of lbs, ubs and ivs in slice");
882 
883  for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
884  AffineMap lbMap = lbs[i];
885  AffineMap ubMap = ubs[i];
886 
887  // Check if this slice is just an equality along this dimension.
888  if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
889  ubMap.getNumResults() != 1 ||
890  lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
891  // The condition above will be true for maps describing a single
892  // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
893  // Make sure we skip those cases by checking that the lb result is not
894  // just a constant.
895  isa<AffineConstantExpr>(lbMap.getResult(0)))
896  return std::nullopt;
897 
898  // Limited support: we expect the lb result to be just a loop dimension for
899  // now.
900  AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0));
901  if (!result)
902  return std::nullopt;
903 
904  // Retrieve dst loop bounds.
905  AffineForOp dstLoop =
906  getForInductionVarOwner(lbOperands[i][result.getPosition()]);
907  if (!dstLoop)
908  return std::nullopt;
909  AffineMap dstLbMap = dstLoop.getLowerBoundMap();
910  AffineMap dstUbMap = dstLoop.getUpperBoundMap();
911 
912  // Retrieve src loop bounds.
913  AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
914  assert(srcLoop && "Expected affine for");
915  AffineMap srcLbMap = srcLoop.getLowerBoundMap();
916  AffineMap srcUbMap = srcLoop.getUpperBoundMap();
917 
918  // Limited support: we expect simple src and dst loops with a single
919  // constant component per bound for now.
920  if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
921  dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
922  return std::nullopt;
923 
924  AffineExpr srcLbResult = srcLbMap.getResult(0);
925  AffineExpr dstLbResult = dstLbMap.getResult(0);
926  AffineExpr srcUbResult = srcUbMap.getResult(0);
927  AffineExpr dstUbResult = dstUbMap.getResult(0);
928  if (!isa<AffineConstantExpr>(srcLbResult) ||
929  !isa<AffineConstantExpr>(srcUbResult) ||
930  !isa<AffineConstantExpr>(dstLbResult) ||
931  !isa<AffineConstantExpr>(dstUbResult))
932  return std::nullopt;
933 
934  // Check if src and dst loop bounds are the same. If not, we can guarantee
935  // that the slice is not maximal.
936  if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
937  srcLoop.getStep() != dstLoop.getStep())
938  return false;
939  }
940 
941  return true;
942 }
943 
944 /// Returns true if it is deterministically verified that the original iteration
945 /// space of the slice is contained within the new iteration space that is
946 /// created after fusing 'this' slice into its destination.
947 std::optional<bool> ComputationSliceState::isSliceValid() const {
948  // Fast check to determine if the slice is valid. If the following conditions
949  // are verified to be true, slice is declared valid by the fast check:
950  // 1. Each slice loop is a single iteration loop bound in terms of a single
951  // destination loop IV.
952  // 2. Loop bounds of the destination loop IV (from above) and those of the
953  // source loop IV are exactly the same.
954  // If the fast check is inconclusive or false, we proceed with a more
955  // expensive analysis.
956  // TODO: Store the result of the fast check, as it might be used again in
957  // `canRemoveSrcNodeAfterFusion`.
958  std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
959  if (isValidFastCheck && *isValidFastCheck)
960  return true;
961 
962  // Create constraints for the source loop nest using which slice is computed.
963  FlatAffineValueConstraints srcConstraints;
964  // TODO: Store the source's domain to avoid computation at each depth.
965  if (failed(getSourceAsConstraints(srcConstraints))) {
966  LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
967  return std::nullopt;
968  }
969  // As the set difference utility currently cannot handle symbols in its
970  // operands, validity of the slice cannot be determined.
971  if (srcConstraints.getNumSymbolVars() > 0) {
972  LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
973  return std::nullopt;
974  }
975  // TODO: Handle local vars in the source domains while using the 'projectOut'
976  // utility below. Currently, aligning is not done assuming that there will be
977  // no local vars in the source domain.
978  if (srcConstraints.getNumLocalVars() != 0) {
979  LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
980  return std::nullopt;
981  }
982 
983  // Create constraints for the slice loop nest that would be created if the
984  // fusion succeeds.
985  FlatAffineValueConstraints sliceConstraints;
986  if (failed(getAsConstraints(&sliceConstraints))) {
987  LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
988  return std::nullopt;
989  }
990 
991  // Projecting out every dimension other than the 'ivs' to express slice's
992  // domain completely in terms of source's IVs.
993  sliceConstraints.projectOut(ivs.size(),
994  sliceConstraints.getNumVars() - ivs.size());
995 
996  LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
997  LLVM_DEBUG(srcConstraints.dump());
998  LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
999  "(expressed in terms of its source's IVs):\n");
1000  LLVM_DEBUG(sliceConstraints.dump());
1001 
1002  // TODO: Store 'srcSet' to avoid recalculating for each depth.
1003  PresburgerSet srcSet(srcConstraints);
1004  PresburgerSet sliceSet(sliceConstraints);
1005  PresburgerSet diffSet = sliceSet.subtract(srcSet);
1006 
1007  if (!diffSet.isIntegerEmpty()) {
1008  LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
1009  return false;
1010  }
1011  return true;
1012 }
1013 
1014 /// Returns true if the computation slice encloses all the iterations of the
1015 /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
1016 /// cannot determine if the slice is maximal or not.
1017 std::optional<bool> ComputationSliceState::isMaximal() const {
1018  // Fast check to determine if the computation slice is maximal. If the result
1019  // is inconclusive, we proceed with a more expensive analysis.
1020  std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
1021  if (isMaximalFastCheck)
1022  return isMaximalFastCheck;
1023 
1024  // Create constraints for the src loop nest being sliced.
1025  FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(),
1026  /*numSymbols=*/0,
1027  /*numLocals=*/0, ivs);
1028  for (Value iv : ivs) {
1029  AffineForOp loop = getForInductionVarOwner(iv);
1030  assert(loop && "Expected affine for");
1031  if (failed(srcConstraints.addAffineForOpDomain(loop)))
1032  return std::nullopt;
1033  }
1034 
1035  // Create constraints for the slice using the dst loop nest information. We
1036  // retrieve existing dst loops from the lbOperands.
1037  SmallVector<Value> consumerIVs;
1038  for (Value lbOp : lbOperands[0])
1039  if (getForInductionVarOwner(lbOp))
1040  consumerIVs.push_back(lbOp);
1041 
1042  // Add empty IV Values for those new loops that are not equalities and,
1043  // therefore, are not yet materialized in the IR.
1044  for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
1045  consumerIVs.push_back(Value());
1046 
1047  FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(),
1048  /*numSymbols=*/0,
1049  /*numLocals=*/0, consumerIVs);
1050 
1051  if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
1052  return std::nullopt;
1053 
1054  if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
1055  // Constraint dims are different. The integer set difference can't be
1056  // computed so we don't know if the slice is maximal.
1057  return std::nullopt;
1058 
1059  // Compute the difference between the src loop nest and the slice integer
1060  // sets.
1061  PresburgerSet srcSet(srcConstraints);
1062  PresburgerSet sliceSet(sliceConstraints);
1063  PresburgerSet diffSet = srcSet.subtract(sliceSet);
1064  return diffSet.isIntegerEmpty();
1065 }
1066 
1067 unsigned MemRefRegion::getRank() const {
1068  return cast<MemRefType>(memref.getType()).getRank();
1069 }
1070 
1073  auto memRefType = cast<MemRefType>(memref.getType());
1074  MLIRContext *context = memref.getContext();
1075  unsigned rank = memRefType.getRank();
1076  if (shape)
1077  shape->reserve(rank);
1078 
1079  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1080 
1081  // Use a copy of the region constraints that has upper/lower bounds for each
1082  // memref dimension with static size added to guard against potential
1083  // over-approximation from projection or union bounding box. We may not add
1084  // this on the region itself since they might just be redundant constraints
1085  // that will need non-trivials means to eliminate.
1086  FlatLinearValueConstraints cstWithShapeBounds(cst);
1087  for (unsigned r = 0; r < rank; r++) {
1088  cstWithShapeBounds.addBound(BoundType::LB, r, 0);
1089  int64_t dimSize = memRefType.getDimSize(r);
1090  if (ShapedType::isDynamic(dimSize))
1091  continue;
1092  cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
1093  }
1094 
1095  // Find a constant upper bound on the extent of this memref region along
1096  // each dimension.
1097  int64_t numElements = 1;
1098  int64_t diffConstant;
1099  for (unsigned d = 0; d < rank; d++) {
1100  AffineMap lb;
1101  std::optional<int64_t> diff =
1102  cstWithShapeBounds.getConstantBoundOnDimSize(context, d, &lb);
1103  if (diff.has_value()) {
1104  diffConstant = *diff;
1105  assert(diffConstant >= 0 && "dim size bound cannot be negative");
1106  } else {
1107  // If no constant bound is found, then it can always be bound by the
1108  // memref's dim size if the latter has a constant size along this dim.
1109  auto dimSize = memRefType.getDimSize(d);
1110  if (ShapedType::isDynamic(dimSize))
1111  return std::nullopt;
1112  diffConstant = dimSize;
1113  // Lower bound becomes 0.
1114  lb = AffineMap::get(/*dimCount=*/0, cstWithShapeBounds.getNumSymbolVars(),
1115  /*result=*/getAffineConstantExpr(0, context));
1116  }
1117  numElements *= diffConstant;
1118  // Populate outputs if available.
1119  if (lbs)
1120  lbs->push_back(lb);
1121  if (shape)
1122  shape->push_back(diffConstant);
1123  }
1124  return numElements;
1125 }
1126 
1128  AffineMap &ubMap) const {
1129  assert(pos < cst.getNumDimVars() && "invalid position");
1130  auto memRefType = cast<MemRefType>(memref.getType());
1131  unsigned rank = memRefType.getRank();
1132 
1133  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1134 
1135  auto boundPairs = cst.getLowerAndUpperBound(
1136  pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(),
1137  /*localExprs=*/{}, memRefType.getContext());
1138  lbMap = boundPairs.first;
1139  ubMap = boundPairs.second;
1140  assert(lbMap && "lower bound for a region must exist");
1141  assert(ubMap && "upper bound for a region must exist");
1142  assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1143  assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1144 }
1145 
1146 LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
1147  assert(memref == other.memref);
1148  return cst.unionBoundingBox(*other.getConstraints());
1149 }
1150 
1151 /// Computes the memory region accessed by this memref with the region
1152 /// represented as constraints symbolic/parametric in 'loopDepth' loops
1153 /// surrounding opInst and any additional Function symbols.
1154 // For example, the memref region for this load operation at loopDepth = 1 will
1155 // be as below:
1156 //
1157 // affine.for %i = 0 to 32 {
1158 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
1159 // load %A[%ii]
1160 // }
1161 // }
1162 //
1163 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
1164 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
1165 //
1166 // TODO: extend this to any other memref dereferencing ops
1167 // (dma_start, dma_wait).
1168 LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
1169  const ComputationSliceState *sliceState,
1170  bool addMemRefDimBounds, bool dropLocalVars,
1171  bool dropOuterIvs) {
1172  assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1173  "affine read/write op expected");
1174 
1175  MemRefAccess access(op);
1176  memref = access.memref;
1177  write = access.isStore();
1178 
1179  unsigned rank = access.getRank();
1180 
1181  LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
1182  << "\ndepth: " << loopDepth << "\n";);
1183 
1184  // 0-d memrefs.
1185  if (rank == 0) {
1187  getAffineIVs(*op, ivs);
1188  assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
1189  // The first 'loopDepth' IVs are symbols for this region.
1190  ivs.resize(loopDepth);
1191  // A 0-d memref has a 0-d region.
1192  cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs);
1193  return success();
1194  }
1195 
1196  // Build the constraints for this region.
1197  AffineValueMap accessValueMap;
1198  access.getAccessMap(&accessValueMap);
1199  AffineMap accessMap = accessValueMap.getAffineMap();
1200 
1201  unsigned numDims = accessMap.getNumDims();
1202  unsigned numSymbols = accessMap.getNumSymbols();
1203  unsigned numOperands = accessValueMap.getNumOperands();
1204  // Merge operands with slice operands.
1205  SmallVector<Value, 4> operands;
1206  operands.resize(numOperands);
1207  for (unsigned i = 0; i < numOperands; ++i)
1208  operands[i] = accessValueMap.getOperand(i);
1209 
1210  if (sliceState != nullptr) {
1211  operands.reserve(operands.size() + sliceState->lbOperands[0].size());
1212  // Append slice operands to 'operands' as symbols.
1213  for (auto extraOperand : sliceState->lbOperands[0]) {
1214  if (!llvm::is_contained(operands, extraOperand)) {
1215  operands.push_back(extraOperand);
1216  numSymbols++;
1217  }
1218  }
1219  }
1220  // We'll first associate the dims and symbols of the access map to the dims
1221  // and symbols resp. of cst. This will change below once cst is
1222  // fully constructed out.
1223  cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
1224 
1225  // Add equality constraints.
1226  // Add inequalities for loop lower/upper bounds.
1227  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
1228  auto operand = operands[i];
1229  if (auto affineFor = getForInductionVarOwner(operand)) {
1230  // Note that cst can now have more dimensions than accessMap if the
1231  // bounds expressions involve outer loops or other symbols.
1232  // TODO: rewrite this to use getInstIndexSet; this way
1233  // conditionals will be handled when the latter supports it.
1234  if (failed(cst.addAffineForOpDomain(affineFor)))
1235  return failure();
1236  } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
1237  if (failed(cst.addAffineParallelOpDomain(parallelOp)))
1238  return failure();
1239  } else if (isValidSymbol(operand)) {
1240  // Check if the symbol is a constant.
1241  Value symbol = operand;
1242  if (auto constVal = getConstantIntValue(symbol))
1243  cst.addBound(BoundType::EQ, symbol, constVal.value());
1244  } else {
1245  LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
1246  return failure();
1247  }
1248  }
1249 
1250  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
1251  if (sliceState != nullptr) {
1252  // Add dim and symbol slice operands.
1253  for (auto operand : sliceState->lbOperands[0]) {
1254  if (failed(cst.addInductionVarOrTerminalSymbol(operand)))
1255  return failure();
1256  }
1257  // Add upper/lower bounds from 'sliceState' to 'cst'.
1258  LogicalResult ret =
1259  cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
1260  sliceState->lbOperands[0]);
1261  assert(succeeded(ret) &&
1262  "should not fail as we never have semi-affine slice maps");
1263  (void)ret;
1264  }
1265 
1266  // Add access function equalities to connect loop IVs to data dimensions.
1267  if (failed(cst.composeMap(&accessValueMap))) {
1268  op->emitError("getMemRefRegion: compose affine map failed");
1269  LLVM_DEBUG(accessValueMap.getAffineMap().dump());
1270  return failure();
1271  }
1272 
1273  // Set all variables appearing after the first 'rank' variables as
1274  // symbolic variables - so that the ones corresponding to the memref
1275  // dimensions are the dimensional variables for the memref region.
1276  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1277 
1278  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1279  // this memref region is symbolic.
1280  SmallVector<Value, 4> enclosingIVs;
1281  getAffineIVs(*op, enclosingIVs);
1282  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1283  enclosingIVs.resize(loopDepth);
1284  SmallVector<Value, 4> vars;
1285  cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1286  for (auto en : llvm::enumerate(vars)) {
1287  if ((isAffineInductionVar(en.value())) &&
1288  !llvm::is_contained(enclosingIVs, en.value())) {
1289  if (dropOuterIvs) {
1290  cst.projectOut(en.value());
1291  } else {
1292  unsigned varPosition;
1293  cst.findVar(en.value(), &varPosition);
1294  auto varKind = cst.getVarKindAt(varPosition);
1295  varPosition -= cst.getNumDimVars();
1296  cst.convertToLocal(varKind, varPosition, varPosition + 1);
1297  }
1298  }
1299  }
1300 
1301  // Project out any local variables (these would have been added for any
1302  // mod/divs) if specified.
1303  if (dropLocalVars)
1304  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1305 
1306  // Constant fold any symbolic variables.
1307  cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),
1308  /*num=*/cst.getNumSymbolVars());
1309 
1310  assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1311 
1312  // Add upper/lower bounds for each memref dimension with static size
1313  // to guard against potential over-approximation from projection.
1314  // TODO: Support dynamic memref dimensions.
1315  if (addMemRefDimBounds) {
1316  auto memRefType = cast<MemRefType>(memref.getType());
1317  for (unsigned r = 0; r < rank; r++) {
1318  cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0);
1319  if (memRefType.isDynamicDim(r))
1320  continue;
1321  cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1);
1322  }
1323  }
1324  cst.removeTrivialRedundancy();
1325 
1326  LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
1327  LLVM_DEBUG(cst.dump());
1328  return success();
1329 }
1330 
1331 std::optional<int64_t>
1333  auto elementType = memRefType.getElementType();
1334 
1335  unsigned sizeInBits;
1336  if (elementType.isIntOrFloat()) {
1337  sizeInBits = elementType.getIntOrFloatBitWidth();
1338  } else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1339  if (vectorType.getElementType().isIntOrFloat())
1340  sizeInBits =
1341  vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1342  else
1343  return std::nullopt;
1344  } else {
1345  return std::nullopt;
1346  }
1347  return llvm::divideCeil(sizeInBits, 8);
1348 }
1349 
1350 // Returns the size of the region.
1351 std::optional<int64_t> MemRefRegion::getRegionSize() {
1352  auto memRefType = cast<MemRefType>(memref.getType());
1353 
1354  if (!memRefType.getLayout().isIdentity()) {
1355  LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
1356  return false;
1357  }
1358 
1359  // Compute the extents of the buffer.
1360  std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1361  if (!numElements) {
1362  LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
1363  return std::nullopt;
1364  }
1365  auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1366  if (!eltSize)
1367  return std::nullopt;
1368  return *eltSize * *numElements;
1369 }
1370 
1371 /// Returns the size of memref data in bytes if it's statically shaped,
1372 /// std::nullopt otherwise. If the element of the memref has vector type, takes
1373 /// into account size of the vector as well.
1374 // TODO: improve/complete this when we have target data.
1375 std::optional<uint64_t>
1377  if (!memRefType.hasStaticShape())
1378  return std::nullopt;
1379  auto elementType = memRefType.getElementType();
1380  if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1381  return std::nullopt;
1382 
1383  auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1384  if (!sizeInBytes)
1385  return std::nullopt;
1386  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1387  sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1388  }
1389  return sizeInBytes;
1390 }
1391 
1392 template <typename LoadOrStoreOp>
1393 LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1394  bool emitError) {
1395  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1396  AffineWriteOpInterface>::value,
1397  "argument should be either a AffineReadOpInterface or a "
1398  "AffineWriteOpInterface");
1399 
1400  Operation *op = loadOrStoreOp.getOperation();
1401  MemRefRegion region(op->getLoc());
1402  if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr,
1403  /*addMemRefDimBounds=*/false)))
1404  return success();
1405 
1406  LLVM_DEBUG(llvm::dbgs() << "Memory region");
1407  LLVM_DEBUG(region.getConstraints()->dump());
1408 
1409  bool outOfBounds = false;
1410  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1411 
1412  // For each dimension, check for out of bounds.
1413  for (unsigned r = 0; r < rank; r++) {
1414  FlatAffineValueConstraints ucst(*region.getConstraints());
1415 
1416  // Intersect memory region with constraint capturing out of bounds (both out
1417  // of upper and out of lower), and check if the constraint system is
1418  // feasible. If it is, there is at least one point out of bounds.
1419  SmallVector<int64_t, 4> ineq(rank + 1, 0);
1420  int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1421  // TODO: handle dynamic dim sizes.
1422  if (dimSize == -1)
1423  continue;
1424 
1425  // Check for overflow: d_i >= memref dim size.
1426  ucst.addBound(BoundType::LB, r, dimSize);
1427  outOfBounds = !ucst.isEmpty();
1428  if (outOfBounds && emitError) {
1429  loadOrStoreOp.emitOpError()
1430  << "memref out of upper bound access along dimension #" << (r + 1);
1431  }
1432 
1433  // Check for a negative index.
1434  FlatAffineValueConstraints lcst(*region.getConstraints());
1435  std::fill(ineq.begin(), ineq.end(), 0);
1436  // d_i <= -1;
1437  lcst.addBound(BoundType::UB, r, -1);
1438  outOfBounds = !lcst.isEmpty();
1439  if (outOfBounds && emitError) {
1440  loadOrStoreOp.emitOpError()
1441  << "memref out of lower bound access along dimension #" << (r + 1);
1442  }
1443  }
1444  return failure(outOfBounds);
1445 }
1446 
1447 // Explicitly instantiate the template so that the compiler knows we need them!
1448 template LogicalResult
1449 mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1450  bool emitError);
1451 template LogicalResult
1452 mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1453  bool emitError);
1454 
1455 // Returns in 'positions' the Block positions of 'op' in each ancestor
1456 // Block from the Block containing operation, stopping at 'limitBlock'.
1457 static void findInstPosition(Operation *op, Block *limitBlock,
1458  SmallVectorImpl<unsigned> *positions) {
1459  Block *block = op->getBlock();
1460  while (block != limitBlock) {
1461  // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1462  // rely on linear scans.
1463  int instPosInBlock = std::distance(block->begin(), op->getIterator());
1464  positions->push_back(instPosInBlock);
1465  op = block->getParentOp();
1466  block = op->getBlock();
1467  }
1468  std::reverse(positions->begin(), positions->end());
1469 }
1470 
1471 // Returns the Operation in a possibly nested set of Blocks, where the
1472 // position of the operation is represented by 'positions', which has a
1473 // Block position for each level of nesting.
1475  unsigned level, Block *block) {
1476  unsigned i = 0;
1477  for (auto &op : *block) {
1478  if (i != positions[level]) {
1479  ++i;
1480  continue;
1481  }
1482  if (level == positions.size() - 1)
1483  return &op;
1484  if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1485  return getInstAtPosition(positions, level + 1,
1486  childAffineForOp.getBody());
1487 
1488  for (auto &region : op.getRegions()) {
1489  for (auto &b : region)
1490  if (auto *ret = getInstAtPosition(positions, level + 1, &b))
1491  return ret;
1492  }
1493  return nullptr;
1494  }
1495  return nullptr;
1496 }
1497 
1498 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1501  for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
1502  auto value = cst->getValue(i);
1503  if (ivs.count(value) == 0) {
1504  assert(isAffineForInductionVar(value));
1505  auto loop = getForInductionVarOwner(value);
1506  if (failed(cst->addAffineForOpDomain(loop)))
1507  return failure();
1508  }
1509  }
1510  return success();
1511 }
1512 
1513 /// Returns the innermost common loop depth for the set of operations in 'ops'.
1514 // TODO: Move this to LoopUtils.
1516  ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
1517  unsigned numOps = ops.size();
1518  assert(numOps > 0 && "Expected at least one operation");
1519 
1520  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1521  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1522  for (unsigned i = 0; i < numOps; ++i) {
1523  getAffineForIVs(*ops[i], &loops[i]);
1524  loopDepthLimit =
1525  std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
1526  }
1527 
1528  unsigned loopDepth = 0;
1529  for (unsigned d = 0; d < loopDepthLimit; ++d) {
1530  unsigned i;
1531  for (i = 1; i < numOps; ++i) {
1532  if (loops[i - 1][d] != loops[i][d])
1533  return loopDepth;
1534  }
1535  if (surroundingLoops)
1536  surroundingLoops->push_back(loops[i - 1][d]);
1537  ++loopDepth;
1538  }
1539  return loopDepth;
1540 }
1541 
1542 /// Computes in 'sliceUnion' the union of all slice bounds computed at
1543 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1544 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1545 /// union was computed correctly, an appropriate failure otherwise.
1548  ArrayRef<Operation *> opsB, unsigned loopDepth,
1549  unsigned numCommonLoops, bool isBackwardSlice,
1550  ComputationSliceState *sliceUnion) {
1551  // Compute the union of slice bounds between all pairs in 'opsA' and
1552  // 'opsB' in 'sliceUnionCst'.
1553  FlatAffineValueConstraints sliceUnionCst;
1554  assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
1555  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1556  for (Operation *i : opsA) {
1557  MemRefAccess srcAccess(i);
1558  for (Operation *j : opsB) {
1559  MemRefAccess dstAccess(j);
1560  if (srcAccess.memref != dstAccess.memref)
1561  continue;
1562  // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1563  if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) ||
1564  (isBackwardSlice && loopDepth > getNestingDepth(j))) {
1565  LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
1567  }
1568 
1569  bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1570  isa<AffineReadOpInterface>(dstAccess.opInst);
1571  FlatAffineValueConstraints dependenceConstraints;
1572  // Check dependence between 'srcAccess' and 'dstAccess'.
1574  srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
1575  &dependenceConstraints, /*dependenceComponents=*/nullptr,
1576  /*allowRAR=*/readReadAccesses);
1577  if (result.value == DependenceResult::Failure) {
1578  LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
1580  }
1581  if (result.value == DependenceResult::NoDependence)
1582  continue;
1583  dependentOpPairs.emplace_back(i, j);
1584 
1585  // Compute slice bounds for 'srcAccess' and 'dstAccess'.
1586  ComputationSliceState tmpSliceState;
1587  mlir::affine::getComputationSliceState(i, j, dependenceConstraints,
1588  loopDepth, isBackwardSlice,
1589  &tmpSliceState);
1590 
1591  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1592  // Initialize 'sliceUnionCst' with the bounds computed in previous step.
1593  if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
1594  LLVM_DEBUG(llvm::dbgs()
1595  << "Unable to compute slice bound constraints\n");
1597  }
1598  assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
1599  continue;
1600  }
1601 
1602  // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1603  FlatAffineValueConstraints tmpSliceCst;
1604  if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
1605  LLVM_DEBUG(llvm::dbgs()
1606  << "Unable to compute slice bound constraints\n");
1608  }
1609 
1610  // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1611  if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
1612 
1613  // Pre-constraint var alignment: record loop IVs used in each constraint
1614  // system.
1615  SmallPtrSet<Value, 8> sliceUnionIVs;
1616  for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1617  sliceUnionIVs.insert(sliceUnionCst.getValue(k));
1618  SmallPtrSet<Value, 8> tmpSliceIVs;
1619  for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1620  tmpSliceIVs.insert(tmpSliceCst.getValue(k));
1621 
1622  sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst);
1623 
1624  // Post-constraint var alignment: add loop IV bounds missing after
1625  // var alignment to constraint systems. This can occur if one constraint
1626  // system uses an loop IV that is not used by the other. The call
1627  // to unionBoundingBox below expects constraints for each Loop IV, even
1628  // if they are the unsliced full loop bounds added here.
1629  if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
1631  if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
1633  }
1634  // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1635  if (sliceUnionCst.getNumLocalVars() > 0 ||
1636  tmpSliceCst.getNumLocalVars() > 0 ||
1637  failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
1638  LLVM_DEBUG(llvm::dbgs()
1639  << "Unable to compute union bounding box of slice bounds\n");
1641  }
1642  }
1643  }
1644 
1645  // Empty union.
1646  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1647  LLVM_DEBUG(llvm::dbgs() << "empty slice union - unexpected\n");
1649  }
1650 
1651  // Gather loops surrounding ops from loop nest where slice will be inserted.
1653  for (auto &dep : dependentOpPairs) {
1654  ops.push_back(isBackwardSlice ? dep.second : dep.first);
1655  }
1656  SmallVector<AffineForOp, 4> surroundingLoops;
1657  unsigned innermostCommonLoopDepth =
1658  getInnermostCommonLoopDepth(ops, &surroundingLoops);
1659  if (loopDepth > innermostCommonLoopDepth) {
1660  LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
1662  }
1663 
1664  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1665  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1666 
1667  // Convert any dst loop IVs which are symbol variables to dim variables.
1668  sliceUnionCst.convertLoopIVSymbolsToDims();
1669  sliceUnion->clearBounds();
1670  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
1671  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
1672 
1673  // Get slice bounds from slice union constraints 'sliceUnionCst'.
1674  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
1675  opsA[0]->getContext(), &sliceUnion->lbs,
1676  &sliceUnion->ubs);
1677 
1678  // Add slice bound operands of union.
1679  SmallVector<Value, 4> sliceBoundOperands;
1680  sliceUnionCst.getValues(numSliceLoopIVs,
1681  sliceUnionCst.getNumDimAndSymbolVars(),
1682  &sliceBoundOperands);
1683 
1684  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1685  sliceUnion->ivs.clear();
1686  sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
1687 
1688  // Set loop nest insertion point to block start at 'loopDepth' for forward
1689  // slices, while at the end for backward slices.
1690  sliceUnion->insertPoint =
1691  isBackwardSlice
1692  ? surroundingLoops[loopDepth - 1].getBody()->begin()
1693  : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1694 
1695  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1696  // canonicalization.
1697  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1698  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1699 
1700  // Check if the slice computed is valid. Return success only if it is verified
1701  // that the slice is valid, otherwise return appropriate failure status.
1702  std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1703  if (!isSliceValid) {
1704  LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
1706  }
1707  if (!*isSliceValid)
1709 
1711 }
1712 
1713 // TODO: extend this to handle multiple result maps.
1714 static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1715  AffineMap ubMap) {
1716  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
1717  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
1718  assert(lbMap.getNumDims() == ubMap.getNumDims());
1719  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1720  AffineExpr lbExpr(lbMap.getResult(0));
1721  AffineExpr ubExpr(ubMap.getResult(0));
1722  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
1723  lbMap.getNumSymbols());
1724  auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
1725  if (!cExpr)
1726  return std::nullopt;
1727  return cExpr.getValue();
1728 }
1729 
1730 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1731 // nest surrounding represented by slice loop bounds in 'slice'. Returns true
1732 // on success, false otherwise (if a non-constant trip count was encountered).
1733 // TODO: Make this work with non-unit step loops.
1735  const ComputationSliceState &slice,
1736  llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1737  unsigned numSrcLoopIVs = slice.ivs.size();
1738  // Populate map from AffineForOp -> trip count
1739  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1740  AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1741  auto *op = forOp.getOperation();
1742  AffineMap lbMap = slice.lbs[i];
1743  AffineMap ubMap = slice.ubs[i];
1744  // If lower or upper bound maps are null or provide no results, it implies
1745  // that source loop was not at all sliced, and the entire loop will be a
1746  // part of the slice.
1747  if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
1748  ubMap.getNumResults() == 0) {
1749  // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1750  if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1751  (*tripCountMap)[op] =
1752  forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1753  continue;
1754  }
1755  std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1756  if (maybeConstTripCount.has_value()) {
1757  (*tripCountMap)[op] = *maybeConstTripCount;
1758  continue;
1759  }
1760  return false;
1761  }
1762  std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1763  // Slice bounds are created with a constant ub - lb difference.
1764  if (!tripCount.has_value())
1765  return false;
1766  (*tripCountMap)[op] = *tripCount;
1767  }
1768  return true;
1769 }
1770 
1771 // Return the number of iterations in the given slice.
1773  const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1774  uint64_t iterCount = 1;
1775  for (const auto &count : sliceTripCountMap) {
1776  iterCount *= count.second;
1777  }
1778  return iterCount;
1779 }
1780 
1781 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1782 // Computes slice bounds by projecting out any loop IVs from
1783 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1784 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1785 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1787  Operation *depSourceOp, Operation *depSinkOp,
1788  const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth,
1789  bool isBackwardSlice, ComputationSliceState *sliceState) {
1790  // Get loop nest surrounding src operation.
1791  SmallVector<AffineForOp, 4> srcLoopIVs;
1792  getAffineForIVs(*depSourceOp, &srcLoopIVs);
1793  unsigned numSrcLoopIVs = srcLoopIVs.size();
1794 
1795  // Get loop nest surrounding dst operation.
1796  SmallVector<AffineForOp, 4> dstLoopIVs;
1797  getAffineForIVs(*depSinkOp, &dstLoopIVs);
1798  unsigned numDstLoopIVs = dstLoopIVs.size();
1799 
1800  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1801  (isBackwardSlice && loopDepth <= numDstLoopIVs));
1802 
1803  // Project out dimensions other than those up to 'loopDepth'.
1804  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1805  unsigned num =
1806  isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1807  FlatAffineValueConstraints sliceCst(dependenceConstraints);
1808  sliceCst.projectOut(pos, num);
1809 
1810  // Add slice loop IV values to 'sliceState'.
1811  unsigned offset = isBackwardSlice ? 0 : loopDepth;
1812  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1813  sliceCst.getValues(offset, offset + numSliceLoopIVs, &sliceState->ivs);
1814 
1815  // Set up lower/upper bound affine maps for the slice.
1816  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
1817  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
1818 
1819  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1820  sliceCst.getSliceBounds(offset, numSliceLoopIVs, depSourceOp->getContext(),
1821  &sliceState->lbs, &sliceState->ubs);
1822 
1823  // Set up bound operands for the slice's lower and upper bounds.
1824  SmallVector<Value, 4> sliceBoundOperands;
1825  unsigned numDimsAndSymbols = sliceCst.getNumDimAndSymbolVars();
1826  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
1827  if (i < offset || i >= offset + numSliceLoopIVs)
1828  sliceBoundOperands.push_back(sliceCst.getValue(i));
1829  }
1830 
1831  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1832  // canonicalization.
1833  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1834  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1835 
1836  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1837  sliceState->insertPoint =
1838  isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1839  : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1840 
1841  llvm::SmallDenseSet<Value, 8> sequentialLoops;
1842  if (isa<AffineReadOpInterface>(depSourceOp) &&
1843  isa<AffineReadOpInterface>(depSinkOp)) {
1844  // For read-read access pairs, clear any slice bounds on sequential loops.
1845  // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1846  getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
1847  &sequentialLoops);
1848  }
1849  auto getSliceLoop = [&](unsigned i) {
1850  return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1851  };
1852  auto isInnermostInsertion = [&]() {
1853  return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1854  : loopDepth >= dstLoopIVs.size());
1855  };
1856  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1857  auto srcIsUnitSlice = [&]() {
1858  return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
1859  (getSliceIterationCount(sliceTripCountMap) == 1));
1860  };
1861  // Clear all sliced loop bounds beginning at the first sequential loop, or
1862  // first loop with a slice fusion barrier attribute..
1863 
1864  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
1865  Value iv = getSliceLoop(i).getInductionVar();
1866  if (sequentialLoops.count(iv) == 0 &&
1867  getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1868  continue;
1869  // Skip reset of bounds of reduction loop inserted in the destination loop
1870  // that meets the following conditions:
1871  // 1. Slice is single trip count.
1872  // 2. Loop bounds of the source and destination match.
1873  // 3. Is being inserted at the innermost insertion point.
1874  std::optional<bool> isMaximal = sliceState->isMaximal();
1875  if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
1876  isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1877  continue;
1878  for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1879  sliceState->lbs[j] = AffineMap();
1880  sliceState->ubs[j] = AffineMap();
1881  }
1882  break;
1883  }
1884 }
1885 
1886 /// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1887 /// updates the slice loop bounds with any non-null bound maps specified in
1888 /// 'sliceState', and inserts this slice into the loop nest surrounding
1889 /// 'dstOpInst' at loop depth 'dstLoopDepth'.
1890 // TODO: extend the slicing utility to compute slices that
1891 // aren't necessarily a one-to-one relation b/w the source and destination. The
1892 // relation between the source and destination could be many-to-many in general.
1893 // TODO: the slice computation is incorrect in the cases
1894 // where the dependence from the source to the destination does not cover the
1895 // entire destination index set. Subtract out the dependent destination
1896 // iterations from destination index set and check for emptiness --- this is one
1897 // solution.
1899  Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
1900  ComputationSliceState *sliceState) {
1901  // Get loop nest surrounding src operation.
1902  SmallVector<AffineForOp, 4> srcLoopIVs;
1903  getAffineForIVs(*srcOpInst, &srcLoopIVs);
1904  unsigned numSrcLoopIVs = srcLoopIVs.size();
1905 
1906  // Get loop nest surrounding dst operation.
1907  SmallVector<AffineForOp, 4> dstLoopIVs;
1908  getAffineForIVs(*dstOpInst, &dstLoopIVs);
1909  unsigned dstLoopIVsSize = dstLoopIVs.size();
1910  if (dstLoopDepth > dstLoopIVsSize) {
1911  dstOpInst->emitError("invalid destination loop depth");
1912  return AffineForOp();
1913  }
1914 
1915  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1916  SmallVector<unsigned, 4> positions;
1917  // TODO: This code is incorrect since srcLoopIVs can be 0-d.
1918  findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
1919 
1920  // Clone src loop nest and insert it a the beginning of the operation block
1921  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1922  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1923  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1924  auto sliceLoopNest =
1925  cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
1926 
1927  Operation *sliceInst =
1928  getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
1929  // Get loop nest surrounding 'sliceInst'.
1930  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
1931  getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1932 
1933  // Sanity check.
1934  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1935  (void)sliceSurroundingLoopsSize;
1936  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1937  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1938  (void)sliceLoopLimit;
1939  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1940 
1941  // Update loop bounds for loops in 'sliceLoopNest'.
1942  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1943  auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1944  if (AffineMap lbMap = sliceState->lbs[i])
1945  forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
1946  if (AffineMap ubMap = sliceState->ubs[i])
1947  forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
1948  }
1949  return sliceLoopNest;
1950 }
1951 
1952 // Constructs MemRefAccess populating it with the memref, its indices and
1953 // opinst from 'loadOrStoreOpInst'.
1955  if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
1956  memref = loadOp.getMemRef();
1957  opInst = loadOrStoreOpInst;
1958  llvm::append_range(indices, loadOp.getMapOperands());
1959  } else {
1960  assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
1961  "Affine read/write op expected");
1962  auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
1963  opInst = loadOrStoreOpInst;
1964  memref = storeOp.getMemRef();
1965  llvm::append_range(indices, storeOp.getMapOperands());
1966  }
1967 }
1968 
1969 unsigned MemRefAccess::getRank() const {
1970  return cast<MemRefType>(memref.getType()).getRank();
1971 }
1972 
1974  return isa<AffineWriteOpInterface>(opInst);
1975 }
1976 
1977 /// Returns the nesting depth of this statement, i.e., the number of loops
1978 /// surrounding this statement.
1980  Operation *currOp = op;
1981  unsigned depth = 0;
1982  while ((currOp = currOp->getParentOp())) {
1983  if (isa<AffineForOp>(currOp))
1984  depth++;
1985  if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
1986  depth += parOp.getNumDims();
1987  }
1988  return depth;
1989 }
1990 
1991 /// Equal if both affine accesses are provably equivalent (at compile
1992 /// time) when considering the memref, the affine maps and their respective
1993 /// operands. The equality of access functions + operands is checked by
1994 /// subtracting fully composed value maps, and then simplifying the difference
1995 /// using the expression flattener.
1996 /// TODO: this does not account for aliasing of memrefs.
1997 bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
1998  if (memref != rhs.memref)
1999  return false;
2000 
2001  AffineValueMap diff, thisMap, rhsMap;
2002  getAccessMap(&thisMap);
2003  rhs.getAccessMap(&rhsMap);
2004  return thisMap == rhsMap;
2005 }
2006 
2008  auto *currOp = op.getParentOp();
2009  AffineForOp currAffineForOp;
2010  // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
2011  // operation while skipping over 'affine.if' operations.
2012  while (currOp) {
2013  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
2014  ivs.push_back(currAffineForOp.getInductionVar());
2015  else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
2016  llvm::append_range(ivs, parOp.getIVs());
2017  currOp = currOp->getParentOp();
2018  }
2019  std::reverse(ivs.begin(), ivs.end());
2020 }
2021 
2022 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
2023 /// where each lists loops from outer-most to inner-most in loop nest.
2025  Operation &b) {
2026  SmallVector<Value, 4> loopsA, loopsB;
2027  getAffineIVs(a, loopsA);
2028  getAffineIVs(b, loopsB);
2029 
2030  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
2031  unsigned numCommonLoops = 0;
2032  for (unsigned i = 0; i < minNumLoops; ++i) {
2033  if (loopsA[i] != loopsB[i])
2034  break;
2035  ++numCommonLoops;
2036  }
2037  return numCommonLoops;
2038 }
2039 
2040 static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
2041  Block::iterator start,
2042  Block::iterator end,
2043  int memorySpace) {
2044  SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
2045 
2046  // Walk this 'affine.for' operation to gather all memory regions.
2047  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
2048  if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
2049  // Neither load nor a store op.
2050  return WalkResult::advance();
2051  }
2052 
2053  // Compute the memref region symbolic in any IVs enclosing this block.
2054  auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
2055  if (failed(
2056  region->compute(opInst,
2057  /*loopDepth=*/getNestingDepth(&*block.begin())))) {
2058  LLVM_DEBUG(opInst->emitError("error obtaining memory region"));
2059  return failure();
2060  }
2061 
2062  auto [it, inserted] = regions.try_emplace(region->memref);
2063  if (inserted) {
2064  it->second = std::move(region);
2065  } else if (failed(it->second->unionBoundingBox(*region))) {
2066  LLVM_DEBUG(opInst->emitWarning(
2067  "getMemoryFootprintBytes: unable to perform a union on a memory "
2068  "region"));
2069  return failure();
2070  }
2071  return WalkResult::advance();
2072  });
2073  if (result.wasInterrupted())
2074  return std::nullopt;
2075 
2076  int64_t totalSizeInBytes = 0;
2077  for (const auto &region : regions) {
2078  std::optional<int64_t> size = region.second->getRegionSize();
2079  if (!size.has_value())
2080  return std::nullopt;
2081  totalSizeInBytes += *size;
2082  }
2083  return totalSizeInBytes;
2084 }
2085 
2086 std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
2087  int memorySpace) {
2088  auto *forInst = forOp.getOperation();
2090  *forInst->getBlock(), Block::iterator(forInst),
2091  std::next(Block::iterator(forInst)), memorySpace);
2092 }
2093 
2094 /// Returns whether a loop is parallel and contains a reduction loop.
2096  SmallVector<LoopReduction> reductions;
2097  if (!isLoopParallel(forOp, &reductions))
2098  return false;
2099  return !reductions.empty();
2100 }
2101 
2102 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
2103 /// at 'forOp'.
2105  AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
2106  forOp->walk([&](Operation *op) {
2107  if (auto innerFor = dyn_cast<AffineForOp>(op))
2108  if (!isLoopParallel(innerFor))
2109  sequentialLoops->insert(innerFor.getInductionVar());
2110  });
2111 }
2112 
2114  FlatAffineValueConstraints fac(set);
2115  if (fac.isEmpty())
2116  return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
2117  set.getContext());
2119 
2120  auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
2121  assert(simplifiedSet && "guaranteed to succeed while roundtripping");
2122  return simplifiedSet;
2123 }
2124 
2125 static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
2126  SmallVector<Value> &target) {
2127  target =
2128  llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
2129  return val.has_value() ? *val : Value();
2130  }));
2131 }
2132 
2133 /// Bound an identifier `pos` in a given FlatAffineValueConstraints with
2134 /// constraints drawn from an affine map. Before adding the constraint, the
2135 /// dimensions/symbols of the affine map are aligned with `constraints`.
2136 /// `operands` are the SSA Value operands used with the affine map.
2137 /// Note: This function adds a new symbol column to the `constraints` for each
2138 /// dimension/symbol that exists in the affine map but not in `constraints`.
2139 static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
2140  BoundType type, unsigned pos,
2141  AffineMap map, ValueRange operands) {
2142  SmallVector<Value> dims, syms, newSyms;
2143  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
2144  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
2145 
2146  AffineMap alignedMap =
2147  alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
2148  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
2149  constraints.appendSymbolVar(newSyms[i]);
2150  return constraints.addBound(type, pos, alignedMap);
2151 }
2152 
2153 /// Add `val` to each result of `map`.
2154 static AffineMap addConstToResults(AffineMap map, int64_t val) {
2155  SmallVector<AffineExpr> newResults;
2156  for (AffineExpr r : map.getResults())
2157  newResults.push_back(r + val);
2158  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
2159  map.getContext());
2160 }
2161 
2162 // Attempt to simplify the given min/max operation by proving that its value is
2163 // bounded by the same lower and upper bound.
2164 //
2165 // Bounds are computed by FlatAffineValueConstraints. Invariants required for
2166 // finding/proving bounds should be supplied via `constraints`.
2167 //
2168 // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
2169 // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
2170 // case of `!isMin`) and bind it to `opBound`. SSA values that are used in
2171 // `op` but are not part of `constraints`, are added as extra symbols.
2172 // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
2173 // * If `isMin`: r_i >= opBound
2174 // * If `isMax`: r_i <= opBound
2175 // If this is the case, ub(op) == lb(op).
2176 // 4. Replace `op` with `opBound`.
2177 //
2178 // In summary, the following constraints are added throughout this function.
2179 // Note: `invar` are dimensions added by the caller to express the invariants.
2180 // (Showing only the case where `isMin`.)
2181 //
2182 // invar | op | opBound | r_i | extra syms... | const | eq/ineq
2183 // ------+-------+---------+-----+---------------+-------+-------------------
2184 // (various eq./ineq. constraining `invar`, added by the caller)
2185 // ... | 0 | 0 | 0 | 0 | ... | ...
2186 // ------+-------+---------+-----+---------------+-------+-------------------
2187 // (various ineq. constraining `op` in terms of `op` operands (`invar` and
2188 // extra `op` operands "extra syms" that are not in `invar`)).
2189 // ... | -1 | 0 | 0 | ... | ... | >= 0
2190 // ------+-------+---------+-----+---------------+-------+-------------------
2191 // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
2192 // ... | 0 | -1 | 0 | ... | ... | = 0
2193 // ------+-------+---------+-----+---------------+-------+-------------------
2194 // (for each `op` map result r_i: set r_i to corresponding map result,
2195 // prove that r_i >= minOpUb via contradiction)
2196 // ... | 0 | 0 | -1 | ... | ... | = 0
2197 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0
2198 //
2200  Operation *op, FlatAffineValueConstraints constraints) {
2201  bool isMin = isa<AffineMinOp>(op);
2202  assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
2203  MLIRContext *ctx = op->getContext();
2204  Builder builder(ctx);
2205  AffineMap map =
2206  isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2207  ValueRange operands = op->getOperands();
2208  unsigned numResults = map.getNumResults();
2209 
2210  // Add a few extra dimensions.
2211  unsigned dimOp = constraints.appendDimVar(); // `op`
2212  unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
2213  unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults);
2214 
2215  // Add an inequality for each result expr_i of map:
2216  // isMin: op <= expr_i, !isMin: op >= expr_i
2217  auto boundType = isMin ? BoundType::UB : BoundType::LB;
2218  // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
2219  AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
2220  if (failed(
2221  alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
2222  return failure();
2223 
2224  // Try to compute a lower/upper bound for op, expressed in terms of the other
2225  // `dims` and extra symbols.
2226  SmallVector<AffineMap> opLb(1), opUb(1);
2227  constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
2228  AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
2229  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
2230  // a TODO of `getSliceBounds` and not handled here.
2231  if (!sliceBound || sliceBound.getNumResults() != 1)
2232  return failure(); // No or multiple bounds found.
2233  // Recover the inclusive UB in the case of an `affine.min`.
2234  AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
2235 
2236  // Add an equality: Set dimOpBound to computed bound.
2237  // Add back dimension for op. (Was removed by `getSliceBounds`.)
2238  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
2239  if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
2240  return failure();
2241 
2242  // If the constraint system is empty, there is an inconsistency. (E.g., this
2243  // can happen if loop lb > ub.)
2244  if (constraints.isEmpty())
2245  return failure();
2246 
2247  // In the case of `isMin` (`!isMin` is inversed):
2248  // Prove that each result of `map` has a lower bound that is equal to (or
2249  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
2250  // can be replaced with the bound. I.e., prove that for each result
2251  // expr_i (represented by dimension r_i):
2252  //
2253  // r_i >= opBound
2254  //
2255  // To prove this inequality, add its negation to the constraint set and prove
2256  // that the constraint set is empty.
2257  for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2258  FlatAffineValueConstraints newConstr(constraints);
2259 
2260  // Add an equality: r_i = expr_i
2261  // Note: These equalities could have been added earlier and used to express
2262  // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2263  // computes minOpUb in terms of r_i dims, which is not desired.
2264  if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
2265  map.getSubMap({i - resultDimStart}), operands)))
2266  return failure();
2267 
2268  // If `isMin`: Add inequality: r_i < opBound
2269  // equiv.: opBound - r_i - 1 >= 0
2270  // If `!isMin`: Add inequality: r_i > opBound
2271  // equiv.: -opBound + r_i - 1 >= 0
2272  SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
2273  ineq[dimOpBound] = isMin ? 1 : -1;
2274  ineq[i] = isMin ? -1 : 1;
2275  ineq[newConstr.getNumCols() - 1] = -1;
2276  newConstr.addInequality(ineq);
2277  if (!newConstr.isEmpty())
2278  return failure();
2279  }
2280 
2281  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2282  AffineMap newMap = alignedBoundMap;
2283  SmallVector<Value> newOperands;
2284  unpackOptionalValues(constraints.getMaybeValues(), newOperands);
2285  // If dims/symbols have known constant values, use those in order to simplify
2286  // the affine map further.
2287  for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2288  // Skip unused operands and operands that are already constants.
2289  if (!newOperands[i] || getConstantIntValue(newOperands[i]))
2290  continue;
2291  if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
2292  AffineExpr expr =
2293  i < newMap.getNumDims()
2294  ? builder.getAffineDimExpr(i)
2295  : builder.getAffineSymbolExpr(i - newMap.getNumDims());
2296  newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
2297  newMap.getNumDims(), newMap.getNumSymbols());
2298  }
2299  }
2300  affine::canonicalizeMapAndOperands(&newMap, &newOperands);
2301  return AffineValueMap(newMap, newOperands);
2302 }
2303 
2305  Operation *b) {
2306  Region *aScope = getAffineAnalysisScope(a);
2307  Region *bScope = getAffineAnalysisScope(b);
2308  if (aScope != bScope)
2309  return nullptr;
2310 
2311  // Get the block ancestry of `op` while stopping at the affine scope `aScope`
2312  // and store them in `ancestry`.
2313  auto getBlockAncestry = [&](Operation *op,
2314  SmallVectorImpl<Block *> &ancestry) {
2315  Operation *curOp = op;
2316  do {
2317  ancestry.push_back(curOp->getBlock());
2318  if (curOp->getParentRegion() == aScope)
2319  break;
2320  curOp = curOp->getParentOp();
2321  } while (curOp);
2322  assert(curOp && "can't reach root op without passing through affine scope");
2323  std::reverse(ancestry.begin(), ancestry.end());
2324  };
2325 
2326  SmallVector<Block *, 4> aAncestors, bAncestors;
2327  getBlockAncestry(a, aAncestors);
2328  getBlockAncestry(b, bAncestors);
2329  assert(!aAncestors.empty() && !bAncestors.empty() &&
2330  "at least one Block ancestor expected");
2331 
2332  Block *innermostCommonBlock = nullptr;
2333  for (unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size();
2334  a < e && b < f; ++a, ++b) {
2335  if (aAncestors[a] != bAncestors[b])
2336  break;
2337  innermostCommonBlock = aAncestors[a];
2338  }
2339  return innermostCommonBlock;
2340 }
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
Definition: Utils.cpp:1714
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
Definition: Utils.cpp:1457
static Node * addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, DenseMap< Value, SetVector< unsigned >> &memrefAccesses)
Add op to MDG creating a new node and adding its memory accesses (affine or non-affine to memrefAcces...
Definition: Utils.cpp:192
const char *const kSliceFusionBarrierAttrName
Definition: Utils.cpp:1781
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
Definition: Utils.cpp:1499
static void getEffectedValues(Operation *op, SmallVectorImpl< Value > &values)
Returns the values that this op has a memref effect of type EffectTys on, not considering recursive e...
Definition: Utils.cpp:166
MemRefDependenceGraph::Node Node
Definition: Utils.cpp:39
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
Definition: Utils.cpp:1474
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
Definition: Utils.cpp:2040
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
Definition: Utils.cpp:2154
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
Definition: Utils.cpp:2139
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
Definition: Utils.cpp:2125
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:348
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:343
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
Definition: AffineMap.h:267
unsigned getNumSymbols() const
Definition: AffineMap.cpp:398
unsigned getNumDims() const
Definition: AffineMap.cpp:394
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
unsigned getNumInputs() const
Definition: AffineMap.cpp:403
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
Definition: AffineMap.cpp:515
void dump() const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:651
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
RetT walk(FnT &&callback)
Walk all nested operations, blocks (including this block) or regions, depending on the type of callba...
Definition: Block.h:305
iterator begin()
Definition: Block.h:143
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:33
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:366
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:370
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:362
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
std::optional< int64_t > getConstantBoundOnDimSize(MLIRContext *context, unsigned pos, AffineMap *lb=nullptr, AffineMap *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
Returns a non-negative constant bound on the extent (upper bound - lower bound) of the specified vari...
FlatLinearValueConstraints represents an extension of FlatLinearConstraints where each non-local vari...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
SmallVector< std::optional< Value > > getMaybeValues() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
void addBound(presburger::BoundType type, Value val, int64_t value)
Adds a constant bound for the variable associated with the given Value.
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
An integer set representing a conjunction of one or more affine equalities and inequalities.
Definition: IntegerSet.h:44
unsigned getNumDims() const
Definition: IntegerSet.cpp:15
MLIRContext * getContext() const
Definition: IntegerSet.cpp:57
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
Definition: IntegerSet.h:56
unsigned getNumSymbols() const
Definition: IntegerSet.cpp:16
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:551
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:749
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:386
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:280
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
Definition: Utils.cpp:2113
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:776
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
Definition: Utils.cpp:1547
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
Definition: AffineOps.cpp:2650
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
Definition: Utils.cpp:2095
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2024
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2654
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
Definition: Utils.cpp:2007
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
Definition: Utils.cpp:2104
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1516
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2642
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:273
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
Definition: Utils.cpp:762
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:2086
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
Definition: Utils.cpp:1515
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:410
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
Definition: Utils.cpp:1786
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
Definition: AffineOps.cpp:2665
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
Definition: Utils.cpp:1376
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1979
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
Definition: Utils.cpp:1772
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)
Find the innermost common Block of a and b in the affine scope that a and b are part of.
Definition: Utils.cpp:2304
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
Definition: Utils.cpp:1734
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
Definition: Utils.cpp:1898
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
Definition: Utils.cpp:2199
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
Definition: Utils.cpp:1332
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
llvm::TypeSize divideCeil(llvm::TypeSize numerator, uint64_t denominator)
Divides the known min value of the numerator by the denominator and rounds the result up to the next ...
BoundType
The type of bound: equal, lower bound or upper bound.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:645
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
Definition: Utils.h:310
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
Definition: Utils.cpp:947
SmallVector< Value, 4 > ivs
Definition: Utils.h:313
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
Definition: Utils.cpp:809
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
Definition: Utils.cpp:793
std::vector< SmallVector< Value, 4 > > ubOperands
Definition: Utils.h:321
SmallVector< AffineMap, 4 > ubs
Definition: Utils.h:317
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
Definition: Utils.cpp:1017
SmallVector< AffineMap, 4 > lbs
Definition: Utils.h:315
std::vector< SmallVector< Value, 4 > > lbOperands
Definition: Utils.h:319
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:49
SmallVector< Operation *, 4 > loadOpInsts
Definition: Utils.h:41
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:47
void collect(Operation *opToWalk)
Definition: Utils.cpp:44
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:45
SmallVector< Operation *, 4 > storeOpInsts
Definition: Utils.h:43
Encapsulates a memref load or store access information.
unsigned getRank() const
Definition: Utils.cpp:1969
MemRefAccess(Operation *opInst)
Constructs a MemRefAccess from a load or store operation.
Definition: Utils.cpp:1954
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
Definition: Utils.cpp:1997
SmallVector< Operation *, 4 > loads
Definition: Utils.h:73
SmallVector< Operation *, 4 > stores
Definition: Utils.h:77
unsigned hasFree(Value memref) const
Definition: Utils.cpp:124
SmallVector< Operation *, 4 > memrefLoads
Definition: Utils.h:75
SmallVector< Operation *, 4 > memrefStores
Definition: Utils.h:79
unsigned getStoreOpCount(Value memref) const
Definition: Utils.cpp:94
unsigned hasStore(Value memref) const
Returns true if there exists an operation with a write memory effect to memref in this node.
Definition: Utils.cpp:110
SmallVector< Operation *, 4 > memrefFrees
Definition: Utils.h:81
unsigned addNode(Operation *op)
Definition: Utils.cpp:374
bool writesToLiveInOrEscapingMemrefs(unsigned id) const
Definition: Utils.cpp:404
void removeEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:450
void addEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:439
DenseMap< unsigned, Node > nodes
Definition: Utils.h:141
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes) const
Return all nodes which define SSA values used in node 'id'.
Definition: Utils.cpp:538
bool hasDependencePath(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:477
void clearNodeLoadAndStores(unsigned id)
Definition: Utils.cpp:705
const Node * getForOpNode(AffineForOp forOp) const
Definition: Utils.cpp:366
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const
Definition: Utils.cpp:552
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
Definition: Utils.cpp:628
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:713
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const
Definition: Utils.cpp:528
const Node * getNode(unsigned id) const
Definition: Utils.cpp:359
void removeNode(unsigned id)
Definition: Utils.cpp:381
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:721
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:729
void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)
Definition: Utils.cpp:692
unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const
Definition: Utils.cpp:512
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr) const
Definition: Utils.cpp:424
void print(raw_ostream &os) const
Definition: Utils.cpp:744
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
Definition: Utils.h:481
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
Definition: Utils.cpp:1071
FlatAffineValueConstraints * getConstraints()
Definition: Utils.h:527
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
Definition: Utils.cpp:1067
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true, bool dropLocalVars=true, bool dropOuterIVs=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Definition: Utils.cpp:1168
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Definition: Utils.cpp:1127
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
Definition: Utils.cpp:1351
LogicalResult unionBoundingBox(const MemRefRegion &other)
Definition: Utils.cpp:1146
Value memref
Memref that this region corresponds to.
Definition: Utils.h:562
Enumerates different result statuses of slice computation by computeSliceUnion
Definition: Utils.h:297
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.