MLIR  18.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for non-loop IR
10 // structures.
11 //
12 //===----------------------------------------------------------------------===//
13 
22 #include "mlir/IR/IntegerSet.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <optional>
27 
28 #define DEBUG_TYPE "analysis-utils"
29 
30 using namespace mlir;
31 using namespace affine;
32 using namespace presburger;
33 
34 using llvm::SmallDenseMap;
35 
37 
38 // LoopNestStateCollector walks loop nests and collects load and store
39 // operations, and whether or not a region holding op other than ForOp and IfOp
40 // was encountered in the loop nest.
42  opToWalk->walk([&](Operation *op) {
43  if (isa<AffineForOp>(op))
44  forOps.push_back(cast<AffineForOp>(op));
45  else if (op->getNumRegions() != 0 && !isa<AffineIfOp>(op))
46  hasNonAffineRegionOp = true;
47  else if (isa<AffineReadOpInterface>(op))
48  loadOpInsts.push_back(op);
49  else if (isa<AffineWriteOpInterface>(op))
50  storeOpInsts.push_back(op);
51  });
52 }
53 
54 // Returns the load op count for 'memref'.
55 unsigned Node::getLoadOpCount(Value memref) const {
56  unsigned loadOpCount = 0;
57  for (Operation *loadOp : loads) {
58  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
59  ++loadOpCount;
60  }
61  return loadOpCount;
62 }
63 
64 // Returns the store op count for 'memref'.
65 unsigned Node::getStoreOpCount(Value memref) const {
66  unsigned storeOpCount = 0;
67  for (Operation *storeOp : stores) {
68  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
69  ++storeOpCount;
70  }
71  return storeOpCount;
72 }
73 
74 // Returns all store ops in 'storeOps' which access 'memref'.
75 void Node::getStoreOpsForMemref(Value memref,
76  SmallVectorImpl<Operation *> *storeOps) const {
77  for (Operation *storeOp : stores) {
78  if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
79  storeOps->push_back(storeOp);
80  }
81 }
82 
83 // Returns all load ops in 'loadOps' which access 'memref'.
84 void Node::getLoadOpsForMemref(Value memref,
85  SmallVectorImpl<Operation *> *loadOps) const {
86  for (Operation *loadOp : loads) {
87  if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
88  loadOps->push_back(loadOp);
89  }
90 }
91 
92 // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
93 // has at least one load and store operation.
94 void Node::getLoadAndStoreMemrefSet(
95  DenseSet<Value> *loadAndStoreMemrefSet) const {
96  llvm::SmallDenseSet<Value, 2> loadMemrefs;
97  for (Operation *loadOp : loads) {
98  loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
99  }
100  for (Operation *storeOp : stores) {
101  auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
102  if (loadMemrefs.count(memref) > 0)
103  loadAndStoreMemrefSet->insert(memref);
104  }
105 }
106 
107 // Returns the graph node for 'id'.
109  auto it = nodes.find(id);
110  assert(it != nodes.end());
111  return &it->second;
112 }
113 
114 // Returns the graph node for 'forOp'.
116  for (auto &idAndNode : nodes)
117  if (idAndNode.second.op == forOp)
118  return &idAndNode.second;
119  return nullptr;
120 }
121 
122 // Adds a node with 'op' to the graph and returns its unique identifier.
124  Node node(nextNodeId++, op);
125  nodes.insert({node.id, node});
126  return node.id;
127 }
128 
129 // Remove node 'id' (and its associated edges) from graph.
131  // Remove each edge in 'inEdges[id]'.
132  if (inEdges.count(id) > 0) {
133  SmallVector<Edge, 2> oldInEdges = inEdges[id];
134  for (auto &inEdge : oldInEdges) {
135  removeEdge(inEdge.id, id, inEdge.value);
136  }
137  }
138  // Remove each edge in 'outEdges[id]'.
139  if (outEdges.count(id) > 0) {
140  SmallVector<Edge, 2> oldOutEdges = outEdges[id];
141  for (auto &outEdge : oldOutEdges) {
142  removeEdge(id, outEdge.id, outEdge.value);
143  }
144  }
145  // Erase remaining node state.
146  inEdges.erase(id);
147  outEdges.erase(id);
148  nodes.erase(id);
149 }
150 
151 // Returns true if node 'id' writes to any memref which escapes (or is an
152 // argument to) the block. Returns false otherwise.
154  Node *node = getNode(id);
155  for (auto *storeOpInst : node->stores) {
156  auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
157  auto *op = memref.getDefiningOp();
158  // Return true if 'memref' is a block argument.
159  if (!op)
160  return true;
161  // Return true if any use of 'memref' does not deference it in an affine
162  // way.
163  for (auto *user : memref.getUsers())
164  if (!isa<AffineMapAccessInterface>(*user))
165  return true;
166  }
167  return false;
168 }
169 
170 // Returns true iff there is an edge from node 'srcId' to node 'dstId' which
171 // is for 'value' if non-null, or for any value otherwise. Returns false
172 // otherwise.
173 bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
174  Value value) {
175  if (outEdges.count(srcId) == 0 || inEdges.count(dstId) == 0) {
176  return false;
177  }
178  bool hasOutEdge = llvm::any_of(outEdges[srcId], [=](Edge &edge) {
179  return edge.id == dstId && (!value || edge.value == value);
180  });
181  bool hasInEdge = llvm::any_of(inEdges[dstId], [=](Edge &edge) {
182  return edge.id == srcId && (!value || edge.value == value);
183  });
184  return hasOutEdge && hasInEdge;
185 }
186 
187 // Adds an edge from node 'srcId' to node 'dstId' for 'value'.
188 void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
189  Value value) {
190  if (!hasEdge(srcId, dstId, value)) {
191  outEdges[srcId].push_back({dstId, value});
192  inEdges[dstId].push_back({srcId, value});
193  if (isa<MemRefType>(value.getType()))
194  memrefEdgeCount[value]++;
195  }
196 }
197 
198 // Removes an edge from node 'srcId' to node 'dstId' for 'value'.
199 void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
200  Value value) {
201  assert(inEdges.count(dstId) > 0);
202  assert(outEdges.count(srcId) > 0);
203  if (isa<MemRefType>(value.getType())) {
204  assert(memrefEdgeCount.count(value) > 0);
205  memrefEdgeCount[value]--;
206  }
207  // Remove 'srcId' from 'inEdges[dstId]'.
208  for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
209  if ((*it).id == srcId && (*it).value == value) {
210  inEdges[dstId].erase(it);
211  break;
212  }
213  }
214  // Remove 'dstId' from 'outEdges[srcId]'.
215  for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
216  if ((*it).id == dstId && (*it).value == value) {
217  outEdges[srcId].erase(it);
218  break;
219  }
220  }
221 }
222 
223 // Returns true if there is a path in the dependence graph from node 'srcId'
224 // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
225 // operations that the edges connected are expected to be from the same block.
226 bool MemRefDependenceGraph::hasDependencePath(unsigned srcId, unsigned dstId) {
227  // Worklist state is: <node-id, next-output-edge-index-to-visit>
229  worklist.push_back({srcId, 0});
230  Operation *dstOp = getNode(dstId)->op;
231  // Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
232  while (!worklist.empty()) {
233  auto &idAndIndex = worklist.back();
234  // Return true if we have reached 'dstId'.
235  if (idAndIndex.first == dstId)
236  return true;
237  // Pop and continue if node has no out edges, or if all out edges have
238  // already been visited.
239  if (outEdges.count(idAndIndex.first) == 0 ||
240  idAndIndex.second == outEdges[idAndIndex.first].size()) {
241  worklist.pop_back();
242  continue;
243  }
244  // Get graph edge to traverse.
245  Edge edge = outEdges[idAndIndex.first][idAndIndex.second];
246  // Increment next output edge index for 'idAndIndex'.
247  ++idAndIndex.second;
248  // Add node at 'edge.id' to the worklist. We don't need to consider
249  // nodes that are "after" dstId in the containing block; one can't have a
250  // path to `dstId` from any of those nodes.
251  bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
252  if (!afterDst && edge.id != idAndIndex.first)
253  worklist.push_back({edge.id, 0});
254  }
255  return false;
256 }
257 
258 // Returns the input edge count for node 'id' and 'memref' from src nodes
259 // which access 'memref' with a store operation.
261  Value memref) {
262  unsigned inEdgeCount = 0;
263  if (inEdges.count(id) > 0)
264  for (auto &inEdge : inEdges[id])
265  if (inEdge.value == memref) {
266  Node *srcNode = getNode(inEdge.id);
267  // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
268  if (srcNode->getStoreOpCount(memref) > 0)
269  ++inEdgeCount;
270  }
271  return inEdgeCount;
272 }
273 
274 // Returns the output edge count for node 'id' and 'memref' (if non-null),
275 // otherwise returns the total output edge count from node 'id'.
276 unsigned MemRefDependenceGraph::getOutEdgeCount(unsigned id, Value memref) {
277  unsigned outEdgeCount = 0;
278  if (outEdges.count(id) > 0)
279  for (auto &outEdge : outEdges[id])
280  if (!memref || outEdge.value == memref)
281  ++outEdgeCount;
282  return outEdgeCount;
283 }
284 
285 /// Return all nodes which define SSA values used in node 'id'.
287  unsigned id, DenseSet<unsigned> &definingNodes) {
288  for (MemRefDependenceGraph::Edge edge : inEdges[id])
289  // By definition of edge, if the edge value is a non-memref value,
290  // then the dependence is between a graph node which defines an SSA value
291  // and another graph node which uses the SSA value.
292  if (!isa<MemRefType>(edge.value.getType()))
293  definingNodes.insert(edge.id);
294 }
295 
296 // Computes and returns an insertion point operation, before which the
297 // the fused <srcId, dstId> loop nest can be inserted while preserving
298 // dependences. Returns nullptr if no such insertion point is found.
299 Operation *
301  unsigned dstId) {
302  if (outEdges.count(srcId) == 0)
303  return getNode(dstId)->op;
304 
305  // Skip if there is any defining node of 'dstId' that depends on 'srcId'.
306  DenseSet<unsigned> definingNodes;
307  gatherDefiningNodes(dstId, definingNodes);
308  if (llvm::any_of(definingNodes,
309  [&](unsigned id) { return hasDependencePath(srcId, id); })) {
310  LLVM_DEBUG(llvm::dbgs()
311  << "Can't fuse: a defining op with a user in the dst "
312  "loop has dependence from the src loop\n");
313  return nullptr;
314  }
315 
316  // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
317  SmallPtrSet<Operation *, 2> srcDepInsts;
318  for (auto &outEdge : outEdges[srcId])
319  if (outEdge.id != dstId)
320  srcDepInsts.insert(getNode(outEdge.id)->op);
321 
322  // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
323  SmallPtrSet<Operation *, 2> dstDepInsts;
324  for (auto &inEdge : inEdges[dstId])
325  if (inEdge.id != srcId)
326  dstDepInsts.insert(getNode(inEdge.id)->op);
327 
328  Operation *srcNodeInst = getNode(srcId)->op;
329  Operation *dstNodeInst = getNode(dstId)->op;
330 
331  // Computing insertion point:
332  // *) Walk all operation positions in Block operation list in the
333  // range (src, dst). For each operation 'op' visited in this search:
334  // *) Store in 'firstSrcDepPos' the first position where 'op' has a
335  // dependence edge from 'srcNode'.
336  // *) Store in 'lastDstDepPost' the last position where 'op' has a
337  // dependence edge to 'dstNode'.
338  // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
339  // operation insertion point (or return null pointer if no such
340  // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
342  std::optional<unsigned> firstSrcDepPos;
343  std::optional<unsigned> lastDstDepPos;
344  unsigned pos = 0;
345  for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
346  it != Block::iterator(dstNodeInst); ++it) {
347  Operation *op = &(*it);
348  if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
349  firstSrcDepPos = pos;
350  if (dstDepInsts.count(op) > 0)
351  lastDstDepPos = pos;
352  depInsts.push_back(op);
353  ++pos;
354  }
355 
356  if (firstSrcDepPos.has_value()) {
357  if (lastDstDepPos.has_value()) {
358  if (*firstSrcDepPos <= *lastDstDepPos) {
359  // No valid insertion point exists which preserves dependences.
360  return nullptr;
361  }
362  }
363  // Return the insertion point at 'firstSrcDepPos'.
364  return depInsts[*firstSrcDepPos];
365  }
366  // No dependence targets in range (or only dst deps in range), return
367  // 'dstNodInst' insertion point.
368  return dstNodeInst;
369 }
370 
371 // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
372 // taking into account that:
373 // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion,
374 // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a
375 // private memref.
376 void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
377  const DenseSet<Value> &privateMemRefs,
378  bool removeSrcId) {
379  // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
380  if (inEdges.count(srcId) > 0) {
381  SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
382  for (auto &inEdge : oldInEdges) {
383  // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
384  if (privateMemRefs.count(inEdge.value) == 0)
385  addEdge(inEdge.id, dstId, inEdge.value);
386  }
387  }
388  // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
389  // If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
390  if (outEdges.count(srcId) > 0) {
391  SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
392  for (auto &outEdge : oldOutEdges) {
393  // Remove any out edges from 'srcId' to 'dstId' across memrefs.
394  if (outEdge.id == dstId)
395  removeEdge(srcId, outEdge.id, outEdge.value);
396  else if (removeSrcId) {
397  addEdge(dstId, outEdge.id, outEdge.value);
398  removeEdge(srcId, outEdge.id, outEdge.value);
399  }
400  }
401  }
402  // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
403  // replaced by a private memref). These edges could come from nodes
404  // other than 'srcId' which were removed in the previous step.
405  if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
406  SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
407  for (auto &inEdge : oldInEdges)
408  if (privateMemRefs.count(inEdge.value) > 0)
409  removeEdge(inEdge.id, dstId, inEdge.value);
410  }
411 }
412 
413 // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
414 // of sibling node 'sibId' into node 'dstId'.
415 void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
416  // For each edge in 'inEdges[sibId]':
417  // *) Add new edge from source node 'inEdge.id' to 'dstNode'.
418  // *) Remove edge from source node 'inEdge.id' to 'sibNode'.
419  if (inEdges.count(sibId) > 0) {
420  SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
421  for (auto &inEdge : oldInEdges) {
422  addEdge(inEdge.id, dstId, inEdge.value);
423  removeEdge(inEdge.id, sibId, inEdge.value);
424  }
425  }
426 
427  // For each edge in 'outEdges[sibId]' to node 'id'
428  // *) Add new edge from 'dstId' to 'outEdge.id'.
429  // *) Remove edge from 'sibId' to 'outEdge.id'.
430  if (outEdges.count(sibId) > 0) {
431  SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
432  for (auto &outEdge : oldOutEdges) {
433  addEdge(dstId, outEdge.id, outEdge.value);
434  removeEdge(sibId, outEdge.id, outEdge.value);
435  }
436  }
437 }
438 
439 // Adds ops in 'loads' and 'stores' to node at 'id'.
441  unsigned id, const SmallVectorImpl<Operation *> &loads,
442  const SmallVectorImpl<Operation *> &stores) {
443  Node *node = getNode(id);
444  llvm::append_range(node->loads, loads);
445  llvm::append_range(node->stores, stores);
446 }
447 
449  Node *node = getNode(id);
450  node->loads.clear();
451  node->stores.clear();
452 }
453 
454 // Calls 'callback' for each input edge incident to node 'id' which carries a
455 // memref dependence.
457  unsigned id, const std::function<void(Edge)> &callback) {
458  if (inEdges.count(id) > 0)
459  forEachMemRefEdge(inEdges[id], callback);
460 }
461 
462 // Calls 'callback' for each output edge from node 'id' which carries a
463 // memref dependence.
465  unsigned id, const std::function<void(Edge)> &callback) {
466  if (outEdges.count(id) > 0)
467  forEachMemRefEdge(outEdges[id], callback);
468 }
469 
470 // Calls 'callback' for each edge in 'edges' which carries a memref
471 // dependence.
473  ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
474  for (const auto &edge : edges) {
475  // Skip if 'edge' is not a memref dependence edge.
476  if (!isa<MemRefType>(edge.value.getType()))
477  continue;
478  assert(nodes.count(edge.id) > 0);
479  // Skip if 'edge.id' is not a loop nest.
480  if (!isa<AffineForOp>(getNode(edge.id)->op))
481  continue;
482  // Visit current input edge 'edge'.
483  callback(edge);
484  }
485 }
486 
487 void MemRefDependenceGraph::print(raw_ostream &os) const {
488  os << "\nMemRefDependenceGraph\n";
489  os << "\nNodes:\n";
490  for (const auto &idAndNode : nodes) {
491  os << "Node: " << idAndNode.first << "\n";
492  auto it = inEdges.find(idAndNode.first);
493  if (it != inEdges.end()) {
494  for (const auto &e : it->second)
495  os << " InEdge: " << e.id << " " << e.value << "\n";
496  }
497  it = outEdges.find(idAndNode.first);
498  if (it != outEdges.end()) {
499  for (const auto &e : it->second)
500  os << " OutEdge: " << e.id << " " << e.value << "\n";
501  }
502  }
503 }
504 
507  auto *currOp = op.getParentOp();
508  AffineForOp currAffineForOp;
509  // Traverse up the hierarchy collecting all 'affine.for' operation while
510  // skipping over 'affine.if' operations.
511  while (currOp) {
512  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
513  loops->push_back(currAffineForOp);
514  currOp = currOp->getParentOp();
515  }
516  std::reverse(loops->begin(), loops->end());
517 }
518 
521  ops->clear();
522  Operation *currOp = op.getParentOp();
523 
524  // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
525  // affine.parallel operations.
526  while (currOp) {
527  if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
528  ops->push_back(currOp);
529  currOp = currOp->getParentOp();
530  }
531  std::reverse(ops->begin(), ops->end());
532 }
533 
534 // Populates 'cst' with FlatAffineValueConstraints which represent original
535 // domain of the loop bounds that define 'ivs'.
537  FlatAffineValueConstraints &cst) const {
538  assert(!ivs.empty() && "Cannot have a slice without its IVs");
539  cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0,
540  /*numLocals=*/0, ivs);
541  for (Value iv : ivs) {
542  AffineForOp loop = getForInductionVarOwner(iv);
543  assert(loop && "Expected affine for");
544  if (failed(cst.addAffineForOpDomain(loop)))
545  return failure();
546  }
547  return success();
548 }
549 
550 // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
553  assert(!lbOperands.empty());
554  // Adds src 'ivs' as dimension variables in 'cst'.
555  unsigned numDims = ivs.size();
556  // Adds operands (dst ivs and symbols) as symbols in 'cst'.
557  unsigned numSymbols = lbOperands[0].size();
558 
559  SmallVector<Value, 4> values(ivs);
560  // Append 'ivs' then 'operands' to 'values'.
561  values.append(lbOperands[0].begin(), lbOperands[0].end());
562  *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
563 
564  // Add loop bound constraints for values which are loop IVs of the destination
565  // of fusion and equality constraints for symbols which are constants.
566  for (unsigned i = numDims, end = values.size(); i < end; ++i) {
567  Value value = values[i];
568  assert(cst->containsVar(value) && "value expected to be present");
569  if (isValidSymbol(value)) {
570  // Check if the symbol is a constant.
571  if (std::optional<int64_t> cOp = getConstantIntValue(value))
572  cst->addBound(BoundType::EQ, value, cOp.value());
573  } else if (auto loop = getForInductionVarOwner(value)) {
574  if (failed(cst->addAffineForOpDomain(loop)))
575  return failure();
576  }
577  }
578 
579  // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
580  LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
581  assert(succeeded(ret) &&
582  "should not fail as we never have semi-affine slice maps");
583  (void)ret;
584  return success();
585 }
586 
587 // Clears state bounds and operand state.
589  lbs.clear();
590  ubs.clear();
591  lbOperands.clear();
592  ubOperands.clear();
593 }
594 
596  llvm::errs() << "\tIVs:\n";
597  for (Value iv : ivs)
598  llvm::errs() << "\t\t" << iv << "\n";
599 
600  llvm::errs() << "\tLBs:\n";
601  for (auto en : llvm::enumerate(lbs)) {
602  llvm::errs() << "\t\t" << en.value() << "\n";
603  llvm::errs() << "\t\tOperands:\n";
604  for (Value lbOp : lbOperands[en.index()])
605  llvm::errs() << "\t\t\t" << lbOp << "\n";
606  }
607 
608  llvm::errs() << "\tUBs:\n";
609  for (auto en : llvm::enumerate(ubs)) {
610  llvm::errs() << "\t\t" << en.value() << "\n";
611  llvm::errs() << "\t\tOperands:\n";
612  for (Value ubOp : ubOperands[en.index()])
613  llvm::errs() << "\t\t\t" << ubOp << "\n";
614  }
615 }
616 
617 /// Fast check to determine if the computation slice is maximal. Returns true if
618 /// each slice dimension maps to an existing dst dimension and both the src
619 /// and the dst loops for those dimensions have the same bounds. Returns false
620 /// if both the src and the dst loops don't have the same bounds. Returns
621 /// std::nullopt if none of the above can be proven.
622 std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
623  assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
624  "Unexpected number of lbs, ubs and ivs in slice");
625 
626  for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
627  AffineMap lbMap = lbs[i];
628  AffineMap ubMap = ubs[i];
629 
630  // Check if this slice is just an equality along this dimension.
631  if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
632  ubMap.getNumResults() != 1 ||
633  lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
634  // The condition above will be true for maps describing a single
635  // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
636  // Make sure we skip those cases by checking that the lb result is not
637  // just a constant.
638  lbMap.getResult(0).isa<AffineConstantExpr>())
639  return std::nullopt;
640 
641  // Limited support: we expect the lb result to be just a loop dimension for
642  // now.
643  AffineDimExpr result = lbMap.getResult(0).dyn_cast<AffineDimExpr>();
644  if (!result)
645  return std::nullopt;
646 
647  // Retrieve dst loop bounds.
648  AffineForOp dstLoop =
649  getForInductionVarOwner(lbOperands[i][result.getPosition()]);
650  if (!dstLoop)
651  return std::nullopt;
652  AffineMap dstLbMap = dstLoop.getLowerBoundMap();
653  AffineMap dstUbMap = dstLoop.getUpperBoundMap();
654 
655  // Retrieve src loop bounds.
656  AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
657  assert(srcLoop && "Expected affine for");
658  AffineMap srcLbMap = srcLoop.getLowerBoundMap();
659  AffineMap srcUbMap = srcLoop.getUpperBoundMap();
660 
661  // Limited support: we expect simple src and dst loops with a single
662  // constant component per bound for now.
663  if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
664  dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
665  return std::nullopt;
666 
667  AffineExpr srcLbResult = srcLbMap.getResult(0);
668  AffineExpr dstLbResult = dstLbMap.getResult(0);
669  AffineExpr srcUbResult = srcUbMap.getResult(0);
670  AffineExpr dstUbResult = dstUbMap.getResult(0);
671  if (!srcLbResult.isa<AffineConstantExpr>() ||
672  !srcUbResult.isa<AffineConstantExpr>() ||
673  !dstLbResult.isa<AffineConstantExpr>() ||
674  !dstUbResult.isa<AffineConstantExpr>())
675  return std::nullopt;
676 
677  // Check if src and dst loop bounds are the same. If not, we can guarantee
678  // that the slice is not maximal.
679  if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
680  srcLoop.getStep() != dstLoop.getStep())
681  return false;
682  }
683 
684  return true;
685 }
686 
687 /// Returns true if it is deterministically verified that the original iteration
688 /// space of the slice is contained within the new iteration space that is
689 /// created after fusing 'this' slice into its destination.
690 std::optional<bool> ComputationSliceState::isSliceValid() const {
691  // Fast check to determine if the slice is valid. If the following conditions
692  // are verified to be true, slice is declared valid by the fast check:
693  // 1. Each slice loop is a single iteration loop bound in terms of a single
694  // destination loop IV.
695  // 2. Loop bounds of the destination loop IV (from above) and those of the
696  // source loop IV are exactly the same.
697  // If the fast check is inconclusive or false, we proceed with a more
698  // expensive analysis.
699  // TODO: Store the result of the fast check, as it might be used again in
700  // `canRemoveSrcNodeAfterFusion`.
701  std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
702  if (isValidFastCheck && *isValidFastCheck)
703  return true;
704 
705  // Create constraints for the source loop nest using which slice is computed.
706  FlatAffineValueConstraints srcConstraints;
707  // TODO: Store the source's domain to avoid computation at each depth.
708  if (failed(getSourceAsConstraints(srcConstraints))) {
709  LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
710  return std::nullopt;
711  }
712  // As the set difference utility currently cannot handle symbols in its
713  // operands, validity of the slice cannot be determined.
714  if (srcConstraints.getNumSymbolVars() > 0) {
715  LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
716  return std::nullopt;
717  }
718  // TODO: Handle local vars in the source domains while using the 'projectOut'
719  // utility below. Currently, aligning is not done assuming that there will be
720  // no local vars in the source domain.
721  if (srcConstraints.getNumLocalVars() != 0) {
722  LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
723  return std::nullopt;
724  }
725 
726  // Create constraints for the slice loop nest that would be created if the
727  // fusion succeeds.
728  FlatAffineValueConstraints sliceConstraints;
729  if (failed(getAsConstraints(&sliceConstraints))) {
730  LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
731  return std::nullopt;
732  }
733 
734  // Projecting out every dimension other than the 'ivs' to express slice's
735  // domain completely in terms of source's IVs.
736  sliceConstraints.projectOut(ivs.size(),
737  sliceConstraints.getNumVars() - ivs.size());
738 
739  LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
740  LLVM_DEBUG(srcConstraints.dump());
741  LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
742  "(expressed in terms of its source's IVs):\n");
743  LLVM_DEBUG(sliceConstraints.dump());
744 
745  // TODO: Store 'srcSet' to avoid recalculating for each depth.
746  PresburgerSet srcSet(srcConstraints);
747  PresburgerSet sliceSet(sliceConstraints);
748  PresburgerSet diffSet = sliceSet.subtract(srcSet);
749 
750  if (!diffSet.isIntegerEmpty()) {
751  LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
752  return false;
753  }
754  return true;
755 }
756 
757 /// Returns true if the computation slice encloses all the iterations of the
758 /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
759 /// cannot determine if the slice is maximal or not.
760 std::optional<bool> ComputationSliceState::isMaximal() const {
761  // Fast check to determine if the computation slice is maximal. If the result
762  // is inconclusive, we proceed with a more expensive analysis.
763  std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
764  if (isMaximalFastCheck)
765  return isMaximalFastCheck;
766 
767  // Create constraints for the src loop nest being sliced.
768  FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(),
769  /*numSymbols=*/0,
770  /*numLocals=*/0, ivs);
771  for (Value iv : ivs) {
772  AffineForOp loop = getForInductionVarOwner(iv);
773  assert(loop && "Expected affine for");
774  if (failed(srcConstraints.addAffineForOpDomain(loop)))
775  return std::nullopt;
776  }
777 
778  // Create constraints for the slice using the dst loop nest information. We
779  // retrieve existing dst loops from the lbOperands.
780  SmallVector<Value> consumerIVs;
781  for (Value lbOp : lbOperands[0])
782  if (getForInductionVarOwner(lbOp))
783  consumerIVs.push_back(lbOp);
784 
785  // Add empty IV Values for those new loops that are not equalities and,
786  // therefore, are not yet materialized in the IR.
787  for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
788  consumerIVs.push_back(Value());
789 
790  FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(),
791  /*numSymbols=*/0,
792  /*numLocals=*/0, consumerIVs);
793 
794  if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
795  return std::nullopt;
796 
797  if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
798  // Constraint dims are different. The integer set difference can't be
799  // computed so we don't know if the slice is maximal.
800  return std::nullopt;
801 
802  // Compute the difference between the src loop nest and the slice integer
803  // sets.
804  PresburgerSet srcSet(srcConstraints);
805  PresburgerSet sliceSet(sliceConstraints);
806  PresburgerSet diffSet = srcSet.subtract(sliceSet);
807  return diffSet.isIntegerEmpty();
808 }
809 
810 unsigned MemRefRegion::getRank() const {
811  return cast<MemRefType>(memref.getType()).getRank();
812 }
813 
815  SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs,
816  SmallVectorImpl<int64_t> *lbDivisors) const {
817  auto memRefType = cast<MemRefType>(memref.getType());
818  unsigned rank = memRefType.getRank();
819  if (shape)
820  shape->reserve(rank);
821 
822  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
823 
824  // Use a copy of the region constraints that has upper/lower bounds for each
825  // memref dimension with static size added to guard against potential
826  // over-approximation from projection or union bounding box. We may not add
827  // this on the region itself since they might just be redundant constraints
828  // that will need non-trivials means to eliminate.
829  FlatAffineValueConstraints cstWithShapeBounds(cst);
830  for (unsigned r = 0; r < rank; r++) {
831  cstWithShapeBounds.addBound(BoundType::LB, r, 0);
832  int64_t dimSize = memRefType.getDimSize(r);
833  if (ShapedType::isDynamic(dimSize))
834  continue;
835  cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
836  }
837 
838  // Find a constant upper bound on the extent of this memref region along each
839  // dimension.
840  int64_t numElements = 1;
841  int64_t diffConstant;
842  int64_t lbDivisor;
843  for (unsigned d = 0; d < rank; d++) {
845  std::optional<int64_t> diff =
846  cstWithShapeBounds.getConstantBoundOnDimSize64(d, &lb, &lbDivisor);
847  if (diff.has_value()) {
848  diffConstant = *diff;
849  assert(diffConstant >= 0 && "Dim size bound can't be negative");
850  assert(lbDivisor > 0);
851  } else {
852  // If no constant bound is found, then it can always be bound by the
853  // memref's dim size if the latter has a constant size along this dim.
854  auto dimSize = memRefType.getDimSize(d);
855  if (dimSize == ShapedType::kDynamic)
856  return std::nullopt;
857  diffConstant = dimSize;
858  // Lower bound becomes 0.
859  lb.resize(cstWithShapeBounds.getNumSymbolVars() + 1, 0);
860  lbDivisor = 1;
861  }
862  numElements *= diffConstant;
863  if (lbs) {
864  lbs->push_back(lb);
865  assert(lbDivisors && "both lbs and lbDivisor or none");
866  lbDivisors->push_back(lbDivisor);
867  }
868  if (shape) {
869  shape->push_back(diffConstant);
870  }
871  }
872  return numElements;
873 }
874 
876  AffineMap &ubMap) const {
877  assert(pos < cst.getNumDimVars() && "invalid position");
878  auto memRefType = cast<MemRefType>(memref.getType());
879  unsigned rank = memRefType.getRank();
880 
881  assert(rank == cst.getNumDimVars() && "inconsistent memref region");
882 
883  auto boundPairs = cst.getLowerAndUpperBound(
884  pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(),
885  /*localExprs=*/{}, memRefType.getContext());
886  lbMap = boundPairs.first;
887  ubMap = boundPairs.second;
888  assert(lbMap && "lower bound for a region must exist");
889  assert(ubMap && "upper bound for a region must exist");
890  assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
891  assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
892 }
893 
895  assert(memref == other.memref);
896  return cst.unionBoundingBox(*other.getConstraints());
897 }
898 
899 /// Computes the memory region accessed by this memref with the region
900 /// represented as constraints symbolic/parametric in 'loopDepth' loops
901 /// surrounding opInst and any additional Function symbols.
902 // For example, the memref region for this load operation at loopDepth = 1 will
903 // be as below:
904 //
905 // affine.for %i = 0 to 32 {
906 // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
907 // load %A[%ii]
908 // }
909 // }
910 //
911 // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
912 // The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
913 //
914 // TODO: extend this to any other memref dereferencing ops
915 // (dma_start, dma_wait).
917  const ComputationSliceState *sliceState,
918  bool addMemRefDimBounds) {
919  assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
920  "affine read/write op expected");
921 
922  MemRefAccess access(op);
923  memref = access.memref;
924  write = access.isStore();
925 
926  unsigned rank = access.getRank();
927 
928  LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
929  << "\ndepth: " << loopDepth << "\n";);
930 
931  // 0-d memrefs.
932  if (rank == 0) {
934  getAffineIVs(*op, ivs);
935  assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
936  // The first 'loopDepth' IVs are symbols for this region.
937  ivs.resize(loopDepth);
938  // A 0-d memref has a 0-d region.
939  cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs);
940  return success();
941  }
942 
943  // Build the constraints for this region.
944  AffineValueMap accessValueMap;
945  access.getAccessMap(&accessValueMap);
946  AffineMap accessMap = accessValueMap.getAffineMap();
947 
948  unsigned numDims = accessMap.getNumDims();
949  unsigned numSymbols = accessMap.getNumSymbols();
950  unsigned numOperands = accessValueMap.getNumOperands();
951  // Merge operands with slice operands.
952  SmallVector<Value, 4> operands;
953  operands.resize(numOperands);
954  for (unsigned i = 0; i < numOperands; ++i)
955  operands[i] = accessValueMap.getOperand(i);
956 
957  if (sliceState != nullptr) {
958  operands.reserve(operands.size() + sliceState->lbOperands[0].size());
959  // Append slice operands to 'operands' as symbols.
960  for (auto extraOperand : sliceState->lbOperands[0]) {
961  if (!llvm::is_contained(operands, extraOperand)) {
962  operands.push_back(extraOperand);
963  numSymbols++;
964  }
965  }
966  }
967  // We'll first associate the dims and symbols of the access map to the dims
968  // and symbols resp. of cst. This will change below once cst is
969  // fully constructed out.
970  cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
971 
972  // Add equality constraints.
973  // Add inequalities for loop lower/upper bounds.
974  for (unsigned i = 0; i < numDims + numSymbols; ++i) {
975  auto operand = operands[i];
976  if (auto affineFor = getForInductionVarOwner(operand)) {
977  // Note that cst can now have more dimensions than accessMap if the
978  // bounds expressions involve outer loops or other symbols.
979  // TODO: rewrite this to use getInstIndexSet; this way
980  // conditionals will be handled when the latter supports it.
981  if (failed(cst.addAffineForOpDomain(affineFor)))
982  return failure();
983  } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
984  if (failed(cst.addAffineParallelOpDomain(parallelOp)))
985  return failure();
986  } else if (isValidSymbol(operand)) {
987  // Check if the symbol is a constant.
988  Value symbol = operand;
989  if (auto constVal = getConstantIntValue(symbol))
990  cst.addBound(BoundType::EQ, symbol, constVal.value());
991  } else {
992  LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
993  return failure();
994  }
995  }
996 
997  // Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
998  if (sliceState != nullptr) {
999  // Add dim and symbol slice operands.
1000  for (auto operand : sliceState->lbOperands[0]) {
1001  cst.addInductionVarOrTerminalSymbol(operand);
1002  }
1003  // Add upper/lower bounds from 'sliceState' to 'cst'.
1004  LogicalResult ret =
1005  cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
1006  sliceState->lbOperands[0]);
1007  assert(succeeded(ret) &&
1008  "should not fail as we never have semi-affine slice maps");
1009  (void)ret;
1010  }
1011 
1012  // Add access function equalities to connect loop IVs to data dimensions.
1013  if (failed(cst.composeMap(&accessValueMap))) {
1014  op->emitError("getMemRefRegion: compose affine map failed");
1015  LLVM_DEBUG(accessValueMap.getAffineMap().dump());
1016  return failure();
1017  }
1018 
1019  // Set all variables appearing after the first 'rank' variables as
1020  // symbolic variables - so that the ones corresponding to the memref
1021  // dimensions are the dimensional variables for the memref region.
1022  cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1023 
1024  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1025  // this memref region is symbolic.
1026  SmallVector<Value, 4> enclosingIVs;
1027  getAffineIVs(*op, enclosingIVs);
1028  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1029  enclosingIVs.resize(loopDepth);
1030  SmallVector<Value, 4> vars;
1031  cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
1032  for (Value var : vars) {
1033  if ((isAffineInductionVar(var)) && !llvm::is_contained(enclosingIVs, var)) {
1034  cst.projectOut(var);
1035  }
1036  }
1037 
1038  // Project out any local variables (these would have been added for any
1039  // mod/divs).
1040  cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
1041 
1042  // Constant fold any symbolic variables.
1043  cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(),
1044  /*num=*/cst.getNumSymbolVars());
1045 
1046  assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1047 
1048  // Add upper/lower bounds for each memref dimension with static size
1049  // to guard against potential over-approximation from projection.
1050  // TODO: Support dynamic memref dimensions.
1051  if (addMemRefDimBounds) {
1052  auto memRefType = cast<MemRefType>(memref.getType());
1053  for (unsigned r = 0; r < rank; r++) {
1054  cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0);
1055  if (memRefType.isDynamicDim(r))
1056  continue;
1057  cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1);
1058  }
1059  }
1060  cst.removeTrivialRedundancy();
1061 
1062  LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
1063  LLVM_DEBUG(cst.dump());
1064  return success();
1065 }
1066 
1067 std::optional<int64_t>
1069  auto elementType = memRefType.getElementType();
1070 
1071  unsigned sizeInBits;
1072  if (elementType.isIntOrFloat()) {
1073  sizeInBits = elementType.getIntOrFloatBitWidth();
1074  } else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1075  if (vectorType.getElementType().isIntOrFloat())
1076  sizeInBits =
1077  vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1078  else
1079  return std::nullopt;
1080  } else {
1081  return std::nullopt;
1082  }
1083  return llvm::divideCeil(sizeInBits, 8);
1084 }
1085 
1086 // Returns the size of the region.
1087 std::optional<int64_t> MemRefRegion::getRegionSize() {
1088  auto memRefType = cast<MemRefType>(memref.getType());
1089 
1090  if (!memRefType.getLayout().isIdentity()) {
1091  LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
1092  return false;
1093  }
1094 
1095  // Indices to use for the DmaStart op.
1096  // Indices for the original memref being DMAed from/to.
1097  SmallVector<Value, 4> memIndices;
1098  // Indices for the faster buffer being DMAed into/from.
1099  SmallVector<Value, 4> bufIndices;
1100 
1101  // Compute the extents of the buffer.
1102  std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1103  if (!numElements) {
1104  LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
1105  return std::nullopt;
1106  }
1107  auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1108  if (!eltSize)
1109  return std::nullopt;
1110  return *eltSize * *numElements;
1111 }
1112 
1113 /// Returns the size of memref data in bytes if it's statically shaped,
1114 /// std::nullopt otherwise. If the element of the memref has vector type, takes
1115 /// into account size of the vector as well.
1116 // TODO: improve/complete this when we have target data.
1117 std::optional<uint64_t>
1119  if (!memRefType.hasStaticShape())
1120  return std::nullopt;
1121  auto elementType = memRefType.getElementType();
1122  if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1123  return std::nullopt;
1124 
1125  auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1126  if (!sizeInBytes)
1127  return std::nullopt;
1128  for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
1129  sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
1130  }
1131  return sizeInBytes;
1132 }
1133 
1134 template <typename LoadOrStoreOp>
1135 LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1136  bool emitError) {
1137  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1138  AffineWriteOpInterface>::value,
1139  "argument should be either a AffineReadOpInterface or a "
1140  "AffineWriteOpInterface");
1141 
1142  Operation *op = loadOrStoreOp.getOperation();
1143  MemRefRegion region(op->getLoc());
1144  if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr,
1145  /*addMemRefDimBounds=*/false)))
1146  return success();
1147 
1148  LLVM_DEBUG(llvm::dbgs() << "Memory region");
1149  LLVM_DEBUG(region.getConstraints()->dump());
1150 
1151  bool outOfBounds = false;
1152  unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1153 
1154  // For each dimension, check for out of bounds.
1155  for (unsigned r = 0; r < rank; r++) {
1156  FlatAffineValueConstraints ucst(*region.getConstraints());
1157 
1158  // Intersect memory region with constraint capturing out of bounds (both out
1159  // of upper and out of lower), and check if the constraint system is
1160  // feasible. If it is, there is at least one point out of bounds.
1161  SmallVector<int64_t, 4> ineq(rank + 1, 0);
1162  int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1163  // TODO: handle dynamic dim sizes.
1164  if (dimSize == -1)
1165  continue;
1166 
1167  // Check for overflow: d_i >= memref dim size.
1168  ucst.addBound(BoundType::LB, r, dimSize);
1169  outOfBounds = !ucst.isEmpty();
1170  if (outOfBounds && emitError) {
1171  loadOrStoreOp.emitOpError()
1172  << "memref out of upper bound access along dimension #" << (r + 1);
1173  }
1174 
1175  // Check for a negative index.
1176  FlatAffineValueConstraints lcst(*region.getConstraints());
1177  std::fill(ineq.begin(), ineq.end(), 0);
1178  // d_i <= -1;
1179  lcst.addBound(BoundType::UB, r, -1);
1180  outOfBounds = !lcst.isEmpty();
1181  if (outOfBounds && emitError) {
1182  loadOrStoreOp.emitOpError()
1183  << "memref out of lower bound access along dimension #" << (r + 1);
1184  }
1185  }
1186  return failure(outOfBounds);
1187 }
1188 
1189 // Explicitly instantiate the template so that the compiler knows we need them!
1190 template LogicalResult
1191 mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1192  bool emitError);
1193 template LogicalResult
1194 mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1195  bool emitError);
1196 
1197 // Returns in 'positions' the Block positions of 'op' in each ancestor
1198 // Block from the Block containing operation, stopping at 'limitBlock'.
1199 static void findInstPosition(Operation *op, Block *limitBlock,
1200  SmallVectorImpl<unsigned> *positions) {
1201  Block *block = op->getBlock();
1202  while (block != limitBlock) {
1203  // FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1204  // rely on linear scans.
1205  int instPosInBlock = std::distance(block->begin(), op->getIterator());
1206  positions->push_back(instPosInBlock);
1207  op = block->getParentOp();
1208  block = op->getBlock();
1209  }
1210  std::reverse(positions->begin(), positions->end());
1211 }
1212 
1213 // Returns the Operation in a possibly nested set of Blocks, where the
1214 // position of the operation is represented by 'positions', which has a
1215 // Block position for each level of nesting.
1217  unsigned level, Block *block) {
1218  unsigned i = 0;
1219  for (auto &op : *block) {
1220  if (i != positions[level]) {
1221  ++i;
1222  continue;
1223  }
1224  if (level == positions.size() - 1)
1225  return &op;
1226  if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1227  return getInstAtPosition(positions, level + 1,
1228  childAffineForOp.getBody());
1229 
1230  for (auto &region : op.getRegions()) {
1231  for (auto &b : region)
1232  if (auto *ret = getInstAtPosition(positions, level + 1, &b))
1233  return ret;
1234  }
1235  return nullptr;
1236  }
1237  return nullptr;
1238 }
1239 
1240 // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1243  for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
1244  auto value = cst->getValue(i);
1245  if (ivs.count(value) == 0) {
1246  assert(isAffineForInductionVar(value));
1247  auto loop = getForInductionVarOwner(value);
1248  if (failed(cst->addAffineForOpDomain(loop)))
1249  return failure();
1250  }
1251  }
1252  return success();
1253 }
1254 
1255 /// Returns the innermost common loop depth for the set of operations in 'ops'.
1256 // TODO: Move this to LoopUtils.
1258  ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
1259  unsigned numOps = ops.size();
1260  assert(numOps > 0 && "Expected at least one operation");
1261 
1262  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
1263  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1264  for (unsigned i = 0; i < numOps; ++i) {
1265  getAffineForIVs(*ops[i], &loops[i]);
1266  loopDepthLimit =
1267  std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
1268  }
1269 
1270  unsigned loopDepth = 0;
1271  for (unsigned d = 0; d < loopDepthLimit; ++d) {
1272  unsigned i;
1273  for (i = 1; i < numOps; ++i) {
1274  if (loops[i - 1][d] != loops[i][d])
1275  return loopDepth;
1276  }
1277  if (surroundingLoops)
1278  surroundingLoops->push_back(loops[i - 1][d]);
1279  ++loopDepth;
1280  }
1281  return loopDepth;
1282 }
1283 
1284 /// Computes in 'sliceUnion' the union of all slice bounds computed at
1285 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1286 /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1287 /// union was computed correctly, an appropriate failure otherwise.
1290  ArrayRef<Operation *> opsB, unsigned loopDepth,
1291  unsigned numCommonLoops, bool isBackwardSlice,
1292  ComputationSliceState *sliceUnion) {
1293  // Compute the union of slice bounds between all pairs in 'opsA' and
1294  // 'opsB' in 'sliceUnionCst'.
1295  FlatAffineValueConstraints sliceUnionCst;
1296  assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
1297  std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
1298  for (auto *i : opsA) {
1299  MemRefAccess srcAccess(i);
1300  for (auto *j : opsB) {
1301  MemRefAccess dstAccess(j);
1302  if (srcAccess.memref != dstAccess.memref)
1303  continue;
1304  // Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1305  if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) ||
1306  (isBackwardSlice && loopDepth > getNestingDepth(j))) {
1307  LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
1309  }
1310 
1311  bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1312  isa<AffineReadOpInterface>(dstAccess.opInst);
1313  FlatAffineValueConstraints dependenceConstraints;
1314  // Check dependence between 'srcAccess' and 'dstAccess'.
1316  srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1,
1317  &dependenceConstraints, /*dependenceComponents=*/nullptr,
1318  /*allowRAR=*/readReadAccesses);
1319  if (result.value == DependenceResult::Failure) {
1320  LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
1322  }
1323  if (result.value == DependenceResult::NoDependence)
1324  continue;
1325  dependentOpPairs.emplace_back(i, j);
1326 
1327  // Compute slice bounds for 'srcAccess' and 'dstAccess'.
1328  ComputationSliceState tmpSliceState;
1329  mlir::affine::getComputationSliceState(i, j, &dependenceConstraints,
1330  loopDepth, isBackwardSlice,
1331  &tmpSliceState);
1332 
1333  if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
1334  // Initialize 'sliceUnionCst' with the bounds computed in previous step.
1335  if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
1336  LLVM_DEBUG(llvm::dbgs()
1337  << "Unable to compute slice bound constraints\n");
1339  }
1340  assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
1341  continue;
1342  }
1343 
1344  // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1345  FlatAffineValueConstraints tmpSliceCst;
1346  if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
1347  LLVM_DEBUG(llvm::dbgs()
1348  << "Unable to compute slice bound constraints\n");
1350  }
1351 
1352  // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1353  if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
1354 
1355  // Pre-constraint var alignment: record loop IVs used in each constraint
1356  // system.
1357  SmallPtrSet<Value, 8> sliceUnionIVs;
1358  for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1359  sliceUnionIVs.insert(sliceUnionCst.getValue(k));
1360  SmallPtrSet<Value, 8> tmpSliceIVs;
1361  for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1362  tmpSliceIVs.insert(tmpSliceCst.getValue(k));
1363 
1364  sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst);
1365 
1366  // Post-constraint var alignment: add loop IV bounds missing after
1367  // var alignment to constraint systems. This can occur if one constraint
1368  // system uses an loop IV that is not used by the other. The call
1369  // to unionBoundingBox below expects constraints for each Loop IV, even
1370  // if they are the unsliced full loop bounds added here.
1371  if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
1373  if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
1375  }
1376  // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1377  if (sliceUnionCst.getNumLocalVars() > 0 ||
1378  tmpSliceCst.getNumLocalVars() > 0 ||
1379  failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
1380  LLVM_DEBUG(llvm::dbgs()
1381  << "Unable to compute union bounding box of slice bounds\n");
1383  }
1384  }
1385  }
1386 
1387  // Empty union.
1388  if (sliceUnionCst.getNumDimAndSymbolVars() == 0)
1390 
1391  // Gather loops surrounding ops from loop nest where slice will be inserted.
1393  for (auto &dep : dependentOpPairs) {
1394  ops.push_back(isBackwardSlice ? dep.second : dep.first);
1395  }
1396  SmallVector<AffineForOp, 4> surroundingLoops;
1397  unsigned innermostCommonLoopDepth =
1398  getInnermostCommonLoopDepth(ops, &surroundingLoops);
1399  if (loopDepth > innermostCommonLoopDepth) {
1400  LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
1402  }
1403 
1404  // Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1405  unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1406 
1407  // Convert any dst loop IVs which are symbol variables to dim variables.
1408  sliceUnionCst.convertLoopIVSymbolsToDims();
1409  sliceUnion->clearBounds();
1410  sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
1411  sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
1412 
1413  // Get slice bounds from slice union constraints 'sliceUnionCst'.
1414  sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs,
1415  opsA[0]->getContext(), &sliceUnion->lbs,
1416  &sliceUnion->ubs);
1417 
1418  // Add slice bound operands of union.
1419  SmallVector<Value, 4> sliceBoundOperands;
1420  sliceUnionCst.getValues(numSliceLoopIVs,
1421  sliceUnionCst.getNumDimAndSymbolVars(),
1422  &sliceBoundOperands);
1423 
1424  // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1425  sliceUnion->ivs.clear();
1426  sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
1427 
1428  // Set loop nest insertion point to block start at 'loopDepth'.
1429  sliceUnion->insertPoint =
1430  isBackwardSlice
1431  ? surroundingLoops[loopDepth - 1].getBody()->begin()
1432  : std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
1433 
1434  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1435  // canonicalization.
1436  sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1437  sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1438 
1439  // Check if the slice computed is valid. Return success only if it is verified
1440  // that the slice is valid, otherwise return appropriate failure status.
1441  std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1442  if (!isSliceValid) {
1443  LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
1445  }
1446  if (!*isSliceValid)
1448 
1450 }
1451 
1452 // TODO: extend this to handle multiple result maps.
1453 static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1454  AffineMap ubMap) {
1455  assert(lbMap.getNumResults() == 1 && "expected single result bound map");
1456  assert(ubMap.getNumResults() == 1 && "expected single result bound map");
1457  assert(lbMap.getNumDims() == ubMap.getNumDims());
1458  assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1459  AffineExpr lbExpr(lbMap.getResult(0));
1460  AffineExpr ubExpr(ubMap.getResult(0));
1461  auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
1462  lbMap.getNumSymbols());
1463  auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>();
1464  if (!cExpr)
1465  return std::nullopt;
1466  return cExpr.getValue();
1467 }
1468 
1469 // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1470 // nest surrounding represented by slice loop bounds in 'slice'. Returns true
1471 // on success, false otherwise (if a non-constant trip count was encountered).
1472 // TODO: Make this work with non-unit step loops.
1474  const ComputationSliceState &slice,
1475  llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
1476  unsigned numSrcLoopIVs = slice.ivs.size();
1477  // Populate map from AffineForOp -> trip count
1478  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1479  AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1480  auto *op = forOp.getOperation();
1481  AffineMap lbMap = slice.lbs[i];
1482  AffineMap ubMap = slice.ubs[i];
1483  // If lower or upper bound maps are null or provide no results, it implies
1484  // that source loop was not at all sliced, and the entire loop will be a
1485  // part of the slice.
1486  if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
1487  ubMap.getNumResults() == 0) {
1488  // The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1489  if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1490  (*tripCountMap)[op] =
1491  forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1492  continue;
1493  }
1494  std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1495  if (maybeConstTripCount.has_value()) {
1496  (*tripCountMap)[op] = *maybeConstTripCount;
1497  continue;
1498  }
1499  return false;
1500  }
1501  std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1502  // Slice bounds are created with a constant ub - lb difference.
1503  if (!tripCount.has_value())
1504  return false;
1505  (*tripCountMap)[op] = *tripCount;
1506  }
1507  return true;
1508 }
1509 
1510 // Return the number of iterations in the given slice.
1512  const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
1513  uint64_t iterCount = 1;
1514  for (const auto &count : sliceTripCountMap) {
1515  iterCount *= count.second;
1516  }
1517  return iterCount;
1518 }
1519 
1520 const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1521 // Computes slice bounds by projecting out any loop IVs from
1522 // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1523 // bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1524 // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1526  Operation *depSourceOp, Operation *depSinkOp,
1527  FlatAffineValueConstraints *dependenceConstraints, unsigned loopDepth,
1528  bool isBackwardSlice, ComputationSliceState *sliceState) {
1529  // Get loop nest surrounding src operation.
1530  SmallVector<AffineForOp, 4> srcLoopIVs;
1531  getAffineForIVs(*depSourceOp, &srcLoopIVs);
1532  unsigned numSrcLoopIVs = srcLoopIVs.size();
1533 
1534  // Get loop nest surrounding dst operation.
1535  SmallVector<AffineForOp, 4> dstLoopIVs;
1536  getAffineForIVs(*depSinkOp, &dstLoopIVs);
1537  unsigned numDstLoopIVs = dstLoopIVs.size();
1538 
1539  assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
1540  (isBackwardSlice && loopDepth <= numDstLoopIVs));
1541 
1542  // Project out dimensions other than those up to 'loopDepth'.
1543  unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1544  unsigned num =
1545  isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1546  dependenceConstraints->projectOut(pos, num);
1547 
1548  // Add slice loop IV values to 'sliceState'.
1549  unsigned offset = isBackwardSlice ? 0 : loopDepth;
1550  unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1551  dependenceConstraints->getValues(offset, offset + numSliceLoopIVs,
1552  &sliceState->ivs);
1553 
1554  // Set up lower/upper bound affine maps for the slice.
1555  sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
1556  sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
1557 
1558  // Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1559  dependenceConstraints->getSliceBounds(offset, numSliceLoopIVs,
1560  depSourceOp->getContext(),
1561  &sliceState->lbs, &sliceState->ubs);
1562 
1563  // Set up bound operands for the slice's lower and upper bounds.
1564  SmallVector<Value, 4> sliceBoundOperands;
1565  unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolVars();
1566  for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
1567  if (i < offset || i >= offset + numSliceLoopIVs) {
1568  sliceBoundOperands.push_back(dependenceConstraints->getValue(i));
1569  }
1570  }
1571 
1572  // Give each bound its own copy of 'sliceBoundOperands' for subsequent
1573  // canonicalization.
1574  sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1575  sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
1576 
1577  // Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1578  sliceState->insertPoint =
1579  isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
1580  : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
1581 
1582  llvm::SmallDenseSet<Value, 8> sequentialLoops;
1583  if (isa<AffineReadOpInterface>(depSourceOp) &&
1584  isa<AffineReadOpInterface>(depSinkOp)) {
1585  // For read-read access pairs, clear any slice bounds on sequential loops.
1586  // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1587  getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
1588  &sequentialLoops);
1589  }
1590  auto getSliceLoop = [&](unsigned i) {
1591  return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1592  };
1593  auto isInnermostInsertion = [&]() {
1594  return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1595  : loopDepth >= dstLoopIVs.size());
1596  };
1597  llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
1598  auto srcIsUnitSlice = [&]() {
1599  return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
1600  (getSliceIterationCount(sliceTripCountMap) == 1));
1601  };
1602  // Clear all sliced loop bounds beginning at the first sequential loop, or
1603  // first loop with a slice fusion barrier attribute..
1604 
1605  for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
1606  Value iv = getSliceLoop(i).getInductionVar();
1607  if (sequentialLoops.count(iv) == 0 &&
1608  getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1609  continue;
1610  // Skip reset of bounds of reduction loop inserted in the destination loop
1611  // that meets the following conditions:
1612  // 1. Slice is single trip count.
1613  // 2. Loop bounds of the source and destination match.
1614  // 3. Is being inserted at the innermost insertion point.
1615  std::optional<bool> isMaximal = sliceState->isMaximal();
1616  if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
1617  isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
1618  continue;
1619  for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1620  sliceState->lbs[j] = AffineMap();
1621  sliceState->ubs[j] = AffineMap();
1622  }
1623  break;
1624  }
1625 }
1626 
1627 /// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1628 /// updates the slice loop bounds with any non-null bound maps specified in
1629 /// 'sliceState', and inserts this slice into the loop nest surrounding
1630 /// 'dstOpInst' at loop depth 'dstLoopDepth'.
1631 // TODO: extend the slicing utility to compute slices that
1632 // aren't necessarily a one-to-one relation b/w the source and destination. The
1633 // relation between the source and destination could be many-to-many in general.
1634 // TODO: the slice computation is incorrect in the cases
1635 // where the dependence from the source to the destination does not cover the
1636 // entire destination index set. Subtract out the dependent destination
1637 // iterations from destination index set and check for emptiness --- this is one
1638 // solution.
1640  Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
1641  ComputationSliceState *sliceState) {
1642  // Get loop nest surrounding src operation.
1643  SmallVector<AffineForOp, 4> srcLoopIVs;
1644  getAffineForIVs(*srcOpInst, &srcLoopIVs);
1645  unsigned numSrcLoopIVs = srcLoopIVs.size();
1646 
1647  // Get loop nest surrounding dst operation.
1648  SmallVector<AffineForOp, 4> dstLoopIVs;
1649  getAffineForIVs(*dstOpInst, &dstLoopIVs);
1650  unsigned dstLoopIVsSize = dstLoopIVs.size();
1651  if (dstLoopDepth > dstLoopIVsSize) {
1652  dstOpInst->emitError("invalid destination loop depth");
1653  return AffineForOp();
1654  }
1655 
1656  // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1657  SmallVector<unsigned, 4> positions;
1658  // TODO: This code is incorrect since srcLoopIVs can be 0-d.
1659  findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
1660 
1661  // Clone src loop nest and insert it a the beginning of the operation block
1662  // of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1663  auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
1664  OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1665  auto sliceLoopNest =
1666  cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
1667 
1668  Operation *sliceInst =
1669  getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody());
1670  // Get loop nest surrounding 'sliceInst'.
1671  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
1672  getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1673 
1674  // Sanity check.
1675  unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1676  (void)sliceSurroundingLoopsSize;
1677  assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1678  unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1679  (void)sliceLoopLimit;
1680  assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1681 
1682  // Update loop bounds for loops in 'sliceLoopNest'.
1683  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
1684  auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1685  if (AffineMap lbMap = sliceState->lbs[i])
1686  forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
1687  if (AffineMap ubMap = sliceState->ubs[i])
1688  forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
1689  }
1690  return sliceLoopNest;
1691 }
1692 
1693 // Constructs MemRefAccess populating it with the memref, its indices and
1694 // opinst from 'loadOrStoreOpInst'.
1696  if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
1697  memref = loadOp.getMemRef();
1698  opInst = loadOrStoreOpInst;
1699  llvm::append_range(indices, loadOp.getMapOperands());
1700  } else {
1701  assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
1702  "Affine read/write op expected");
1703  auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
1704  opInst = loadOrStoreOpInst;
1705  memref = storeOp.getMemRef();
1706  llvm::append_range(indices, storeOp.getMapOperands());
1707  }
1708 }
1709 
1710 unsigned MemRefAccess::getRank() const {
1711  return cast<MemRefType>(memref.getType()).getRank();
1712 }
1713 
1715  return isa<AffineWriteOpInterface>(opInst);
1716 }
1717 
1718 /// Returns the nesting depth of this statement, i.e., the number of loops
1719 /// surrounding this statement.
1721  Operation *currOp = op;
1722  unsigned depth = 0;
1723  while ((currOp = currOp->getParentOp())) {
1724  if (isa<AffineForOp>(currOp))
1725  depth++;
1726  }
1727  return depth;
1728 }
1729 
1730 /// Equal if both affine accesses are provably equivalent (at compile
1731 /// time) when considering the memref, the affine maps and their respective
1732 /// operands. The equality of access functions + operands is checked by
1733 /// subtracting fully composed value maps, and then simplifying the difference
1734 /// using the expression flattener.
1735 /// TODO: this does not account for aliasing of memrefs.
1736 bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
1737  if (memref != rhs.memref)
1738  return false;
1739 
1740  AffineValueMap diff, thisMap, rhsMap;
1741  getAccessMap(&thisMap);
1742  rhs.getAccessMap(&rhsMap);
1743  AffineValueMap::difference(thisMap, rhsMap, &diff);
1744  return llvm::all_of(diff.getAffineMap().getResults(),
1745  [](AffineExpr e) { return e == 0; });
1746 }
1747 
1749  auto *currOp = op.getParentOp();
1750  AffineForOp currAffineForOp;
1751  // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
1752  // operation while skipping over 'affine.if' operations.
1753  while (currOp) {
1754  if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
1755  ivs.push_back(currAffineForOp.getInductionVar());
1756  else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
1757  llvm::append_range(ivs, parOp.getIVs());
1758  currOp = currOp->getParentOp();
1759  }
1760  std::reverse(ivs.begin(), ivs.end());
1761 }
1762 
1763 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
1764 /// where each lists loops from outer-most to inner-most in loop nest.
1766  Operation &b) {
1767  SmallVector<Value, 4> loopsA, loopsB;
1768  getAffineIVs(a, loopsA);
1769  getAffineIVs(b, loopsB);
1770 
1771  unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
1772  unsigned numCommonLoops = 0;
1773  for (unsigned i = 0; i < minNumLoops; ++i) {
1774  if (loopsA[i] != loopsB[i])
1775  break;
1776  ++numCommonLoops;
1777  }
1778  return numCommonLoops;
1779 }
1780 
1781 static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
1782  Block::iterator start,
1783  Block::iterator end,
1784  int memorySpace) {
1785  SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
1786 
1787  // Walk this 'affine.for' operation to gather all memory regions.
1788  auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
1789  if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
1790  // Neither load nor a store op.
1791  return WalkResult::advance();
1792  }
1793 
1794  // Compute the memref region symbolic in any IVs enclosing this block.
1795  auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
1796  if (failed(
1797  region->compute(opInst,
1798  /*loopDepth=*/getNestingDepth(&*block.begin())))) {
1799  return opInst->emitError("error obtaining memory region\n");
1800  }
1801 
1802  auto it = regions.find(region->memref);
1803  if (it == regions.end()) {
1804  regions[region->memref] = std::move(region);
1805  } else if (failed(it->second->unionBoundingBox(*region))) {
1806  return opInst->emitWarning(
1807  "getMemoryFootprintBytes: unable to perform a union on a memory "
1808  "region");
1809  }
1810  return WalkResult::advance();
1811  });
1812  if (result.wasInterrupted())
1813  return std::nullopt;
1814 
1815  int64_t totalSizeInBytes = 0;
1816  for (const auto &region : regions) {
1817  std::optional<int64_t> size = region.second->getRegionSize();
1818  if (!size.has_value())
1819  return std::nullopt;
1820  totalSizeInBytes += *size;
1821  }
1822  return totalSizeInBytes;
1823 }
1824 
1825 std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
1826  int memorySpace) {
1827  auto *forInst = forOp.getOperation();
1829  *forInst->getBlock(), Block::iterator(forInst),
1830  std::next(Block::iterator(forInst)), memorySpace);
1831 }
1832 
1833 /// Returns whether a loop is parallel and contains a reduction loop.
1835  SmallVector<LoopReduction> reductions;
1836  if (!isLoopParallel(forOp, &reductions))
1837  return false;
1838  return !reductions.empty();
1839 }
1840 
1841 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
1842 /// at 'forOp'.
1844  AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
1845  forOp->walk([&](Operation *op) {
1846  if (auto innerFor = dyn_cast<AffineForOp>(op))
1847  if (!isLoopParallel(innerFor))
1848  sequentialLoops->insert(innerFor.getInductionVar());
1849  });
1850 }
1851 
1853  FlatAffineValueConstraints fac(set);
1854  if (fac.isEmpty())
1855  return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
1856  set.getContext());
1858 
1859  auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
1860  assert(simplifiedSet && "guaranteed to succeed while roundtripping");
1861  return simplifiedSet;
1862 }
1863 
1864 static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
1865  SmallVector<Value> &target) {
1866  target =
1867  llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
1868  return val.has_value() ? *val : Value();
1869  }));
1870 }
1871 
1872 /// Bound an identifier `pos` in a given FlatAffineValueConstraints with
1873 /// constraints drawn from an affine map. Before adding the constraint, the
1874 /// dimensions/symbols of the affine map are aligned with `constraints`.
1875 /// `operands` are the SSA Value operands used with the affine map.
1876 /// Note: This function adds a new symbol column to the `constraints` for each
1877 /// dimension/symbol that exists in the affine map but not in `constraints`.
1879  BoundType type, unsigned pos,
1880  AffineMap map, ValueRange operands) {
1881  SmallVector<Value> dims, syms, newSyms;
1882  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
1883  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
1884 
1885  AffineMap alignedMap =
1886  alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
1887  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
1888  constraints.appendSymbolVar(newSyms[i]);
1889  return constraints.addBound(type, pos, alignedMap);
1890 }
1891 
1892 /// Add `val` to each result of `map`.
1893 static AffineMap addConstToResults(AffineMap map, int64_t val) {
1894  SmallVector<AffineExpr> newResults;
1895  for (AffineExpr r : map.getResults())
1896  newResults.push_back(r + val);
1897  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
1898  map.getContext());
1899 }
1900 
1901 // Attempt to simplify the given min/max operation by proving that its value is
1902 // bounded by the same lower and upper bound.
1903 //
1904 // Bounds are computed by FlatAffineValueConstraints. Invariants required for
1905 // finding/proving bounds should be supplied via `constraints`.
1906 //
1907 // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
1908 // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
1909 // case of `!isMin`) and bind it to `opBound`. SSA values that are used in
1910 // `op` but are not part of `constraints`, are added as extra symbols.
1911 // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
1912 // * If `isMin`: r_i >= opBound
1913 // * If `isMax`: r_i <= opBound
1914 // If this is the case, ub(op) == lb(op).
1915 // 4. Replace `op` with `opBound`.
1916 //
1917 // In summary, the following constraints are added throughout this function.
1918 // Note: `invar` are dimensions added by the caller to express the invariants.
1919 // (Showing only the case where `isMin`.)
1920 //
1921 // invar | op | opBound | r_i | extra syms... | const | eq/ineq
1922 // ------+-------+---------+-----+---------------+-------+-------------------
1923 // (various eq./ineq. constraining `invar`, added by the caller)
1924 // ... | 0 | 0 | 0 | 0 | ... | ...
1925 // ------+-------+---------+-----+---------------+-------+-------------------
1926 // (various ineq. constraining `op` in terms of `op` operands (`invar` and
1927 // extra `op` operands "extra syms" that are not in `invar`)).
1928 // ... | -1 | 0 | 0 | ... | ... | >= 0
1929 // ------+-------+---------+-----+---------------+-------+-------------------
1930 // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
1931 // ... | 0 | -1 | 0 | ... | ... | = 0
1932 // ------+-------+---------+-----+---------------+-------+-------------------
1933 // (for each `op` map result r_i: set r_i to corresponding map result,
1934 // prove that r_i >= minOpUb via contradiction)
1935 // ... | 0 | 0 | -1 | ... | ... | = 0
1936 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0
1937 //
1939  Operation *op, FlatAffineValueConstraints constraints) {
1940  bool isMin = isa<AffineMinOp>(op);
1941  assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
1942  MLIRContext *ctx = op->getContext();
1943  Builder builder(ctx);
1944  AffineMap map =
1945  isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
1946  ValueRange operands = op->getOperands();
1947  unsigned numResults = map.getNumResults();
1948 
1949  // Add a few extra dimensions.
1950  unsigned dimOp = constraints.appendDimVar(); // `op`
1951  unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
1952  unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults);
1953 
1954  // Add an inequality for each result expr_i of map:
1955  // isMin: op <= expr_i, !isMin: op >= expr_i
1956  auto boundType = isMin ? BoundType::UB : BoundType::LB;
1957  // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
1958  AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
1959  if (failed(
1960  alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
1961  return failure();
1962 
1963  // Try to compute a lower/upper bound for op, expressed in terms of the other
1964  // `dims` and extra symbols.
1965  SmallVector<AffineMap> opLb(1), opUb(1);
1966  constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
1967  AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
1968  // TODO: `getSliceBounds` may return multiple bounds at the moment. This is
1969  // a TODO of `getSliceBounds` and not handled here.
1970  if (!sliceBound || sliceBound.getNumResults() != 1)
1971  return failure(); // No or multiple bounds found.
1972  // Recover the inclusive UB in the case of an `affine.min`.
1973  AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
1974 
1975  // Add an equality: Set dimOpBound to computed bound.
1976  // Add back dimension for op. (Was removed by `getSliceBounds`.)
1977  AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp);
1978  if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
1979  return failure();
1980 
1981  // If the constraint system is empty, there is an inconsistency. (E.g., this
1982  // can happen if loop lb > ub.)
1983  if (constraints.isEmpty())
1984  return failure();
1985 
1986  // In the case of `isMin` (`!isMin` is inversed):
1987  // Prove that each result of `map` has a lower bound that is equal to (or
1988  // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
1989  // can be replaced with the bound. I.e., prove that for each result
1990  // expr_i (represented by dimension r_i):
1991  //
1992  // r_i >= opBound
1993  //
1994  // To prove this inequality, add its negation to the constraint set and prove
1995  // that the constraint set is empty.
1996  for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
1997  FlatAffineValueConstraints newConstr(constraints);
1998 
1999  // Add an equality: r_i = expr_i
2000  // Note: These equalities could have been added earlier and used to express
2001  // minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2002  // computes minOpUb in terms of r_i dims, which is not desired.
2003  if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
2004  map.getSubMap({i - resultDimStart}), operands)))
2005  return failure();
2006 
2007  // If `isMin`: Add inequality: r_i < opBound
2008  // equiv.: opBound - r_i - 1 >= 0
2009  // If `!isMin`: Add inequality: r_i > opBound
2010  // equiv.: -opBound + r_i - 1 >= 0
2011  SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
2012  ineq[dimOpBound] = isMin ? 1 : -1;
2013  ineq[i] = isMin ? -1 : 1;
2014  ineq[newConstr.getNumCols() - 1] = -1;
2015  newConstr.addInequality(ineq);
2016  if (!newConstr.isEmpty())
2017  return failure();
2018  }
2019 
2020  // Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2021  AffineMap newMap = alignedBoundMap;
2022  SmallVector<Value> newOperands;
2023  unpackOptionalValues(constraints.getMaybeValues(), newOperands);
2024  // If dims/symbols have known constant values, use those in order to simplify
2025  // the affine map further.
2026  for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2027  // Skip unused operands and operands that are already constants.
2028  if (!newOperands[i] || getConstantIntValue(newOperands[i]))
2029  continue;
2030  if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
2031  AffineExpr expr =
2032  i < newMap.getNumDims()
2033  ? builder.getAffineDimExpr(i)
2034  : builder.getAffineSymbolExpr(i - newMap.getNumDims());
2035  newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
2036  newMap.getNumDims(), newMap.getNumSymbols());
2037  }
2038  }
2039  affine::canonicalizeMapAndOperands(&newMap, &newOperands);
2040  return AffineValueMap(newMap, newOperands);
2041 }
static std::optional< uint64_t > getConstDifference(AffineMap lbMap, AffineMap ubMap)
Definition: Utils.cpp:1453
static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl< unsigned > *positions)
Definition: Utils.cpp:1199
const char *const kSliceFusionBarrierAttrName
Definition: Utils.cpp:1520
static LogicalResult addMissingLoopIVBounds(SmallPtrSet< Value, 8 > &ivs, FlatAffineValueConstraints *cst)
Definition: Utils.cpp:1241
MemRefDependenceGraph::Node Node
Definition: Utils.cpp:36
static Operation * getInstAtPosition(ArrayRef< unsigned > positions, unsigned level, Block *block)
Definition: Utils.cpp:1216
static std::optional< int64_t > getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace)
Definition: Utils.cpp:1781
static AffineMap addConstToResults(AffineMap map, int64_t val)
Add val to each result of map.
Definition: Utils.cpp:1893
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands)
Bound an identifier pos in a given FlatAffineValueConstraints with constraints drawn from an affine m...
Definition: Utils.cpp:1878
static void unpackOptionalValues(ArrayRef< std::optional< Value >> source, SmallVector< Value > &target)
Definition: Utils.cpp:1864
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
int64_t getValue() const
Definition: AffineExpr.cpp:519
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
unsigned getPosition() const
Definition: AffineExpr.cpp:325
Base type for affine expression.
Definition: AffineExpr.h:68
constexpr bool isa() const
Definition: AffineExpr.h:272
U dyn_cast() const
Definition: AffineExpr.h:283
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:44
MLIRContext * getContext() const
Definition: AffineMap.cpp:284
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineMap shiftDims(unsigned shift, unsigned offset=0) const
Replace dims[offset ...
Definition: AffineMap.h:241
unsigned getNumSymbols() const
Definition: AffineMap.cpp:341
unsigned getNumDims() const
Definition: AffineMap.cpp:337
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:350
unsigned getNumResults() const
Definition: AffineMap.cpp:345
unsigned getNumInputs() const
Definition: AffineMap.cpp:346
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:354
AffineMap replace(AffineExpr expr, AffineExpr replacement, unsigned numResultDims, unsigned numResultSyms) const
Sparse replace method.
Definition: AffineMap.cpp:454
void dump() const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:570
Block represents an ordered list of Operations.
Definition: Block.h:30
OpListType::iterator iterator
Definition: Block.h:133
RetT walk(FnT &&callback)
Walk the operations in this block.
Definition: Block.h:280
iterator begin()
Definition: Block.h:136
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:50
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:357
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:361
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:353
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
IntegerSet getAsIntegerSet(MLIRContext *context) const
Returns the constraint system as an integer set.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, SmallVectorImpl< AffineMap > *lbMaps, SmallVectorImpl< AffineMap > *ubMaps, bool closedUB=false)
Computes the lower and upper bounds of the first num dimensional variables (starting at offset) as an...
LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other)
Updates the constraints to be the smallest bounding (enclosing) box that contains the points of this ...
void mergeAndAlignVarsWithOther(unsigned offset, FlatLinearValueConstraints *other)
Merge and align the variables of this and other starting at offset, so that both constraint systems g...
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool containsVar(Value val) const
Returns true if a variable with the specified Value exists, false otherwise.
ArrayRef< std::optional< Value > > getMaybeValues() const
bool areVarsAlignedWithOther(const FlatLinearConstraints &other)
Returns true if this constraint system and other are in the same space, i.e., if they are associated ...
void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const
Returns the Values associated with variables in range [start, end).
An integer set representing a conjunction of one or more affine equalities and inequalities.
Definition: IntegerSet.h:44
unsigned getNumDims() const
Definition: IntegerSet.cpp:15
MLIRContext * getContext() const
Definition: IntegerSet.cpp:57
static IntegerSet getEmptySet(unsigned numDims, unsigned numSymbols, MLIRContext *context)
Definition: IntegerSet.h:56
unsigned getNumSymbols() const
Definition: IntegerSet.cpp:16
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:206
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:528
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:385
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Definition: Operation.cpp:279
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:776
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:652
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:655
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:372
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
Type getType() const
Return the type of this value.
Definition: Value.h:122
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
static WalkResult advance()
Definition: Visitors.h:52
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
Value getOperand(unsigned i) const
unsigned getNumOperands() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
void convertLoopIVSymbolsToDims()
Changes all symbol variables which are loop IVs to dim variables.
LogicalResult addDomainFromSliceMaps(ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds constraints (lower and upper bounds) for each loop in the loop nest described by the bound maps ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
LogicalResult addSliceBounds(ArrayRef< Value > values, ArrayRef< AffineMap > lbMaps, ArrayRef< AffineMap > ubMaps, ArrayRef< Value > operands)
Adds slice lower bounds represented by lower bounds in lbMaps and upper bounds in ubMaps to each vari...
void addInequality(ArrayRef< MPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
void removeTrivialRedundancy()
Removes duplicate constraints, trivially true constraints, and constraints that can be detected as re...
std::optional< int64_t > getConstantBoundOnDimSize64(unsigned pos, SmallVectorImpl< int64_t > *lb=nullptr, int64_t *boundFloorDivisor=nullptr, SmallVectorImpl< int64_t > *ub=nullptr, unsigned *minLbPos=nullptr, unsigned *minUbPos=nullptr) const
The same, but casts to int64_t.
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
bool isIntegerEmpty() const
Return true if all the sets in the union are known to be integer empty false otherwise.
PresburgerSet subtract(const PresburgerRelation &set) const
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
IntegerSet simplifyIntegerSet(IntegerSet set)
Simplify the integer set by simplifying the underlying affine expressions by flattening and some simp...
Definition: Utils.cpp:1852
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost.
Definition: Utils.cpp:519
SliceComputationResult computeSliceUnion(ArrayRef< Operation * > opsA, ArrayRef< Operation * > opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion)
Computes in 'sliceUnion' the union of all slice bounds computed at 'loopDepth' between all dependent ...
Definition: Utils.cpp:1289
bool isAffineInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp or AffineParallelOp.
Definition: AffineOps.cpp:2650
bool isLoopParallelAndContainsReduction(AffineForOp forOp)
Returns whether a loop is a parallel loop and contains a reduction loop.
Definition: Utils.cpp:1834
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:1765
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2654
void getAffineIVs(Operation &op, SmallVectorImpl< Value > &ivs)
Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel ops ordered from the outer...
Definition: Utils.cpp:1748
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, FlatAffineValueConstraints *dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState)
Computes the computation slice loop bounds for one loop nest as affine maps of the other loop nest's ...
Definition: Utils.cpp:1525
void getSequentialLoops(AffineForOp forOp, llvm::SmallDenseSet< Value, 8 > *sequentialLoops)
Returns in 'sequentialLoops' all sequential loops in loop nest rooted at 'forOp'.
Definition: Utils.cpp:1843
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1514
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2642
void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)
Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....
Definition: Utils.cpp:505
std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)
Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...
Definition: Utils.cpp:1825
unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)
Returns the innermost common loop depth for the set of operations in 'ops'.
Definition: Utils.cpp:1257
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:363
AffineParallelOp getAffineParallelInductionVarOwner(Value val)
Returns true if the provided value is among the induction variables of an AffineParallelOp.
Definition: AffineOps.cpp:2665
std::optional< uint64_t > getIntOrFloatMemRefSizeInBytes(MemRefType memRefType)
Returns the size of a memref with element type int or float in bytes if it's statically shaped,...
Definition: Utils.cpp:1118
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1720
uint64_t getSliceIterationCount(const llvm::SmallDenseMap< Operation *, uint64_t, 8 > &sliceTripCountMap)
Return the number of iterations for the slicetripCountMap provided.
Definition: Utils.cpp:1511
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, bool emitError=true)
Checks a load or store op for an out of bound access; returns failure if the access is out of bounds ...
bool buildSliceTripCountMap(const ComputationSliceState &slice, llvm::SmallDenseMap< Operation *, uint64_t, 8 > *tripCountMap)
Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop nest surrounding represe...
Definition: Utils.cpp:1473
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState)
Creates a clone of the computation contained in the loop nest surrounding 'srcOpInst',...
Definition: Utils.cpp:1639
FailureOr< AffineValueMap > simplifyConstrainedMinMaxOp(Operation *op, FlatAffineValueConstraints constraints)
Try to simplify the given affine.min or affine.max op to an affine map with a single result and opera...
Definition: Utils.cpp:1938
std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)
Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...
Definition: Utils.cpp:1068
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
BoundType
The type of bound: equal, lower bound or upper bound.
This header declares functions that assist transformations in the MemRef dialect.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, ValueRange dims, ValueRange syms, SmallVector< Value > *newSyms=nullptr)
Re-indexes the dimensions and symbols of an affine map with given operands values to align with dims ...
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)
Simplify an affine expression by flattening and some amount of simple analysis.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...
Definition: Utils.h:260
std::optional< bool > isSliceValid() const
Checks the validity of the slice computed.
Definition: Utils.cpp:690
SmallVector< Value, 4 > ivs
Definition: Utils.h:263
LogicalResult getAsConstraints(FlatAffineValueConstraints *cst) const
Definition: Utils.cpp:552
LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst) const
Adds to 'cst' constraints which represent the original loop bounds on 'ivs' in 'this'.
Definition: Utils.cpp:536
std::vector< SmallVector< Value, 4 > > ubOperands
Definition: Utils.h:271
SmallVector< AffineMap, 4 > ubs
Definition: Utils.h:267
std::optional< bool > isMaximal() const
Returns true if the computation slice encloses all the iterations of the sliced loop nest.
Definition: Utils.cpp:760
SmallVector< AffineMap, 4 > lbs
Definition: Utils.h:265
std::vector< SmallVector< Value, 4 > > lbOperands
Definition: Utils.h:269
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
void collect(Operation *opToWalk)
Definition: Utils.cpp:41
Encapsulates a memref load or store access information.
unsigned getRank() const
Definition: Utils.cpp:1710
MemRefAccess(Operation *opInst)
Constructs a MemRefAccess from a load or store operation.
Definition: Utils.cpp:1695
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
bool operator==(const MemRefAccess &rhs) const
Equal if both affine accesses can be proved to be equivalent at compile time (considering the memrefs...
Definition: Utils.cpp:1736
SmallVector< Operation *, 4 > loads
Definition: Utils.h:65
SmallVector< Operation *, 4 > stores
Definition: Utils.h:67
unsigned getStoreOpCount(Value memref) const
Definition: Utils.cpp:65
unsigned addNode(Operation *op)
Definition: Utils.cpp:123
unsigned getIncomingMemRefAccesses(unsigned id, Value memref)
Definition: Utils.cpp:260
void removeEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:199
void addEdge(unsigned srcId, unsigned dstId, Value value)
Definition: Utils.cpp:188
Node * getForOpNode(AffineForOp forOp)
Definition: Utils.cpp:115
bool hasDependencePath(unsigned srcId, unsigned dstId)
Definition: Utils.cpp:226
void clearNodeLoadAndStores(unsigned id)
Definition: Utils.cpp:448
void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)
Definition: Utils.cpp:376
bool writesToLiveInOrEscapingMemrefs(unsigned id)
Definition: Utils.cpp:153
void addToNode(unsigned id, const SmallVectorImpl< Operation * > &loads, const SmallVectorImpl< Operation * > &stores)
Definition: Utils.cpp:440
void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:456
unsigned getOutEdgeCount(unsigned id, Value memref=nullptr)
Definition: Utils.cpp:276
void removeNode(unsigned id)
Definition: Utils.cpp:130
Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId)
Definition: Utils.cpp:300
void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:464
void forEachMemRefEdge(ArrayRef< Edge > edges, const std::function< void(Edge)> &callback)
Definition: Utils.cpp:472
bool hasEdge(unsigned srcId, unsigned dstId, Value value=nullptr)
Definition: Utils.cpp:173
void gatherDefiningNodes(unsigned id, DenseSet< unsigned > &definingNodes)
Return all nodes which define SSA values used in node 'id'.
Definition: Utils.cpp:286
Node * getNode(unsigned id)
Definition: Utils.cpp:108
void print(raw_ostream &os) const
Definition: Utils.cpp:487
A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...
Definition: Utils.h:432
FlatAffineValueConstraints * getConstraints()
Definition: Utils.h:471
unsigned getRank() const
Returns the rank of the memref that this region corresponds to.
Definition: Utils.cpp:810
std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, std::vector< SmallVector< int64_t, 4 >> *lbs=nullptr, SmallVectorImpl< int64_t > *lbDivisors=nullptr) const
Returns a constant upper bound on the number of elements in this region if bounded by a known constan...
Definition: Utils.cpp:814
void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const
Gets the lower and upper bound map for the dimensional variable at pos.
Definition: Utils.cpp:875
std::optional< int64_t > getRegionSize()
Returns the size of this MemRefRegion in bytes.
Definition: Utils.cpp:1087
LogicalResult unionBoundingBox(const MemRefRegion &other)
Definition: Utils.cpp:894
Value memref
Memref that this region corresponds to.
Definition: Utils.h:519
LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true)
Computes the memory region accessed by this memref with the region represented as constraints symboli...
Definition: Utils.cpp:916
Enumerates different result statuses of slice computation by computeSliceUnion
Definition: Utils.h:247
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.