MLIR  20.0.0git
AffineAnalysis.cpp
Go to the documentation of this file.
1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
23 #include "mlir/IR/BuiltinOps.h"
24 #include "mlir/IR/IntegerSet.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <optional>
31 
32 #define DEBUG_TYPE "affine-analysis"
33 
34 using namespace mlir;
35 using namespace affine;
36 using namespace presburger;
37 
38 /// Get the value that is being reduced by `pos`-th reduction in the loop if
39 /// such a reduction can be performed by affine parallel loops. This assumes
40 /// floating-point operations are commutative. On success, `kind` will be the
41 /// reduction kind suitable for use in affine parallel loop builder. If the
42 /// reduction is not supported, returns null.
43 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
44  arith::AtomicRMWKind &kind) {
45  SmallVector<Operation *> combinerOps;
46  Value reducedVal =
47  matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
48  if (!reducedVal)
49  return nullptr;
50 
51  // Expected only one combiner operation.
52  if (combinerOps.size() > 1)
53  return nullptr;
54 
55  Operation *combinerOp = combinerOps.back();
56  std::optional<arith::AtomicRMWKind> maybeKind =
58  .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
59  .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
60  .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
61  .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
62  .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
63  .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
64  .Case(
65  [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
66  .Case(
67  [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
68  .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
69  .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
70  .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
71  .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
72  .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
73  // TODO: AtomicRMW supports other kinds of reductions this is
74  // currently not detecting, add those when the need arises.
75  return std::nullopt;
76  });
77  if (!maybeKind)
78  return nullptr;
79 
80  kind = *maybeKind;
81  return reducedVal;
82 }
83 
84 /// Populate `supportedReductions` with descriptors of the supported reductions.
86  AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
87  unsigned numIterArgs = forOp.getNumIterOperands();
88  if (numIterArgs == 0)
89  return;
90  supportedReductions.reserve(numIterArgs);
91  for (unsigned i = 0; i < numIterArgs; ++i) {
92  arith::AtomicRMWKind kind;
93  if (Value value = getSupportedReduction(forOp, i, kind))
94  supportedReductions.emplace_back(LoopReduction{kind, i, value});
95  }
96 }
97 
98 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
99 /// provided, populates it with descriptors of the parallelizable reductions and
100 /// treats them as not preventing parallelization.
102  AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
103  unsigned numIterArgs = forOp.getNumIterOperands();
104 
105  // Loop is not parallel if it has SSA loop-carried dependences and reduction
106  // detection is not requested.
107  if (numIterArgs > 0 && !parallelReductions)
108  return false;
109 
110  // Find supported reductions of requested.
111  if (parallelReductions) {
112  getSupportedReductions(forOp, *parallelReductions);
113  // Return later to allow for identifying all parallel reductions even if the
114  // loop is not parallel.
115  if (parallelReductions->size() != numIterArgs)
116  return false;
117  }
118 
119  // Check memory dependences.
120  return isLoopMemoryParallel(forOp);
121 }
122 
123 /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
124 /// allocated by an operation nested within `enclosingOp`.
125 static bool isLocallyDefined(Value v, Operation *enclosingOp) {
126  Operation *defOp = v.getDefiningOp();
127  if (!defOp)
128  return false;
129 
130  if (hasSingleEffect<MemoryEffects::Allocate>(defOp, v) &&
131  enclosingOp->isProperAncestor(defOp))
132  return true;
133 
134  // Aliasing ops.
135  auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
136  return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
137 }
138 
139 bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
140  // Any memref-typed iteration arguments are treated as serializing.
141  if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
142  return false;
143 
144  // Collect all load and store ops in loop nest rooted at 'forOp'.
145  SmallVector<Operation *, 8> loadAndStoreOps;
146  auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
147  if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
148  // Memrefs that are allocated inside `forOp` need not be considered.
149  if (!isLocallyDefined(readOp.getMemRef(), forOp))
150  loadAndStoreOps.push_back(op);
151  } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
152  // Filter out stores the same way as above.
153  if (!isLocallyDefined(writeOp.getMemRef(), forOp))
154  loadAndStoreOps.push_back(op);
155  } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
156  !hasSingleEffect<MemoryEffects::Allocate>(op) &&
157  !isMemoryEffectFree(op)) {
158  // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
159  // as long as they don't escape the loop (which has been checked above).
160  return WalkResult::interrupt();
161  }
162 
163  return WalkResult::advance();
164  });
165 
166  // Stop early if the loop has unknown ops with side effects.
167  if (walkResult.wasInterrupted())
168  return false;
169 
170  // Dep check depth would be number of enclosing loops + 1.
171  unsigned depth = getNestingDepth(forOp) + 1;
172 
173  // Check dependences between all pairs of ops in 'loadAndStoreOps'.
174  for (auto *srcOp : loadAndStoreOps) {
175  MemRefAccess srcAccess(srcOp);
176  for (auto *dstOp : loadAndStoreOps) {
177  MemRefAccess dstAccess(dstOp);
178  DependenceResult result =
179  checkMemrefAccessDependence(srcAccess, dstAccess, depth);
181  return false;
182  }
183  }
184  return true;
185 }
186 
187 /// Returns the sequence of AffineApplyOp Operations operation in
188 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
189 /// and ending at operands which are not defined by AffineApplyOps.
190 // TODO: Add a method to AffineApplyOp which forward substitutes the
191 // AffineApplyOp into any user AffineApplyOps.
193  ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
194  struct State {
195  // The ssa value for this node in the DFS traversal.
196  Value value;
197  // The operand index of 'value' to explore next during DFS traversal.
198  unsigned operandIndex;
199  };
200  SmallVector<State, 4> worklist;
201  for (auto operand : operands) {
202  worklist.push_back({operand, 0});
203  }
204 
205  while (!worklist.empty()) {
206  State &state = worklist.back();
207  auto *opInst = state.value.getDefiningOp();
208  // Note: getDefiningOp will return nullptr if the operand is not an
209  // Operation (i.e. block argument), which is a terminator for the search.
210  if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
211  worklist.pop_back();
212  continue;
213  }
214 
215  if (state.operandIndex == 0) {
216  // Pre-Visit: Add 'opInst' to reachable sequence.
217  affineApplyOps.push_back(opInst);
218  }
219  if (state.operandIndex < opInst->getNumOperands()) {
220  // Visit: Add next 'affineApplyOp' operand to worklist.
221  // Get next operand to visit at 'operandIndex'.
222  auto nextOperand = opInst->getOperand(state.operandIndex);
223  // Increment 'operandIndex' in 'state'.
224  ++state.operandIndex;
225  // Add 'nextOperand' to worklist.
226  worklist.push_back({nextOperand, 0});
227  } else {
228  // Post-visit: done visiting operands AffineApplyOp, pop off stack.
229  worklist.pop_back();
230  }
231  }
232 }
233 
234 // Builds a system of constraints with dimensional variables corresponding to
235 // the loop IVs of the forOps appearing in that order. Any symbols founds in
236 // the bound operands are added as symbols in the system. Returns failure for
237 // the yet unimplemented cases.
238 // TODO: Handle non-unit steps through local variables or stride information in
239 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
240 // introducing a method in FlatAffineValueConstraints
241 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
243  FlatAffineValueConstraints *domain) {
244  SmallVector<Value, 4> indices;
246  size_t numDims = 0;
247  for (Operation *op : ops) {
248  if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
249  LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
250  "parallel ops");
251  return failure();
252  }
253  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
254  loopOps.push_back(forOp);
255  // An AffineForOp retains only 1 induction variable.
256  numDims += 1;
257  } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
258  loopOps.push_back(parallelOp);
259  numDims += parallelOp.getNumDims();
260  }
261  }
262  extractInductionVars(loopOps, indices);
263  // Reset while associating Values in 'indices' to the domain.
264  *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0,
265  /*numLocals=*/0, indices);
266  for (Operation *op : ops) {
267  // Add constraints from forOp's bounds.
268  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
269  if (failed(domain->addAffineForOpDomain(forOp)))
270  return failure();
271  } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
272  domain->addAffineIfOpDomain(ifOp);
273  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
274  if (failed(domain->addAffineParallelOpDomain(parallelOp)))
275  return failure();
276  }
277  return success();
278 }
279 
280 /// Computes the iteration domain for 'op' and populates 'indexSet', which
281 /// encapsulates the constraints involving loops surrounding 'op' and
282 /// potentially involving any Function symbols. The dimensional variables in
283 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
284 /// innermost.
285 static LogicalResult getOpIndexSet(Operation *op,
286  FlatAffineValueConstraints *indexSet) {
288  getEnclosingAffineOps(*op, &ops);
289  return getIndexSet(ops, indexSet);
290 }
291 
292 // Returns the number of outer loop common to 'src/dstDomain'.
293 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
294 static unsigned
296  const FlatAffineValueConstraints &dstDomain,
297  SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
298  // Find the number of common loops shared by src and dst accesses.
299  unsigned minNumLoops =
300  std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars());
301  unsigned numCommonLoops = 0;
302  for (unsigned i = 0; i < minNumLoops; ++i) {
303  if ((!isAffineForInductionVar(srcDomain.getValue(i)) &&
304  !isAffineParallelInductionVar(srcDomain.getValue(i))) ||
305  (!isAffineForInductionVar(dstDomain.getValue(i)) &&
306  !isAffineParallelInductionVar(dstDomain.getValue(i))) ||
307  srcDomain.getValue(i) != dstDomain.getValue(i))
308  break;
309  if (commonLoops != nullptr)
310  commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
311  ++numCommonLoops;
312  }
313  if (commonLoops != nullptr)
314  assert(commonLoops->size() == numCommonLoops);
315  return numCommonLoops;
316 }
317 
318 /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
319 /// `opB` should be in the same affine scope. Returns nullptr if such a block
320 /// does not exist (when the two ops are in different blocks of an op starting
321 /// an `AffineScope`).
323  // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
324  // chain extends up to and includnig an op that starts an affine scope.
325  auto getChainOfAncestorBlocks =
326  [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
327  Block *currBlock = op->getBlock();
328  // Loop terminates when the currBlock is nullptr or its parent operation
329  // holds an affine scope.
330  while (currBlock &&
331  !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
332  ancestorBlocks.push_back(currBlock);
333  currBlock = currBlock->getParentOp()->getBlock();
334  }
335  assert(currBlock &&
336  "parent op starting an affine scope is always expected");
337  ancestorBlocks.push_back(currBlock);
338  };
339 
340  // Find the closest common block.
341  SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
342  getChainOfAncestorBlocks(opA, srcAncestorBlocks);
343  getChainOfAncestorBlocks(opB, dstAncestorBlocks);
344 
345  Block *commonBlock = nullptr;
346  for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
347  i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
348  i--, j--)
349  commonBlock = srcAncestorBlocks[i];
350 
351  return commonBlock;
352 }
353 
354 /// Returns true if the ancestor operation of 'srcAccess' appears before the
355 /// ancestor operation of 'dstAccess' in their common ancestral block. The
356 /// operations for `srcAccess` and `dstAccess` are expected to be in the same
357 /// affine scope and have a common surrounding block within it.
359  const MemRefAccess &dstAccess) {
360  // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
361  Block *commonBlock =
362  getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst);
363  assert(commonBlock &&
364  "ops expected to have a common surrounding block in affine scope");
365 
366  // Check the dominance relationship between the respective ancestors of the
367  // src and dst in the Block of the innermost among the common loops.
368  Operation *srcOp = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
369  assert(srcOp && "src access op must lie in common block");
370  Operation *dstOp = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
371  assert(dstOp && "dest access op must lie in common block");
372 
373  // Determine whether dstOp comes after srcOp.
374  return srcOp->isBeforeInBlock(dstOp);
375 }
376 
377 // Adds ordering constraints to 'dependenceDomain' based on number of loops
378 // common to 'src/dstDomain' and requested 'loopDepth'.
379 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
380 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
381 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
382 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
383 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
385  const FlatAffineValueConstraints &dstDomain,
386  unsigned loopDepth,
387  IntegerRelation *dependenceDomain) {
388  unsigned numCols = dependenceDomain->getNumCols();
389  SmallVector<int64_t, 4> eq(numCols);
390  unsigned numSrcDims = srcDomain.getNumDimVars();
391  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
392  unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
393  for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
394  std::fill(eq.begin(), eq.end(), 0);
395  eq[i] = -1;
396  eq[i + numSrcDims] = 1;
397  if (i == loopDepth - 1) {
398  eq[numCols - 1] = -1;
399  dependenceDomain->addInequality(eq);
400  } else {
401  dependenceDomain->addEquality(eq);
402  }
403  }
404 }
405 
406 // Computes distance and direction vectors in 'dependences', by adding
407 // variables to 'dependenceDomain' which represent the difference of the IVs,
408 // eliminating all other variables, and reading off distance vectors from
409 // equality constraints (if possible), and direction vectors from inequalities.
411  const FlatAffineValueConstraints &srcDomain,
412  const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
413  IntegerPolyhedron *dependenceDomain,
414  SmallVector<DependenceComponent, 2> *dependenceComponents) {
415  // Find the number of common loops shared by src and dst accesses.
416  SmallVector<AffineForOp, 4> commonLoops;
417  unsigned numCommonLoops =
418  getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
419  if (numCommonLoops == 0)
420  return;
421  // Compute direction vectors for requested loop depth.
422  unsigned numIdsToEliminate = dependenceDomain->getNumVars();
423  // Add new variables to 'dependenceDomain' to represent the direction
424  // constraints for each shared loop.
425  dependenceDomain->insertVar(VarKind::SetDim, /*pos=*/0,
426  /*num=*/numCommonLoops);
427 
428  // Add equality constraints for each common loop, setting newly introduced
429  // variable at column 'j' to the 'dst' IV minus the 'src IV.
431  eq.resize(dependenceDomain->getNumCols());
432  unsigned numSrcDims = srcDomain.getNumDimVars();
433  // Constraint variables format:
434  // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
435  for (unsigned j = 0; j < numCommonLoops; ++j) {
436  std::fill(eq.begin(), eq.end(), 0);
437  eq[j] = 1;
438  eq[j + numCommonLoops] = 1;
439  eq[j + numCommonLoops + numSrcDims] = -1;
440  dependenceDomain->addEquality(eq);
441  }
442 
443  // Eliminate all variables other than the direction variables just added.
444  dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
445 
446  // Scan each common loop variable column and set direction vectors based
447  // on eliminated constraint system.
448  dependenceComponents->resize(numCommonLoops);
449  for (unsigned j = 0; j < numCommonLoops; ++j) {
450  (*dependenceComponents)[j].op = commonLoops[j].getOperation();
451  auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j);
452  (*dependenceComponents)[j].lb =
453  lbConst.value_or(std::numeric_limits<int64_t>::min());
454  auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j);
455  (*dependenceComponents)[j].ub =
456  ubConst.value_or(std::numeric_limits<int64_t>::max());
457  }
458 }
459 
461  // Create set corresponding to domain of access.
463  if (failed(getOpIndexSet(opInst, &domain)))
464  return failure();
465 
466  // Get access relation from access map.
467  AffineValueMap accessValueMap;
468  getAccessMap(&accessValueMap);
469  if (failed(getRelationFromMap(accessValueMap, rel)))
470  return failure();
471 
472  // Merge and align domain ids of `rel` with ids of `domain`. Since the domain
473  // of the access map is a subset of the domain of access, the domain ids of
474  // `rel` are guranteed to be a subset of ids of `domain`.
475  unsigned inserts = 0;
476  for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
477  const Identifier domainIdi = Identifier(domain.getValue(i));
478  const Identifier *findBegin = rel.getIds(VarKind::SetDim).begin() + i;
479  const Identifier *findEnd = rel.getIds(VarKind::SetDim).end();
480  const Identifier *itr = std::find(findBegin, findEnd, domainIdi);
481  if (itr != findEnd) {
482  rel.swapVar(i, i + std::distance(findBegin, itr));
483  } else {
484  ++inserts;
485  rel.insertVar(VarKind::SetDim, i);
486  rel.setId(VarKind::SetDim, i, domainIdi);
487  }
488  }
489 
490  // Append domain constraints to `rel`.
491  IntegerRelation domainRel = domain;
492  if (rel.getSpace().isUsingIds() && !domainRel.getSpace().isUsingIds())
493  domainRel.resetIds();
494  domainRel.appendVar(VarKind::Range, accessValueMap.getNumResults());
495  domainRel.mergeAndAlignSymbols(rel);
496  domainRel.mergeLocalVars(rel);
497  rel.append(domainRel);
498 
499  rel.convertVarKind(VarKind::SetDim, 0, accessValueMap.getNumDims() + inserts,
500  VarKind::Domain);
501 
502  return success();
503 }
504 
505 // Populates 'accessMap' with composition of AffineApplyOps reachable from
506 // indices of MemRefAccess.
508  // Get affine map from AffineLoad/Store.
509  AffineMap map;
510  if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
511  map = loadOp.getAffineMap();
512  else
513  map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
514 
515  SmallVector<Value, 8> operands(indices.begin(), indices.end());
516  fullyComposeAffineMapAndOperands(&map, &operands);
517  map = simplifyAffineMap(map);
518  canonicalizeMapAndOperands(&map, &operands);
519  accessMap->reset(map, operands);
520 }
521 
522 // Builds a flat affine constraint system to check if there exists a dependence
523 // between memref accesses 'srcAccess' and 'dstAccess'.
524 // Returns 'NoDependence' if the accesses can be definitively shown not to
525 // access the same element.
526 // Returns 'HasDependence' if the accesses do access the same element.
527 // Returns 'Failure' if an error or unsupported case was encountered.
528 // If a dependence exists, returns in 'dependenceComponents' a direction
529 // vector for the dependence, with a component for each loop IV in loops
530 // common to both accesses (see Dependence in AffineAnalysis.h for details).
531 //
532 // The memref access dependence check is comprised of the following steps:
533 // *) Build access relation for each access. An access relation maps elements
534 // of an iteration domain to the element(s) of an array domain accessed by
535 // that iteration of the associated statement through some array reference.
536 // *) Compute the dependence relation by composing access relation of
537 // `srcAccess` with the inverse of access relation of `dstAccess`.
538 // Doing this builds a relation between iteration domain of `srcAccess`
539 // to the iteration domain of `dstAccess` which access the same memory
540 // location.
541 // *) Add ordering constraints for `srcAccess` to be accessed before
542 // `dstAccess`.
543 //
544 // This method builds a constraint system with the following column format:
545 //
546 // [src-dim-variables, dst-dim-variables, symbols, constant]
547 //
548 // For example, given the following MLIR code with "source" and "destination"
549 // accesses to the same memref label, and symbols %M, %N, %K:
550 //
551 // affine.for %i0 = 0 to 100 {
552 // affine.for %i1 = 0 to 50 {
553 // %a0 = affine.apply
554 // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
555 // // Source memref access.
556 // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
557 // }
558 // }
559 //
560 // affine.for %i2 = 0 to 100 {
561 // affine.for %i3 = 0 to 50 {
562 // %a1 = affine.apply
563 // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
564 // // Destination memref access.
565 // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
566 // }
567 // }
568 //
569 // The access relation for `srcAccess` would be the following:
570 //
571 // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
572 // 2 -4 -1 0 1 0 0 = 0
573 // 0 3 0 -1 0 -1 0 = 0
574 // 1 0 0 0 0 0 0 >= 0
575 // -1 0 0 0 0 0 100 >= 0
576 // 0 1 0 0 0 0 0 >= 0
577 // 0 -1 0 0 0 0 50 >= 0
578 //
579 // The access relation for `dstAccess` would be the following:
580 //
581 // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
582 // 7 9 -1 0 -1 0 0 = 0
583 // 0 11 0 -1 0 -1 0 = 0
584 // 1 0 0 0 0 0 0 >= 0
585 // -1 0 0 0 0 0 100 >= 0
586 // 0 1 0 0 0 0 0 >= 0
587 // 0 -1 0 0 0 0 50 >= 0
588 //
589 // The equalities in the above relations correspond to the access maps while
590 // the inequalities corresspond to the iteration domain constraints.
591 //
592 // The dependence relation formed:
593 //
594 // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
595 // 2 -4 -7 -9 1 1 0 0 = 0
596 // 0 3 0 -11 -1 0 1 0 = 0
597 // 1 0 0 0 0 0 0 0 >= 0
598 // -1 0 0 0 0 0 0 100 >= 0
599 // 0 1 0 0 0 0 0 0 >= 0
600 // 0 -1 0 0 0 0 0 50 >= 0
601 // 0 0 1 0 0 0 0 0 >= 0
602 // 0 0 -1 0 0 0 0 100 >= 0
603 // 0 0 0 1 0 0 0 0 >= 0
604 // 0 0 0 -1 0 0 0 50 >= 0
605 //
606 //
607 // TODO: Support AffineExprs mod/floordiv/ceildiv.
609  const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
610  unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
611  SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
612  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
613  << Twine(loopDepth) << " between:\n";);
614  LLVM_DEBUG(srcAccess.opInst->dump());
615  LLVM_DEBUG(dstAccess.opInst->dump());
616 
617  // Return 'NoDependence' if these accesses do not access the same memref.
618  if (srcAccess.memref != dstAccess.memref)
620 
621  // Return 'NoDependence' if one of these accesses is not an
622  // AffineWriteOpInterface.
623  if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
624  !isa<AffineWriteOpInterface>(dstAccess.opInst))
626 
627  // We can't analyze further if the ops lie in different affine scopes or have
628  // no common block in an affine scope.
629  if (getAffineScope(srcAccess.opInst) != getAffineScope(dstAccess.opInst))
631  if (!getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst))
633 
634  // Create access relation from each MemRefAccess.
635  PresburgerSpace space = PresburgerSpace::getRelationSpace();
636  IntegerRelation srcRel(space), dstRel(space);
637  if (failed(srcAccess.getAccessRelation(srcRel)))
639  if (failed(dstAccess.getAccessRelation(dstRel)))
641 
642  FlatAffineValueConstraints srcDomain(srcRel.getDomainSet());
643  FlatAffineValueConstraints dstDomain(dstRel.getDomainSet());
644 
645  // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
646  // operation of 'srcAccess' does not properly dominate the ancestor
647  // operation of 'dstAccess' in the same common operation block.
648  // Note: this check is skipped if 'allowRAR' is true, because RAR deps
649  // can exist irrespective of lexicographic ordering b/w src and dst.
650  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
651  assert(loopDepth <= numCommonLoops + 1);
652  if (!allowRAR && loopDepth > numCommonLoops &&
653  !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
655  }
656 
657  // Compute the dependence relation by composing `srcRel` with the inverse of
658  // `dstRel`. Doing this builds a relation between iteration domain of
659  // `srcAccess` to the iteration domain of `dstAccess` which access the same
660  // memory locations.
661  dstRel.inverse();
662  dstRel.mergeAndCompose(srcRel);
663  dstRel.convertVarKind(VarKind::Domain, 0, dstRel.getNumDomainVars(),
664  VarKind::Range, 0);
665  IntegerPolyhedron dependenceDomain(dstRel);
666 
667  // Add 'src' happens before 'dst' ordering constraints.
668  addOrderingConstraints(srcDomain, dstDomain, loopDepth, &dependenceDomain);
669 
670  // Return 'NoDependence' if the solution space is empty: no dependence.
671  if (dependenceDomain.isEmpty())
673 
674  // Compute dependence direction vector and return true.
675  if (dependenceComponents != nullptr)
676  computeDirectionVector(srcDomain, dstDomain, loopDepth, &dependenceDomain,
677  dependenceComponents);
678 
679  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
680  LLVM_DEBUG(dependenceDomain.dump());
681 
682  FlatAffineValueConstraints result(dependenceDomain);
683  if (dependenceConstraints)
684  *dependenceConstraints = result;
686 }
687 
688 /// Gathers dependence components for dependences between all ops in loop nest
689 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
691  AffineForOp forOp, unsigned maxLoopDepth,
692  std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
693  // Collect all load and store ops in loop nest rooted at 'forOp'.
694  SmallVector<Operation *, 8> loadAndStoreOps;
695  forOp->walk([&](Operation *op) {
696  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
697  loadAndStoreOps.push_back(op);
698  });
699 
700  unsigned numOps = loadAndStoreOps.size();
701  for (unsigned d = 1; d <= maxLoopDepth; ++d) {
702  for (unsigned i = 0; i < numOps; ++i) {
703  auto *srcOp = loadAndStoreOps[i];
704  MemRefAccess srcAccess(srcOp);
705  for (unsigned j = 0; j < numOps; ++j) {
706  auto *dstOp = loadAndStoreOps[j];
707  MemRefAccess dstAccess(dstOp);
708 
710  // TODO: Explore whether it would be profitable to pre-compute and store
711  // deps instead of repeatedly checking.
713  srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
714  &depComps);
715  if (hasDependence(result))
716  depCompsVec->push_back(depComps);
717  }
718  }
719  }
720 }
static LogicalResult getOpIndexSet(Operation *op, FlatAffineValueConstraints *indexSet)
Computes the iteration domain for 'op' and populates 'indexSet', which encapsulates the constraints i...
static Value getSupportedReduction(AffineForOp forOp, unsigned pos, arith::AtomicRMWKind &kind)
Get the value that is being reduced by pos-th reduction in the loop if such a reduction can be perfor...
static void computeDirectionVector(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, IntegerPolyhedron *dependenceDomain, SmallVector< DependenceComponent, 2 > *dependenceComponents)
static unsigned getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, SmallVectorImpl< AffineForOp > *commonLoops=nullptr)
static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess)
Returns true if the ancestor operation of 'srcAccess' appears before the ancestor operation of 'dstAc...
static bool isLocallyDefined(Value v, Operation *enclosingOp)
Returns true if v is allocated locally to enclosingOp – i.e., it is allocated by an operation nested ...
static Block * getCommonBlockInAffineScope(Operation *opA, Operation *opB)
Returns the closest surrounding block common to opA and opB.
static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, IntegerRelation *dependenceDomain)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
Block represents an ordered list of Operations.
Definition: Block.h:31
Operation * findAncestorOpInBlock(Operation &op)
Returns 'op' if 'op' lies in this block, or otherwise finds the ancestor operation of 'op' that lies ...
Definition: Block.cpp:73
void push_back(Operation *op)
Definition: Block.h:147
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:745
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:386
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:219
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
static WalkResult advance()
Definition: Visitors.h:51
static WalkResult interrupt()
Definition: Visitors.h:50
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void reset(AffineMap map, ValueRange operands, ValueRange results={})
unsigned getNumResults() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
void addAffineIfOpDomain(AffineIfOp ifOp)
Adds constraints imposed by the affine.if operation.
LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp)
Add constraints (lower and upper bounds) for the specified 'affine.parallel' operation's Value using ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
An Identifier stores a pointer to an object, such as a Value or an Operation.
An IntegerPolyhedron represents the set of points from a PresburgerSpace that satisfy a list of affin...
unsigned insertVar(VarKind kind, unsigned pos, unsigned num=1) override
Insert num variables of the specified kind at position pos.
An IntegerRelation represents the set of points from a PresburgerSpace that satisfy a list of affine ...
void setId(VarKind kind, unsigned i, Identifier id)
Set the identifier for the ith variable of the specified kind of the IntegerRelation's PresburgerSpac...
virtual void swapVar(unsigned posA, unsigned posB)
Swap the posA^th variable with the posB^th variable.
ArrayRef< Identifier > getIds(VarKind kind)
Get the identifiers for the variables of specified varKind.
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
void convertVarKind(VarKind srcKind, unsigned varStart, unsigned varLimit, VarKind dstKind, unsigned pos)
Converts variables of kind srcKind in the range [varStart, varLimit) to variables of kind dstKind.
unsigned appendVar(VarKind kind, unsigned num=1)
Append num variables of the specified kind after the last variable of that kind.
virtual unsigned insertVar(VarKind kind, unsigned pos, unsigned num=1)
Insert num variables of the specified kind at position pos.
void inverse()
Invert the relation i.e., swap its domain and range.
void append(const IntegerRelation &other)
Appends constraints from other into this.
void addEquality(ArrayRef< DynamicAPInt > eq)
Adds an equality from the coefficients specified in eq.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void mergeAndCompose(const IntegerRelation &other)
Given a relation other: (A -> B), this operation merges the symbol and local variables and then takes...
IntegerPolyhedron getDomainSet() const
Return a set corresponding to all points in the domain of the relation.
void projectOut(unsigned pos, unsigned num)
Projects out (aka eliminates) num variables starting at position pos.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
void mergeAndAlignSymbols(IntegerRelation &other)
Merge and align symbol variables of this and other with respect to identifiers.
unsigned mergeLocalVars(IntegerRelation &other)
Adds additional local vars to the sets such that they both have the union of the local vars in each s...
const PresburgerSpace & getSpace() const
Returns a reference to the underlying space.
PresburgerSpace is the space of all possible values of a tuple of integer valued variables/variables.
bool isUsingIds() const
Returns if identifiers are being used.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1132
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:647
LogicalResult getIndexSet(MutableArrayRef< Operation * > ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2563
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1433
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2551
bool isLoopMemoryParallel(AffineForOp forOp)
Returns true if ‘forOp’ doesn't have memory dependences preventing parallelization.
LogicalResult getRelationFromMap(AffineMap &map, presburger::IntegerRelation &rel)
Builds a relation from the given AffineMap/AffineValueMap map, containing all pairs of the form opera...
void extractInductionVars(ArrayRef< Operation * > affineOps, SmallVectorImpl< Value > &ivs)
Extracts the induction variables from a list of either AffineForOp or AffineParallelOp and places the...
Definition: AffineOps.cpp:2594
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1848
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:261
bool isAffineParallelInductionVar(Value val)
Returns true if val is the induction variable of an AffineParallelOp.
Definition: AffineOps.cpp:2555
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:750
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
A description of a (parallelizable) reduction in an affine loop.
Encapsulates a memref load or store access information.
LogicalResult getAccessRelation(presburger::IntegerRelation &accessRel) const
Creates an access relation for the access.
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.