MLIR  19.0.0git
AffineAnalysis.cpp
Go to the documentation of this file.
1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
22 #include "mlir/IR/BuiltinOps.h"
23 #include "mlir/IR/IntegerSet.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <optional>
30 
31 #define DEBUG_TYPE "affine-analysis"
32 
33 using namespace mlir;
34 using namespace affine;
35 using namespace presburger;
36 
37 /// Get the value that is being reduced by `pos`-th reduction in the loop if
38 /// such a reduction can be performed by affine parallel loops. This assumes
39 /// floating-point operations are commutative. On success, `kind` will be the
40 /// reduction kind suitable for use in affine parallel loop builder. If the
41 /// reduction is not supported, returns null.
42 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
43  arith::AtomicRMWKind &kind) {
44  SmallVector<Operation *> combinerOps;
45  Value reducedVal =
46  matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
47  if (!reducedVal)
48  return nullptr;
49 
50  // Expected only one combiner operation.
51  if (combinerOps.size() > 1)
52  return nullptr;
53 
54  Operation *combinerOp = combinerOps.back();
55  std::optional<arith::AtomicRMWKind> maybeKind =
57  .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
58  .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
59  .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
60  .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
61  .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
62  .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
63  .Case(
64  [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
65  .Case(
66  [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
67  .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
68  .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
69  .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
70  .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
71  .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
72  // TODO: AtomicRMW supports other kinds of reductions this is
73  // currently not detecting, add those when the need arises.
74  return std::nullopt;
75  });
76  if (!maybeKind)
77  return nullptr;
78 
79  kind = *maybeKind;
80  return reducedVal;
81 }
82 
83 /// Populate `supportedReductions` with descriptors of the supported reductions.
85  AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
86  unsigned numIterArgs = forOp.getNumIterOperands();
87  if (numIterArgs == 0)
88  return;
89  supportedReductions.reserve(numIterArgs);
90  for (unsigned i = 0; i < numIterArgs; ++i) {
91  arith::AtomicRMWKind kind;
92  if (Value value = getSupportedReduction(forOp, i, kind))
93  supportedReductions.emplace_back(LoopReduction{kind, i, value});
94  }
95 }
96 
97 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
98 /// provided, populates it with descriptors of the parallelizable reductions and
99 /// treats them as not preventing parallelization.
101  AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
102  unsigned numIterArgs = forOp.getNumIterOperands();
103 
104  // Loop is not parallel if it has SSA loop-carried dependences and reduction
105  // detection is not requested.
106  if (numIterArgs > 0 && !parallelReductions)
107  return false;
108 
109  // Find supported reductions of requested.
110  if (parallelReductions) {
111  getSupportedReductions(forOp, *parallelReductions);
112  // Return later to allow for identifying all parallel reductions even if the
113  // loop is not parallel.
114  if (parallelReductions->size() != numIterArgs)
115  return false;
116  }
117 
118  // Check memory dependences.
119  return isLoopMemoryParallel(forOp);
120 }
121 
122 /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
123 /// allocated by an operation nested within `enclosingOp`.
124 static bool isLocallyDefined(Value v, Operation *enclosingOp) {
125  Operation *defOp = v.getDefiningOp();
126  if (!defOp)
127  return false;
128 
129  if (hasSingleEffect<MemoryEffects::Allocate>(defOp, v) &&
130  enclosingOp->isProperAncestor(defOp))
131  return true;
132 
133  // Aliasing ops.
134  auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
135  return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
136 }
137 
138 bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
139  // Any memref-typed iteration arguments are treated as serializing.
140  if (llvm::any_of(forOp.getResultTypes(),
141  [](Type type) { return isa<BaseMemRefType>(type); }))
142  return false;
143 
144  // Collect all load and store ops in loop nest rooted at 'forOp'.
145  SmallVector<Operation *, 8> loadAndStoreOps;
146  auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
147  if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
148  // Memrefs that are allocated inside `forOp` need not be considered.
149  if (!isLocallyDefined(readOp.getMemRef(), forOp))
150  loadAndStoreOps.push_back(op);
151  } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
152  // Filter out stores the same way as above.
153  if (!isLocallyDefined(writeOp.getMemRef(), forOp))
154  loadAndStoreOps.push_back(op);
155  } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
156  !hasSingleEffect<MemoryEffects::Allocate>(op) &&
157  !isMemoryEffectFree(op)) {
158  // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
159  // as long as they don't escape the loop (which has been checked above).
160  return WalkResult::interrupt();
161  }
162 
163  return WalkResult::advance();
164  });
165 
166  // Stop early if the loop has unknown ops with side effects.
167  if (walkResult.wasInterrupted())
168  return false;
169 
170  // Dep check depth would be number of enclosing loops + 1.
171  unsigned depth = getNestingDepth(forOp) + 1;
172 
173  // Check dependences between all pairs of ops in 'loadAndStoreOps'.
174  for (auto *srcOp : loadAndStoreOps) {
175  MemRefAccess srcAccess(srcOp);
176  for (auto *dstOp : loadAndStoreOps) {
177  MemRefAccess dstAccess(dstOp);
178  DependenceResult result =
179  checkMemrefAccessDependence(srcAccess, dstAccess, depth);
181  return false;
182  }
183  }
184  return true;
185 }
186 
187 /// Returns the sequence of AffineApplyOp Operations operation in
188 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
189 /// and ending at operands which are not defined by AffineApplyOps.
190 // TODO: Add a method to AffineApplyOp which forward substitutes the
191 // AffineApplyOp into any user AffineApplyOps.
193  ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
194  struct State {
195  // The ssa value for this node in the DFS traversal.
196  Value value;
197  // The operand index of 'value' to explore next during DFS traversal.
198  unsigned operandIndex;
199  };
200  SmallVector<State, 4> worklist;
201  for (auto operand : operands) {
202  worklist.push_back({operand, 0});
203  }
204 
205  while (!worklist.empty()) {
206  State &state = worklist.back();
207  auto *opInst = state.value.getDefiningOp();
208  // Note: getDefiningOp will return nullptr if the operand is not an
209  // Operation (i.e. block argument), which is a terminator for the search.
210  if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
211  worklist.pop_back();
212  continue;
213  }
214 
215  if (state.operandIndex == 0) {
216  // Pre-Visit: Add 'opInst' to reachable sequence.
217  affineApplyOps.push_back(opInst);
218  }
219  if (state.operandIndex < opInst->getNumOperands()) {
220  // Visit: Add next 'affineApplyOp' operand to worklist.
221  // Get next operand to visit at 'operandIndex'.
222  auto nextOperand = opInst->getOperand(state.operandIndex);
223  // Increment 'operandIndex' in 'state'.
224  ++state.operandIndex;
225  // Add 'nextOperand' to worklist.
226  worklist.push_back({nextOperand, 0});
227  } else {
228  // Post-visit: done visiting operands AffineApplyOp, pop off stack.
229  worklist.pop_back();
230  }
231  }
232 }
233 
234 // Builds a system of constraints with dimensional variables corresponding to
235 // the loop IVs of the forOps appearing in that order. Any symbols founds in
236 // the bound operands are added as symbols in the system. Returns failure for
237 // the yet unimplemented cases.
238 // TODO: Handle non-unit steps through local variables or stride information in
239 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
240 // introducing a method in FlatAffineValueConstraints
241 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
243  FlatAffineValueConstraints *domain) {
244  SmallVector<Value, 4> indices;
246  size_t numDims = 0;
247  for (Operation *op : ops) {
248  if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
249  LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
250  "parallel ops");
251  return failure();
252  }
253  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
254  loopOps.push_back(forOp);
255  // An AffineForOp retains only 1 induction variable.
256  numDims += 1;
257  } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
258  loopOps.push_back(parallelOp);
259  numDims += parallelOp.getNumDims();
260  }
261  }
262  extractInductionVars(loopOps, indices);
263  // Reset while associating Values in 'indices' to the domain.
264  *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0,
265  /*numLocals=*/0, indices);
266  for (Operation *op : ops) {
267  // Add constraints from forOp's bounds.
268  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
269  if (failed(domain->addAffineForOpDomain(forOp)))
270  return failure();
271  } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
272  domain->addAffineIfOpDomain(ifOp);
273  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
274  if (failed(domain->addAffineParallelOpDomain(parallelOp)))
275  return failure();
276  }
277  return success();
278 }
279 
280 /// Computes the iteration domain for 'op' and populates 'indexSet', which
281 /// encapsulates the constraints involving loops surrounding 'op' and
282 /// potentially involving any Function symbols. The dimensional variables in
283 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
284 /// innermost.
286  FlatAffineValueConstraints *indexSet) {
288  getEnclosingAffineOps(*op, &ops);
289  return getIndexSet(ops, indexSet);
290 }
291 
292 // Returns the number of outer loop common to 'src/dstDomain'.
293 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
294 static unsigned
296  const FlatAffineValueConstraints &dstDomain,
297  SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
298  // Find the number of common loops shared by src and dst accesses.
299  unsigned minNumLoops =
300  std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars());
301  unsigned numCommonLoops = 0;
302  for (unsigned i = 0; i < minNumLoops; ++i) {
303  if ((!isAffineForInductionVar(srcDomain.getValue(i)) &&
304  !isAffineParallelInductionVar(srcDomain.getValue(i))) ||
305  (!isAffineForInductionVar(dstDomain.getValue(i)) &&
306  !isAffineParallelInductionVar(dstDomain.getValue(i))) ||
307  srcDomain.getValue(i) != dstDomain.getValue(i))
308  break;
309  if (commonLoops != nullptr)
310  commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
311  ++numCommonLoops;
312  }
313  if (commonLoops != nullptr)
314  assert(commonLoops->size() == numCommonLoops);
315  return numCommonLoops;
316 }
317 
318 /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
319 /// `opB` should be in the same affine scope. Returns nullptr if such a block
320 /// does not exist (when the two ops are in different blocks of an op starting
321 /// an `AffineScope`).
323  // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
324  // chain extends up to and includnig an op that starts an affine scope.
325  auto getChainOfAncestorBlocks =
326  [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
327  Block *currBlock = op->getBlock();
328  // Loop terminates when the currBlock is nullptr or its parent operation
329  // holds an affine scope.
330  while (currBlock &&
331  !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
332  ancestorBlocks.push_back(currBlock);
333  currBlock = currBlock->getParentOp()->getBlock();
334  }
335  assert(currBlock &&
336  "parent op starting an affine scope is always expected");
337  ancestorBlocks.push_back(currBlock);
338  };
339 
340  // Find the closest common block.
341  SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
342  getChainOfAncestorBlocks(opA, srcAncestorBlocks);
343  getChainOfAncestorBlocks(opB, dstAncestorBlocks);
344 
345  Block *commonBlock = nullptr;
346  for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
347  i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
348  i--, j--)
349  commonBlock = srcAncestorBlocks[i];
350 
351  return commonBlock;
352 }
353 
354 /// Returns true if the ancestor operation of 'srcAccess' appears before the
355 /// ancestor operation of 'dstAccess' in their common ancestral block. The
356 /// operations for `srcAccess` and `dstAccess` are expected to be in the same
357 /// affine scope and have a common surrounding block within it.
359  const MemRefAccess &dstAccess) {
360  // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
361  Block *commonBlock =
362  getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst);
363  assert(commonBlock &&
364  "ops expected to have a common surrounding block in affine scope");
365 
366  // Check the dominance relationship between the respective ancestors of the
367  // src and dst in the Block of the innermost among the common loops.
368  Operation *srcOp = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
369  assert(srcOp && "src access op must lie in common block");
370  Operation *dstOp = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
371  assert(dstOp && "dest access op must lie in common block");
372 
373  // Determine whether dstOp comes after srcOp.
374  return srcOp->isBeforeInBlock(dstOp);
375 }
376 
377 // Adds ordering constraints to 'dependenceDomain' based on number of loops
378 // common to 'src/dstDomain' and requested 'loopDepth'.
379 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
380 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
381 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
382 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
383 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
384 static void
386  const FlatAffineValueConstraints &dstDomain,
387  unsigned loopDepth,
388  FlatAffineValueConstraints *dependenceDomain) {
389  unsigned numCols = dependenceDomain->getNumCols();
390  SmallVector<int64_t, 4> eq(numCols);
391  unsigned numSrcDims = srcDomain.getNumDimVars();
392  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
393  unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
394  for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
395  std::fill(eq.begin(), eq.end(), 0);
396  eq[i] = -1;
397  eq[i + numSrcDims] = 1;
398  if (i == loopDepth - 1) {
399  eq[numCols - 1] = -1;
400  dependenceDomain->addInequality(eq);
401  } else {
402  dependenceDomain->addEquality(eq);
403  }
404  }
405 }
406 
407 // Computes distance and direction vectors in 'dependences', by adding
408 // variables to 'dependenceDomain' which represent the difference of the IVs,
409 // eliminating all other variables, and reading off distance vectors from
410 // equality constraints (if possible), and direction vectors from inequalities.
412  const FlatAffineValueConstraints &srcDomain,
413  const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
414  FlatAffineValueConstraints *dependenceDomain,
415  SmallVector<DependenceComponent, 2> *dependenceComponents) {
416  // Find the number of common loops shared by src and dst accesses.
417  SmallVector<AffineForOp, 4> commonLoops;
418  unsigned numCommonLoops =
419  getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
420  if (numCommonLoops == 0)
421  return;
422  // Compute direction vectors for requested loop depth.
423  unsigned numIdsToEliminate = dependenceDomain->getNumVars();
424  // Add new variables to 'dependenceDomain' to represent the direction
425  // constraints for each shared loop.
426  dependenceDomain->insertDimVar(/*pos=*/0, /*num=*/numCommonLoops);
427 
428  // Add equality constraints for each common loop, setting newly introduced
429  // variable at column 'j' to the 'dst' IV minus the 'src IV.
431  eq.resize(dependenceDomain->getNumCols());
432  unsigned numSrcDims = srcDomain.getNumDimVars();
433  // Constraint variables format:
434  // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
435  for (unsigned j = 0; j < numCommonLoops; ++j) {
436  std::fill(eq.begin(), eq.end(), 0);
437  eq[j] = 1;
438  eq[j + numCommonLoops] = 1;
439  eq[j + numCommonLoops + numSrcDims] = -1;
440  dependenceDomain->addEquality(eq);
441  }
442 
443  // Eliminate all variables other than the direction variables just added.
444  dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
445 
446  // Scan each common loop variable column and set direction vectors based
447  // on eliminated constraint system.
448  dependenceComponents->resize(numCommonLoops);
449  for (unsigned j = 0; j < numCommonLoops; ++j) {
450  (*dependenceComponents)[j].op = commonLoops[j].getOperation();
451  auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j);
452  (*dependenceComponents)[j].lb =
453  lbConst.value_or(std::numeric_limits<int64_t>::min());
454  auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j);
455  (*dependenceComponents)[j].ub =
456  ubConst.value_or(std::numeric_limits<int64_t>::max());
457  }
458 }
459 
461  // Create set corresponding to domain of access.
463  if (failed(getOpIndexSet(opInst, &domain)))
464  return failure();
465 
466  // Get access relation from access map.
467  AffineValueMap accessValueMap;
468  getAccessMap(&accessValueMap);
469  if (failed(getRelationFromMap(accessValueMap, rel)))
470  return failure();
471 
472  FlatAffineRelation domainRel(rel.getNumDomainDims(), /*numRangeDims=*/0,
473  domain);
474 
475  // Merge and align domain ids of `ret` and ids of `domain`. Since the domain
476  // of the access map is a subset of the domain of access, the domain ids of
477  // `ret` are guranteed to be a subset of ids of `domain`.
478  for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
479  unsigned loc;
480  if (rel.findVar(domain.getValue(i), &loc)) {
481  rel.swapVar(i, loc);
482  } else {
483  rel.insertDomainVar(i);
484  rel.setValue(i, domain.getValue(i));
485  }
486  }
487 
488  // Append domain constraints to `rel`.
489  domainRel.appendRangeVar(rel.getNumRangeDims());
490  domainRel.mergeSymbolVars(rel);
491  domainRel.mergeLocalVars(rel);
492  rel.append(domainRel);
493 
494  return success();
495 }
496 
497 // Populates 'accessMap' with composition of AffineApplyOps reachable from
498 // indices of MemRefAccess.
500  // Get affine map from AffineLoad/Store.
501  AffineMap map;
502  if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
503  map = loadOp.getAffineMap();
504  else
505  map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
506 
507  SmallVector<Value, 8> operands(indices.begin(), indices.end());
508  fullyComposeAffineMapAndOperands(&map, &operands);
509  map = simplifyAffineMap(map);
510  canonicalizeMapAndOperands(&map, &operands);
511  accessMap->reset(map, operands);
512 }
513 
514 // Builds a flat affine constraint system to check if there exists a dependence
515 // between memref accesses 'srcAccess' and 'dstAccess'.
516 // Returns 'NoDependence' if the accesses can be definitively shown not to
517 // access the same element.
518 // Returns 'HasDependence' if the accesses do access the same element.
519 // Returns 'Failure' if an error or unsupported case was encountered.
520 // If a dependence exists, returns in 'dependenceComponents' a direction
521 // vector for the dependence, with a component for each loop IV in loops
522 // common to both accesses (see Dependence in AffineAnalysis.h for details).
523 //
524 // The memref access dependence check is comprised of the following steps:
525 // *) Build access relation for each access. An access relation maps elements
526 // of an iteration domain to the element(s) of an array domain accessed by
527 // that iteration of the associated statement through some array reference.
528 // *) Compute the dependence relation by composing access relation of
529 // `srcAccess` with the inverse of access relation of `dstAccess`.
530 // Doing this builds a relation between iteration domain of `srcAccess`
531 // to the iteration domain of `dstAccess` which access the same memory
532 // location.
533 // *) Add ordering constraints for `srcAccess` to be accessed before
534 // `dstAccess`.
535 //
536 // This method builds a constraint system with the following column format:
537 //
538 // [src-dim-variables, dst-dim-variables, symbols, constant]
539 //
540 // For example, given the following MLIR code with "source" and "destination"
541 // accesses to the same memref label, and symbols %M, %N, %K:
542 //
543 // affine.for %i0 = 0 to 100 {
544 // affine.for %i1 = 0 to 50 {
545 // %a0 = affine.apply
546 // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
547 // // Source memref access.
548 // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
549 // }
550 // }
551 //
552 // affine.for %i2 = 0 to 100 {
553 // affine.for %i3 = 0 to 50 {
554 // %a1 = affine.apply
555 // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
556 // // Destination memref access.
557 // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
558 // }
559 // }
560 //
561 // The access relation for `srcAccess` would be the following:
562 //
563 // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
564 // 2 -4 -1 0 1 0 0 = 0
565 // 0 3 0 -1 0 -1 0 = 0
566 // 1 0 0 0 0 0 0 >= 0
567 // -1 0 0 0 0 0 100 >= 0
568 // 0 1 0 0 0 0 0 >= 0
569 // 0 -1 0 0 0 0 50 >= 0
570 //
571 // The access relation for `dstAccess` would be the following:
572 //
573 // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
574 // 7 9 -1 0 -1 0 0 = 0
575 // 0 11 0 -1 0 -1 0 = 0
576 // 1 0 0 0 0 0 0 >= 0
577 // -1 0 0 0 0 0 100 >= 0
578 // 0 1 0 0 0 0 0 >= 0
579 // 0 -1 0 0 0 0 50 >= 0
580 //
581 // The equalities in the above relations correspond to the access maps while
582 // the inequalities corresspond to the iteration domain constraints.
583 //
584 // The dependence relation formed:
585 //
586 // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
587 // 2 -4 -7 -9 1 1 0 0 = 0
588 // 0 3 0 -11 -1 0 1 0 = 0
589 // 1 0 0 0 0 0 0 0 >= 0
590 // -1 0 0 0 0 0 0 100 >= 0
591 // 0 1 0 0 0 0 0 0 >= 0
592 // 0 -1 0 0 0 0 0 50 >= 0
593 // 0 0 1 0 0 0 0 0 >= 0
594 // 0 0 -1 0 0 0 0 100 >= 0
595 // 0 0 0 1 0 0 0 0 >= 0
596 // 0 0 0 -1 0 0 0 50 >= 0
597 //
598 //
599 // TODO: Support AffineExprs mod/floordiv/ceildiv.
601  const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
602  unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
603  SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
604  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
605  << Twine(loopDepth) << " between:\n";);
606  LLVM_DEBUG(srcAccess.opInst->dump());
607  LLVM_DEBUG(dstAccess.opInst->dump());
608 
609  // Return 'NoDependence' if these accesses do not access the same memref.
610  if (srcAccess.memref != dstAccess.memref)
612 
613  // Return 'NoDependence' if one of these accesses is not an
614  // AffineWriteOpInterface.
615  if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
616  !isa<AffineWriteOpInterface>(dstAccess.opInst))
618 
619  // We can't analyze further if the ops lie in different affine scopes or have
620  // no common block in an affine scope.
621  if (getAffineScope(srcAccess.opInst) != getAffineScope(dstAccess.opInst))
623  if (!getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst))
625 
626  // Create access relation from each MemRefAccess.
627  FlatAffineRelation srcRel, dstRel;
628  if (failed(srcAccess.getAccessRelation(srcRel)))
630  if (failed(dstAccess.getAccessRelation(dstRel)))
632 
633  FlatAffineValueConstraints srcDomain = srcRel.getDomainSet();
634  FlatAffineValueConstraints dstDomain = dstRel.getDomainSet();
635 
636  // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
637  // operation of 'srcAccess' does not properly dominate the ancestor
638  // operation of 'dstAccess' in the same common operation block.
639  // Note: this check is skipped if 'allowRAR' is true, because RAR deps
640  // can exist irrespective of lexicographic ordering b/w src and dst.
641  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
642  assert(loopDepth <= numCommonLoops + 1);
643  if (!allowRAR && loopDepth > numCommonLoops &&
644  !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
646  }
647 
648  // Compute the dependence relation by composing `srcRel` with the inverse of
649  // `dstRel`. Doing this builds a relation between iteration domain of
650  // `srcAccess` to the iteration domain of `dstAccess` which access the same
651  // memory locations.
652  dstRel.inverse();
653  dstRel.compose(srcRel);
654 
655  // Add 'src' happens before 'dst' ordering constraints.
656  addOrderingConstraints(srcDomain, dstDomain, loopDepth, &dstRel);
657 
658  // Return 'NoDependence' if the solution space is empty: no dependence.
659  if (dstRel.isEmpty())
661 
662  // Compute dependence direction vector and return true.
663  if (dependenceComponents != nullptr)
664  computeDirectionVector(srcDomain, dstDomain, loopDepth, &dstRel,
665  dependenceComponents);
666 
667  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
668  LLVM_DEBUG(dstRel.dump());
669 
670  if (dependenceConstraints)
671  *dependenceConstraints = dstRel;
673 }
674 
675 /// Gathers dependence components for dependences between all ops in loop nest
676 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
678  AffineForOp forOp, unsigned maxLoopDepth,
679  std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
680  // Collect all load and store ops in loop nest rooted at 'forOp'.
681  SmallVector<Operation *, 8> loadAndStoreOps;
682  forOp->walk([&](Operation *op) {
683  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
684  loadAndStoreOps.push_back(op);
685  });
686 
687  unsigned numOps = loadAndStoreOps.size();
688  for (unsigned d = 1; d <= maxLoopDepth; ++d) {
689  for (unsigned i = 0; i < numOps; ++i) {
690  auto *srcOp = loadAndStoreOps[i];
691  MemRefAccess srcAccess(srcOp);
692  for (unsigned j = 0; j < numOps; ++j) {
693  auto *dstOp = loadAndStoreOps[j];
694  MemRefAccess dstAccess(dstOp);
695 
697  // TODO: Explore whether it would be profitable to pre-compute and store
698  // deps instead of repeatedly checking.
700  srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
701  &depComps);
702  if (hasDependence(result))
703  depCompsVec->push_back(depComps);
704  }
705  }
706  }
707 }
static LogicalResult getOpIndexSet(Operation *op, FlatAffineValueConstraints *indexSet)
Computes the iteration domain for 'op' and populates 'indexSet', which encapsulates the constraints i...
static void computeDirectionVector(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, FlatAffineValueConstraints *dependenceDomain, SmallVector< DependenceComponent, 2 > *dependenceComponents)
static Value getSupportedReduction(AffineForOp forOp, unsigned pos, arith::AtomicRMWKind &kind)
Get the value that is being reduced by pos-th reduction in the loop if such a reduction can be perfor...
static unsigned getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, SmallVectorImpl< AffineForOp > *commonLoops=nullptr)
static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess)
Returns true if the ancestor operation of 'srcAccess' appears before the ancestor operation of 'dstAc...
static bool isLocallyDefined(Value v, Operation *enclosingOp)
Returns true if v is allocated locally to enclosingOp – i.e., it is allocated by an operation nested ...
static Block * getCommonBlockInAffineScope(Operation *opA, Operation *opB)
Returns the closest surrounding block common to opA and opB.
static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, FlatAffineValueConstraints *dependenceDomain)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:47
Block represents an ordered list of Operations.
Definition: Block.h:30
Operation * findAncestorOpInBlock(Operation &op)
Returns 'op' if 'op' lies in this block, or otherwise finds the ancestor operation of 'op' that lies ...
Definition: Block.cpp:73
void push_back(Operation *op)
Definition: Block.h:146
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
void swapVar(unsigned posA, unsigned posB) override
Swap the posA^th variable with the posB^th variable.
unsigned insertDimVar(unsigned pos, ValueRange vals)
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
void mergeSymbolVars(FlatLinearValueConstraints &other)
Merge and align symbols of this and other such that both get union of of symbols that are unique.
void projectOut(Value val)
Projects out the variable that is associate with Value.
bool findVar(Value val, unsigned *pos, unsigned offset=0) const
Looks up the position of the variable with the specified Value starting with variables at offset offs...
void setValue(unsigned pos, Value val)
Sets the Value associated with the pos^th variable.
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:745
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:386
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:219
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
static WalkResult advance()
Definition: Visitors.h:52
static WalkResult interrupt()
Definition: Visitors.h:51
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void reset(AffineMap map, ValueRange operands, ValueRange results={})
A FlatAffineRelation represents a set of ordered pairs (domain -> range) where "domain" and "range" a...
void compose(const FlatAffineRelation &other)
Given affine relation other: (domainOther -> rangeOther), this operation takes the composition of oth...
unsigned getNumDomainDims() const
Returns the number of variables corresponding to domain/range of relation.
void inverse()
Swap domain and range of the relation.
FlatAffineValueConstraints getDomainSet() const
Returns a set corresponding to the domain/range of the affine relation.
void insertDomainVar(unsigned pos, unsigned num=1)
Insert num variables of the specified kind after the pos variable of that kind.
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
void addAffineIfOpDomain(AffineIfOp ifOp)
Adds constraints imposed by the affine.if operation.
LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp)
Add constraints (lower and upper bounds) for the specified 'affine.parallel' operation's Value using ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
void addInequality(ArrayRef< MPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
void addEquality(ArrayRef< MPInt > eq)
Adds an equality from the coefficients specified in eq.
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
void append(const IntegerRelation &other)
Appends constraints from other into this.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
unsigned mergeLocalVars(IntegerRelation &other)
Adds additional local vars to the sets such that they both have the union of the local vars in each s...
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1125
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:647
LogicalResult getIndexSet(MutableArrayRef< Operation * > ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
LogicalResult getRelationFromMap(AffineMap &map, FlatAffineRelation &rel)
Builds a relation from the given AffineMap/AffineValueMap map, containing all pairs of the form opera...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2554
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1426
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2542
bool isLoopMemoryParallel(AffineForOp forOp)
Returns true if ‘forOp’ doesn't have memory dependences preventing parallelization.
void extractInductionVars(ArrayRef< Operation * > affineOps, SmallVectorImpl< Value > &ivs)
Extracts the induction variables from a list of either AffineForOp or AffineParallelOp and places the...
Definition: AffineOps.cpp:2585
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1848
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:254
bool isAffineParallelInductionVar(Value val)
Returns true if val is the induction variable of an AffineParallelOp.
Definition: AffineOps.cpp:2546
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:736
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
A description of a (parallelizable) reduction in an affine loop.
Encapsulates a memref load or store access information.
LogicalResult getAccessRelation(FlatAffineRelation &accessRel) const
Creates an access relation for the access.
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.