MLIR  22.0.0git
AffineAnalysis.cpp
Go to the documentation of this file.
1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
24 #include "llvm/ADT/TypeSwitch.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <optional>
28 
29 #define DEBUG_TYPE "affine-analysis"
30 
31 using namespace mlir;
32 using namespace affine;
33 using namespace presburger;
34 
35 /// Get the value that is being reduced by `pos`-th reduction in the loop if
36 /// such a reduction can be performed by affine parallel loops. This assumes
37 /// floating-point operations are commutative. On success, `kind` will be the
38 /// reduction kind suitable for use in affine parallel loop builder. If the
39 /// reduction is not supported, returns null.
40 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
41  arith::AtomicRMWKind &kind) {
42  SmallVector<Operation *> combinerOps;
43  Value reducedVal =
44  matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
45  if (!reducedVal)
46  return nullptr;
47 
48  // Expected only one combiner operation.
49  if (combinerOps.size() > 1)
50  return nullptr;
51 
52  Operation *combinerOp = combinerOps.back();
53  std::optional<arith::AtomicRMWKind> maybeKind =
55  .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
56  .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
57  .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
58  .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
59  .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
60  .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
61  .Case(
62  [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
63  .Case(
64  [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
65  .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
66  .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
67  .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
68  .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
69  .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
70  // TODO: AtomicRMW supports other kinds of reductions this is
71  // currently not detecting, add those when the need arises.
72  return std::nullopt;
73  });
74  if (!maybeKind)
75  return nullptr;
76 
77  kind = *maybeKind;
78  return reducedVal;
79 }
80 
81 /// Populate `supportedReductions` with descriptors of the supported reductions.
83  AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
84  unsigned numIterArgs = forOp.getNumIterOperands();
85  if (numIterArgs == 0)
86  return;
87  supportedReductions.reserve(numIterArgs);
88  for (unsigned i = 0; i < numIterArgs; ++i) {
89  arith::AtomicRMWKind kind;
90  if (Value value = getSupportedReduction(forOp, i, kind))
91  supportedReductions.emplace_back(LoopReduction{kind, i, value});
92  }
93 }
94 
95 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
96 /// provided, populates it with descriptors of the parallelizable reductions and
97 /// treats them as not preventing parallelization.
99  AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
100  unsigned numIterArgs = forOp.getNumIterOperands();
101 
102  // Loop is not parallel if it has SSA loop-carried dependences and reduction
103  // detection is not requested.
104  if (numIterArgs > 0 && !parallelReductions)
105  return false;
106 
107  // Find supported reductions of requested.
108  if (parallelReductions) {
109  getSupportedReductions(forOp, *parallelReductions);
110  // Return later to allow for identifying all parallel reductions even if the
111  // loop is not parallel.
112  if (parallelReductions->size() != numIterArgs)
113  return false;
114  }
115 
116  // Check memory dependences.
117  return isLoopMemoryParallel(forOp);
118 }
119 
120 /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
121 /// allocated by an operation nested within `enclosingOp`.
122 static bool isLocallyDefined(Value v, Operation *enclosingOp) {
123  Operation *defOp = v.getDefiningOp();
124  if (!defOp)
125  return false;
126 
127  if (hasSingleEffect<MemoryEffects::Allocate>(defOp, v) &&
128  enclosingOp->isProperAncestor(defOp))
129  return true;
130 
131  // Aliasing ops.
132  auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
133  return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
134 }
135 
136 bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
137  // Any memref-typed iteration arguments are treated as serializing.
138  if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
139  return false;
140 
141  // Collect all load and store ops in loop nest rooted at 'forOp'.
142  SmallVector<Operation *, 8> loadAndStoreOps;
143  auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
144  if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
145  // Memrefs that are allocated inside `forOp` need not be considered.
146  if (!isLocallyDefined(readOp.getMemRef(), forOp))
147  loadAndStoreOps.push_back(op);
148  } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
149  // Filter out stores the same way as above.
150  if (!isLocallyDefined(writeOp.getMemRef(), forOp))
151  loadAndStoreOps.push_back(op);
152  } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
153  !hasSingleEffect<MemoryEffects::Allocate>(op) &&
154  !isMemoryEffectFree(op)) {
155  // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
156  // as long as they don't escape the loop (which has been checked above).
157  return WalkResult::interrupt();
158  }
159 
160  return WalkResult::advance();
161  });
162 
163  // Stop early if the loop has unknown ops with side effects.
164  if (walkResult.wasInterrupted())
165  return false;
166 
167  // Dep check depth would be number of enclosing loops + 1.
168  unsigned depth = getNestingDepth(forOp) + 1;
169 
170  // Check dependences between all pairs of ops in 'loadAndStoreOps'.
171  for (auto *srcOp : loadAndStoreOps) {
172  MemRefAccess srcAccess(srcOp);
173  for (auto *dstOp : loadAndStoreOps) {
174  MemRefAccess dstAccess(dstOp);
175  DependenceResult result =
176  checkMemrefAccessDependence(srcAccess, dstAccess, depth);
178  return false;
179  }
180  }
181  return true;
182 }
183 
184 /// Returns the sequence of AffineApplyOp Operations operation in
185 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
186 /// and ending at operands which are not defined by AffineApplyOps.
187 // TODO: Add a method to AffineApplyOp which forward substitutes the
188 // AffineApplyOp into any user AffineApplyOps.
190  ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
191  struct State {
192  // The ssa value for this node in the DFS traversal.
193  Value value;
194  // The operand index of 'value' to explore next during DFS traversal.
195  unsigned operandIndex;
196  };
197  SmallVector<State, 4> worklist;
198  for (auto operand : operands) {
199  worklist.push_back({operand, 0});
200  }
201 
202  while (!worklist.empty()) {
203  State &state = worklist.back();
204  auto *opInst = state.value.getDefiningOp();
205  // Note: getDefiningOp will return nullptr if the operand is not an
206  // Operation (i.e. block argument), which is a terminator for the search.
207  if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
208  worklist.pop_back();
209  continue;
210  }
211 
212  if (state.operandIndex == 0) {
213  // Pre-Visit: Add 'opInst' to reachable sequence.
214  affineApplyOps.push_back(opInst);
215  }
216  if (state.operandIndex < opInst->getNumOperands()) {
217  // Visit: Add next 'affineApplyOp' operand to worklist.
218  // Get next operand to visit at 'operandIndex'.
219  auto nextOperand = opInst->getOperand(state.operandIndex);
220  // Increment 'operandIndex' in 'state'.
221  ++state.operandIndex;
222  // Add 'nextOperand' to worklist.
223  worklist.push_back({nextOperand, 0});
224  } else {
225  // Post-visit: done visiting operands AffineApplyOp, pop off stack.
226  worklist.pop_back();
227  }
228  }
229 }
230 
231 // Builds a system of constraints with dimensional variables corresponding to
232 // the loop IVs of the forOps appearing in that order. Any symbols founds in
233 // the bound operands are added as symbols in the system. Returns failure for
234 // the yet unimplemented cases.
235 // TODO: Handle non-unit steps through local variables or stride information in
236 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
237 // introducing a method in FlatAffineValueConstraints
238 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
240  FlatAffineValueConstraints *domain) {
241  SmallVector<Value, 4> indices;
243  size_t numDims = 0;
244  for (Operation *op : ops) {
245  if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
246  LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
247  "parallel ops");
248  return failure();
249  }
250  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
251  loopOps.push_back(forOp);
252  // An AffineForOp retains only 1 induction variable.
253  numDims += 1;
254  } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
255  loopOps.push_back(parallelOp);
256  numDims += parallelOp.getNumDims();
257  }
258  }
259  extractInductionVars(loopOps, indices);
260  // Reset while associating Values in 'indices' to the domain.
261  *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0,
262  /*numLocals=*/0, indices);
263  for (Operation *op : ops) {
264  // Add constraints from forOp's bounds.
265  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
266  if (failed(domain->addAffineForOpDomain(forOp)))
267  return failure();
268  } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
269  domain->addAffineIfOpDomain(ifOp);
270  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
271  if (failed(domain->addAffineParallelOpDomain(parallelOp)))
272  return failure();
273  }
274  return success();
275 }
276 
277 /// Computes the iteration domain for 'op' and populates 'indexSet', which
278 /// encapsulates the constraints involving loops surrounding 'op' and
279 /// potentially involving any Function symbols. The dimensional variables in
280 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
281 /// innermost.
282 static LogicalResult getOpIndexSet(Operation *op,
283  FlatAffineValueConstraints *indexSet) {
285  getEnclosingAffineOps(*op, &ops);
286  return getIndexSet(ops, indexSet);
287 }
288 
289 // Returns the number of outer loop common to 'src/dstDomain'.
290 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
291 static unsigned
293  const FlatAffineValueConstraints &dstDomain,
294  SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
295  // Find the number of common loops shared by src and dst accesses.
296  unsigned minNumLoops =
297  std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars());
298  unsigned numCommonLoops = 0;
299  for (unsigned i = 0; i < minNumLoops; ++i) {
300  if ((!isAffineForInductionVar(srcDomain.getValue(i)) &&
301  !isAffineParallelInductionVar(srcDomain.getValue(i))) ||
302  (!isAffineForInductionVar(dstDomain.getValue(i)) &&
303  !isAffineParallelInductionVar(dstDomain.getValue(i))) ||
304  srcDomain.getValue(i) != dstDomain.getValue(i))
305  break;
306  if (commonLoops != nullptr)
307  commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
308  ++numCommonLoops;
309  }
310  if (commonLoops != nullptr)
311  assert(commonLoops->size() == numCommonLoops);
312  return numCommonLoops;
313 }
314 
315 /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
316 /// `opB` should be in the same affine scope. Returns nullptr if such a block
317 /// does not exist (when the two ops are in different blocks of an op starting
318 /// an `AffineScope`).
320  // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
321  // chain extends up to and includnig an op that starts an affine scope.
322  auto getChainOfAncestorBlocks =
323  [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
324  Block *currBlock = op->getBlock();
325  // Loop terminates when the currBlock is nullptr or its parent operation
326  // holds an affine scope.
327  while (currBlock &&
328  !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
329  ancestorBlocks.push_back(currBlock);
330  currBlock = currBlock->getParentOp()->getBlock();
331  }
332  assert(currBlock &&
333  "parent op starting an affine scope is always expected");
334  ancestorBlocks.push_back(currBlock);
335  };
336 
337  // Find the closest common block.
338  SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
339  getChainOfAncestorBlocks(opA, srcAncestorBlocks);
340  getChainOfAncestorBlocks(opB, dstAncestorBlocks);
341 
342  Block *commonBlock = nullptr;
343  for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
344  i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
345  i--, j--)
346  commonBlock = srcAncestorBlocks[i];
347 
348  return commonBlock;
349 }
350 
351 /// Returns true if the ancestor operation of 'srcAccess' appears before the
352 /// ancestor operation of 'dstAccess' in their common ancestral block. The
353 /// operations for `srcAccess` and `dstAccess` are expected to be in the same
354 /// affine scope and have a common surrounding block within it.
356  const MemRefAccess &dstAccess) {
357  // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
358  Block *commonBlock =
359  getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst);
360  assert(commonBlock &&
361  "ops expected to have a common surrounding block in affine scope");
362 
363  // Check the dominance relationship between the respective ancestors of the
364  // src and dst in the Block of the innermost among the common loops.
365  Operation *srcOp = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
366  assert(srcOp && "src access op must lie in common block");
367  Operation *dstOp = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
368  assert(dstOp && "dest access op must lie in common block");
369 
370  // Determine whether dstOp comes after srcOp.
371  return srcOp->isBeforeInBlock(dstOp);
372 }
373 
374 // Adds ordering constraints to 'dependenceDomain' based on number of loops
375 // common to 'src/dstDomain' and requested 'loopDepth'.
376 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
377 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
378 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
379 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
380 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
382  const FlatAffineValueConstraints &dstDomain,
383  unsigned loopDepth,
384  IntegerRelation *dependenceDomain) {
385  unsigned numCols = dependenceDomain->getNumCols();
386  SmallVector<int64_t, 4> eq(numCols);
387  unsigned numSrcDims = srcDomain.getNumDimVars();
388  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
389  unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
390  for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
391  llvm::fill(eq, 0);
392  eq[i] = -1;
393  eq[i + numSrcDims] = 1;
394  if (i == loopDepth - 1) {
395  eq[numCols - 1] = -1;
396  dependenceDomain->addInequality(eq);
397  } else {
398  dependenceDomain->addEquality(eq);
399  }
400  }
401 }
402 
403 // Computes distance and direction vectors in 'dependences', by adding
404 // variables to 'dependenceDomain' which represent the difference of the IVs,
405 // eliminating all other variables, and reading off distance vectors from
406 // equality constraints (if possible), and direction vectors from inequalities.
408  const FlatAffineValueConstraints &srcDomain,
409  const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
410  IntegerPolyhedron *dependenceDomain,
411  SmallVector<DependenceComponent, 2> *dependenceComponents) {
412  // Find the number of common loops shared by src and dst accesses.
413  SmallVector<AffineForOp, 4> commonLoops;
414  unsigned numCommonLoops =
415  getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
416  if (numCommonLoops == 0)
417  return;
418  // Compute direction vectors for requested loop depth.
419  unsigned numIdsToEliminate = dependenceDomain->getNumVars();
420  // Add new variables to 'dependenceDomain' to represent the direction
421  // constraints for each shared loop.
422  dependenceDomain->insertVar(VarKind::SetDim, /*pos=*/0,
423  /*num=*/numCommonLoops);
424 
425  // Add equality constraints for each common loop, setting newly introduced
426  // variable at column 'j' to the 'dst' IV minus the 'src IV.
428  eq.resize(dependenceDomain->getNumCols());
429  unsigned numSrcDims = srcDomain.getNumDimVars();
430  // Constraint variables format:
431  // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
432  for (unsigned j = 0; j < numCommonLoops; ++j) {
433  llvm::fill(eq, 0);
434  eq[j] = 1;
435  eq[j + numCommonLoops] = 1;
436  eq[j + numCommonLoops + numSrcDims] = -1;
437  dependenceDomain->addEquality(eq);
438  }
439 
440  // Eliminate all variables other than the direction variables just added.
441  dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
442 
443  // Scan each common loop variable column and set direction vectors based
444  // on eliminated constraint system.
445  dependenceComponents->resize(numCommonLoops);
446  for (unsigned j = 0; j < numCommonLoops; ++j) {
447  (*dependenceComponents)[j].op = commonLoops[j].getOperation();
448  auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j);
449  (*dependenceComponents)[j].lb =
450  lbConst.value_or(std::numeric_limits<int64_t>::min());
451  auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j);
452  (*dependenceComponents)[j].ub =
453  ubConst.value_or(std::numeric_limits<int64_t>::max());
454  }
455 }
456 
458  // Create set corresponding to domain of access.
460  if (failed(getOpIndexSet(opInst, &domain)))
461  return failure();
462 
463  // Get access relation from access map.
464  AffineValueMap accessValueMap;
465  getAccessMap(&accessValueMap);
466  if (failed(getRelationFromMap(accessValueMap, rel)))
467  return failure();
468 
469  // Merge and align domain ids of `rel` with ids of `domain`. Since the domain
470  // of the access map is a subset of the domain of access, the domain ids of
471  // `rel` are guranteed to be a subset of ids of `domain`.
472  unsigned inserts = 0;
473  for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
474  const Identifier domainIdi = Identifier(domain.getValue(i));
475  const Identifier *findBegin = rel.getIds(VarKind::SetDim).begin() + i;
476  const Identifier *findEnd = rel.getIds(VarKind::SetDim).end();
477  const Identifier *itr = std::find(findBegin, findEnd, domainIdi);
478  if (itr != findEnd) {
479  rel.swapVar(i, i + std::distance(findBegin, itr));
480  } else {
481  ++inserts;
482  rel.insertVar(VarKind::SetDim, i);
483  rel.setId(VarKind::SetDim, i, domainIdi);
484  }
485  }
486 
487  // Append domain constraints to `rel`.
488  IntegerRelation domainRel = domain;
489  // For 0-d spaces, there will be no IDs. Enable if that's the case.
490  if (!domainRel.getSpace().isUsingIds())
491  domainRel.resetIds();
492  if (!rel.getSpace().isUsingIds())
493  rel.resetIds();
494  domainRel.appendVar(VarKind::Range, accessValueMap.getNumResults());
495  domainRel.mergeAndAlignSymbols(rel);
496  domainRel.mergeLocalVars(rel);
497  rel.append(domainRel);
498 
499  rel.convertVarKind(VarKind::SetDim, 0, accessValueMap.getNumDims() + inserts,
500  VarKind::Domain);
501 
502  return success();
503 }
504 
505 // Populates 'accessMap' with composition of AffineApplyOps reachable from
506 // indices of MemRefAccess.
508  // Get affine map from AffineLoad/Store.
509  AffineMap map;
510  if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
511  map = loadOp.getAffineMap();
512  else
513  map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
514 
515  SmallVector<Value, 8> operands(indices.begin(), indices.end());
516  fullyComposeAffineMapAndOperands(&map, &operands);
517  map = simplifyAffineMap(map);
518  canonicalizeMapAndOperands(&map, &operands);
519  accessMap->reset(map, operands);
520 }
521 
522 // Builds a flat affine constraint system to check if there exists a dependence
523 // between memref accesses 'srcAccess' and 'dstAccess'.
524 // Returns 'NoDependence' if the accesses can be definitively shown not to
525 // access the same element.
526 // Returns 'HasDependence' if the accesses do access the same element.
527 // Returns 'Failure' if an error or unsupported case was encountered.
528 // If a dependence exists, returns in 'dependenceComponents' a direction
529 // vector for the dependence, with a component for each loop IV in loops
530 // common to both accesses (see Dependence in AffineAnalysis.h for details).
531 //
532 // The memref access dependence check is comprised of the following steps:
533 // *) Build access relation for each access. An access relation maps elements
534 // of an iteration domain to the element(s) of an array domain accessed by
535 // that iteration of the associated statement through some array reference.
536 // *) Compute the dependence relation by composing access relation of
537 // `srcAccess` with the inverse of access relation of `dstAccess`.
538 // Doing this builds a relation between iteration domain of `srcAccess`
539 // to the iteration domain of `dstAccess` which access the same memory
540 // location.
541 // *) Add ordering constraints for `srcAccess` to be accessed before
542 // `dstAccess`.
543 //
544 // This method builds a constraint system with the following column format:
545 //
546 // [src-dim-variables, dst-dim-variables, symbols, constant]
547 //
548 // For example, given the following MLIR code with "source" and "destination"
549 // accesses to the same memref label, and symbols %M, %N, %K:
550 //
551 // affine.for %i0 = 0 to 100 {
552 // affine.for %i1 = 0 to 50 {
553 // %a0 = affine.apply
554 // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
555 // // Source memref access.
556 // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
557 // }
558 // }
559 //
560 // affine.for %i2 = 0 to 100 {
561 // affine.for %i3 = 0 to 50 {
562 // %a1 = affine.apply
563 // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
564 // // Destination memref access.
565 // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
566 // }
567 // }
568 //
569 // The access relation for `srcAccess` would be the following:
570 //
571 // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
572 // 2 -4 -1 0 1 0 0 = 0
573 // 0 3 0 -1 0 -1 0 = 0
574 // 1 0 0 0 0 0 0 >= 0
575 // -1 0 0 0 0 0 100 >= 0
576 // 0 1 0 0 0 0 0 >= 0
577 // 0 -1 0 0 0 0 50 >= 0
578 //
579 // The access relation for `dstAccess` would be the following:
580 //
581 // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
582 // 7 9 -1 0 -1 0 0 = 0
583 // 0 11 0 -1 0 -1 0 = 0
584 // 1 0 0 0 0 0 0 >= 0
585 // -1 0 0 0 0 0 100 >= 0
586 // 0 1 0 0 0 0 0 >= 0
587 // 0 -1 0 0 0 0 50 >= 0
588 //
589 // The equalities in the above relations correspond to the access maps while
590 // the inequalities corresspond to the iteration domain constraints.
591 //
592 // The dependence relation formed:
593 //
594 // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
595 // 2 -4 -7 -9 1 1 0 0 = 0
596 // 0 3 0 -11 -1 0 1 0 = 0
597 // 1 0 0 0 0 0 0 0 >= 0
598 // -1 0 0 0 0 0 0 100 >= 0
599 // 0 1 0 0 0 0 0 0 >= 0
600 // 0 -1 0 0 0 0 0 50 >= 0
601 // 0 0 1 0 0 0 0 0 >= 0
602 // 0 0 -1 0 0 0 0 100 >= 0
603 // 0 0 0 1 0 0 0 0 >= 0
604 // 0 0 0 -1 0 0 0 50 >= 0
605 //
606 //
607 // TODO: Support AffineExprs mod/floordiv/ceildiv.
609  const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
610  unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
611  SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
612  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
613  << Twine(loopDepth) << " between:\n";);
614  LLVM_DEBUG(srcAccess.opInst->dump());
615  LLVM_DEBUG(dstAccess.opInst->dump());
616 
617  // Return 'NoDependence' if these accesses do not access the same memref.
618  if (srcAccess.memref != dstAccess.memref)
620 
621  // Return 'NoDependence' if one of these accesses is not an
622  // AffineWriteOpInterface.
623  if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
624  !isa<AffineWriteOpInterface>(dstAccess.opInst))
626 
627  // We can't analyze further if the ops lie in different affine scopes or have
628  // no common block in an affine scope.
629  if (getAffineAnalysisScope(srcAccess.opInst) !=
630  getAffineAnalysisScope(dstAccess.opInst))
632  if (!getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst))
634 
635  // Create access relation from each MemRefAccess.
636  PresburgerSpace space = PresburgerSpace::getRelationSpace();
637  IntegerRelation srcRel(space), dstRel(space);
638  if (failed(srcAccess.getAccessRelation(srcRel)))
640  if (failed(dstAccess.getAccessRelation(dstRel)))
642 
643  FlatAffineValueConstraints srcDomain(srcRel.getDomainSet());
644  FlatAffineValueConstraints dstDomain(dstRel.getDomainSet());
645 
646  // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
647  // operation of 'srcAccess' does not properly dominate the ancestor
648  // operation of 'dstAccess' in the same common operation block.
649  // Note: this check is skipped if 'allowRAR' is true, because RAR deps
650  // can exist irrespective of lexicographic ordering b/w src and dst.
651  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
652  assert(loopDepth <= numCommonLoops + 1);
653  if (!allowRAR && loopDepth > numCommonLoops &&
654  !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
656  }
657 
658  // Compute the dependence relation by composing `srcRel` with the inverse of
659  // `dstRel`. Doing this builds a relation between iteration domain of
660  // `srcAccess` to the iteration domain of `dstAccess` which access the same
661  // memory locations.
662  dstRel.inverse();
663  // For 0-d spaces, there will be no IDs. Enable if that's the case.
664  if (!dstRel.getSpace().isUsingIds())
665  dstRel.resetIds();
666  if (!srcRel.getSpace().isUsingIds())
667  srcRel.resetIds();
668  dstRel.mergeAndCompose(srcRel);
669  dstRel.convertVarKind(VarKind::Domain, 0, dstRel.getNumDomainVars(),
670  VarKind::Range, 0);
671  IntegerPolyhedron dependenceDomain(dstRel);
672 
673  // Add 'src' happens before 'dst' ordering constraints.
674  addOrderingConstraints(srcDomain, dstDomain, loopDepth, &dependenceDomain);
675 
676  // Return 'NoDependence' if the solution space is empty: no dependence.
677  if (dependenceDomain.isEmpty())
679 
680  // Compute dependence direction vector and return true.
681  if (dependenceComponents != nullptr)
682  computeDirectionVector(srcDomain, dstDomain, loopDepth, &dependenceDomain,
683  dependenceComponents);
684 
685  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
686  LLVM_DEBUG(dependenceDomain.dump());
687 
688  FlatAffineValueConstraints result(dependenceDomain);
689  if (dependenceConstraints)
690  *dependenceConstraints = result;
692 }
693 
694 /// Gathers dependence components for dependences between all ops in loop nest
695 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
697  AffineForOp forOp, unsigned maxLoopDepth,
698  std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
699  // Collect all load and store ops in loop nest rooted at 'forOp'.
700  SmallVector<Operation *, 8> loadAndStoreOps;
701  forOp->walk([&](Operation *op) {
702  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
703  loadAndStoreOps.push_back(op);
704  });
705 
706  unsigned numOps = loadAndStoreOps.size();
707  for (unsigned d = 1; d <= maxLoopDepth; ++d) {
708  for (unsigned i = 0; i < numOps; ++i) {
709  auto *srcOp = loadAndStoreOps[i];
710  MemRefAccess srcAccess(srcOp);
711  for (unsigned j = 0; j < numOps; ++j) {
712  auto *dstOp = loadAndStoreOps[j];
713  MemRefAccess dstAccess(dstOp);
714 
716  // TODO: Explore whether it would be profitable to pre-compute and store
717  // deps instead of repeatedly checking.
719  srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
720  &depComps);
721  if (hasDependence(result))
722  depCompsVec->push_back(depComps);
723  }
724  }
725  }
726 }
static LogicalResult getOpIndexSet(Operation *op, FlatAffineValueConstraints *indexSet)
Computes the iteration domain for 'op' and populates 'indexSet', which encapsulates the constraints i...
static Value getSupportedReduction(AffineForOp forOp, unsigned pos, arith::AtomicRMWKind &kind)
Get the value that is being reduced by pos-th reduction in the loop if such a reduction can be perfor...
static void computeDirectionVector(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, IntegerPolyhedron *dependenceDomain, SmallVector< DependenceComponent, 2 > *dependenceComponents)
static unsigned getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, SmallVectorImpl< AffineForOp > *commonLoops=nullptr)
static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess)
Returns true if the ancestor operation of 'srcAccess' appears before the ancestor operation of 'dstAc...
static bool isLocallyDefined(Value v, Operation *enclosingOp)
Returns true if v is allocated locally to enclosingOp – i.e., it is allocated by an operation nested ...
static Block * getCommonBlockInAffineScope(Operation *opA, Operation *opB)
Returns the closest surrounding block common to opA and opB.
static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, IntegerRelation *dependenceDomain)
union mlir::linalg::@1227::ArityGroupAndKind::Kind kind
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
Block represents an ordered list of Operations.
Definition: Block.h:33
Operation * findAncestorOpInBlock(Operation &op)
Returns 'op' if 'op' lies in this block, or otherwise finds the ancestor operation of 'op' that lies ...
Definition: Block.cpp:74
void push_back(Operation *op)
Definition: Block.h:149
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:31
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
A trait of region holding operations that defines a new scope for polyhedral optimization purposes.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:749
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:385
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:218
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: WalkResult.h:29
static WalkResult advance()
Definition: WalkResult.h:47
static WalkResult interrupt()
Definition: WalkResult.h:46
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void reset(AffineMap map, ValueRange operands, ValueRange results={})
unsigned getNumResults() const
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
void addAffineIfOpDomain(AffineIfOp ifOp)
Adds constraints imposed by the affine.if operation.
LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp)
Add constraints (lower and upper bounds) for the specified 'affine.parallel' operation's Value using ...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified 'affine.for' operation's Value using IR i...
An Identifier stores a pointer to an object, such as a Value or an Operation.
An IntegerPolyhedron represents the set of points from a PresburgerSpace that satisfy a list of affin...
unsigned insertVar(VarKind kind, unsigned pos, unsigned num=1) override
Insert num variables of the specified kind at position pos.
An IntegerRelation represents the set of points from a PresburgerSpace that satisfy a list of affine ...
void setId(VarKind kind, unsigned i, Identifier id)
Set the identifier for the ith variable of the specified kind of the IntegerRelation's PresburgerSpac...
virtual void swapVar(unsigned posA, unsigned posB)
Swap the posA^th variable with the posB^th variable.
ArrayRef< Identifier > getIds(VarKind kind)
Get the identifiers for the variables of specified varKind.
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
void convertVarKind(VarKind srcKind, unsigned varStart, unsigned varLimit, VarKind dstKind, unsigned pos)
Converts variables of kind srcKind in the range [varStart, varLimit) to variables of kind dstKind.
unsigned appendVar(VarKind kind, unsigned num=1)
Append num variables of the specified kind after the last variable of that kind.
virtual unsigned insertVar(VarKind kind, unsigned pos, unsigned num=1)
Insert num variables of the specified kind at position pos.
void inverse()
Invert the relation i.e., swap its domain and range.
void append(const IntegerRelation &other)
Appends constraints from other into this.
void addEquality(ArrayRef< DynamicAPInt > eq)
Adds an equality from the coefficients specified in eq.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
void mergeAndCompose(const IntegerRelation &other)
Given a relation other: (A -> B), this operation merges the symbol and local variables and then takes...
IntegerPolyhedron getDomainSet() const
Return a set corresponding to all points in the domain of the relation.
void projectOut(unsigned pos, unsigned num)
Projects out (aka eliminates) num variables starting at position pos.
void addInequality(ArrayRef< DynamicAPInt > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
void mergeAndAlignSymbols(IntegerRelation &other)
Merge and align symbol variables of this and other with respect to identifiers.
unsigned mergeLocalVars(IntegerRelation &other)
Adds additional local vars to the sets such that they both have the union of the local vars in each s...
const PresburgerSpace & getSpace() const
Returns a reference to the underlying space.
PresburgerSpace is the space of all possible values of a tuple of integer valued variables/variables.
bool isUsingIds() const
Returns if identifiers are being used.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in 'depCompsVec', dependence components for dependences between all load and store ops in loo...
void getEnclosingAffineOps(Operation &op, SmallVectorImpl< Operation * > *ops)
Populates 'ops' with affine operations enclosing op ordered from outermost to innermost while stoppin...
Definition: Utils.cpp:770
LogicalResult getIndexSet(MutableArrayRef< Operation * > ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2799
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1617
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2787
bool isLoopMemoryParallel(AffineForOp forOp)
Returns true if ‘forOp’ doesn't have memory dependences preventing parallelization.
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:275
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1258
LogicalResult getRelationFromMap(AffineMap &map, presburger::IntegerRelation &rel)
Builds a relation from the given AffineMap/AffineValueMap map, containing all pairs of the form opera...
void extractInductionVars(ArrayRef< Operation * > affineOps, SmallVectorImpl< Value > &ivs)
Extracts the induction variables from a list of either AffineForOp or AffineParallelOp and places the...
Definition: AffineOps.cpp:2830
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation.
Definition: Utils.cpp:1974
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
bool isAffineParallelInductionVar(Value val)
Returns true if val is the induction variable of an AffineParallelOp.
Definition: AffineOps.cpp:2791
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:766
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
Checks whether two accesses to the same memref access the same element.
enum mlir::affine::DependenceResult::ResultEnum value
A description of a (parallelizable) reduction in an affine loop.
Encapsulates a memref load or store access information.
LogicalResult getAccessRelation(presburger::IntegerRelation &accessRel) const
Creates an access relation for the access.
void getAccessMap(AffineValueMap *accessMap) const
Populates 'accessMap' with composition of AffineApplyOps reachable from 'indices'.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.