MLIR  16.0.0git
AffineAnalysis.cpp
Go to the documentation of this file.
1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
22 #include "mlir/IR/BuiltinOps.h"
23 #include "mlir/IR/IntegerSet.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 
30 #define DEBUG_TYPE "affine-analysis"
31 
32 using namespace mlir;
33 using namespace presburger;
34 
35 /// Get the value that is being reduced by `pos`-th reduction in the loop if
36 /// such a reduction can be performed by affine parallel loops. This assumes
37 /// floating-point operations are commutative. On success, `kind` will be the
38 /// reduction kind suitable for use in affine parallel loop builder. If the
39 /// reduction is not supported, returns null.
40 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
41  arith::AtomicRMWKind &kind) {
42  SmallVector<Operation *> combinerOps;
43  Value reducedVal =
44  matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
45  if (!reducedVal)
46  return nullptr;
47 
48  // Expected only one combiner operation.
49  if (combinerOps.size() > 1)
50  return nullptr;
51 
52  Operation *combinerOp = combinerOps.back();
55  .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
56  .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
57  .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
58  .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
59  .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
60  .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
61  .Case([](arith::MinFOp) { return arith::AtomicRMWKind::minf; })
62  .Case([](arith::MaxFOp) { return arith::AtomicRMWKind::maxf; })
63  .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
64  .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
65  .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
66  .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
67  .Default([](Operation *) -> Optional<arith::AtomicRMWKind> {
68  // TODO: AtomicRMW supports other kinds of reductions this is
69  // currently not detecting, add those when the need arises.
70  return llvm::None;
71  });
72  if (!maybeKind)
73  return nullptr;
74 
75  kind = *maybeKind;
76  return reducedVal;
77 }
78 
79 /// Populate `supportedReductions` with descriptors of the supported reductions.
81  AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
82  unsigned numIterArgs = forOp.getNumIterOperands();
83  if (numIterArgs == 0)
84  return;
85  supportedReductions.reserve(numIterArgs);
86  for (unsigned i = 0; i < numIterArgs; ++i) {
87  arith::AtomicRMWKind kind;
88  if (Value value = getSupportedReduction(forOp, i, kind))
89  supportedReductions.emplace_back(LoopReduction{kind, i, value});
90  }
91 }
92 
93 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
94 /// provided, populates it with descriptors of the parallelizable reductions and
95 /// treats them as not preventing parallelization.
96 bool mlir::isLoopParallel(AffineForOp forOp,
97  SmallVectorImpl<LoopReduction> *parallelReductions) {
98  unsigned numIterArgs = forOp.getNumIterOperands();
99 
100  // Loop is not parallel if it has SSA loop-carried dependences and reduction
101  // detection is not requested.
102  if (numIterArgs > 0 && !parallelReductions)
103  return false;
104 
105  // Find supported reductions of requested.
106  if (parallelReductions) {
107  getSupportedReductions(forOp, *parallelReductions);
108  // Return later to allow for identifying all parallel reductions even if the
109  // loop is not parallel.
110  if (parallelReductions->size() != numIterArgs)
111  return false;
112  }
113 
114  // Check memory dependences.
115  return isLoopMemoryParallel(forOp);
116 }
117 
118 /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
119 /// allocated by an operation nested within `enclosingOp`.
120 static bool isLocallyDefined(Value v, Operation *enclosingOp) {
121  Operation *defOp = v.getDefiningOp();
122  if (!defOp)
123  return false;
124 
125  if (hasSingleEffect<MemoryEffects::Allocate>(defOp, v) &&
126  enclosingOp->isProperAncestor(defOp))
127  return true;
128 
129  // Aliasing ops.
130  auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
131  return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
132 }
133 
134 bool mlir::isLoopMemoryParallel(AffineForOp forOp) {
135  // Any memref-typed iteration arguments are treated as serializing.
136  if (llvm::any_of(forOp.getResultTypes(),
137  [](Type type) { return type.isa<BaseMemRefType>(); }))
138  return false;
139 
140  // Collect all load and store ops in loop nest rooted at 'forOp'.
141  SmallVector<Operation *, 8> loadAndStoreOps;
142  auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
143  if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
144  // Memrefs that are allocated inside `forOp` need not be considered.
145  if (!isLocallyDefined(readOp.getMemRef(), forOp))
146  loadAndStoreOps.push_back(op);
147  } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
148  // Filter out stores the same way as above.
149  if (!isLocallyDefined(writeOp.getMemRef(), forOp))
150  loadAndStoreOps.push_back(op);
151  } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
152  !hasSingleEffect<MemoryEffects::Allocate>(op) &&
153  !MemoryEffectOpInterface::hasNoEffect(op)) {
154  // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
155  // as long as they don't escape the loop (which has been checked above).
156  return WalkResult::interrupt();
157  }
158 
159  return WalkResult::advance();
160  });
161 
162  // Stop early if the loop has unknown ops with side effects.
163  if (walkResult.wasInterrupted())
164  return false;
165 
166  // Dep check depth would be number of enclosing loops + 1.
167  unsigned depth = getNestingDepth(forOp) + 1;
168 
169  // Check dependences between all pairs of ops in 'loadAndStoreOps'.
170  for (auto *srcOp : loadAndStoreOps) {
171  MemRefAccess srcAccess(srcOp);
172  for (auto *dstOp : loadAndStoreOps) {
173  MemRefAccess dstAccess(dstOp);
174  FlatAffineValueConstraints dependenceConstraints;
176  srcAccess, dstAccess, depth, &dependenceConstraints,
177  /*dependenceComponents=*/nullptr);
179  return false;
180  }
181  }
182  return true;
183 }
184 
185 /// Returns the sequence of AffineApplyOp Operations operation in
186 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
187 /// and ending at operands which are not defined by AffineApplyOps.
188 // TODO: Add a method to AffineApplyOp which forward substitutes the
189 // AffineApplyOp into any user AffineApplyOps.
191  ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
192  struct State {
193  // The ssa value for this node in the DFS traversal.
194  Value value;
195  // The operand index of 'value' to explore next during DFS traversal.
196  unsigned operandIndex;
197  };
198  SmallVector<State, 4> worklist;
199  for (auto operand : operands) {
200  worklist.push_back({operand, 0});
201  }
202 
203  while (!worklist.empty()) {
204  State &state = worklist.back();
205  auto *opInst = state.value.getDefiningOp();
206  // Note: getDefiningOp will return nullptr if the operand is not an
207  // Operation (i.e. block argument), which is a terminator for the search.
208  if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
209  worklist.pop_back();
210  continue;
211  }
212 
213  if (state.operandIndex == 0) {
214  // Pre-Visit: Add 'opInst' to reachable sequence.
215  affineApplyOps.push_back(opInst);
216  }
217  if (state.operandIndex < opInst->getNumOperands()) {
218  // Visit: Add next 'affineApplyOp' operand to worklist.
219  // Get next operand to visit at 'operandIndex'.
220  auto nextOperand = opInst->getOperand(state.operandIndex);
221  // Increment 'operandIndex' in 'state'.
222  ++state.operandIndex;
223  // Add 'nextOperand' to worklist.
224  worklist.push_back({nextOperand, 0});
225  } else {
226  // Post-visit: done visiting operands AffineApplyOp, pop off stack.
227  worklist.pop_back();
228  }
229  }
230 }
231 
232 // Builds a system of constraints with dimensional variables corresponding to
233 // the loop IVs of the forOps appearing in that order. Any symbols founds in
234 // the bound operands are added as symbols in the system. Returns failure for
235 // the yet unimplemented cases.
236 // TODO: Handle non-unit steps through local variables or stride information in
237 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
238 // introducing a method in FlatAffineValueConstraints
239 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
241  FlatAffineValueConstraints *domain) {
242  SmallVector<Value, 4> indices;
244 
245  for (Operation *op : ops) {
246  assert((isa<AffineForOp, AffineIfOp>(op)) &&
247  "ops should have either AffineForOp or AffineIfOp");
248  if (AffineForOp forOp = dyn_cast<AffineForOp>(op))
249  forOps.push_back(forOp);
250  }
251  extractForInductionVars(forOps, &indices);
252  // Reset while associated Values in 'indices' to the domain.
253  domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
254  for (Operation *op : ops) {
255  // Add constraints from forOp's bounds.
256  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
257  if (failed(domain->addAffineForOpDomain(forOp)))
258  return failure();
259  } else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) {
260  domain->addAffineIfOpDomain(ifOp);
261  }
262  }
263  return success();
264 }
265 
266 /// Computes the iteration domain for 'op' and populates 'indexSet', which
267 /// encapsulates the constraints involving loops surrounding 'op' and
268 /// potentially involving any Function symbols. The dimensional variables in
269 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
270 /// innermost.
272  FlatAffineValueConstraints *indexSet) {
275  return getIndexSet(ops, indexSet);
276 }
277 
278 // Returns the number of outer loop common to 'src/dstDomain'.
279 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
280 static unsigned
282  const FlatAffineValueConstraints &dstDomain,
283  SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
284  // Find the number of common loops shared by src and dst accesses.
285  unsigned minNumLoops =
286  std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars());
287  unsigned numCommonLoops = 0;
288  for (unsigned i = 0; i < minNumLoops; ++i) {
289  if (!isForInductionVar(srcDomain.getValue(i)) ||
290  !isForInductionVar(dstDomain.getValue(i)) ||
291  srcDomain.getValue(i) != dstDomain.getValue(i))
292  break;
293  if (commonLoops != nullptr)
294  commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
295  ++numCommonLoops;
296  }
297  if (commonLoops != nullptr)
298  assert(commonLoops->size() == numCommonLoops);
299  return numCommonLoops;
300 }
301 
302 /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
303 /// `opB` should be in the same affine scope and thus such a block is guaranteed
304 /// to exist.
306  // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
307  // chain extends up to and includnig an op that starts an affine scope.
308  auto getChainOfAncestorBlocks =
309  [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
310  Block *currBlock = op->getBlock();
311  // Loop terminates when the currBlock is nullptr or its parent operation
312  // holds an affine scope.
313  while (currBlock &&
314  !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
315  ancestorBlocks.push_back(currBlock);
316  currBlock = currBlock->getParentOp()->getBlock();
317  }
318  assert(currBlock &&
319  "parent op starting an affine scope is always expected");
320  ancestorBlocks.push_back(currBlock);
321  };
322 
323  // Find the closest common block including those in AffineIf.
324  SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
325  getChainOfAncestorBlocks(opA, srcAncestorBlocks);
326  getChainOfAncestorBlocks(opB, dstAncestorBlocks);
327 
328  Block *commonBlock = nullptr;
329  for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
330  i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
331  i--, j--)
332  commonBlock = srcAncestorBlocks[i];
333  // This is guaranteed since both ops are from the same affine scope.
334  assert(commonBlock && "ops expected to have a common surrounding block");
335  return commonBlock;
336 }
337 
338 /// Returns true if the ancestor operation of 'srcAccess' appears before the
339 /// ancestor operation of 'dstAccess' in their common ancestral block. The
340 /// operations for `srcAccess` and `dstAccess` are expected to be in the same
341 /// affine scope.
343  const MemRefAccess &dstAccess) {
344  // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
345  auto *commonBlock = getCommonBlock(srcAccess.opInst, dstAccess.opInst);
346  // Check the dominance relationship between the respective ancestors of the
347  // src and dst in the Block of the innermost among the common loops.
348  auto *srcInst = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
349  assert(srcInst && "src access op must lie in common block");
350  auto *dstInst = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
351  assert(dstInst && "dest access op must lie in common block");
352 
353  // Determine whether dstInst comes after srcInst.
354  return srcInst->isBeforeInBlock(dstInst);
355 }
356 
357 // Adds ordering constraints to 'dependenceDomain' based on number of loops
358 // common to 'src/dstDomain' and requested 'loopDepth'.
359 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
360 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
361 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
362 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
363 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
364 static void
366  const FlatAffineValueConstraints &dstDomain,
367  unsigned loopDepth,
368  FlatAffineValueConstraints *dependenceDomain) {
369  unsigned numCols = dependenceDomain->getNumCols();
370  SmallVector<int64_t, 4> eq(numCols);
371  unsigned numSrcDims = srcDomain.getNumDimVars();
372  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
373  unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
374  for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
375  std::fill(eq.begin(), eq.end(), 0);
376  eq[i] = -1;
377  eq[i + numSrcDims] = 1;
378  if (i == loopDepth - 1) {
379  eq[numCols - 1] = -1;
380  dependenceDomain->addInequality(eq);
381  } else {
382  dependenceDomain->addEquality(eq);
383  }
384  }
385 }
386 
387 // Computes distance and direction vectors in 'dependences', by adding
388 // variables to 'dependenceDomain' which represent the difference of the IVs,
389 // eliminating all other variables, and reading off distance vectors from
390 // equality constraints (if possible), and direction vectors from inequalities.
392  const FlatAffineValueConstraints &srcDomain,
393  const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
394  FlatAffineValueConstraints *dependenceDomain,
395  SmallVector<DependenceComponent, 2> *dependenceComponents) {
396  // Find the number of common loops shared by src and dst accesses.
397  SmallVector<AffineForOp, 4> commonLoops;
398  unsigned numCommonLoops =
399  getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
400  if (numCommonLoops == 0)
401  return;
402  // Compute direction vectors for requested loop depth.
403  unsigned numIdsToEliminate = dependenceDomain->getNumVars();
404  // Add new variables to 'dependenceDomain' to represent the direction
405  // constraints for each shared loop.
406  dependenceDomain->insertDimVar(/*pos=*/0, /*num=*/numCommonLoops);
407 
408  // Add equality constraints for each common loop, setting newly introduced
409  // variable at column 'j' to the 'dst' IV minus the 'src IV.
411  eq.resize(dependenceDomain->getNumCols());
412  unsigned numSrcDims = srcDomain.getNumDimVars();
413  // Constraint variables format:
414  // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
415  for (unsigned j = 0; j < numCommonLoops; ++j) {
416  std::fill(eq.begin(), eq.end(), 0);
417  eq[j] = 1;
418  eq[j + numCommonLoops] = 1;
419  eq[j + numCommonLoops + numSrcDims] = -1;
420  dependenceDomain->addEquality(eq);
421  }
422 
423  // Eliminate all variables other than the direction variables just added.
424  dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
425 
426  // Scan each common loop variable column and set direction vectors based
427  // on eliminated constraint system.
428  dependenceComponents->resize(numCommonLoops);
429  for (unsigned j = 0; j < numCommonLoops; ++j) {
430  (*dependenceComponents)[j].op = commonLoops[j].getOperation();
431  auto lbConst = dependenceDomain->getConstantBound(IntegerPolyhedron::LB, j);
432  (*dependenceComponents)[j].lb =
433  lbConst.value_or(std::numeric_limits<int64_t>::min());
434  auto ubConst = dependenceDomain->getConstantBound(IntegerPolyhedron::UB, j);
435  (*dependenceComponents)[j].ub =
436  ubConst.value_or(std::numeric_limits<int64_t>::max());
437  }
438 }
439 
441  // Create set corresponding to domain of access.
443  if (failed(getOpIndexSet(opInst, &domain)))
444  return failure();
445 
446  // Get access relation from access map.
447  AffineValueMap accessValueMap;
448  getAccessMap(&accessValueMap);
449  if (failed(getRelationFromMap(accessValueMap, rel)))
450  return failure();
451 
452  FlatAffineRelation domainRel(rel.getNumDomainDims(), /*numRangeDims=*/0,
453  domain);
454 
455  // Merge and align domain ids of `ret` and ids of `domain`. Since the domain
456  // of the access map is a subset of the domain of access, the domain ids of
457  // `ret` are guranteed to be a subset of ids of `domain`.
458  for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
459  unsigned loc;
460  if (rel.findVar(domain.getValue(i), &loc)) {
461  rel.swapVar(i, loc);
462  } else {
463  rel.insertDomainVar(i);
464  rel.setValue(i, domain.getValue(i));
465  }
466  }
467 
468  // Append domain constraints to `rel`.
469  domainRel.appendRangeVar(rel.getNumRangeDims());
470  domainRel.mergeSymbolVars(rel);
471  domainRel.mergeLocalVars(rel);
472  rel.append(domainRel);
473 
474  return success();
475 }
476 
477 // Populates 'accessMap' with composition of AffineApplyOps reachable from
478 // indices of MemRefAccess.
480  // Get affine map from AffineLoad/Store.
481  AffineMap map;
482  if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
483  map = loadOp.getAffineMap();
484  else
485  map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
486 
487  SmallVector<Value, 8> operands(indices.begin(), indices.end());
488  fullyComposeAffineMapAndOperands(&map, &operands);
489  map = simplifyAffineMap(map);
490  canonicalizeMapAndOperands(&map, &operands);
491  accessMap->reset(map, operands);
492 }
493 
494 // Builds a flat affine constraint system to check if there exists a dependence
495 // between memref accesses 'srcAccess' and 'dstAccess'.
496 // Returns 'NoDependence' if the accesses can be definitively shown not to
497 // access the same element.
498 // Returns 'HasDependence' if the accesses do access the same element.
499 // Returns 'Failure' if an error or unsupported case was encountered.
500 // If a dependence exists, returns in 'dependenceComponents' a direction
501 // vector for the dependence, with a component for each loop IV in loops
502 // common to both accesses (see Dependence in AffineAnalysis.h for details).
503 //
504 // The memref access dependence check is comprised of the following steps:
505 // *) Build access relation for each access. An access relation maps elements
506 // of an iteration domain to the element(s) of an array domain accessed by
507 // that iteration of the associated statement through some array reference.
508 // *) Compute the dependence relation by composing access relation of
509 // `srcAccess` with the inverse of access relation of `dstAccess`.
510 // Doing this builds a relation between iteration domain of `srcAccess`
511 // to the iteration domain of `dstAccess` which access the same memory
512 // location.
513 // *) Add ordering constraints for `srcAccess` to be accessed before
514 // `dstAccess`.
515 //
516 // This method builds a constraint system with the following column format:
517 //
518 // [src-dim-variables, dst-dim-variables, symbols, constant]
519 //
520 // For example, given the following MLIR code with "source" and "destination"
521 // accesses to the same memref label, and symbols %M, %N, %K:
522 //
523 // affine.for %i0 = 0 to 100 {
524 // affine.for %i1 = 0 to 50 {
525 // %a0 = affine.apply
526 // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
527 // // Source memref access.
528 // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
529 // }
530 // }
531 //
532 // affine.for %i2 = 0 to 100 {
533 // affine.for %i3 = 0 to 50 {
534 // %a1 = affine.apply
535 // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
536 // // Destination memref access.
537 // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
538 // }
539 // }
540 //
541 // The access relation for `srcAccess` would be the following:
542 //
543 // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
544 // 2 -4 -1 0 1 0 0 = 0
545 // 0 3 0 -1 0 -1 0 = 0
546 // 1 0 0 0 0 0 0 >= 0
547 // -1 0 0 0 0 0 100 >= 0
548 // 0 1 0 0 0 0 0 >= 0
549 // 0 -1 0 0 0 0 50 >= 0
550 //
551 // The access relation for `dstAccess` would be the following:
552 //
553 // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
554 // 7 9 -1 0 -1 0 0 = 0
555 // 0 11 0 -1 0 -1 0 = 0
556 // 1 0 0 0 0 0 0 >= 0
557 // -1 0 0 0 0 0 100 >= 0
558 // 0 1 0 0 0 0 0 >= 0
559 // 0 -1 0 0 0 0 50 >= 0
560 //
561 // The equalities in the above relations correspond to the access maps while
562 // the inequalities corresspond to the iteration domain constraints.
563 //
564 // The dependence relation formed:
565 //
566 // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
567 // 2 -4 -7 -9 1 1 0 0 = 0
568 // 0 3 0 -11 -1 0 1 0 = 0
569 // 1 0 0 0 0 0 0 0 >= 0
570 // -1 0 0 0 0 0 0 100 >= 0
571 // 0 1 0 0 0 0 0 0 >= 0
572 // 0 -1 0 0 0 0 0 50 >= 0
573 // 0 0 1 0 0 0 0 0 >= 0
574 // 0 0 -1 0 0 0 0 100 >= 0
575 // 0 0 0 1 0 0 0 0 >= 0
576 // 0 0 0 -1 0 0 0 50 >= 0
577 //
578 //
579 // TODO: Support AffineExprs mod/floordiv/ceildiv.
581  const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
582  unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
583  SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
584  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
585  << Twine(loopDepth) << " between:\n";);
586  LLVM_DEBUG(srcAccess.opInst->dump(););
587  LLVM_DEBUG(dstAccess.opInst->dump(););
588 
589  // Return 'NoDependence' if these accesses do not access the same memref.
590  if (srcAccess.memref != dstAccess.memref)
592 
593  // Return 'NoDependence' if one of these accesses is not an
594  // AffineWriteOpInterface.
595  if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
596  !isa<AffineWriteOpInterface>(dstAccess.opInst))
598 
599  // We can't analyze further if the ops lie in different affine scopes.
600  if (getAffineScope(srcAccess.opInst) != getAffineScope(dstAccess.opInst))
602 
603  // Create access relation from each MemRefAccess.
604  FlatAffineRelation srcRel, dstRel;
605  if (failed(srcAccess.getAccessRelation(srcRel)))
607  if (failed(dstAccess.getAccessRelation(dstRel)))
609 
610  FlatAffineValueConstraints srcDomain = srcRel.getDomainSet();
611  FlatAffineValueConstraints dstDomain = dstRel.getDomainSet();
612 
613  // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
614  // operation of 'srcAccess' does not properly dominate the ancestor
615  // operation of 'dstAccess' in the same common operation block.
616  // Note: this check is skipped if 'allowRAR' is true, because because RAR
617  // deps can exist irrespective of lexicographic ordering b/w src and dst.
618  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
619  assert(loopDepth <= numCommonLoops + 1);
620  if (!allowRAR && loopDepth > numCommonLoops &&
621  !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
623  }
624 
625  // Compute the dependence relation by composing `srcRel` with the inverse of
626  // `dstRel`. Doing this builds a relation between iteration domain of
627  // `srcAccess` to the iteration domain of `dstAccess` which access the same
628  // memory locations.
629  dstRel.inverse();
630  dstRel.compose(srcRel);
631  *dependenceConstraints = dstRel;
632 
633  // Add 'src' happens before 'dst' ordering constraints.
634  addOrderingConstraints(srcDomain, dstDomain, loopDepth,
635  dependenceConstraints);
636 
637  // Return 'NoDependence' if the solution space is empty: no dependence.
638  if (dependenceConstraints->isEmpty())
640 
641  // Compute dependence direction vector and return true.
642  if (dependenceComponents != nullptr)
643  computeDirectionVector(srcDomain, dstDomain, loopDepth,
644  dependenceConstraints, dependenceComponents);
645 
646  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
647  LLVM_DEBUG(dependenceConstraints->dump());
649 }
650 
651 /// Gathers dependence components for dependences between all ops in loop nest
652 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
654  AffineForOp forOp, unsigned maxLoopDepth,
655  std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
656  // Collect all load and store ops in loop nest rooted at 'forOp'.
657  SmallVector<Operation *, 8> loadAndStoreOps;
658  forOp->walk([&](Operation *op) {
659  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
660  loadAndStoreOps.push_back(op);
661  });
662 
663  unsigned numOps = loadAndStoreOps.size();
664  for (unsigned d = 1; d <= maxLoopDepth; ++d) {
665  for (unsigned i = 0; i < numOps; ++i) {
666  auto *srcOp = loadAndStoreOps[i];
667  MemRefAccess srcAccess(srcOp);
668  for (unsigned j = 0; j < numOps; ++j) {
669  auto *dstOp = loadAndStoreOps[j];
670  MemRefAccess dstAccess(dstOp);
671 
672  FlatAffineValueConstraints dependenceConstraints;
674  // TODO: Explore whether it would be profitable to pre-compute and store
675  // deps instead of repeatedly checking.
677  srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
678  if (hasDependence(result))
679  depCompsVec->push_back(depComps);
680  }
681  }
682  }
683 }
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, SmallVector< DependenceComponent, 2 > *dependenceComponents, bool allowRAR=false)
Include the generated interface declarations.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in &#39;depCompsVec&#39;, dependence components for dependences between all load and store ops in loo...
unsigned getNumDomainDims() const
Returns the number of variables corresponding to domain/range of relation.
void reset(unsigned numReservedInequalities, unsigned numReservedEqualities, unsigned numReservedCols, unsigned numDims, unsigned numSymbols, unsigned numLocals=0)
Clears any existing data and reserves memory for the specified constraints.
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
Block represents an ordered list of Operations.
Definition: Block.h:29
void addEquality(ArrayRef< int64_t > eq)
Adds an equality from the coefficients specified in eq.
A FlatAffineRelation represents a set of ordered pairs (domain -> range) where "domain" and "range" a...
A trait of region holding operations that defines a new scope for polyhedral optimization purposes...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
void addAffineIfOpDomain(AffineIfOp ifOp)
Adds constraints imposed by the affine.if operation.
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if `forOp&#39; is a parallel loop.
void getAccessMap(AffineValueMap *accessMap) const
Populates &#39;accessMap&#39; with composition of AffineApplyOps reachable from &#39;indices&#39;.
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:144
Checks whether two accesses to the same memref access the same element.
LogicalResult getRelationFromMap(AffineMap &map, FlatAffineRelation &rel)
Builds a relation from the given AffineMap/AffineValueMap map, containing all pairs of the form opera...
void extractForInductionVars(ArrayRef< AffineForOp > forInsts, SmallVectorImpl< Value > *ivs)
Extracts the induction variables from a list of AffineForOps and places them in the output argument i...
Definition: AffineOps.cpp:2157
static constexpr const bool value
bool isLoopMemoryParallel(AffineForOp forOp)
Returns true if `forOp&#39; doesn&#39;t have memory dependences preventing parallelization.
unsigned insertDimVar(unsigned pos, unsigned num=1)
Insert variables of the specified kind at position pos.
void addInequality(ArrayRef< int64_t > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
void swapVar(unsigned posA, unsigned posB) override
Swap the posA^th variable with the posB^th variable.
static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess)
Returns true if the ancestor operation of &#39;srcAccess&#39; appears before the ancestor operation of &#39;dstAc...
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:701
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:528
enum mlir::DependenceResult::ResultEnum value
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1066
static WalkResult advance()
Definition: Visitors.h:51
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation *> &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
LogicalResult getAccessRelation(FlatAffineRelation &accessRel) const
Creates an access relation for the access.
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:42
bool isForInductionVar(Value val)
Returns true if the provided value is the induction variable of a AffineForOp.
Definition: AffineOps.cpp:2138
static WalkResult interrupt()
Definition: Visitors.h:50
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
static Value getSupportedReduction(AffineForOp forOp, unsigned pos, arith::AtomicRMWKind &kind)
Get the value that is being reduced by pos-th reduction in the loop if such a reduction can be perfor...
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.
bool findVar(Value val, unsigned *pos) const
Looks up the position of the variable with the specified Value.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:72
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
void projectOut(Value val)
Projects out the variable that is associate with Value.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:2144
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
void append(const IntegerRelation &other)
Appends constraints from other into this.
static unsigned getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, SmallVectorImpl< AffineForOp > *commonLoops=nullptr)
void getEnclosingAffineForAndIfOps(Operation &op, SmallVectorImpl< Operation *> *ops)
Populates &#39;ops&#39; with IVs of the loops surrounding op, along with affine.if operations interleaved bet...
Definition: Utils.cpp:51
static void computeDirectionVector(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, FlatAffineValueConstraints *dependenceDomain, SmallVector< DependenceComponent, 2 > *dependenceComponents)
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:246
void push_back(Operation *op)
Definition: Block.h:140
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:634
LogicalResult getIndexSet(MutableArrayRef< Operation *> ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional variables corresponding to the loop IVs of the forOps...
This class provides a shared interface for ranked and unranked memref types.
Definition: BuiltinTypes.h:112
FlatAffineValueConstraints represents an extension of IntegerPolyhedron where each non-local variable...
static Block * getCommonBlock(Operation *opA, Operation *opB)
Returns the closest surrounding block common to opA and opB.
bool isEmpty() const
Checks for emptiness by performing variable elimination on all variables, running the GCD test on eac...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
unsigned getNumCols() const
Returns the number of columns in the constraint system.
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified &#39;affine.for&#39; operation&#39;s Value using IR i...
void insertDomainVar(unsigned pos, unsigned num=1)
Insert num variables of the specified kind after the pos variable of that kind.
static bool isLocallyDefined(Value v, Operation *enclosingOp)
Returns true if v is allocated locally to enclosingOp – i.e., it is allocated by an operation nested...
void inverse()
Swap domain and range of the relation.
void setValue(unsigned pos, Value val)
Sets the Value associated with the pos^th variable.
FlatAffineValueConstraints getDomainSet() const
Returns a set corresponding to the domain/range of the affine relation.
Encapsulates a memref load or store access information.
static LogicalResult getOpIndexSet(Operation *op, FlatAffineValueConstraints *indexSet)
Computes the iteration domain for &#39;op&#39; and populates &#39;indexSet&#39;, which encapsulates the constraints i...
unsigned getNumRangeDims() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th variable.
A description of a (parallelizable) reduction in an affine loop.
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes...
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation *> &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, FlatAffineValueConstraints *dependenceDomain)
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation...
Definition: Utils.cpp:1241
void compose(const FlatAffineRelation &other)
Given affine relation other: (domainOther -> rangeOther), this operation takes the composition of oth...
void reset(AffineMap map, ValueRange operands, ValueRange results={})
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:172
Optional< int64_t > getConstantBound(BoundType type, unsigned pos) const
Returns the constant bound for the pos^th variable if there is one; None otherwise.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Operation * opInst