MLIR  14.0.0git
AffineAnalysis.cpp
Go to the documentation of this file.
1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
23 #include "mlir/IR/BuiltinOps.h"
24 #include "mlir/IR/IntegerSet.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 
31 #define DEBUG_TYPE "affine-analysis"
32 
33 using namespace mlir;
34 
35 /// Get the value that is being reduced by `pos`-th reduction in the loop if
36 /// such a reduction can be performed by affine parallel loops. This assumes
37 /// floating-point operations are commutative. On success, `kind` will be the
38 /// reduction kind suitable for use in affine parallel loop builder. If the
39 /// reduction is not supported, returns null.
40 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
41  arith::AtomicRMWKind &kind) {
42  SmallVector<Operation *> combinerOps;
43  Value reducedVal =
44  matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
45  if (!reducedVal)
46  return nullptr;
47 
48  // Expected only one combiner operation.
49  if (combinerOps.size() > 1)
50  return nullptr;
51 
52  Operation *combinerOp = combinerOps.back();
55  .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
56  .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
57  .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
58  .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
59  .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
60  .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
61  .Case([](arith::MinFOp) { return arith::AtomicRMWKind::minf; })
62  .Case([](arith::MaxFOp) { return arith::AtomicRMWKind::maxf; })
63  .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
64  .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
65  .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
66  .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
67  .Default([](Operation *) -> Optional<arith::AtomicRMWKind> {
68  // TODO: AtomicRMW supports other kinds of reductions this is
69  // currently not detecting, add those when the need arises.
70  return llvm::None;
71  });
72  if (!maybeKind)
73  return nullptr;
74 
75  kind = *maybeKind;
76  return reducedVal;
77 }
78 
79 /// Populate `supportedReductions` with descriptors of the supported reductions.
81  AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
82  unsigned numIterArgs = forOp.getNumIterOperands();
83  if (numIterArgs == 0)
84  return;
85  supportedReductions.reserve(numIterArgs);
86  for (unsigned i = 0; i < numIterArgs; ++i) {
87  arith::AtomicRMWKind kind;
88  if (Value value = getSupportedReduction(forOp, i, kind))
89  supportedReductions.emplace_back(LoopReduction{kind, i, value});
90  }
91 }
92 
93 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
94 /// provided, populates it with descriptors of the parallelizable reductions and
95 /// treats them as not preventing parallelization.
96 bool mlir::isLoopParallel(AffineForOp forOp,
97  SmallVectorImpl<LoopReduction> *parallelReductions) {
98  unsigned numIterArgs = forOp.getNumIterOperands();
99 
100  // Loop is not parallel if it has SSA loop-carried dependences and reduction
101  // detection is not requested.
102  if (numIterArgs > 0 && !parallelReductions)
103  return false;
104 
105  // Find supported reductions of requested.
106  if (parallelReductions) {
107  getSupportedReductions(forOp, *parallelReductions);
108  // Return later to allow for identifying all parallel reductions even if the
109  // loop is not parallel.
110  if (parallelReductions->size() != numIterArgs)
111  return false;
112  }
113 
114  // Check memory dependences.
115  return isLoopMemoryParallel(forOp);
116 }
117 
118 /// Returns true if `forOp' doesn't have memory dependences preventing
119 /// parallelization. This function doesn't check iter_args and should be used
120 /// only as a building block for full parallel-checking functions.
121 bool mlir::isLoopMemoryParallel(AffineForOp forOp) {
122  // Collect all load and store ops in loop nest rooted at 'forOp'.
123  SmallVector<Operation *, 8> loadAndStoreOps;
124  auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
125  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
126  loadAndStoreOps.push_back(op);
127  else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
128  !MemoryEffectOpInterface::hasNoEffect(op))
129  return WalkResult::interrupt();
130 
131  return WalkResult::advance();
132  });
133 
134  // Stop early if the loop has unknown ops with side effects.
135  if (walkResult.wasInterrupted())
136  return false;
137 
138  // Dep check depth would be number of enclosing loops + 1.
139  unsigned depth = getNestingDepth(forOp) + 1;
140 
141  // Check dependences between all pairs of ops in 'loadAndStoreOps'.
142  for (auto *srcOp : loadAndStoreOps) {
143  MemRefAccess srcAccess(srcOp);
144  for (auto *dstOp : loadAndStoreOps) {
145  MemRefAccess dstAccess(dstOp);
146  FlatAffineValueConstraints dependenceConstraints;
148  srcAccess, dstAccess, depth, &dependenceConstraints,
149  /*dependenceComponents=*/nullptr);
151  return false;
152  }
153  }
154  return true;
155 }
156 
157 /// Returns the sequence of AffineApplyOp Operations operation in
158 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
159 /// and ending at operands which are not defined by AffineApplyOps.
160 // TODO: Add a method to AffineApplyOp which forward substitutes the
161 // AffineApplyOp into any user AffineApplyOps.
163  ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
164  struct State {
165  // The ssa value for this node in the DFS traversal.
166  Value value;
167  // The operand index of 'value' to explore next during DFS traversal.
168  unsigned operandIndex;
169  };
170  SmallVector<State, 4> worklist;
171  for (auto operand : operands) {
172  worklist.push_back({operand, 0});
173  }
174 
175  while (!worklist.empty()) {
176  State &state = worklist.back();
177  auto *opInst = state.value.getDefiningOp();
178  // Note: getDefiningOp will return nullptr if the operand is not an
179  // Operation (i.e. block argument), which is a terminator for the search.
180  if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
181  worklist.pop_back();
182  continue;
183  }
184 
185  if (state.operandIndex == 0) {
186  // Pre-Visit: Add 'opInst' to reachable sequence.
187  affineApplyOps.push_back(opInst);
188  }
189  if (state.operandIndex < opInst->getNumOperands()) {
190  // Visit: Add next 'affineApplyOp' operand to worklist.
191  // Get next operand to visit at 'operandIndex'.
192  auto nextOperand = opInst->getOperand(state.operandIndex);
193  // Increment 'operandIndex' in 'state'.
194  ++state.operandIndex;
195  // Add 'nextOperand' to worklist.
196  worklist.push_back({nextOperand, 0});
197  } else {
198  // Post-visit: done visiting operands AffineApplyOp, pop off stack.
199  worklist.pop_back();
200  }
201  }
202 }
203 
204 // Builds a system of constraints with dimensional identifiers corresponding to
205 // the loop IVs of the forOps appearing in that order. Any symbols founds in
206 // the bound operands are added as symbols in the system. Returns failure for
207 // the yet unimplemented cases.
208 // TODO: Handle non-unit steps through local variables or stride information in
209 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
210 // introducing a method in FlatAffineValueConstraints
211 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
213  FlatAffineValueConstraints *domain) {
214  SmallVector<Value, 4> indices;
216 
217  for (Operation *op : ops) {
218  assert((isa<AffineForOp, AffineIfOp>(op)) &&
219  "ops should have either AffineForOp or AffineIfOp");
220  if (AffineForOp forOp = dyn_cast<AffineForOp>(op))
221  forOps.push_back(forOp);
222  }
223  extractForInductionVars(forOps, &indices);
224  // Reset while associated Values in 'indices' to the domain.
225  domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
226  for (Operation *op : ops) {
227  // Add constraints from forOp's bounds.
228  if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
229  if (failed(domain->addAffineForOpDomain(forOp)))
230  return failure();
231  } else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) {
232  domain->addAffineIfOpDomain(ifOp);
233  }
234  }
235  return success();
236 }
237 
238 /// Computes the iteration domain for 'op' and populates 'indexSet', which
239 /// encapsulates the constraints involving loops surrounding 'op' and
240 /// potentially involving any Function symbols. The dimensional identifiers in
241 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
242 /// innermost.
244  FlatAffineValueConstraints *indexSet) {
247  return getIndexSet(ops, indexSet);
248 }
249 
250 // Returns the number of outer loop common to 'src/dstDomain'.
251 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
252 static unsigned
254  const FlatAffineValueConstraints &dstDomain,
255  SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
256  // Find the number of common loops shared by src and dst accesses.
257  unsigned minNumLoops =
258  std::min(srcDomain.getNumDimIds(), dstDomain.getNumDimIds());
259  unsigned numCommonLoops = 0;
260  for (unsigned i = 0; i < minNumLoops; ++i) {
261  if (!isForInductionVar(srcDomain.getValue(i)) ||
262  !isForInductionVar(dstDomain.getValue(i)) ||
263  srcDomain.getValue(i) != dstDomain.getValue(i))
264  break;
265  if (commonLoops != nullptr)
266  commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
267  ++numCommonLoops;
268  }
269  if (commonLoops != nullptr)
270  assert(commonLoops->size() == numCommonLoops);
271  return numCommonLoops;
272 }
273 
274 /// Returns Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
275 static Block *getCommonBlock(const MemRefAccess &srcAccess,
276  const MemRefAccess &dstAccess,
277  const FlatAffineValueConstraints &srcDomain,
278  unsigned numCommonLoops) {
279  // Get the chain of ancestor blocks to the given `MemRefAccess` instance. The
280  // search terminates when either an op with the `AffineScope` trait or
281  // `endBlock` is reached.
282  auto getChainOfAncestorBlocks = [&](const MemRefAccess &access,
283  SmallVector<Block *, 4> &ancestorBlocks,
284  Block *endBlock = nullptr) {
285  Block *currBlock = access.opInst->getBlock();
286  // Loop terminates when the currBlock is nullptr or equals to the endBlock,
287  // or its parent operation holds an affine scope.
288  while (currBlock && currBlock != endBlock &&
289  !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
290  ancestorBlocks.push_back(currBlock);
291  currBlock = currBlock->getParentOp()->getBlock();
292  }
293  };
294 
295  if (numCommonLoops == 0) {
296  Block *block = srcAccess.opInst->getBlock();
297  while (!llvm::isa<FuncOp>(block->getParentOp())) {
298  block = block->getParentOp()->getBlock();
299  }
300  return block;
301  }
302  Value commonForIV = srcDomain.getValue(numCommonLoops - 1);
303  AffineForOp forOp = getForInductionVarOwner(commonForIV);
304  assert(forOp && "commonForValue was not an induction variable");
305 
306  // Find the closest common block including those in AffineIf.
307  SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
308  getChainOfAncestorBlocks(srcAccess, srcAncestorBlocks, forOp.getBody());
309  getChainOfAncestorBlocks(dstAccess, dstAncestorBlocks, forOp.getBody());
310 
311  Block *commonBlock = forOp.getBody();
312  for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
313  i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
314  i--, j--)
315  commonBlock = srcAncestorBlocks[i];
316 
317  return commonBlock;
318 }
319 
320 // Returns true if the ancestor operation of 'srcAccess' appears before the
321 // ancestor operation of 'dstAccess' in the common ancestral block. Returns
322 // false otherwise.
323 // Note that because 'srcAccess' or 'dstAccess' may be nested in conditionals,
324 // the function is named 'srcAppearsBeforeDstInCommonBlock'. Note that
325 // 'numCommonLoops' is the number of contiguous surrounding outer loops.
327  const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
328  const FlatAffineValueConstraints &srcDomain, unsigned numCommonLoops) {
329  // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
330  auto *commonBlock =
331  getCommonBlock(srcAccess, dstAccess, srcDomain, numCommonLoops);
332  // Check the dominance relationship between the respective ancestors of the
333  // src and dst in the Block of the innermost among the common loops.
334  auto *srcInst = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
335  assert(srcInst != nullptr);
336  auto *dstInst = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
337  assert(dstInst != nullptr);
338 
339  // Determine whether dstInst comes after srcInst.
340  return srcInst->isBeforeInBlock(dstInst);
341 }
342 
343 // Adds ordering constraints to 'dependenceDomain' based on number of loops
344 // common to 'src/dstDomain' and requested 'loopDepth'.
345 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
346 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
347 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
348 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
349 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
350 static void
352  const FlatAffineValueConstraints &dstDomain,
353  unsigned loopDepth,
354  FlatAffineValueConstraints *dependenceDomain) {
355  unsigned numCols = dependenceDomain->getNumCols();
356  SmallVector<int64_t, 4> eq(numCols);
357  unsigned numSrcDims = srcDomain.getNumDimIds();
358  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
359  unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
360  for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
361  std::fill(eq.begin(), eq.end(), 0);
362  eq[i] = -1;
363  eq[i + numSrcDims] = 1;
364  if (i == loopDepth - 1) {
365  eq[numCols - 1] = -1;
366  dependenceDomain->addInequality(eq);
367  } else {
368  dependenceDomain->addEquality(eq);
369  }
370  }
371 }
372 
373 // Computes distance and direction vectors in 'dependences', by adding
374 // variables to 'dependenceDomain' which represent the difference of the IVs,
375 // eliminating all other variables, and reading off distance vectors from
376 // equality constraints (if possible), and direction vectors from inequalities.
378  const FlatAffineValueConstraints &srcDomain,
379  const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
380  FlatAffineValueConstraints *dependenceDomain,
381  SmallVector<DependenceComponent, 2> *dependenceComponents) {
382  // Find the number of common loops shared by src and dst accesses.
383  SmallVector<AffineForOp, 4> commonLoops;
384  unsigned numCommonLoops =
385  getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
386  if (numCommonLoops == 0)
387  return;
388  // Compute direction vectors for requested loop depth.
389  unsigned numIdsToEliminate = dependenceDomain->getNumIds();
390  // Add new variables to 'dependenceDomain' to represent the direction
391  // constraints for each shared loop.
392  dependenceDomain->insertDimId(/*pos=*/0, /*num=*/numCommonLoops);
393 
394  // Add equality constraints for each common loop, setting newly introduced
395  // variable at column 'j' to the 'dst' IV minus the 'src IV.
397  eq.resize(dependenceDomain->getNumCols());
398  unsigned numSrcDims = srcDomain.getNumDimIds();
399  // Constraint variables format:
400  // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
401  for (unsigned j = 0; j < numCommonLoops; ++j) {
402  std::fill(eq.begin(), eq.end(), 0);
403  eq[j] = 1;
404  eq[j + numCommonLoops] = 1;
405  eq[j + numCommonLoops + numSrcDims] = -1;
406  dependenceDomain->addEquality(eq);
407  }
408 
409  // Eliminate all variables other than the direction variables just added.
410  dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
411 
412  // Scan each common loop variable column and set direction vectors based
413  // on eliminated constraint system.
414  dependenceComponents->resize(numCommonLoops);
415  for (unsigned j = 0; j < numCommonLoops; ++j) {
416  (*dependenceComponents)[j].op = commonLoops[j].getOperation();
417  auto lbConst =
418  dependenceDomain->getConstantBound(FlatAffineConstraints::LB, j);
419  (*dependenceComponents)[j].lb =
420  lbConst.getValueOr(std::numeric_limits<int64_t>::min());
421  auto ubConst =
422  dependenceDomain->getConstantBound(FlatAffineConstraints::UB, j);
423  (*dependenceComponents)[j].ub =
424  ubConst.getValueOr(std::numeric_limits<int64_t>::max());
425  }
426 }
427 
429  // Create set corresponding to domain of access.
431  if (failed(getOpIndexSet(opInst, &domain)))
432  return failure();
433 
434  // Get access relation from access map.
435  AffineValueMap accessValueMap;
436  getAccessMap(&accessValueMap);
437  if (failed(getRelationFromMap(accessValueMap, rel)))
438  return failure();
439 
440  FlatAffineRelation domainRel(rel.getNumDomainDims(), /*numRangeDims=*/0,
441  domain);
442 
443  // Merge and align domain ids of `ret` and ids of `domain`. Since the domain
444  // of the access map is a subset of the domain of access, the domain ids of
445  // `ret` are guranteed to be a subset of ids of `domain`.
446  for (unsigned i = 0, e = domain.getNumDimIds(); i < e; ++i) {
447  unsigned loc;
448  if (rel.findId(domain.getValue(i), &loc)) {
449  rel.swapId(i, loc);
450  } else {
451  rel.insertDomainId(i);
452  rel.setValue(i, domain.getValue(i));
453  }
454  }
455 
456  // Append domain constraints to `ret`.
457  domainRel.appendRangeId(rel.getNumRangeDims());
458  domainRel.mergeSymbolIds(rel);
459  domainRel.mergeLocalIds(rel);
460  rel.append(domainRel);
461 
462  return success();
463 }
464 
465 // Populates 'accessMap' with composition of AffineApplyOps reachable from
466 // indices of MemRefAccess.
468  // Get affine map from AffineLoad/Store.
469  AffineMap map;
470  if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
471  map = loadOp.getAffineMap();
472  else
473  map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
474 
475  SmallVector<Value, 8> operands(indices.begin(), indices.end());
476  fullyComposeAffineMapAndOperands(&map, &operands);
477  map = simplifyAffineMap(map);
478  canonicalizeMapAndOperands(&map, &operands);
479  accessMap->reset(map, operands);
480 }
481 
482 // Builds a flat affine constraint system to check if there exists a dependence
483 // between memref accesses 'srcAccess' and 'dstAccess'.
484 // Returns 'NoDependence' if the accesses can be definitively shown not to
485 // access the same element.
486 // Returns 'HasDependence' if the accesses do access the same element.
487 // Returns 'Failure' if an error or unsupported case was encountered.
488 // If a dependence exists, returns in 'dependenceComponents' a direction
489 // vector for the dependence, with a component for each loop IV in loops
490 // common to both accesses (see Dependence in AffineAnalysis.h for details).
491 //
492 // The memref access dependence check is comprised of the following steps:
493 // *) Build access relation for each access. An access relation maps elements
494 // of an iteration domain to the element(s) of an array domain accessed by
495 // that iteration of the associated statement through some array reference.
496 // *) Compute the dependence relation by composing access relation of
497 // `srcAccess` with the inverse of access relation of `dstAccess`.
498 // Doing this builds a relation between iteration domain of `srcAccess`
499 // to the iteration domain of `dstAccess` which access the same memory
500 // location.
501 // *) Add ordering constraints for `srcAccess` to be accessed before
502 // `dstAccess`.
503 //
504 // This method builds a constraint system with the following column format:
505 //
506 // [src-dim-identifiers, dst-dim-identifiers, symbols, constant]
507 //
508 // For example, given the following MLIR code with "source" and "destination"
509 // accesses to the same memref label, and symbols %M, %N, %K:
510 //
511 // affine.for %i0 = 0 to 100 {
512 // affine.for %i1 = 0 to 50 {
513 // %a0 = affine.apply
514 // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
515 // // Source memref access.
516 // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
517 // }
518 // }
519 //
520 // affine.for %i2 = 0 to 100 {
521 // affine.for %i3 = 0 to 50 {
522 // %a1 = affine.apply
523 // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
524 // // Destination memref access.
525 // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
526 // }
527 // }
528 //
529 // The access relation for `srcAccess` would be the following:
530 //
531 // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const]
532 // 2 -4 -1 0 1 0 0 = 0
533 // 0 3 0 -1 0 -1 0 = 0
534 // 1 0 0 0 0 0 0 >= 0
535 // -1 0 0 0 0 0 100 >= 0
536 // 0 1 0 0 0 0 0 >= 0
537 // 0 -1 0 0 0 0 50 >= 0
538 //
539 // The access relation for `dstAccess` would be the following:
540 //
541 // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const]
542 // 7 9 -1 0 -1 0 0 = 0
543 // 0 11 0 -1 0 -1 0 = 0
544 // 1 0 0 0 0 0 0 >= 0
545 // -1 0 0 0 0 0 100 >= 0
546 // 0 1 0 0 0 0 0 >= 0
547 // 0 -1 0 0 0 0 50 >= 0
548 //
549 // The equalities in the above relations correspond to the access maps while
550 // the inequalities corresspond to the iteration domain constraints.
551 //
552 // The dependence relation formed:
553 //
554 // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const]
555 // 2 -4 -7 -9 1 1 0 0 = 0
556 // 0 3 0 -11 -1 0 1 0 = 0
557 // 1 0 0 0 0 0 0 0 >= 0
558 // -1 0 0 0 0 0 0 100 >= 0
559 // 0 1 0 0 0 0 0 0 >= 0
560 // 0 -1 0 0 0 0 0 50 >= 0
561 // 0 0 1 0 0 0 0 0 >= 0
562 // 0 0 -1 0 0 0 0 100 >= 0
563 // 0 0 0 1 0 0 0 0 >= 0
564 // 0 0 0 -1 0 0 0 50 >= 0
565 //
566 //
567 // TODO: Support AffineExprs mod/floordiv/ceildiv.
569  const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
570  unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
571  SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
572  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
573  << Twine(loopDepth) << " between:\n";);
574  LLVM_DEBUG(srcAccess.opInst->dump(););
575  LLVM_DEBUG(dstAccess.opInst->dump(););
576 
577  // Return 'NoDependence' if these accesses do not access the same memref.
578  if (srcAccess.memref != dstAccess.memref)
580 
581  // Return 'NoDependence' if one of these accesses is not an
582  // AffineWriteOpInterface.
583  if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
584  !isa<AffineWriteOpInterface>(dstAccess.opInst))
586 
587  // Create access relation from each MemRefAccess.
588  FlatAffineRelation srcRel, dstRel;
589  if (failed(srcAccess.getAccessRelation(srcRel)))
591  if (failed(dstAccess.getAccessRelation(dstRel)))
593 
594  FlatAffineValueConstraints srcDomain = srcRel.getDomainSet();
595  FlatAffineValueConstraints dstDomain = dstRel.getDomainSet();
596 
597  // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
598  // operation of 'srcAccess' does not properly dominate the ancestor
599  // operation of 'dstAccess' in the same common operation block.
600  // Note: this check is skipped if 'allowRAR' is true, because because RAR
601  // deps can exist irrespective of lexicographic ordering b/w src and dst.
602  unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
603  assert(loopDepth <= numCommonLoops + 1);
604  if (!allowRAR && loopDepth > numCommonLoops &&
605  !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess, srcDomain,
606  numCommonLoops)) {
608  }
609 
610  // Compute the dependence relation by composing `srcRel` with the inverse of
611  // `dstRel`. Doing this builds a relation between iteration domain of
612  // `srcAccess` to the iteration domain of `dstAccess` which access the same
613  // memory locations.
614  dstRel.inverse();
615  dstRel.compose(srcRel);
616  *dependenceConstraints = dstRel;
617 
618  // Add 'src' happens before 'dst' ordering constraints.
619  addOrderingConstraints(srcDomain, dstDomain, loopDepth,
620  dependenceConstraints);
621 
622  // Return 'NoDependence' if the solution space is empty: no dependence.
623  if (dependenceConstraints->isEmpty())
625 
626  // Compute dependence direction vector and return true.
627  if (dependenceComponents != nullptr)
628  computeDirectionVector(srcDomain, dstDomain, loopDepth,
629  dependenceConstraints, dependenceComponents);
630 
631  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
632  LLVM_DEBUG(dependenceConstraints->dump());
634 }
635 
636 /// Gathers dependence components for dependences between all ops in loop nest
637 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
639  AffineForOp forOp, unsigned maxLoopDepth,
640  std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
641  // Collect all load and store ops in loop nest rooted at 'forOp'.
642  SmallVector<Operation *, 8> loadAndStoreOps;
643  forOp->walk([&](Operation *op) {
644  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
645  loadAndStoreOps.push_back(op);
646  });
647 
648  unsigned numOps = loadAndStoreOps.size();
649  for (unsigned d = 1; d <= maxLoopDepth; ++d) {
650  for (unsigned i = 0; i < numOps; ++i) {
651  auto *srcOp = loadAndStoreOps[i];
652  MemRefAccess srcAccess(srcOp);
653  for (unsigned j = 0; j < numOps; ++j) {
654  auto *dstOp = loadAndStoreOps[j];
655  MemRefAccess dstAccess(dstOp);
656 
657  FlatAffineValueConstraints dependenceConstraints;
659  // TODO: Explore whether it would be profitable to pre-compute and store
660  // deps instead of repeatedly checking.
662  srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
663  if (hasDependence(result))
664  depCompsVec->push_back(depComps);
665  }
666  }
667  }
668 }
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, SmallVector< DependenceComponent, 2 > *dependenceComponents, bool allowRAR=false)
Include the generated interface declarations.
void getDependenceComponents(AffineForOp forOp, unsigned maxLoopDepth, std::vector< SmallVector< DependenceComponent, 2 >> *depCompsVec)
Returns in &#39;depCompsVec&#39;, dependence components for dependences between all load and store ops in loo...
bool isEmpty() const
Checks for emptiness by performing variable elimination on all identifiers, running the GCD test on e...
void append(const IntegerPolyhedron &other)
Appends constraints from other into this.
unsigned getNumDimIds() const
unsigned getNumDomainDims() const
Returns the number of identifiers corresponding to domain/range of relation.
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
void insertDomainId(unsigned pos, unsigned num=1)
Insert num identifiers of the specified kind after the pos identifier of that kind.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
static Value min(ImplicitLocOpBuilder &builder, Value a, Value b)
Block represents an ordered list of Operations.
Definition: Block.h:29
A FlatAffineRelation represents a set of ordered pairs (domain -> range) where "domain" and "range" a...
A trait of region holding operations that defines a new scope for polyhedral optimization purposes...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
void addAffineIfOpDomain(AffineIfOp ifOp)
Adds constraints imposed by the affine.if operation.
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if `forOp&#39; is a parallel loop.
void getAccessMap(AffineValueMap *accessMap) const
Populates &#39;accessMap&#39; with composition of AffineApplyOps reachable from &#39;indices&#39;.
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:96
Checks whether two accesses to the same memref access the same element.
LogicalResult getRelationFromMap(AffineMap &map, FlatAffineRelation &rel)
Builds a relation from the given AffineMap/AffineValueMap map, containing all pairs of the form opera...
void extractForInductionVars(ArrayRef< AffineForOp > forInsts, SmallVectorImpl< Value > *ivs)
Extracts the induction variables from a list of AffineForOps and places them in the output argument i...
Definition: AffineOps.cpp:1857
static constexpr const bool value
bool isLoopMemoryParallel(AffineForOp forOp)
Returns true if `forOp&#39; doesn&#39;t have memory dependences preventing parallelization.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
unsigned getNumIds() const
void addInequality(ArrayRef< int64_t > inEq)
Adds an inequality (>= 0) from the coefficients specified in inEq.
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
void reset(unsigned numReservedInequalities, unsigned numReservedEqualities, unsigned numReservedCols, unsigned numDims, unsigned numSymbols, unsigned numLocals=0) override
Clears any existing data and reserves memory for the specified constraints.
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:705
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:470
enum mlir::DependenceResult::ResultEnum value
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:879
static WalkResult advance()
Definition: Visitors.h:51
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation *> &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
LogicalResult getAccessRelation(FlatAffineRelation &accessRel) const
Creates an access relation for the access.
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:38
bool isForInductionVar(Value val)
Returns true if the provided value is the induction variable of a AffineForOp.
Definition: AffineOps.cpp:1838
static WalkResult interrupt()
Definition: Visitors.h:50
Optional< int64_t > getConstantBound(BoundType type, unsigned pos) const
Returns the constant bound for the pos^th identifier if there is one; None otherwise.
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:34
void getSupportedReductions(AffineForOp forOp, SmallVectorImpl< LoopReduction > &supportedReductions)
Populate supportedReductions with descriptors of the supported reductions.
static Value getSupportedReduction(AffineForOp forOp, unsigned pos, arith::AtomicRMWKind &kind)
Get the value that is being reduced by pos-th reduction in the loop if such a reduction can be perfor...
Eliminates identifier at the specified position using Fourier-Motzkin variable elimination.
unsigned insertDimId(unsigned pos, ValueRange vals)
Insert identifiers of the specified kind at position pos.
static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, const FlatAffineValueConstraints &srcDomain, unsigned numCommonLoops)
void swapId(unsigned posA, unsigned posB) override
Swap the posA^th identifier with the posB^th identifier.
void projectOut(Value val)
Projects out the identifier that is associate with Value.
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: AffineOps.cpp:1844
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:84
static unsigned getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, SmallVectorImpl< AffineForOp > *commonLoops=nullptr)
void getEnclosingAffineForAndIfOps(Operation &op, SmallVectorImpl< Operation *> *ops)
Populates &#39;ops&#39; with IVs of the loops surrounding op, along with affine.if operations interleaved bet...
Definition: Utils.cpp:51
static void computeDirectionVector(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, FlatAffineValueConstraints *dependenceDomain, SmallVector< DependenceComponent, 2 > *dependenceComponents)
void push_back(Operation *op)
Definition: Block.h:140
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:654
LogicalResult getIndexSet(MutableArrayRef< Operation *> ops, FlatAffineValueConstraints *domain)
Builds a system of constraints with dimensional identifiers corresponding to the loop IVs of the forO...
An extension of FlatAffineConstraints in which dimensions and symbols can optionally be associated wi...
LogicalResult addAffineForOpDomain(AffineForOp forOp)
Adds constraints (lower and upper bounds) for the specified &#39;affine.for&#39; operation&#39;s Value using IR i...
void inverse()
Swap domain and range of the relation.
void setValue(unsigned pos, Value val)
Sets the Value associated with the pos^th identifier.
FlatAffineValueConstraints getDomainSet() const
Returns a set corresponding to the domain/range of the affine relation.
Encapsulates a memref load or store access information.
static LogicalResult getOpIndexSet(Operation *op, FlatAffineValueConstraints *indexSet)
Computes the iteration domain for &#39;op&#39; and populates &#39;indexSet&#39;, which encapsulates the constraints i...
unsigned getNumRangeDims() const
Value getValue(unsigned pos) const
Returns the Value associated with the pos^th identifier.
A description of a (parallelizable) reduction in an affine loop.
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes...
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation *> &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
unsigned getNumCols() const
Returns the number of columns in the constraint system.
static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, FlatAffineValueConstraints *dependenceDomain)
unsigned getNestingDepth(Operation *op)
Returns the nesting depth of this operation, i.e., the number of loops surrounding this operation...
Definition: Utils.cpp:1250
bool findId(Value val, unsigned *pos) const
Looks up the position of the identifier with the specified Value.
void compose(const FlatAffineRelation &other)
Given affine relation other: (domainOther -> rangeOther), this operation takes the composition of oth...
void reset(AffineMap map, ValueRange operands, ValueRange results={})
static Block * getCommonBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, const FlatAffineValueConstraints &srcDomain, unsigned numCommonLoops)
Returns Block common to &#39;srcAccess.opInst&#39; and &#39;dstAccess.opInst&#39;.
void addEquality(ArrayRef< int64_t > eq)
Adds an equality from the coefficients specified in eq.
Operation * opInst
static Value max(ImplicitLocOpBuilder &builder, Value a, Value b)