MLIR  21.0.0git
LoopAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
21 #include "llvm/Support/MathExtras.h"
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Support/Debug.h"
27 #include <numeric>
28 #include <optional>
29 #include <type_traits>
30 
31 using namespace mlir;
32 using namespace mlir::affine;
33 
34 #define DEBUG_TYPE "affine-loop-analysis"
35 
36 /// Returns the trip count of the loop as an affine expression if the latter is
37 /// expressible as an affine expression, and nullptr otherwise. The trip count
38 /// expression is simplified before returning. This method only utilizes map
39 /// composition to construct lower and upper bounds before computing the trip
40 /// count expressions.
42  AffineForOp forOp, AffineMap *tripCountMap,
43  SmallVectorImpl<Value> *tripCountOperands) {
44  MLIRContext *context = forOp.getContext();
45  int64_t step = forOp.getStepAsInt();
46  int64_t loopSpan;
47  if (forOp.hasConstantBounds()) {
48  int64_t lb = forOp.getConstantLowerBound();
49  int64_t ub = forOp.getConstantUpperBound();
50  loopSpan = ub - lb;
51  if (loopSpan < 0)
52  loopSpan = 0;
53  *tripCountMap = AffineMap::getConstantMap(
54  llvm::divideCeilSigned(loopSpan, step), context);
55  tripCountOperands->clear();
56  return;
57  }
58  auto lbMap = forOp.getLowerBoundMap();
59  auto ubMap = forOp.getUpperBoundMap();
60  if (lbMap.getNumResults() != 1) {
61  *tripCountMap = AffineMap();
62  return;
63  }
64 
65  // Difference of each upper bound expression from the single lower bound
66  // expression (divided by the step) provides the expressions for the trip
67  // count map.
68  AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
69 
70  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
71  lbMap.getResult(0));
72  auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
73  lbSplatExpr, context);
74  AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
75 
76  AffineValueMap tripCountValueMap;
77  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
78  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
79  tripCountValueMap.setResult(i,
80  tripCountValueMap.getResult(i).ceilDiv(step));
81 
82  *tripCountMap = tripCountValueMap.getAffineMap();
83  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
84  tripCountValueMap.getOperands().end());
85 }
86 
87 /// Returns the trip count of the loop if it's a constant, std::nullopt
88 /// otherwise. This method uses affine expression analysis (in turn using
89 /// getTripCount) and is able to determine constant trip count in non-trivial
90 /// cases.
91 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
92  SmallVector<Value, 4> operands;
93  AffineMap map;
94  getTripCountMapAndOperands(forOp, &map, &operands);
95 
96  if (!map)
97  return std::nullopt;
98 
99  // Take the min if all trip counts are constant.
100  std::optional<uint64_t> tripCount;
101  for (auto resultExpr : map.getResults()) {
102  if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
103  if (tripCount.has_value())
104  tripCount =
105  std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue()));
106  else
107  tripCount = constExpr.getValue();
108  } else
109  return std::nullopt;
110  }
111  return tripCount;
112 }
113 
114 /// Returns the greatest known integral divisor of the trip count. Affine
115 /// expression analysis is used (indirectly through getTripCount), and
116 /// this method is thus able to determine non-trivial divisors.
117 uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
118  SmallVector<Value, 4> operands;
119  AffineMap map;
120  getTripCountMapAndOperands(forOp, &map, &operands);
121 
122  if (!map)
123  return 1;
124 
125  // The largest divisor of the trip count is the GCD of the individual largest
126  // divisors.
127  assert(map.getNumResults() >= 1 && "expected one or more results");
128  std::optional<uint64_t> gcd;
129  for (auto resultExpr : map.getResults()) {
130  uint64_t thisGcd;
131  if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
132  uint64_t tripCount = constExpr.getValue();
133  // 0 iteration loops (greatest divisor is 2^64 - 1).
134  if (tripCount == 0)
136  else
137  // The greatest divisor is the trip count.
138  thisGcd = tripCount;
139  } else {
140  // Trip count is not a known constant; return its largest known divisor.
141  thisGcd = resultExpr.getLargestKnownDivisor();
142  }
143  if (gcd.has_value())
144  gcd = std::gcd(*gcd, thisGcd);
145  else
146  gcd = thisGcd;
147  }
148  assert(gcd.has_value() && "value expected per above logic");
149  return *gcd;
150 }
151 
152 /// Given an affine.for `iv` and an access `index` of type index, returns `true`
153 /// if `index` is independent of `iv` and false otherwise.
154 ///
155 /// Prerequisites: `iv` and `index` of the proper type;
156 static bool isAccessIndexInvariant(Value iv, Value index) {
157  assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
158  assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
159  auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
160  SmallVector<Value> operands = {index};
161  AffineValueMap avm(map, operands);
163  return !avm.isFunctionOf(0, iv);
164 }
165 
166 // Pre-requisite: Loop bounds should be in canonical form.
167 template <typename LoadOrStoreOp>
168 bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
169  AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
171  return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
172 }
173 
174 // Explicitly instantiate the template so that the compiler knows we need them.
175 template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
176  AffineForOp);
177 template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
178  AffineForOp);
179 template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
180 template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);
181 
183  ArrayRef<Value> indices) {
184  DenseSet<Value> res;
185  for (Value index : indices) {
186  if (isAccessIndexInvariant(iv, index))
187  res.insert(index);
188  }
189  return res;
190 }
191 
192 // TODO: check access stride.
193 template <typename LoadOrStoreOp>
194 bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
195  int *memRefDim) {
196  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
197  AffineWriteOpInterface>::value,
198  "Must be called on either an affine read or write op");
199  assert(memRefDim && "memRefDim == nullptr");
200  auto memRefType = memoryOp.getMemRefType();
201 
202  if (!memRefType.getLayout().isIdentity())
203  return memoryOp.emitError("NYI: non-trivial layout map"), false;
204 
205  int uniqueVaryingIndexAlongIv = -1;
206  auto accessMap = memoryOp.getAffineMap();
207  SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
208  unsigned numDims = accessMap.getNumDims();
209  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
210  // Gather map operands used in result expr 'i' in 'exprOperands'.
211  SmallVector<Value, 4> exprOperands;
212  auto resultExpr = accessMap.getResult(i);
213  resultExpr.walk([&](AffineExpr expr) {
214  if (auto dimExpr = dyn_cast<AffineDimExpr>(expr))
215  exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
216  else if (auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
217  exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
218  });
219  // Check access invariance of each operand in 'exprOperands'.
220  for (Value exprOperand : exprOperands) {
221  if (!isAccessIndexInvariant(iv, exprOperand)) {
222  if (uniqueVaryingIndexAlongIv != -1) {
223  // 2+ varying indices -> do not vectorize along iv.
224  return false;
225  }
226  uniqueVaryingIndexAlongIv = i;
227  }
228  }
229  }
230 
231  if (uniqueVaryingIndexAlongIv == -1)
232  *memRefDim = -1;
233  else
234  *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
235  return true;
236 }
237 
238 template bool mlir::affine::isContiguousAccess(Value iv,
239  AffineReadOpInterface loadOp,
240  int *memRefDim);
241 template bool mlir::affine::isContiguousAccess(Value iv,
242  AffineWriteOpInterface loadOp,
243  int *memRefDim);
244 
245 template <typename LoadOrStoreOp>
246 static bool isVectorElement(LoadOrStoreOp memoryOp) {
247  auto memRefType = memoryOp.getMemRefType();
248  return isa<VectorType>(memRefType.getElementType());
249 }
250 
251 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
252 
253 static bool
255  const VectorizableOpFun &isVectorizableOp,
256  NestedPattern &vectorTransferMatcher) {
257  auto *forOp = loop.getOperation();
258 
259  // No vectorization across conditionals for now.
260  auto conditionals = matcher::If();
261  SmallVector<NestedMatch, 8> conditionalsMatched;
262  conditionals.match(forOp, &conditionalsMatched);
263  if (!conditionalsMatched.empty()) {
264  return false;
265  }
266 
267  // No vectorization for ops with operand or result types that are not
268  // vectorizable.
269  auto types = matcher::Op([](Operation &op) -> bool {
270  if (llvm::any_of(op.getOperandTypes(), [](Type type) {
271  if (MemRefType t = dyn_cast<MemRefType>(type))
272  return !VectorType::isValidElementType(t.getElementType());
273  return !VectorType::isValidElementType(type);
274  }))
275  return true;
276  return llvm::any_of(op.getResultTypes(), [](Type type) {
277  return !VectorType::isValidElementType(type);
278  });
279  });
280  SmallVector<NestedMatch, 8> opsMatched;
281  types.match(forOp, &opsMatched);
282  if (!opsMatched.empty()) {
283  return false;
284  }
285 
286  // No vectorization across unknown regions.
287  auto regions = matcher::Op([](Operation &op) -> bool {
288  return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
289  });
290  SmallVector<NestedMatch, 8> regionsMatched;
291  regions.match(forOp, &regionsMatched);
292  if (!regionsMatched.empty()) {
293  return false;
294  }
295 
296  SmallVector<NestedMatch, 8> vectorTransfersMatched;
297  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
298  if (!vectorTransfersMatched.empty()) {
299  return false;
300  }
301 
302  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
303  SmallVector<NestedMatch, 8> loadAndStoresMatched;
304  loadAndStores.match(forOp, &loadAndStoresMatched);
305  for (auto ls : loadAndStoresMatched) {
306  auto *op = ls.getMatchedOperation();
307  auto load = dyn_cast<AffineLoadOp>(op);
308  auto store = dyn_cast<AffineStoreOp>(op);
309  // Only scalar types are considered vectorizable, all load/store must be
310  // vectorizable for a loop to qualify as vectorizable.
311  // TODO: ponder whether we want to be more general here.
312  bool vector = load ? isVectorElement(load) : isVectorElement(store);
313  if (vector) {
314  return false;
315  }
316  if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
317  return false;
318  }
319  }
320  return true;
321 }
322 
324  AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) {
325  *memRefDim = -1;
326  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
327  auto load = dyn_cast<AffineLoadOp>(op);
328  auto store = dyn_cast<AffineStoreOp>(op);
329  int thisOpMemRefDim = -1;
330  bool isContiguous =
331  load ? isContiguousAccess(loop.getInductionVar(),
332  cast<AffineReadOpInterface>(*load),
333  &thisOpMemRefDim)
334  : isContiguousAccess(loop.getInductionVar(),
335  cast<AffineWriteOpInterface>(*store),
336  &thisOpMemRefDim);
337  if (thisOpMemRefDim != -1) {
338  // If memory accesses vary across different dimensions then the loop is
339  // not vectorizable.
340  if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
341  return false;
342  *memRefDim = thisOpMemRefDim;
343  }
344  return isContiguous;
345  });
346  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
347 }
348 
350  AffineForOp loop, NestedPattern &vectorTransferMatcher) {
351  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
352 }
353 
354 /// Checks whether SSA dominance would be violated if a for op's body
355 /// operations are shifted by the specified shifts. This method checks if a
356 /// 'def' and all its uses have the same shift factor.
357 // TODO: extend this to check for memory-based dependence violation when we have
358 // the support.
359 bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp,
360  ArrayRef<uint64_t> shifts) {
361  auto *forBody = forOp.getBody();
362  assert(shifts.size() == forBody->getOperations().size());
363 
364  // Work backwards over the body of the block so that the shift of a use's
365  // ancestor operation in the block gets recorded before it's looked up.
366  DenseMap<Operation *, uint64_t> forBodyShift;
367  for (const auto &it :
368  llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
369  auto &op = it.value();
370 
371  // Get the index of the current operation, note that we are iterating in
372  // reverse so we need to fix it up.
373  size_t index = shifts.size() - it.index() - 1;
374 
375  // Remember the shift of this operation.
376  uint64_t shift = shifts[index];
377  forBodyShift.try_emplace(&op, shift);
378 
379  // Validate the results of this operation if it were to be shifted.
380  for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
381  Value result = op.getResult(i);
382  for (auto *user : result.getUsers()) {
383  // If an ancestor operation doesn't lie in the block of forOp,
384  // there is no shift to check.
385  if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
386  assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
387  if (shift != forBodyShift[ancOp])
388  return false;
389  }
390  }
391  }
392  }
393  return true;
394 }
395 
397  assert(!loops.empty() && "no original loops provided");
398 
399  // We first find out all dependences we intend to check.
400  SmallVector<Operation *, 8> loadAndStoreOps;
401  loops[0]->walk([&](Operation *op) {
402  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
403  loadAndStoreOps.push_back(op);
404  });
405 
406  unsigned numOps = loadAndStoreOps.size();
407  unsigned numLoops = loops.size();
408  for (unsigned d = 1; d <= numLoops + 1; ++d) {
409  for (unsigned i = 0; i < numOps; ++i) {
410  Operation *srcOp = loadAndStoreOps[i];
411  MemRefAccess srcAccess(srcOp);
412  for (unsigned j = 0; j < numOps; ++j) {
413  Operation *dstOp = loadAndStoreOps[j];
414  MemRefAccess dstAccess(dstOp);
415 
418  srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
419  &depComps);
420 
421  // Skip if there is no dependence in this case.
422  if (!hasDependence(result))
423  continue;
424 
425  // Check whether there is any negative direction vector in the
426  // dependence components found above, which means that dependence is
427  // violated by the default hyper-rect tiling method.
428  LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated "
429  "for dependence at depth: "
430  << Twine(d) << " between:\n";);
431  LLVM_DEBUG(srcAccess.opInst->dump());
432  LLVM_DEBUG(dstAccess.opInst->dump());
433  for (const DependenceComponent &depComp : depComps) {
434  if (depComp.lb.has_value() && depComp.ub.has_value() &&
435  *depComp.lb < *depComp.ub && *depComp.ub < 0) {
436  LLVM_DEBUG(llvm::dbgs()
437  << "Dependence component lb = " << Twine(*depComp.lb)
438  << " ub = " << Twine(*depComp.ub)
439  << " is negative at depth: " << Twine(d)
440  << " and thus violates the legality rule.\n");
441  return false;
442  }
443  }
444  }
445  }
446  }
447 
448  return true;
449 }
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isAccessIndexInvariant(Value iv, Value index)
Given an affine.for iv and an access index of type index, returns true if index is independent of iv ...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:964
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:334
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:128
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:674
operand_type_range getOperandTypes()
Definition: Operation.h:397
result_type_range getResultTypes()
Definition: Operation.h:428
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:132
Type getType() const
Return the type of this value.
Definition: Value.h:129
user_range getUsers() const
Definition: Value.h:228
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void composeSimplifyAndCanonicalize()
Composes all incoming affine.apply ops and then simplifies and canonicalizes the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
bool isFunctionOf(unsigned idx, Value value) const
Return true if the idx^th result depends on 'value', false otherwise.
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp)
Checks if an affine read or write operation depends on forOp's IV, i.e., if the memory access is inva...
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2557
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.