MLIR  20.0.0git
LoopAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
21 #include "llvm/Support/MathExtras.h"
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Support/Debug.h"
27 #include <numeric>
28 #include <optional>
29 #include <type_traits>
30 
31 using namespace mlir;
32 using namespace mlir::affine;
33 
34 #define DEBUG_TYPE "affine-loop-analysis"
35 
36 /// Returns the trip count of the loop as an affine expression if the latter is
37 /// expressible as an affine expression, and nullptr otherwise. The trip count
38 /// expression is simplified before returning. This method only utilizes map
39 /// composition to construct lower and upper bounds before computing the trip
40 /// count expressions.
42  AffineForOp forOp, AffineMap *tripCountMap,
43  SmallVectorImpl<Value> *tripCountOperands) {
44  MLIRContext *context = forOp.getContext();
45  int64_t step = forOp.getStepAsInt();
46  int64_t loopSpan;
47  if (forOp.hasConstantBounds()) {
48  int64_t lb = forOp.getConstantLowerBound();
49  int64_t ub = forOp.getConstantUpperBound();
50  loopSpan = ub - lb;
51  if (loopSpan < 0)
52  loopSpan = 0;
53  *tripCountMap = AffineMap::getConstantMap(
54  llvm::divideCeilSigned(loopSpan, step), context);
55  tripCountOperands->clear();
56  return;
57  }
58  auto lbMap = forOp.getLowerBoundMap();
59  auto ubMap = forOp.getUpperBoundMap();
60  if (lbMap.getNumResults() != 1) {
61  *tripCountMap = AffineMap();
62  return;
63  }
64 
65  // Difference of each upper bound expression from the single lower bound
66  // expression (divided by the step) provides the expressions for the trip
67  // count map.
68  AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
69 
70  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
71  lbMap.getResult(0));
72  auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
73  lbSplatExpr, context);
74  AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
75 
76  AffineValueMap tripCountValueMap;
77  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
78  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
79  tripCountValueMap.setResult(i,
80  tripCountValueMap.getResult(i).ceilDiv(step));
81 
82  *tripCountMap = tripCountValueMap.getAffineMap();
83  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
84  tripCountValueMap.getOperands().end());
85 }
86 
87 /// Returns the trip count of the loop if it's a constant, std::nullopt
88 /// otherwise. This method uses affine expression analysis (in turn using
89 /// getTripCount) and is able to determine constant trip count in non-trivial
90 /// cases.
91 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
92  SmallVector<Value, 4> operands;
93  AffineMap map;
94  getTripCountMapAndOperands(forOp, &map, &operands);
95 
96  if (!map)
97  return std::nullopt;
98 
99  // Take the min if all trip counts are constant.
100  std::optional<uint64_t> tripCount;
101  for (auto resultExpr : map.getResults()) {
102  if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
103  if (tripCount.has_value())
104  tripCount =
105  std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue()));
106  else
107  tripCount = constExpr.getValue();
108  } else
109  return std::nullopt;
110  }
111  return tripCount;
112 }
113 
114 /// Returns the greatest known integral divisor of the trip count. Affine
115 /// expression analysis is used (indirectly through getTripCount), and
116 /// this method is thus able to determine non-trivial divisors.
117 uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
118  SmallVector<Value, 4> operands;
119  AffineMap map;
120  getTripCountMapAndOperands(forOp, &map, &operands);
121 
122  if (!map)
123  return 1;
124 
125  // The largest divisor of the trip count is the GCD of the individual largest
126  // divisors.
127  assert(map.getNumResults() >= 1 && "expected one or more results");
128  std::optional<uint64_t> gcd;
129  for (auto resultExpr : map.getResults()) {
130  uint64_t thisGcd;
131  if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
132  uint64_t tripCount = constExpr.getValue();
133  // 0 iteration loops (greatest divisor is 2^64 - 1).
134  if (tripCount == 0)
136  else
137  // The greatest divisor is the trip count.
138  thisGcd = tripCount;
139  } else {
140  // Trip count is not a known constant; return its largest known divisor.
141  thisGcd = resultExpr.getLargestKnownDivisor();
142  }
143  if (gcd.has_value())
144  gcd = std::gcd(*gcd, thisGcd);
145  else
146  gcd = thisGcd;
147  }
148  assert(gcd.has_value() && "value expected per above logic");
149  return *gcd;
150 }
151 
152 /// Given an affine.for `iv` and an access `index` of type index, returns `true`
153 /// if `index` is independent of `iv` and false otherwise.
154 ///
155 /// Prerequisites: `iv` and `index` of the proper type;
156 static bool isAccessIndexInvariant(Value iv, Value index) {
157  assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
158  assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
159  auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
160  SmallVector<Value> operands = {index};
161  AffineValueMap avm(map, operands);
163  return !avm.isFunctionOf(0, iv);
164 }
165 
166 // Pre-requisite: Loop bounds should be in canonical form.
167 template <typename LoadOrStoreOp>
168 bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
169  AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
171  return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
172 }
173 
174 // Explicitly instantiate the template so that the compiler knows we need them.
175 template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
176  AffineForOp);
177 template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
178  AffineForOp);
179 template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
180 template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);
181 
183  ArrayRef<Value> indices) {
184  DenseSet<Value> res;
185  for (auto val : indices) {
186  if (isAccessIndexInvariant(iv, val)) {
187  res.insert(val);
188  }
189  }
190  return res;
191 }
192 
193 // TODO: check access stride.
194 template <typename LoadOrStoreOp>
195 bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
196  int *memRefDim) {
197  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
198  AffineWriteOpInterface>::value,
199  "Must be called on either an affine read or write op");
200  assert(memRefDim && "memRefDim == nullptr");
201  auto memRefType = memoryOp.getMemRefType();
202 
203  if (!memRefType.getLayout().isIdentity())
204  return memoryOp.emitError("NYI: non-trivial layout map"), false;
205 
206  int uniqueVaryingIndexAlongIv = -1;
207  auto accessMap = memoryOp.getAffineMap();
208  SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
209  unsigned numDims = accessMap.getNumDims();
210  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
211  // Gather map operands used in result expr 'i' in 'exprOperands'.
212  SmallVector<Value, 4> exprOperands;
213  auto resultExpr = accessMap.getResult(i);
214  resultExpr.walk([&](AffineExpr expr) {
215  if (auto dimExpr = dyn_cast<AffineDimExpr>(expr))
216  exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
217  else if (auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
218  exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
219  });
220  // Check access invariance of each operand in 'exprOperands'.
221  for (Value exprOperand : exprOperands) {
222  if (!isAccessIndexInvariant(iv, exprOperand)) {
223  if (uniqueVaryingIndexAlongIv != -1) {
224  // 2+ varying indices -> do not vectorize along iv.
225  return false;
226  }
227  uniqueVaryingIndexAlongIv = i;
228  }
229  }
230  }
231 
232  if (uniqueVaryingIndexAlongIv == -1)
233  *memRefDim = -1;
234  else
235  *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
236  return true;
237 }
238 
239 template bool mlir::affine::isContiguousAccess(Value iv,
240  AffineReadOpInterface loadOp,
241  int *memRefDim);
242 template bool mlir::affine::isContiguousAccess(Value iv,
243  AffineWriteOpInterface loadOp,
244  int *memRefDim);
245 
246 template <typename LoadOrStoreOp>
247 static bool isVectorElement(LoadOrStoreOp memoryOp) {
248  auto memRefType = memoryOp.getMemRefType();
249  return isa<VectorType>(memRefType.getElementType());
250 }
251 
252 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
253 
254 static bool
256  const VectorizableOpFun &isVectorizableOp,
257  NestedPattern &vectorTransferMatcher) {
258  auto *forOp = loop.getOperation();
259 
260  // No vectorization across conditionals for now.
261  auto conditionals = matcher::If();
262  SmallVector<NestedMatch, 8> conditionalsMatched;
263  conditionals.match(forOp, &conditionalsMatched);
264  if (!conditionalsMatched.empty()) {
265  return false;
266  }
267 
268  // No vectorization for ops with operand or result types that are not
269  // vectorizable.
270  auto types = matcher::Op([](Operation &op) -> bool {
271  if (llvm::any_of(op.getOperandTypes(), [](Type type) {
272  if (MemRefType t = dyn_cast<MemRefType>(type))
273  return !VectorType::isValidElementType(t.getElementType());
274  return !VectorType::isValidElementType(type);
275  }))
276  return true;
277  return llvm::any_of(op.getResultTypes(), [](Type type) {
278  return !VectorType::isValidElementType(type);
279  });
280  });
281  SmallVector<NestedMatch, 8> opsMatched;
282  types.match(forOp, &opsMatched);
283  if (!opsMatched.empty()) {
284  return false;
285  }
286 
287  // No vectorization across unknown regions.
288  auto regions = matcher::Op([](Operation &op) -> bool {
289  return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
290  });
291  SmallVector<NestedMatch, 8> regionsMatched;
292  regions.match(forOp, &regionsMatched);
293  if (!regionsMatched.empty()) {
294  return false;
295  }
296 
297  SmallVector<NestedMatch, 8> vectorTransfersMatched;
298  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
299  if (!vectorTransfersMatched.empty()) {
300  return false;
301  }
302 
303  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
304  SmallVector<NestedMatch, 8> loadAndStoresMatched;
305  loadAndStores.match(forOp, &loadAndStoresMatched);
306  for (auto ls : loadAndStoresMatched) {
307  auto *op = ls.getMatchedOperation();
308  auto load = dyn_cast<AffineLoadOp>(op);
309  auto store = dyn_cast<AffineStoreOp>(op);
310  // Only scalar types are considered vectorizable, all load/store must be
311  // vectorizable for a loop to qualify as vectorizable.
312  // TODO: ponder whether we want to be more general here.
313  bool vector = load ? isVectorElement(load) : isVectorElement(store);
314  if (vector) {
315  return false;
316  }
317  if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
318  return false;
319  }
320  }
321  return true;
322 }
323 
325  AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) {
326  *memRefDim = -1;
327  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
328  auto load = dyn_cast<AffineLoadOp>(op);
329  auto store = dyn_cast<AffineStoreOp>(op);
330  int thisOpMemRefDim = -1;
331  bool isContiguous =
332  load ? isContiguousAccess(loop.getInductionVar(),
333  cast<AffineReadOpInterface>(*load),
334  &thisOpMemRefDim)
335  : isContiguousAccess(loop.getInductionVar(),
336  cast<AffineWriteOpInterface>(*store),
337  &thisOpMemRefDim);
338  if (thisOpMemRefDim != -1) {
339  // If memory accesses vary across different dimensions then the loop is
340  // not vectorizable.
341  if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
342  return false;
343  *memRefDim = thisOpMemRefDim;
344  }
345  return isContiguous;
346  });
347  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
348 }
349 
351  AffineForOp loop, NestedPattern &vectorTransferMatcher) {
352  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
353 }
354 
355 /// Checks whether SSA dominance would be violated if a for op's body
356 /// operations are shifted by the specified shifts. This method checks if a
357 /// 'def' and all its uses have the same shift factor.
358 // TODO: extend this to check for memory-based dependence violation when we have
359 // the support.
360 bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp,
361  ArrayRef<uint64_t> shifts) {
362  auto *forBody = forOp.getBody();
363  assert(shifts.size() == forBody->getOperations().size());
364 
365  // Work backwards over the body of the block so that the shift of a use's
366  // ancestor operation in the block gets recorded before it's looked up.
367  DenseMap<Operation *, uint64_t> forBodyShift;
368  for (const auto &it :
369  llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
370  auto &op = it.value();
371 
372  // Get the index of the current operation, note that we are iterating in
373  // reverse so we need to fix it up.
374  size_t index = shifts.size() - it.index() - 1;
375 
376  // Remember the shift of this operation.
377  uint64_t shift = shifts[index];
378  forBodyShift.try_emplace(&op, shift);
379 
380  // Validate the results of this operation if it were to be shifted.
381  for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
382  Value result = op.getResult(i);
383  for (auto *user : result.getUsers()) {
384  // If an ancestor operation doesn't lie in the block of forOp,
385  // there is no shift to check.
386  if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
387  assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
388  if (shift != forBodyShift[ancOp])
389  return false;
390  }
391  }
392  }
393  }
394  return true;
395 }
396 
398  assert(!loops.empty() && "no original loops provided");
399 
400  // We first find out all dependences we intend to check.
401  SmallVector<Operation *, 8> loadAndStoreOps;
402  loops[0]->walk([&](Operation *op) {
403  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
404  loadAndStoreOps.push_back(op);
405  });
406 
407  unsigned numOps = loadAndStoreOps.size();
408  unsigned numLoops = loops.size();
409  for (unsigned d = 1; d <= numLoops + 1; ++d) {
410  for (unsigned i = 0; i < numOps; ++i) {
411  Operation *srcOp = loadAndStoreOps[i];
412  MemRefAccess srcAccess(srcOp);
413  for (unsigned j = 0; j < numOps; ++j) {
414  Operation *dstOp = loadAndStoreOps[j];
415  MemRefAccess dstAccess(dstOp);
416 
419  srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
420  &depComps);
421 
422  // Skip if there is no dependence in this case.
423  if (!hasDependence(result))
424  continue;
425 
426  // Check whether there is any negative direction vector in the
427  // dependence components found above, which means that dependence is
428  // violated by the default hyper-rect tiling method.
429  LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated "
430  "for dependence at depth: "
431  << Twine(d) << " between:\n";);
432  LLVM_DEBUG(srcAccess.opInst->dump());
433  LLVM_DEBUG(dstAccess.opInst->dump());
434  for (const DependenceComponent &depComp : depComps) {
435  if (depComp.lb.has_value() && depComp.ub.has_value() &&
436  *depComp.lb < *depComp.ub && *depComp.ub < 0) {
437  LLVM_DEBUG(llvm::dbgs()
438  << "Dependence component lb = " << Twine(*depComp.lb)
439  << " ub = " << Twine(*depComp.ub)
440  << " is negative at depth: " << Twine(d)
441  << " and thus violates the legality rule.\n");
442  return false;
443  }
444  }
445  }
446  }
447  }
448 
449  return true;
450 }
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isAccessIndexInvariant(Value iv, Value index)
Given an affine.for iv and an access index of type index, returns true if index is independent of iv ...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:964
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:334
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:128
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:669
operand_type_range getOperandTypes()
Definition: Operation.h:392
result_type_range getResultTypes()
Definition: Operation.h:423
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:132
Type getType() const
Return the type of this value.
Definition: Value.h:129
user_range getUsers() const
Definition: Value.h:228
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void composeSimplifyAndCanonicalize()
Composes all incoming affine.apply ops and then simplifies and canonicalizes the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
bool isFunctionOf(unsigned idx, Value value) const
Return true if the idx^th result depends on 'value', false otherwise.
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isTilingValid(ArrayRef< AffineForOp > loops)
Checks whether hyper-rectangular loop tiling of the nest represented by loops is valid.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp)
Checks if an affine read or write operation depends on forOp's IV, i.e., if the memory access is inva...
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2553
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
Checks whether two accesses to the same memref access the same element.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.