MLIR  19.0.0git
LoopAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include <numeric>
27 #include <optional>
28 #include <type_traits>
29 
30 using namespace mlir;
31 using namespace mlir::affine;
32 
33 /// Returns the trip count of the loop as an affine expression if the latter is
34 /// expressible as an affine expression, and nullptr otherwise. The trip count
35 /// expression is simplified before returning. This method only utilizes map
36 /// composition to construct lower and upper bounds before computing the trip
37 /// count expressions.
39  AffineForOp forOp, AffineMap *tripCountMap,
40  SmallVectorImpl<Value> *tripCountOperands) {
41  MLIRContext *context = forOp.getContext();
42  int64_t step = forOp.getStepAsInt();
43  int64_t loopSpan;
44  if (forOp.hasConstantBounds()) {
45  int64_t lb = forOp.getConstantLowerBound();
46  int64_t ub = forOp.getConstantUpperBound();
47  loopSpan = ub - lb;
48  if (loopSpan < 0)
49  loopSpan = 0;
50  *tripCountMap = AffineMap::getConstantMap(ceilDiv(loopSpan, step), context);
51  tripCountOperands->clear();
52  return;
53  }
54  auto lbMap = forOp.getLowerBoundMap();
55  auto ubMap = forOp.getUpperBoundMap();
56  if (lbMap.getNumResults() != 1) {
57  *tripCountMap = AffineMap();
58  return;
59  }
60 
61  // Difference of each upper bound expression from the single lower bound
62  // expression (divided by the step) provides the expressions for the trip
63  // count map.
64  AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
65 
66  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
67  lbMap.getResult(0));
68  auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
69  lbSplatExpr, context);
70  AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
71 
72  AffineValueMap tripCountValueMap;
73  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
74  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
75  tripCountValueMap.setResult(i,
76  tripCountValueMap.getResult(i).ceilDiv(step));
77 
78  *tripCountMap = tripCountValueMap.getAffineMap();
79  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
80  tripCountValueMap.getOperands().end());
81 }
82 
83 /// Returns the trip count of the loop if it's a constant, std::nullopt
84 /// otherwise. This method uses affine expression analysis (in turn using
85 /// getTripCount) and is able to determine constant trip count in non-trivial
86 /// cases.
87 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
88  SmallVector<Value, 4> operands;
89  AffineMap map;
90  getTripCountMapAndOperands(forOp, &map, &operands);
91 
92  if (!map)
93  return std::nullopt;
94 
95  // Take the min if all trip counts are constant.
96  std::optional<uint64_t> tripCount;
97  for (auto resultExpr : map.getResults()) {
98  if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
99  if (tripCount.has_value())
100  tripCount =
101  std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue()));
102  else
103  tripCount = constExpr.getValue();
104  } else
105  return std::nullopt;
106  }
107  return tripCount;
108 }
109 
110 /// Returns the greatest known integral divisor of the trip count. Affine
111 /// expression analysis is used (indirectly through getTripCount), and
112 /// this method is thus able to determine non-trivial divisors.
113 uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
114  SmallVector<Value, 4> operands;
115  AffineMap map;
116  getTripCountMapAndOperands(forOp, &map, &operands);
117 
118  if (!map)
119  return 1;
120 
121  // The largest divisor of the trip count is the GCD of the individual largest
122  // divisors.
123  assert(map.getNumResults() >= 1 && "expected one or more results");
124  std::optional<uint64_t> gcd;
125  for (auto resultExpr : map.getResults()) {
126  uint64_t thisGcd;
127  if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
128  uint64_t tripCount = constExpr.getValue();
129  // 0 iteration loops (greatest divisor is 2^64 - 1).
130  if (tripCount == 0)
132  else
133  // The greatest divisor is the trip count.
134  thisGcd = tripCount;
135  } else {
136  // Trip count is not a known constant; return its largest known divisor.
137  thisGcd = resultExpr.getLargestKnownDivisor();
138  }
139  if (gcd.has_value())
140  gcd = std::gcd(*gcd, thisGcd);
141  else
142  gcd = thisGcd;
143  }
144  assert(gcd.has_value() && "value expected per above logic");
145  return *gcd;
146 }
147 
148 /// Given an affine.for `iv` and an access `index` of type index, returns `true`
149 /// if `index` is independent of `iv` and false otherwise.
150 ///
151 /// Prerequisites: `iv` and `index` of the proper type;
152 static bool isAccessIndexInvariant(Value iv, Value index) {
153  assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
154  assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
155  auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
156  SmallVector<Value> operands = {index};
157  AffineValueMap avm(map, operands);
159  return !avm.isFunctionOf(0, iv);
160 }
161 
162 // Pre-requisite: Loop bounds should be in canonical form.
163 template <typename LoadOrStoreOp>
164 bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
165  AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
167  return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
168 }
169 
170 // Explicitly instantiate the template so that the compiler knows we need them.
171 template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
172  AffineForOp);
173 template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
174  AffineForOp);
175 template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
176 template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);
177 
179  ArrayRef<Value> indices) {
180  DenseSet<Value> res;
181  for (auto val : indices) {
182  if (isAccessIndexInvariant(iv, val)) {
183  res.insert(val);
184  }
185  }
186  return res;
187 }
188 
189 // TODO: check access stride.
190 template <typename LoadOrStoreOp>
191 bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
192  int *memRefDim) {
193  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
194  AffineWriteOpInterface>::value,
195  "Must be called on either an affine read or write op");
196  assert(memRefDim && "memRefDim == nullptr");
197  auto memRefType = memoryOp.getMemRefType();
198 
199  if (!memRefType.getLayout().isIdentity())
200  return memoryOp.emitError("NYI: non-trivial layout map"), false;
201 
202  int uniqueVaryingIndexAlongIv = -1;
203  auto accessMap = memoryOp.getAffineMap();
204  SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
205  unsigned numDims = accessMap.getNumDims();
206  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
207  // Gather map operands used in result expr 'i' in 'exprOperands'.
208  SmallVector<Value, 4> exprOperands;
209  auto resultExpr = accessMap.getResult(i);
210  resultExpr.walk([&](AffineExpr expr) {
211  if (auto dimExpr = dyn_cast<AffineDimExpr>(expr))
212  exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
213  else if (auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
214  exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
215  });
216  // Check access invariance of each operand in 'exprOperands'.
217  for (Value exprOperand : exprOperands) {
218  if (!isAccessIndexInvariant(iv, exprOperand)) {
219  if (uniqueVaryingIndexAlongIv != -1) {
220  // 2+ varying indices -> do not vectorize along iv.
221  return false;
222  }
223  uniqueVaryingIndexAlongIv = i;
224  }
225  }
226  }
227 
228  if (uniqueVaryingIndexAlongIv == -1)
229  *memRefDim = -1;
230  else
231  *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
232  return true;
233 }
234 
235 template bool mlir::affine::isContiguousAccess(Value iv,
236  AffineReadOpInterface loadOp,
237  int *memRefDim);
238 template bool mlir::affine::isContiguousAccess(Value iv,
239  AffineWriteOpInterface loadOp,
240  int *memRefDim);
241 
242 template <typename LoadOrStoreOp>
243 static bool isVectorElement(LoadOrStoreOp memoryOp) {
244  auto memRefType = memoryOp.getMemRefType();
245  return isa<VectorType>(memRefType.getElementType());
246 }
247 
248 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
249 
250 static bool
252  const VectorizableOpFun &isVectorizableOp,
253  NestedPattern &vectorTransferMatcher) {
254  auto *forOp = loop.getOperation();
255 
256  // No vectorization across conditionals for now.
257  auto conditionals = matcher::If();
258  SmallVector<NestedMatch, 8> conditionalsMatched;
259  conditionals.match(forOp, &conditionalsMatched);
260  if (!conditionalsMatched.empty()) {
261  return false;
262  }
263 
264  // No vectorization for ops with operand or result types that are not
265  // vectorizable.
266  auto types = matcher::Op([](Operation &op) -> bool {
267  if (llvm::any_of(op.getOperandTypes(), [](Type type) {
268  if (MemRefType t = dyn_cast<MemRefType>(type))
269  return !VectorType::isValidElementType(t.getElementType());
270  return !VectorType::isValidElementType(type);
271  }))
272  return true;
273  return llvm::any_of(op.getResultTypes(), [](Type type) {
274  return !VectorType::isValidElementType(type);
275  });
276  });
277  SmallVector<NestedMatch, 8> opsMatched;
278  types.match(forOp, &opsMatched);
279  if (!opsMatched.empty()) {
280  return false;
281  }
282 
283  // No vectorization across unknown regions.
284  auto regions = matcher::Op([](Operation &op) -> bool {
285  return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
286  });
287  SmallVector<NestedMatch, 8> regionsMatched;
288  regions.match(forOp, &regionsMatched);
289  if (!regionsMatched.empty()) {
290  return false;
291  }
292 
293  SmallVector<NestedMatch, 8> vectorTransfersMatched;
294  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
295  if (!vectorTransfersMatched.empty()) {
296  return false;
297  }
298 
299  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
300  SmallVector<NestedMatch, 8> loadAndStoresMatched;
301  loadAndStores.match(forOp, &loadAndStoresMatched);
302  for (auto ls : loadAndStoresMatched) {
303  auto *op = ls.getMatchedOperation();
304  auto load = dyn_cast<AffineLoadOp>(op);
305  auto store = dyn_cast<AffineStoreOp>(op);
306  // Only scalar types are considered vectorizable, all load/store must be
307  // vectorizable for a loop to qualify as vectorizable.
308  // TODO: ponder whether we want to be more general here.
309  bool vector = load ? isVectorElement(load) : isVectorElement(store);
310  if (vector) {
311  return false;
312  }
313  if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
314  return false;
315  }
316  }
317  return true;
318 }
319 
321  AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) {
322  *memRefDim = -1;
323  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
324  auto load = dyn_cast<AffineLoadOp>(op);
325  auto store = dyn_cast<AffineStoreOp>(op);
326  int thisOpMemRefDim = -1;
327  bool isContiguous =
328  load ? isContiguousAccess(loop.getInductionVar(),
329  cast<AffineReadOpInterface>(*load),
330  &thisOpMemRefDim)
331  : isContiguousAccess(loop.getInductionVar(),
332  cast<AffineWriteOpInterface>(*store),
333  &thisOpMemRefDim);
334  if (thisOpMemRefDim != -1) {
335  // If memory accesses vary across different dimensions then the loop is
336  // not vectorizable.
337  if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
338  return false;
339  *memRefDim = thisOpMemRefDim;
340  }
341  return isContiguous;
342  });
343  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
344 }
345 
347  AffineForOp loop, NestedPattern &vectorTransferMatcher) {
348  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
349 }
350 
351 /// Checks whether SSA dominance would be violated if a for op's body
352 /// operations are shifted by the specified shifts. This method checks if a
353 /// 'def' and all its uses have the same shift factor.
354 // TODO: extend this to check for memory-based dependence violation when we have
355 // the support.
356 bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp,
357  ArrayRef<uint64_t> shifts) {
358  auto *forBody = forOp.getBody();
359  assert(shifts.size() == forBody->getOperations().size());
360 
361  // Work backwards over the body of the block so that the shift of a use's
362  // ancestor operation in the block gets recorded before it's looked up.
363  DenseMap<Operation *, uint64_t> forBodyShift;
364  for (const auto &it :
365  llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
366  auto &op = it.value();
367 
368  // Get the index of the current operation, note that we are iterating in
369  // reverse so we need to fix it up.
370  size_t index = shifts.size() - it.index() - 1;
371 
372  // Remember the shift of this operation.
373  uint64_t shift = shifts[index];
374  forBodyShift.try_emplace(&op, shift);
375 
376  // Validate the results of this operation if it were to be shifted.
377  for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
378  Value result = op.getResult(i);
379  for (auto *user : result.getUsers()) {
380  // If an ancestor operation doesn't lie in the block of forOp,
381  // there is no shift to check.
382  if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
383  assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
384  if (shift != forBodyShift[ancOp])
385  return false;
386  }
387  }
388  }
389  }
390  return true;
391 }
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isAccessIndexInvariant(Value iv, Value index)
Given an affine.for iv and an access index of type index, returns true if index is independent of iv ...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
Definition: AffineExpr.h:69
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:926
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:47
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:318
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:391
unsigned getNumResults() const
Definition: AffineMap.cpp:386
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:125
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:669
operand_type_range getOperandTypes()
Definition: Operation.h:392
result_type_range getResultTypes()
Definition: Operation.h:423
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:132
Type getType() const
Return the type of this value.
Definition: Value.h:129
user_range getUsers() const
Definition: Value.h:228
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
void composeSimplifyAndCanonicalize()
Composes all incoming affine.apply ops and then simplifies and canonicalizes the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
bool isFunctionOf(unsigned idx, Value value) const
Return true if the idx^th result depends on 'value', false otherwise.
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp)
Checks if an affine read or write operation depends on forOp's IV, i.e., if the memory access is inva...
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2544
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
LLVM_ATTRIBUTE_ALWAYS_INLINE MPInt gcd(const MPInt &a, const MPInt &b)
Definition: MPInt.h:399
Include the generated interface declarations.
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
Definition: MathExtras.h:23