MLIR  17.0.0git
LoopAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include <numeric>
27 #include <optional>
28 #include <type_traits>
29 
30 using namespace mlir;
31 using namespace mlir::affine;
32 
33 /// Returns the trip count of the loop as an affine expression if the latter is
34 /// expressible as an affine expression, and nullptr otherwise. The trip count
35 /// expression is simplified before returning. This method only utilizes map
36 /// composition to construct lower and upper bounds before computing the trip
37 /// count expressions.
39  AffineForOp forOp, AffineMap *tripCountMap,
40  SmallVectorImpl<Value> *tripCountOperands) {
41  MLIRContext *context = forOp.getContext();
42  int64_t step = forOp.getStep();
43  int64_t loopSpan;
44  if (forOp.hasConstantBounds()) {
45  int64_t lb = forOp.getConstantLowerBound();
46  int64_t ub = forOp.getConstantUpperBound();
47  loopSpan = ub - lb;
48  if (loopSpan < 0)
49  loopSpan = 0;
50  *tripCountMap = AffineMap::getConstantMap(ceilDiv(loopSpan, step), context);
51  tripCountOperands->clear();
52  return;
53  }
54  auto lbMap = forOp.getLowerBoundMap();
55  auto ubMap = forOp.getUpperBoundMap();
56  if (lbMap.getNumResults() != 1) {
57  *tripCountMap = AffineMap();
58  return;
59  }
60 
61  // Difference of each upper bound expression from the single lower bound
62  // expression (divided by the step) provides the expressions for the trip
63  // count map.
64  AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
65 
66  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
67  lbMap.getResult(0));
68  auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
69  lbSplatExpr, context);
70  AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
71 
72  AffineValueMap tripCountValueMap;
73  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
74  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
75  tripCountValueMap.setResult(i,
76  tripCountValueMap.getResult(i).ceilDiv(step));
77 
78  *tripCountMap = tripCountValueMap.getAffineMap();
79  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
80  tripCountValueMap.getOperands().end());
81 }
82 
83 /// Returns the trip count of the loop if it's a constant, std::nullopt
84 /// otherwise. This method uses affine expression analysis (in turn using
85 /// getTripCount) and is able to determine constant trip count in non-trivial
86 /// cases.
87 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
88  SmallVector<Value, 4> operands;
89  AffineMap map;
90  getTripCountMapAndOperands(forOp, &map, &operands);
91 
92  if (!map)
93  return std::nullopt;
94 
95  // Take the min if all trip counts are constant.
96  std::optional<uint64_t> tripCount;
97  for (auto resultExpr : map.getResults()) {
98  if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
99  if (tripCount.has_value())
100  tripCount =
101  std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue()));
102  else
103  tripCount = constExpr.getValue();
104  } else
105  return std::nullopt;
106  }
107  return tripCount;
108 }
109 
110 /// Returns the greatest known integral divisor of the trip count. Affine
111 /// expression analysis is used (indirectly through getTripCount), and
112 /// this method is thus able to determine non-trivial divisors.
113 uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
114  SmallVector<Value, 4> operands;
115  AffineMap map;
116  getTripCountMapAndOperands(forOp, &map, &operands);
117 
118  if (!map)
119  return 1;
120 
121  // The largest divisor of the trip count is the GCD of the individual largest
122  // divisors.
123  assert(map.getNumResults() >= 1 && "expected one or more results");
124  std::optional<uint64_t> gcd;
125  for (auto resultExpr : map.getResults()) {
126  uint64_t thisGcd;
127  if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
128  uint64_t tripCount = constExpr.getValue();
129  // 0 iteration loops (greatest divisor is 2^64 - 1).
130  if (tripCount == 0)
132  else
133  // The greatest divisor is the trip count.
134  thisGcd = tripCount;
135  } else {
136  // Trip count is not a known constant; return its largest known divisor.
137  thisGcd = resultExpr.getLargestKnownDivisor();
138  }
139  if (gcd.has_value())
140  gcd = std::gcd(*gcd, thisGcd);
141  else
142  gcd = thisGcd;
143  }
144  assert(gcd.has_value() && "value expected per above logic");
145  return *gcd;
146 }
147 
148 /// Given an induction variable `iv` of type AffineForOp and an access `index`
149 /// of type index, returns `true` if `index` is independent of `iv` and
150 /// false otherwise. The determination supports composition with at most one
151 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
152 /// the composition of AffineApplyOp needs to be canonicalized by construction
153 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps
154 /// everywhere. To achieve this, at the very least, the compose-affine-apply
155 /// pass must have been run.
156 ///
157 /// Prerequisites:
158 /// 1. `iv` and `index` of the proper type;
159 /// 2. at most one reachable AffineApplyOp from index;
160 ///
161 /// Returns false in cases with more than one AffineApplyOp, this is
162 /// conservative.
163 static bool isAccessIndexInvariant(Value iv, Value index) {
164  assert(isAffineForInductionVar(iv) && "iv must be a AffineForOp");
165  assert(isa<IndexType>(index.getType()) && "index must be of IndexType");
166  SmallVector<Operation *, 4> affineApplyOps;
167  getReachableAffineApplyOps({index}, affineApplyOps);
168 
169  if (affineApplyOps.empty()) {
170  // Pointer equality test because of Value pointer semantics.
171  return index != iv;
172  }
173 
174  if (affineApplyOps.size() > 1) {
175  affineApplyOps[0]->emitRemark(
176  "CompositionAffineMapsPass must have been run: there should be at most "
177  "one AffineApplyOp, returning false conservatively.");
178  return false;
179  }
180 
181  auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
182  // We need yet another level of indirection because the `dim` index of the
183  // access may not correspond to the `dim` index of composeOp.
184  return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
185 }
186 
188  ArrayRef<Value> indices) {
189  DenseSet<Value> res;
190  for (auto val : indices) {
191  if (isAccessIndexInvariant(iv, val)) {
192  res.insert(val);
193  }
194  }
195  return res;
196 }
197 
198 /// Given:
199 /// 1. an induction variable `iv` of type AffineForOp;
200 /// 2. a `memoryOp` of type const LoadOp& or const StoreOp&;
201 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
202 /// is defined as either invariant or varying only along a unique MemRef dim.
203 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
204 /// convey the memRef access is invariant along `iv`).
205 ///
206 /// Prerequisites:
207 /// 1. `memRefDim` ~= nullptr;
208 /// 2. `iv` of the proper type;
209 /// 3. the MemRef accessed by `memoryOp` has no layout map or at most an
210 /// identity layout map.
211 ///
212 /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
213 /// Returns false if the MemRef has a non-identity layoutMap or more than 1
214 /// layoutMap. This is conservative.
215 ///
216 // TODO: check strides.
217 template <typename LoadOrStoreOp>
218 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
219  int *memRefDim) {
220  static_assert(
221  llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value,
222  "Must be called on either LoadOp or StoreOp");
223  assert(memRefDim && "memRefDim == nullptr");
224  auto memRefType = memoryOp.getMemRefType();
225 
226  if (!memRefType.getLayout().isIdentity())
227  return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
228 
229  int uniqueVaryingIndexAlongIv = -1;
230  auto accessMap = memoryOp.getAffineMap();
231  SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
232  unsigned numDims = accessMap.getNumDims();
233  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
234  // Gather map operands used result expr 'i' in 'exprOperands'.
235  SmallVector<Value, 4> exprOperands;
236  auto resultExpr = accessMap.getResult(i);
237  resultExpr.walk([&](AffineExpr expr) {
238  if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
239  exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
240  else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
241  exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
242  });
243  // Check access invariance of each operand in 'exprOperands'.
244  for (auto exprOperand : exprOperands) {
245  if (!isAccessIndexInvariant(iv, exprOperand)) {
246  if (uniqueVaryingIndexAlongIv != -1) {
247  // 2+ varying indices -> do not vectorize along iv.
248  return false;
249  }
250  uniqueVaryingIndexAlongIv = i;
251  }
252  }
253  }
254 
255  if (uniqueVaryingIndexAlongIv == -1)
256  *memRefDim = -1;
257  else
258  *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
259  return true;
260 }
261 
262 template <typename LoadOrStoreOp>
263 static bool isVectorElement(LoadOrStoreOp memoryOp) {
264  auto memRefType = memoryOp.getMemRefType();
265  return isa<VectorType>(memRefType.getElementType());
266 }
267 
268 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
269 
270 static bool
272  const VectorizableOpFun &isVectorizableOp,
273  NestedPattern &vectorTransferMatcher) {
274  auto *forOp = loop.getOperation();
275 
276  // No vectorization across conditionals for now.
277  auto conditionals = matcher::If();
278  SmallVector<NestedMatch, 8> conditionalsMatched;
279  conditionals.match(forOp, &conditionalsMatched);
280  if (!conditionalsMatched.empty()) {
281  return false;
282  }
283 
284  // No vectorization for ops with operand or result types that are not
285  // vectorizable.
286  auto types = matcher::Op([](Operation &op) -> bool {
287  if (llvm::any_of(op.getOperandTypes(), [](Type type) {
288  if (MemRefType t = dyn_cast<MemRefType>(type))
289  return !VectorType::isValidElementType(t.getElementType());
290  return !VectorType::isValidElementType(type);
291  }))
292  return true;
293  return llvm::any_of(op.getResultTypes(), [](Type type) {
294  return !VectorType::isValidElementType(type);
295  });
296  });
297  SmallVector<NestedMatch, 8> opsMatched;
298  types.match(forOp, &opsMatched);
299  if (!opsMatched.empty()) {
300  return false;
301  }
302 
303  // No vectorization across unknown regions.
304  auto regions = matcher::Op([](Operation &op) -> bool {
305  return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
306  });
307  SmallVector<NestedMatch, 8> regionsMatched;
308  regions.match(forOp, &regionsMatched);
309  if (!regionsMatched.empty()) {
310  return false;
311  }
312 
313  SmallVector<NestedMatch, 8> vectorTransfersMatched;
314  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
315  if (!vectorTransfersMatched.empty()) {
316  return false;
317  }
318 
319  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
320  SmallVector<NestedMatch, 8> loadAndStoresMatched;
321  loadAndStores.match(forOp, &loadAndStoresMatched);
322  for (auto ls : loadAndStoresMatched) {
323  auto *op = ls.getMatchedOperation();
324  auto load = dyn_cast<AffineLoadOp>(op);
325  auto store = dyn_cast<AffineStoreOp>(op);
326  // Only scalar types are considered vectorizable, all load/store must be
327  // vectorizable for a loop to qualify as vectorizable.
328  // TODO: ponder whether we want to be more general here.
329  bool vector = load ? isVectorElement(load) : isVectorElement(store);
330  if (vector) {
331  return false;
332  }
333  if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
334  return false;
335  }
336  }
337  return true;
338 }
339 
341  AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) {
342  *memRefDim = -1;
343  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
344  auto load = dyn_cast<AffineLoadOp>(op);
345  auto store = dyn_cast<AffineStoreOp>(op);
346  int thisOpMemRefDim = -1;
347  bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
348  &thisOpMemRefDim)
349  : isContiguousAccess(loop.getInductionVar(), store,
350  &thisOpMemRefDim);
351  if (thisOpMemRefDim != -1) {
352  // If memory accesses vary across different dimensions then the loop is
353  // not vectorizable.
354  if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
355  return false;
356  *memRefDim = thisOpMemRefDim;
357  }
358  return isContiguous;
359  });
360  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
361 }
362 
364  AffineForOp loop, NestedPattern &vectorTransferMatcher) {
365  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
366 }
367 
368 /// Checks whether SSA dominance would be violated if a for op's body
369 /// operations are shifted by the specified shifts. This method checks if a
370 /// 'def' and all its uses have the same shift factor.
371 // TODO: extend this to check for memory-based dependence violation when we have
372 // the support.
373 bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp,
374  ArrayRef<uint64_t> shifts) {
375  auto *forBody = forOp.getBody();
376  assert(shifts.size() == forBody->getOperations().size());
377 
378  // Work backwards over the body of the block so that the shift of a use's
379  // ancestor operation in the block gets recorded before it's looked up.
380  DenseMap<Operation *, uint64_t> forBodyShift;
381  for (const auto &it :
382  llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
383  auto &op = it.value();
384 
385  // Get the index of the current operation, note that we are iterating in
386  // reverse so we need to fix it up.
387  size_t index = shifts.size() - it.index() - 1;
388 
389  // Remember the shift of this operation.
390  uint64_t shift = shifts[index];
391  forBodyShift.try_emplace(&op, shift);
392 
393  // Validate the results of this operation if it were to be shifted.
394  for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
395  Value result = op.getResult(i);
396  for (auto *user : result.getUsers()) {
397  // If an ancestor operation doesn't lie in the block of forOp,
398  // there is no shift to check.
399  if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
400  assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
401  if (shift != forBodyShift[ancOp])
402  return false;
403  }
404  }
405  }
406  }
407  return true;
408 }
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
static bool isAccessIndexInvariant(Value iv, Value index)
Given an induction variable iv of type AffineForOp and an access index of type index,...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:821
U dyn_cast() const
Definition: AffineExpr.h:281
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:44
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:337
unsigned getNumResults() const
Definition: AffineMap.cpp:332
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:102
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:224
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:635
operand_type_range getOperandTypes()
Definition: Operation.h:392
result_type_range getResultTypes()
Definition: Operation.h:423
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
Type getType() const
Return the type of this value.
Definition: Value.h:122
user_range getUsers() const
Definition: Value.h:222
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
void setResult(unsigned i, AffineExpr e)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
NestedPattern If(const NestedPattern &child)
bool isLoadOrStore(Operation &op)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
std::optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, std::nullopt otherwise.
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool isAffineForInductionVar(Value val)
Returns true if the provided value is the induction variable of an AffineForOp.
Definition: AffineOps.cpp:2647
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE MPInt gcd(const MPInt &a, const MPInt &b)
Definition: MPInt.h:399
This header declares functions that assit transformations in the MemRef dialect.
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
Definition: MathExtras.h:23