MLIR  14.0.0git
LoopAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include <type_traits>
27 
28 using namespace mlir;
29 
30 /// Returns the trip count of the loop as an affine expression if the latter is
31 /// expressible as an affine expression, and nullptr otherwise. The trip count
32 /// expression is simplified before returning. This method only utilizes map
33 /// composition to construct lower and upper bounds before computing the trip
34 /// count expressions.
36  AffineForOp forOp, AffineMap *tripCountMap,
37  SmallVectorImpl<Value> *tripCountOperands) {
38  MLIRContext *context = forOp.getContext();
39  int64_t step = forOp.getStep();
40  int64_t loopSpan;
41  if (forOp.hasConstantBounds()) {
42  int64_t lb = forOp.getConstantLowerBound();
43  int64_t ub = forOp.getConstantUpperBound();
44  loopSpan = ub - lb;
45  if (loopSpan < 0)
46  loopSpan = 0;
47  *tripCountMap = AffineMap::getConstantMap(ceilDiv(loopSpan, step), context);
48  tripCountOperands->clear();
49  return;
50  }
51  auto lbMap = forOp.getLowerBoundMap();
52  auto ubMap = forOp.getUpperBoundMap();
53  if (lbMap.getNumResults() != 1) {
54  *tripCountMap = AffineMap();
55  return;
56  }
57 
58  // Difference of each upper bound expression from the single lower bound
59  // expression (divided by the step) provides the expressions for the trip
60  // count map.
61  AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
62 
63  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
64  lbMap.getResult(0));
65  auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
66  lbSplatExpr, context);
67  AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
68 
69  AffineValueMap tripCountValueMap;
70  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
71  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
72  tripCountValueMap.setResult(i,
73  tripCountValueMap.getResult(i).ceilDiv(step));
74 
75  *tripCountMap = tripCountValueMap.getAffineMap();
76  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
77  tripCountValueMap.getOperands().end());
78 }
79 
80 /// Returns the trip count of the loop if it's a constant, None otherwise. This
81 /// method uses affine expression analysis (in turn using getTripCount) and is
82 /// able to determine constant trip count in non-trivial cases.
84  SmallVector<Value, 4> operands;
85  AffineMap map;
86  getTripCountMapAndOperands(forOp, &map, &operands);
87 
88  if (!map)
89  return None;
90 
91  // Take the min if all trip counts are constant.
92  Optional<uint64_t> tripCount;
93  for (auto resultExpr : map.getResults()) {
94  if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
95  if (tripCount.hasValue())
96  tripCount = std::min(tripCount.getValue(),
97  static_cast<uint64_t>(constExpr.getValue()));
98  else
99  tripCount = constExpr.getValue();
100  } else
101  return None;
102  }
103  return tripCount;
104 }
105 
106 /// Returns the greatest known integral divisor of the trip count. Affine
107 /// expression analysis is used (indirectly through getTripCount), and
108 /// this method is thus able to determine non-trivial divisors.
109 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
110  SmallVector<Value, 4> operands;
111  AffineMap map;
112  getTripCountMapAndOperands(forOp, &map, &operands);
113 
114  if (!map)
115  return 1;
116 
117  // The largest divisor of the trip count is the GCD of the individual largest
118  // divisors.
119  assert(map.getNumResults() >= 1 && "expected one or more results");
120  Optional<uint64_t> gcd;
121  for (auto resultExpr : map.getResults()) {
122  uint64_t thisGcd;
123  if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
124  uint64_t tripCount = constExpr.getValue();
125  // 0 iteration loops (greatest divisor is 2^64 - 1).
126  if (tripCount == 0)
128  else
129  // The greatest divisor is the trip count.
130  thisGcd = tripCount;
131  } else {
132  // Trip count is not a known constant; return its largest known divisor.
133  thisGcd = resultExpr.getLargestKnownDivisor();
134  }
135  if (gcd.hasValue())
136  gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd);
137  else
138  gcd = thisGcd;
139  }
140  assert(gcd.hasValue() && "value expected per above logic");
141  return gcd.getValue();
142 }
143 
144 /// Given an induction variable `iv` of type AffineForOp and an access `index`
145 /// of type index, returns `true` if `index` is independent of `iv` and
146 /// false otherwise. The determination supports composition with at most one
147 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
148 /// the composition of AffineApplyOp needs to be canonicalized by construction
149 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps
150 /// everywhere. To achieve this, at the very least, the compose-affine-apply
151 /// pass must have been run.
152 ///
153 /// Prerequisites:
154 /// 1. `iv` and `index` of the proper type;
155 /// 2. at most one reachable AffineApplyOp from index;
156 ///
157 /// Returns false in cases with more than one AffineApplyOp, this is
158 /// conservative.
159 static bool isAccessIndexInvariant(Value iv, Value index) {
160  assert(isForInductionVar(iv) && "iv must be a AffineForOp");
161  assert(index.getType().isa<IndexType>() && "index must be of IndexType");
162  SmallVector<Operation *, 4> affineApplyOps;
163  getReachableAffineApplyOps({index}, affineApplyOps);
164 
165  if (affineApplyOps.empty()) {
166  // Pointer equality test because of Value pointer semantics.
167  return index != iv;
168  }
169 
170  if (affineApplyOps.size() > 1) {
171  affineApplyOps[0]->emitRemark(
172  "CompositionAffineMapsPass must have been run: there should be at most "
173  "one AffineApplyOp, returning false conservatively.");
174  return false;
175  }
176 
177  auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
178  // We need yet another level of indirection because the `dim` index of the
179  // access may not correspond to the `dim` index of composeOp.
180  return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
181 }
182 
184  DenseSet<Value> res;
185  for (auto val : indices) {
186  if (isAccessIndexInvariant(iv, val)) {
187  res.insert(val);
188  }
189  }
190  return res;
191 }
192 
193 /// Given:
194 /// 1. an induction variable `iv` of type AffineForOp;
195 /// 2. a `memoryOp` of type const LoadOp& or const StoreOp&;
196 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
197 /// is defined as either invariant or varying only along a unique MemRef dim.
198 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
199 /// convey the memRef access is invariant along `iv`).
200 ///
201 /// Prerequisites:
202 /// 1. `memRefDim` ~= nullptr;
203 /// 2. `iv` of the proper type;
204 /// 3. the MemRef accessed by `memoryOp` has no layout map or at most an
205 /// identity layout map.
206 ///
207 /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
208 /// Returns false if the MemRef has a non-identity layoutMap or more than 1
209 /// layoutMap. This is conservative.
210 ///
211 // TODO: check strides.
212 template <typename LoadOrStoreOp>
213 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
214  int *memRefDim) {
215  static_assert(
217  "Must be called on either LoadOp or StoreOp");
218  assert(memRefDim && "memRefDim == nullptr");
219  auto memRefType = memoryOp.getMemRefType();
220 
221  if (!memRefType.getLayout().isIdentity())
222  return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
223 
224  int uniqueVaryingIndexAlongIv = -1;
225  auto accessMap = memoryOp.getAffineMap();
226  SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
227  unsigned numDims = accessMap.getNumDims();
228  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
229  // Gather map operands used result expr 'i' in 'exprOperands'.
230  SmallVector<Value, 4> exprOperands;
231  auto resultExpr = accessMap.getResult(i);
232  resultExpr.walk([&](AffineExpr expr) {
233  if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
234  exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
235  else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
236  exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
237  });
238  // Check access invariance of each operand in 'exprOperands'.
239  for (auto exprOperand : exprOperands) {
240  if (!isAccessIndexInvariant(iv, exprOperand)) {
241  if (uniqueVaryingIndexAlongIv != -1) {
242  // 2+ varying indices -> do not vectorize along iv.
243  return false;
244  }
245  uniqueVaryingIndexAlongIv = i;
246  }
247  }
248  }
249 
250  if (uniqueVaryingIndexAlongIv == -1)
251  *memRefDim = -1;
252  else
253  *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
254  return true;
255 }
256 
257 template <typename LoadOrStoreOp>
258 static bool isVectorElement(LoadOrStoreOp memoryOp) {
259  auto memRefType = memoryOp.getMemRefType();
260  return memRefType.getElementType().template isa<VectorType>();
261 }
262 
263 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
264 
265 static bool
267  const VectorizableOpFun &isVectorizableOp,
268  NestedPattern &vectorTransferMatcher) {
269  auto *forOp = loop.getOperation();
270 
271  // No vectorization across conditionals for now.
272  auto conditionals = matcher::If();
273  SmallVector<NestedMatch, 8> conditionalsMatched;
274  conditionals.match(forOp, &conditionalsMatched);
275  if (!conditionalsMatched.empty()) {
276  return false;
277  }
278 
279  // No vectorization across unknown regions.
280  auto regions = matcher::Op([](Operation &op) -> bool {
281  return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
282  });
283  SmallVector<NestedMatch, 8> regionsMatched;
284  regions.match(forOp, &regionsMatched);
285  if (!regionsMatched.empty()) {
286  return false;
287  }
288 
289  SmallVector<NestedMatch, 8> vectorTransfersMatched;
290  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
291  if (!vectorTransfersMatched.empty()) {
292  return false;
293  }
294 
295  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
296  SmallVector<NestedMatch, 8> loadAndStoresMatched;
297  loadAndStores.match(forOp, &loadAndStoresMatched);
298  for (auto ls : loadAndStoresMatched) {
299  auto *op = ls.getMatchedOperation();
300  auto load = dyn_cast<AffineLoadOp>(op);
301  auto store = dyn_cast<AffineStoreOp>(op);
302  // Only scalar types are considered vectorizable, all load/store must be
303  // vectorizable for a loop to qualify as vectorizable.
304  // TODO: ponder whether we want to be more general here.
305  bool vector = load ? isVectorElement(load) : isVectorElement(store);
306  if (vector) {
307  return false;
308  }
309  if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
310  return false;
311  }
312  }
313  return true;
314 }
315 
316 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
317  NestedPattern &vectorTransferMatcher) {
318  *memRefDim = -1;
319  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
320  auto load = dyn_cast<AffineLoadOp>(op);
321  auto store = dyn_cast<AffineStoreOp>(op);
322  int thisOpMemRefDim = -1;
323  bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
324  &thisOpMemRefDim)
325  : isContiguousAccess(loop.getInductionVar(), store,
326  &thisOpMemRefDim);
327  if (thisOpMemRefDim != -1) {
328  // If memory accesses vary across different dimensions then the loop is
329  // not vectorizable.
330  if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
331  return false;
332  *memRefDim = thisOpMemRefDim;
333  }
334  return isContiguous;
335  });
336  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
337 }
338 
339 bool mlir::isVectorizableLoopBody(AffineForOp loop,
340  NestedPattern &vectorTransferMatcher) {
341  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
342 }
343 
344 /// Checks whether SSA dominance would be violated if a for op's body
345 /// operations are shifted by the specified shifts. This method checks if a
346 /// 'def' and all its uses have the same shift factor.
347 // TODO: extend this to check for memory-based dependence violation when we have
348 // the support.
349 bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
350  auto *forBody = forOp.getBody();
351  assert(shifts.size() == forBody->getOperations().size());
352 
353  // Work backwards over the body of the block so that the shift of a use's
354  // ancestor operation in the block gets recorded before it's looked up.
355  DenseMap<Operation *, uint64_t> forBodyShift;
356  for (const auto &it :
357  llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
358  auto &op = it.value();
359 
360  // Get the index of the current operation, note that we are iterating in
361  // reverse so we need to fix it up.
362  size_t index = shifts.size() - it.index() - 1;
363 
364  // Remember the shift of this operation.
365  uint64_t shift = shifts[index];
366  forBodyShift.try_emplace(&op, shift);
367 
368  // Validate the results of this operation if it were to be shifted.
369  for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
370  Value result = op.getResult(i);
371  for (auto *user : result.getUsers()) {
372  // If an ancestor operation doesn't lie in the block of forOp,
373  // there is no shift to check.
374  if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
375  assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
376  if (shift != forBodyShift[ancOp])
377  return false;
378  }
379  }
380  }
381  }
382  return true;
383 }
Include the generated interface declarations.
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
static Value min(ImplicitLocOpBuilder &builder, Value a, Value b)
static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:420
bool isLoadOrStore(Operation &op)
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps &#39;a&#39; and &#39;b&#39;, represented as an affine map a...
user_range getUsers() const
Definition: Value.h:212
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
static constexpr const bool value
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:95
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op&#39;s body operations are shifted by the specifi...
U dyn_cast() const
Definition: AffineExpr.h:281
static bool isVectorElement(LoadOrStoreOp memoryOp)
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR&#39;s ceildiv operation on constants.
Definition: MathExtras.h:23
NestedPattern If(const NestedPattern &child)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:206
Base type for affine expression.
Definition: AffineExpr.h:68
void match(FuncOp func, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in func.
unsigned getNumResults() const
Definition: AffineMap.cpp:302
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:38
bool isForInductionVar(Value val)
Returns true if the provided value is the induction variable of a AffineForOp.
Definition: AffineOps.cpp:1838
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:311
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:84
Type getType() const
Return the type of this value.
Definition: Value.h:117
static bool isAccessIndexInvariant(Value iv, Value index)
Given an induction variable iv of type AffineForOp and an access index of type index, returns true if index is independent of iv and false otherwise.
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:55
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
Optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it&#39;s a constant, None otherwise.
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes...
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation *> &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool isa() const
Definition: Types.h:234
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:224
static Value max(ImplicitLocOpBuilder &builder, Value a, Value b)