MLIR  16.0.0git
LoopAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include <numeric>
27 #include <type_traits>
28 
29 using namespace mlir;
30 
31 /// Returns the trip count of the loop as an affine expression if the latter is
32 /// expressible as an affine expression, and nullptr otherwise. The trip count
33 /// expression is simplified before returning. This method only utilizes map
34 /// composition to construct lower and upper bounds before computing the trip
35 /// count expressions.
37  AffineForOp forOp, AffineMap *tripCountMap,
38  SmallVectorImpl<Value> *tripCountOperands) {
39  MLIRContext *context = forOp.getContext();
40  int64_t step = forOp.getStep();
41  int64_t loopSpan;
42  if (forOp.hasConstantBounds()) {
43  int64_t lb = forOp.getConstantLowerBound();
44  int64_t ub = forOp.getConstantUpperBound();
45  loopSpan = ub - lb;
46  if (loopSpan < 0)
47  loopSpan = 0;
48  *tripCountMap = AffineMap::getConstantMap(ceilDiv(loopSpan, step), context);
49  tripCountOperands->clear();
50  return;
51  }
52  auto lbMap = forOp.getLowerBoundMap();
53  auto ubMap = forOp.getUpperBoundMap();
54  if (lbMap.getNumResults() != 1) {
55  *tripCountMap = AffineMap();
56  return;
57  }
58 
59  // Difference of each upper bound expression from the single lower bound
60  // expression (divided by the step) provides the expressions for the trip
61  // count map.
62  AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
63 
64  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
65  lbMap.getResult(0));
66  auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
67  lbSplatExpr, context);
68  AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
69 
70  AffineValueMap tripCountValueMap;
71  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
72  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
73  tripCountValueMap.setResult(i,
74  tripCountValueMap.getResult(i).ceilDiv(step));
75 
76  *tripCountMap = tripCountValueMap.getAffineMap();
77  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
78  tripCountValueMap.getOperands().end());
79 }
80 
81 /// Returns the trip count of the loop if it's a constant, None otherwise. This
82 /// method uses affine expression analysis (in turn using getTripCount) and is
83 /// able to determine constant trip count in non-trivial cases.
85  SmallVector<Value, 4> operands;
86  AffineMap map;
87  getTripCountMapAndOperands(forOp, &map, &operands);
88 
89  if (!map)
90  return std::nullopt;
91 
92  // Take the min if all trip counts are constant.
93  Optional<uint64_t> tripCount;
94  for (auto resultExpr : map.getResults()) {
95  if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
96  if (tripCount.has_value())
97  tripCount = std::min(tripCount.value(),
98  static_cast<uint64_t>(constExpr.getValue()));
99  else
100  tripCount = constExpr.getValue();
101  } else
102  return std::nullopt;
103  }
104  return tripCount;
105 }
106 
107 /// Returns the greatest known integral divisor of the trip count. Affine
108 /// expression analysis is used (indirectly through getTripCount), and
109 /// this method is thus able to determine non-trivial divisors.
110 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
111  SmallVector<Value, 4> operands;
112  AffineMap map;
113  getTripCountMapAndOperands(forOp, &map, &operands);
114 
115  if (!map)
116  return 1;
117 
118  // The largest divisor of the trip count is the GCD of the individual largest
119  // divisors.
120  assert(map.getNumResults() >= 1 && "expected one or more results");
122  for (auto resultExpr : map.getResults()) {
123  uint64_t thisGcd;
124  if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
125  uint64_t tripCount = constExpr.getValue();
126  // 0 iteration loops (greatest divisor is 2^64 - 1).
127  if (tripCount == 0)
129  else
130  // The greatest divisor is the trip count.
131  thisGcd = tripCount;
132  } else {
133  // Trip count is not a known constant; return its largest known divisor.
134  thisGcd = resultExpr.getLargestKnownDivisor();
135  }
136  if (gcd.has_value())
137  gcd = std::gcd(gcd.value(), thisGcd);
138  else
139  gcd = thisGcd;
140  }
141  assert(gcd.has_value() && "value expected per above logic");
142  return gcd.value();
143 }
144 
145 /// Given an induction variable `iv` of type AffineForOp and an access `index`
146 /// of type index, returns `true` if `index` is independent of `iv` and
147 /// false otherwise. The determination supports composition with at most one
148 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
149 /// the composition of AffineApplyOp needs to be canonicalized by construction
150 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps
151 /// everywhere. To achieve this, at the very least, the compose-affine-apply
152 /// pass must have been run.
153 ///
154 /// Prerequisites:
155 /// 1. `iv` and `index` of the proper type;
156 /// 2. at most one reachable AffineApplyOp from index;
157 ///
158 /// Returns false in cases with more than one AffineApplyOp, this is
159 /// conservative.
160 static bool isAccessIndexInvariant(Value iv, Value index) {
161  assert(isForInductionVar(iv) && "iv must be a AffineForOp");
162  assert(index.getType().isa<IndexType>() && "index must be of IndexType");
163  SmallVector<Operation *, 4> affineApplyOps;
164  getReachableAffineApplyOps({index}, affineApplyOps);
165 
166  if (affineApplyOps.empty()) {
167  // Pointer equality test because of Value pointer semantics.
168  return index != iv;
169  }
170 
171  if (affineApplyOps.size() > 1) {
172  affineApplyOps[0]->emitRemark(
173  "CompositionAffineMapsPass must have been run: there should be at most "
174  "one AffineApplyOp, returning false conservatively.");
175  return false;
176  }
177 
178  auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
179  // We need yet another level of indirection because the `dim` index of the
180  // access may not correspond to the `dim` index of composeOp.
181  return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
182 }
183 
185  DenseSet<Value> res;
186  for (auto val : indices) {
187  if (isAccessIndexInvariant(iv, val)) {
188  res.insert(val);
189  }
190  }
191  return res;
192 }
193 
194 /// Given:
195 /// 1. an induction variable `iv` of type AffineForOp;
196 /// 2. a `memoryOp` of type const LoadOp& or const StoreOp&;
197 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
198 /// is defined as either invariant or varying only along a unique MemRef dim.
199 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
200 /// convey the memRef access is invariant along `iv`).
201 ///
202 /// Prerequisites:
203 /// 1. `memRefDim` ~= nullptr;
204 /// 2. `iv` of the proper type;
205 /// 3. the MemRef accessed by `memoryOp` has no layout map or at most an
206 /// identity layout map.
207 ///
208 /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
209 /// Returns false if the MemRef has a non-identity layoutMap or more than 1
210 /// layoutMap. This is conservative.
211 ///
212 // TODO: check strides.
213 template <typename LoadOrStoreOp>
214 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
215  int *memRefDim) {
216  static_assert(
218  "Must be called on either LoadOp or StoreOp");
219  assert(memRefDim && "memRefDim == nullptr");
220  auto memRefType = memoryOp.getMemRefType();
221 
222  if (!memRefType.getLayout().isIdentity())
223  return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
224 
225  int uniqueVaryingIndexAlongIv = -1;
226  auto accessMap = memoryOp.getAffineMap();
227  SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
228  unsigned numDims = accessMap.getNumDims();
229  for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
230  // Gather map operands used result expr 'i' in 'exprOperands'.
231  SmallVector<Value, 4> exprOperands;
232  auto resultExpr = accessMap.getResult(i);
233  resultExpr.walk([&](AffineExpr expr) {
234  if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
235  exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
236  else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
237  exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
238  });
239  // Check access invariance of each operand in 'exprOperands'.
240  for (auto exprOperand : exprOperands) {
241  if (!isAccessIndexInvariant(iv, exprOperand)) {
242  if (uniqueVaryingIndexAlongIv != -1) {
243  // 2+ varying indices -> do not vectorize along iv.
244  return false;
245  }
246  uniqueVaryingIndexAlongIv = i;
247  }
248  }
249  }
250 
251  if (uniqueVaryingIndexAlongIv == -1)
252  *memRefDim = -1;
253  else
254  *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
255  return true;
256 }
257 
258 template <typename LoadOrStoreOp>
259 static bool isVectorElement(LoadOrStoreOp memoryOp) {
260  auto memRefType = memoryOp.getMemRefType();
261  return memRefType.getElementType().template isa<VectorType>();
262 }
263 
264 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
265 
266 static bool
268  const VectorizableOpFun &isVectorizableOp,
269  NestedPattern &vectorTransferMatcher) {
270  auto *forOp = loop.getOperation();
271 
272  // No vectorization across conditionals for now.
273  auto conditionals = matcher::If();
274  SmallVector<NestedMatch, 8> conditionalsMatched;
275  conditionals.match(forOp, &conditionalsMatched);
276  if (!conditionalsMatched.empty()) {
277  return false;
278  }
279 
280  // No vectorization across unknown regions.
281  auto regions = matcher::Op([](Operation &op) -> bool {
282  return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
283  });
284  SmallVector<NestedMatch, 8> regionsMatched;
285  regions.match(forOp, &regionsMatched);
286  if (!regionsMatched.empty()) {
287  return false;
288  }
289 
290  SmallVector<NestedMatch, 8> vectorTransfersMatched;
291  vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
292  if (!vectorTransfersMatched.empty()) {
293  return false;
294  }
295 
296  auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
297  SmallVector<NestedMatch, 8> loadAndStoresMatched;
298  loadAndStores.match(forOp, &loadAndStoresMatched);
299  for (auto ls : loadAndStoresMatched) {
300  auto *op = ls.getMatchedOperation();
301  auto load = dyn_cast<AffineLoadOp>(op);
302  auto store = dyn_cast<AffineStoreOp>(op);
303  // Only scalar types are considered vectorizable, all load/store must be
304  // vectorizable for a loop to qualify as vectorizable.
305  // TODO: ponder whether we want to be more general here.
306  bool vector = load ? isVectorElement(load) : isVectorElement(store);
307  if (vector) {
308  return false;
309  }
310  if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
311  return false;
312  }
313  }
314  return true;
315 }
316 
317 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
318  NestedPattern &vectorTransferMatcher) {
319  *memRefDim = -1;
320  VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
321  auto load = dyn_cast<AffineLoadOp>(op);
322  auto store = dyn_cast<AffineStoreOp>(op);
323  int thisOpMemRefDim = -1;
324  bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
325  &thisOpMemRefDim)
326  : isContiguousAccess(loop.getInductionVar(), store,
327  &thisOpMemRefDim);
328  if (thisOpMemRefDim != -1) {
329  // If memory accesses vary across different dimensions then the loop is
330  // not vectorizable.
331  if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
332  return false;
333  *memRefDim = thisOpMemRefDim;
334  }
335  return isContiguous;
336  });
337  return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
338 }
339 
340 bool mlir::isVectorizableLoopBody(AffineForOp loop,
341  NestedPattern &vectorTransferMatcher) {
342  return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
343 }
344 
345 /// Checks whether SSA dominance would be violated if a for op's body
346 /// operations are shifted by the specified shifts. This method checks if a
347 /// 'def' and all its uses have the same shift factor.
348 // TODO: extend this to check for memory-based dependence violation when we have
349 // the support.
350 bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
351  auto *forBody = forOp.getBody();
352  assert(shifts.size() == forBody->getOperations().size());
353 
354  // Work backwards over the body of the block so that the shift of a use's
355  // ancestor operation in the block gets recorded before it's looked up.
356  DenseMap<Operation *, uint64_t> forBodyShift;
357  for (const auto &it :
358  llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
359  auto &op = it.value();
360 
361  // Get the index of the current operation, note that we are iterating in
362  // reverse so we need to fix it up.
363  size_t index = shifts.size() - it.index() - 1;
364 
365  // Remember the shift of this operation.
366  uint64_t shift = shifts[index];
367  forBodyShift.try_emplace(&op, shift);
368 
369  // Validate the results of this operation if it were to be shifted.
370  for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
371  Value result = op.getResult(i);
372  for (auto *user : result.getUsers()) {
373  // If an ancestor operation doesn't lie in the block of forOp,
374  // there is no shift to check.
375  if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
376  assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
377  if (shift != forBodyShift[ancOp])
378  return false;
379  }
380  }
381  }
382  }
383  return true;
384 }
static constexpr const bool value
static bool isVectorizableLoopBodyWithOpCond(AffineForOp loop, const VectorizableOpFun &isVectorizableOp, NestedPattern &vectorTransferMatcher)
std::function< bool(AffineForOp, Operation &)> VectorizableOpFun
static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim)
Given:
static bool isAccessIndexInvariant(Value iv, Value index)
Given an induction variable iv of type AffineForOp and an access index of type index,...
static bool isVectorElement(LoadOrStoreOp memoryOp)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:807
U dyn_cast() const
Definition: AffineExpr.h:281
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:42
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:319
unsigned getNumResults() const
Definition: AffineMap.cpp:314
static AffineMap getConstantMap(int64_t val, MLIRContext *context)
Returns a single constant result affine map.
Definition: AffineMap.cpp:96
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:224
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
unsigned getNumResults() const
void setResult(unsigned i, AffineExpr e)
ArrayRef< Value > getOperands() const
AffineMap getAffineMap() const
AffineExpr getResult(unsigned i)
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
void match(Operation *op, SmallVectorImpl< NestedMatch > *matches)
Returns all the top-level matches in op.
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:477
bool isa() const
Definition: Types.h:260
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
Type getType() const
Return the type of this value.
Definition: Value.h:114
user_range getUsers() const
Definition: Value.h:209
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:230
NestedPattern If(const NestedPattern &child)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
bool isLoadOrStore(Operation &op)
LLVM_ATTRIBUTE_ALWAYS_INLINE MPInt gcd(const MPInt &a, const MPInt &b)
Definition: MPInt.h:399
Include the generated interface declarations.
bool isForInductionVar(Value val)
Returns true if the provided value is the induction variable of a AffineForOp.
Definition: AffineOps.cpp:2295
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR's ceildiv operation on constants.
Definition: MathExtras.h:23
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp)
Returns the greatest known integral divisor of the trip count.
Optional< uint64_t > getConstantTripCount(AffineForOp forOp)
Returns the trip count of the loop if it's a constant, None otherwise.
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef< uint64_t > shifts)
Checks where SSA dominance would be violated if a for op's body operations are shifted by the specifi...
void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map, SmallVectorImpl< Value > *operands)
Returns the trip count of the loop as an affine map with its corresponding operands if the latter is ...