MLIR  14.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
22 #include "mlir/IR/Dominance.h"
23 #include "mlir/IR/IntegerSet.h"
25 
26 #define DEBUG_TYPE "affine-utils"
27 
28 using namespace mlir;
29 
30 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
31 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
32 /// the rest of the op.
33 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
34  if (elseBlock)
35  assert(ifOp.hasElse() && "else block expected");
36 
37  Block *destBlock = ifOp->getBlock();
38  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
39  destBlock->getOperations().splice(
40  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
41  std::prev(srcBlock->end()));
42  ifOp.erase();
43 }
44 
45 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
46 /// on. The `ifOp` could be hoisted and placed right before such an operation.
47 /// This method assumes that the ifOp has been canonicalized (to be correct and
48 /// effective).
49 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
50  // Walk up the parents past all for op that this conditional is invariant on.
51  auto ifOperands = ifOp.getOperands();
52  auto *res = ifOp.getOperation();
53  while (!isa<FuncOp>(res->getParentOp())) {
54  auto *parentOp = res->getParentOp();
55  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
56  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
57  break;
58  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
59  for (auto iv : parallelOp.getIVs())
60  if (llvm::is_contained(ifOperands, iv))
61  break;
62  } else if (!isa<AffineIfOp>(parentOp)) {
63  // Won't walk up past anything other than affine.for/if ops.
64  break;
65  }
66  // You can always hoist up past any affine.if ops.
67  res = parentOp;
68  }
69  return res;
70 }
71 
72 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
73 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
74 /// otherwise the same `ifOp`.
75 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
76  // No hoisting to do.
77  if (hoistOverOp == ifOp)
78  return ifOp;
79 
80  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
81  // the else block. Then drop the else block of the original 'if' in the 'then'
82  // branch while promoting its then block, and analogously drop the 'then'
83  // block of the original 'if' from the 'else' branch while promoting its else
84  // block.
85  BlockAndValueMapping operandMap;
86  OpBuilder b(hoistOverOp);
87  auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(),
88  ifOp.getOperands(),
89  /*elseBlock=*/true);
90 
91  // Create a clone of hoistOverOp to use for the else branch of the hoisted
92  // conditional. The else block may get optimized away if empty.
93  Operation *hoistOverOpClone = nullptr;
94  // We use this unique name to identify/find `ifOp`'s clone in the else
95  // version.
96  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
97  operandMap.clear();
98  b.setInsertionPointAfter(hoistOverOp);
99  // We'll set an attribute to identify this op in a clone of this sub-tree.
100  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
101  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
102 
103  // Promote the 'then' block of the original affine.if in the then version.
104  promoteIfBlock(ifOp, /*elseBlock=*/false);
105 
106  // Move the then version to the hoisted if op's 'then' block.
107  auto *thenBlock = hoistedIfOp.getThenBlock();
108  thenBlock->getOperations().splice(thenBlock->begin(),
109  hoistOverOp->getBlock()->getOperations(),
110  Block::iterator(hoistOverOp));
111 
112  // Find the clone of the original affine.if op in the else version.
113  AffineIfOp ifCloneInElse;
114  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
115  if (!ifClone->getAttr(idForIfOp))
116  return WalkResult::advance();
117  ifCloneInElse = ifClone;
118  return WalkResult::interrupt();
119  });
120  assert(ifCloneInElse && "if op clone should exist");
121  // For the else block, promote the else block of the original 'if' if it had
122  // one; otherwise, the op itself is to be erased.
123  if (!ifCloneInElse.hasElse())
124  ifCloneInElse.erase();
125  else
126  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
127 
128  // Move the else version into the else block of the hoisted if op.
129  auto *elseBlock = hoistedIfOp.getElseBlock();
130  elseBlock->getOperations().splice(
131  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
132  Block::iterator(hoistOverOpClone));
133 
134  return hoistedIfOp;
135 }
136 
138 mlir::affineParallelize(AffineForOp forOp,
139  ArrayRef<LoopReduction> parallelReductions) {
140  // Fail early if there are iter arguments that are not reductions.
141  unsigned numReductions = parallelReductions.size();
142  if (numReductions != forOp.getNumIterOperands())
143  return failure();
144 
145  Location loc = forOp.getLoc();
146  OpBuilder outsideBuilder(forOp);
147  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
148  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
149  AffineMap upperBoundMap = forOp.getUpperBoundMap();
150  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
151 
152  // Creating empty 1-D affine.parallel op.
153  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
154  parallelReductions, [](const LoopReduction &red) { return red.value; }));
155  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
156  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
157  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
158  loc, ValueRange(reducedValues).getTypes(), reductionKinds,
159  llvm::makeArrayRef(lowerBoundMap), lowerBoundOperands,
160  llvm::makeArrayRef(upperBoundMap), upperBoundOperands,
161  llvm::makeArrayRef(forOp.getStep()));
162  // Steal the body of the old affine for op.
163  newPloop.region().takeBody(forOp.region());
164  Operation *yieldOp = &newPloop.getBody()->back();
165 
166  // Handle the initial values of reductions because the parallel loop always
167  // starts from the neutral value.
168  SmallVector<Value> newResults;
169  newResults.reserve(numReductions);
170  for (unsigned i = 0; i < numReductions; ++i) {
171  Value init = forOp.getIterOperands()[i];
172  // This works because we are only handling single-op reductions at the
173  // moment. A switch on reduction kind or a mechanism to collect operations
174  // participating in the reduction will be necessary for multi-op reductions.
175  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
176  assert(reductionOp && "yielded value is expected to be produced by an op");
177  outsideBuilder.getInsertionBlock()->getOperations().splice(
178  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
179  reductionOp);
180  reductionOp->setOperands({init, newPloop->getResult(i)});
181  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
182  }
183 
184  // Update the loop terminator to yield reduced values bypassing the reduction
185  // operation itself (now moved outside of the loop) and erase the block
186  // arguments that correspond to reductions. Note that the loop always has one
187  // "main" induction variable whenc coming from a non-parallel for.
188  unsigned numIVs = 1;
189  yieldOp->setOperands(reducedValues);
190  newPloop.getBody()->eraseArguments(
191  llvm::to_vector<4>(llvm::seq<unsigned>(numIVs, numReductions + numIVs)));
192 
193  forOp.erase();
194  return success();
195 }
196 
197 // Returns success if any hoisting happened.
198 LogicalResult mlir::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
199  // Bail out early if the ifOp returns a result. TODO: Consider how to
200  // properly support this case.
201  if (ifOp.getNumResults() != 0)
202  return failure();
203 
204  // Apply canonicalization patterns and folding - this is necessary for the
205  // hoisting check to be correct (operands should be composed), and to be more
206  // effective (no unused operands). Since the pattern rewriter's folding is
207  // entangled with application of patterns, we may fold/end up erasing the op,
208  // in which case we return with `folded` being set.
209  RewritePatternSet patterns(ifOp.getContext());
210  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
211  bool erased;
212  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
213  (void)applyOpPatternsAndFold(ifOp, frozenPatterns, &erased);
214  if (erased) {
215  if (folded)
216  *folded = true;
217  return failure();
218  }
219  if (folded)
220  *folded = false;
221 
222  // The folding above should have ensured this, but the affine.if's
223  // canonicalization is missing composition of affine.applys into it.
224  assert(llvm::all_of(ifOp.getOperands(),
225  [](Value v) {
226  return isTopLevelValue(v) || isForInductionVar(v);
227  }) &&
228  "operands not composed");
229 
230  // We are going hoist as high as possible.
231  // TODO: this could be customized in the future.
232  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
233 
234  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
235  // Nothing to hoist over.
236  if (hoistedIfOp == ifOp)
237  return failure();
238 
239  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
240  // a sequence of affine.fors that are all perfectly nested).
242  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
243  frozenPatterns);
244 
245  return success();
246 }
247 
248 // Return the min expr after replacing the given dim.
250  AffineExpr max, bool positivePath) {
251  if (e == dim)
252  return positivePath ? min : max;
253  if (auto bin = e.dyn_cast<AffineBinaryOpExpr>()) {
254  AffineExpr lhs = bin.getLHS();
255  AffineExpr rhs = bin.getRHS();
256  if (bin.getKind() == mlir::AffineExprKind::Add)
257  return substWithMin(lhs, dim, min, max, positivePath) +
258  substWithMin(rhs, dim, min, max, positivePath);
259 
260  auto c1 = bin.getLHS().dyn_cast<AffineConstantExpr>();
261  auto c2 = bin.getRHS().dyn_cast<AffineConstantExpr>();
262  if (c1 && c1.getValue() < 0)
263  return getAffineBinaryOpExpr(
264  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
265  if (c2 && c2.getValue() < 0)
266  return getAffineBinaryOpExpr(
267  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
268  return getAffineBinaryOpExpr(
269  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
270  substWithMin(rhs, dim, min, max, positivePath));
271  }
272  return e;
273 }
274 
275 void mlir::normalizeAffineParallel(AffineParallelOp op) {
276  // Loops with min/max in bounds are not normalized at the moment.
277  if (op.hasMinMaxBounds())
278  return;
279 
280  AffineMap lbMap = op.lowerBoundsMap();
281  SmallVector<int64_t, 8> steps = op.getSteps();
282  // No need to do any work if the parallel op is already normalized.
283  bool isAlreadyNormalized =
284  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
285  int64_t step = std::get<0>(tuple);
286  auto lbExpr =
287  std::get<1>(tuple).template dyn_cast<AffineConstantExpr>();
288  return lbExpr && lbExpr.getValue() == 0 && step == 1;
289  });
290  if (isAlreadyNormalized)
291  return;
292 
293  AffineValueMap ranges;
294  AffineValueMap::difference(op.getUpperBoundsValueMap(),
295  op.getLowerBoundsValueMap(), &ranges);
296  auto builder = OpBuilder::atBlockBegin(op.getBody());
297  auto zeroExpr = builder.getAffineConstantExpr(0);
300  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
301  int64_t step = steps[i];
302 
303  // Adjust the lower bound to be 0.
304  lbExprs.push_back(zeroExpr);
305 
306  // Adjust the upper bound expression: 'range / step'.
307  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
308  ubExprs.push_back(ubExpr);
309 
310  // Adjust the corresponding IV: 'lb + i * step'.
311  BlockArgument iv = op.getBody()->getArgument(i);
312  AffineExpr lbExpr = lbMap.getResult(i);
313  unsigned nDims = lbMap.getNumDims();
314  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
315  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
316  /*symbolCount=*/lbMap.getNumSymbols(), expr);
317 
318  // Use an 'affine.apply' op that will be simplified later in subsequent
319  // canonicalizations.
320  OperandRange lbOperands = op.getLowerBoundsOperands();
321  OperandRange dimOperands = lbOperands.take_front(nDims);
322  OperandRange symbolOperands = lbOperands.drop_front(nDims);
323  SmallVector<Value, 8> applyOperands{dimOperands};
324  applyOperands.push_back(iv);
325  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
326  auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
327  iv.replaceAllUsesExcept(apply, apply);
328  }
329 
330  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
331  op.setSteps(newSteps);
332  auto newLowerMap = AffineMap::get(
333  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
334  op.setLowerBounds({}, newLowerMap);
335  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
336  ubExprs, op.getContext());
337  op.setUpperBounds(ranges.getOperands(), newUpperMap);
338 }
339 
340 /// Normalizes affine.for ops. If the affine.for op has only a single iteration
341 /// only then it is simply promoted, else it is normalized in the traditional
342 /// way, by converting the lower bound to zero and loop step to one. The upper
343 /// bound is set to the trip count of the loop. For now, original loops must
344 /// have lower bound with a single result only. There is no such restriction on
345 /// upper bounds.
346 void mlir::normalizeAffineFor(AffineForOp op) {
348  return;
349 
350  // Check if the forop is already normalized.
351  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
352  (op.getStep() == 1))
353  return;
354 
355  // Check if the lower bound has a single result only. Loops with a max lower
356  // bound can't be normalized without additional support like
357  // affine.execute_region's. If the lower bound does not have a single result
358  // then skip this op.
359  if (op.getLowerBoundMap().getNumResults() != 1)
360  return;
361 
362  Location loc = op.getLoc();
363  OpBuilder opBuilder(op);
364  int64_t origLoopStep = op.getStep();
365 
366  // Calculate upperBound for normalized loop.
367  SmallVector<Value, 4> ubOperands;
368  AffineBound lb = op.getLowerBound();
369  AffineBound ub = op.getUpperBound();
370  ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands());
371  AffineMap origLbMap = lb.getMap();
372  AffineMap origUbMap = ub.getMap();
373 
374  // Add dimension operands from upper/lower bound.
375  for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
376  ubOperands.push_back(ub.getOperand(j));
377  for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j)
378  ubOperands.push_back(lb.getOperand(j));
379 
380  // Add symbol operands from upper/lower bound.
381  for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
382  ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
383  for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
384  ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
385 
386  // Add original result expressions from lower/upper bound map.
387  SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(),
388  origLbMap.getResults().end());
389  SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(),
390  origUbMap.getResults().end());
391  SmallVector<AffineExpr, 4> newUbExprs;
392 
393  // The original upperBound can have more than one result. For the new
394  // upperBound of this loop, take difference of all possible combinations of
395  // the ub results and lb result and ceildiv with the loop step. For e.g.,
396  //
397  // affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0)
398  // will have an upperBound map as,
399  // affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv
400  // 1)>(%i0)
401  //
402  // Insert all combinations of upper/lower bound results.
403  for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) {
404  newUbExprs.push_back(
405  (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep));
406  }
407 
408  // Construct newUbMap.
409  AffineMap newUbMap =
410  AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(),
411  origLbMap.getNumSymbols() + origUbMap.getNumSymbols(),
412  newUbExprs, opBuilder.getContext());
413 
414  // Normalize the loop.
415  op.setUpperBound(ubOperands, newUbMap);
416  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
417  op.setStep(1);
418 
419  // Calculate the Value of new loopIV. Create affine.apply for the value of
420  // the loopIV in normalized loop.
421  opBuilder.setInsertionPointToStart(op.getBody());
422  SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
423  lb.getOperands().begin() +
424  lb.getMap().getNumDims());
425  // Add an extra dim operand for loopIV.
426  lbOperands.push_back(op.getInductionVar());
427  // Add symbol operands from lower bound.
428  for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
429  lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
430 
431  AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims());
432  AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0);
433  AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1,
434  origLbMap.getNumSymbols(), newIVExpr);
435  Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands);
436  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
437 }
438 
439 /// Ensure that all operations that could be executed after `start`
440 /// (noninclusive) and prior to `memOp` (e.g. on a control flow/op path
441 /// between the operations) do not have the potential memory effect
442 /// `EffectType` on `memOp`. `memOp` is an operation that reads or writes to
443 /// a memref. For example, if `EffectType` is MemoryEffects::Write, this method
444 /// will check if there is no write to the memory between `start` and `memOp`
445 /// that would change the read within `memOp`.
446 template <typename EffectType, typename T>
447 static bool hasNoInterveningEffect(Operation *start, T memOp) {
448  Value memref = memOp.getMemRef();
449  bool isOriginalAllocation = memref.getDefiningOp<memref::AllocaOp>() ||
450  memref.getDefiningOp<memref::AllocOp>();
451 
452  // A boolean representing whether an intervening operation could have impacted
453  // memOp.
454  bool hasSideEffect = false;
455 
456  // Check whether the effect on memOp can be caused by a given operation op.
457  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
458  // If the effect has alreay been found, early exit,
459  if (hasSideEffect)
460  return;
461 
462  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
464  memEffect.getEffects(effects);
465 
466  bool opMayHaveEffect = false;
467  for (auto effect : effects) {
468  // If op causes EffectType on a potentially aliasing location for
469  // memOp, mark as having the effect.
470  if (isa<EffectType>(effect.getEffect())) {
471  if (isOriginalAllocation && effect.getValue() &&
472  (effect.getValue().getDefiningOp<memref::AllocaOp>() ||
473  effect.getValue().getDefiningOp<memref::AllocOp>())) {
474  if (effect.getValue() != memref)
475  continue;
476  }
477  opMayHaveEffect = true;
478  break;
479  }
480  }
481 
482  if (!opMayHaveEffect)
483  return;
484 
485  // If the side effect comes from an affine read or write, try to
486  // prove the side effecting `op` cannot reach `memOp`.
487  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
488  MemRefAccess srcAccess(op);
489  MemRefAccess destAccess(memOp);
490  // Dependence analysis is only correct if both ops operate on the same
491  // memref.
492  if (srcAccess.memref == destAccess.memref) {
493  FlatAffineValueConstraints dependenceConstraints;
494 
495  // Number of loops containing the start op and the ending operation.
496  unsigned minSurroundingLoops =
497  getNumCommonSurroundingLoops(*start, *memOp);
498 
499  // Number of loops containing the operation `op` which has the
500  // potential memory side effect and can occur on a path between
501  // `start` and `memOp`.
502  unsigned nsLoops = getNumCommonSurroundingLoops(*op, *memOp);
503 
504  // For ease, let's consider the case that `op` is a store and we're
505  // looking for other potential stores (e.g `op`) that overwrite memory
506  // after `start`, and before being read in `memOp`. In this case, we
507  // only need to consider other potential stores with depth >
508  // minSurrounding loops since `start` would overwrite any store with a
509  // smaller number of surrounding loops before.
510  unsigned d;
511  for (d = nsLoops + 1; d > minSurroundingLoops; d--) {
513  srcAccess, destAccess, d, &dependenceConstraints,
514  /*dependenceComponents=*/nullptr);
515  if (hasDependence(result)) {
516  hasSideEffect = true;
517  return;
518  }
519  }
520 
521  // No side effect was seen, simply return.
522  return;
523  }
524  }
525  hasSideEffect = true;
526  return;
527  }
528 
529  if (op->hasTrait<OpTrait::HasRecursiveSideEffects>()) {
530  // Recurse into the regions for this op and check whether the internal
531  // operations may have the side effect `EffectType` on memOp.
532  for (Region &region : op->getRegions())
533  for (Block &block : region)
534  for (Operation &op : block)
535  checkOperation(&op);
536  return;
537  }
538 
539  // Otherwise, conservatively assume generic operations have the effect
540  // on the operation
541  hasSideEffect = true;
542  };
543 
544  // Check all paths from ancestor op `parent` to the operation `to` for the
545  // effect. It is known that `to` must be contained within `parent`.
546  auto until = [&](Operation *parent, Operation *to) {
547  // TODO check only the paths from `parent` to `to`.
548  // Currently we fallback and check the entire parent op, rather than
549  // just the paths from the parent path, stopping after reaching `to`.
550  // This is conservatively correct, but could be made more aggressive.
551  assert(parent->isAncestor(to));
552  checkOperation(parent);
553  };
554 
555  // Check for all paths from operation `from` to operation `untilOp` for the
556  // given memory effect.
557  std::function<void(Operation *, Operation *)> recur =
558  [&](Operation *from, Operation *untilOp) {
559  assert(
560  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
561  "Checking for side effect between two operations without a common "
562  "ancestor");
563 
564  // If the operations are in different regions, recursively consider all
565  // path from `from` to the parent of `to` and all paths from the parent
566  // of `to` to `to`.
567  if (from->getParentRegion() != untilOp->getParentRegion()) {
568  recur(from, untilOp->getParentOp());
569  until(untilOp->getParentOp(), untilOp);
570  return;
571  }
572 
573  // Now, assuming that `from` and `to` exist in the same region, perform
574  // a CFG traversal to check all the relevant operations.
575 
576  // Additional blocks to consider.
577  SmallVector<Block *, 2> todoBlocks;
578  {
579  // First consider the parent block of `from` an check all operations
580  // after `from`.
581  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
582  iter != end && &*iter != untilOp; ++iter) {
583  checkOperation(&*iter);
584  }
585 
586  // If the parent of `from` doesn't contain `to`, add the successors
587  // to the list of blocks to check.
588  if (untilOp->getBlock() != from->getBlock())
589  for (Block *succ : from->getBlock()->getSuccessors())
590  todoBlocks.push_back(succ);
591  }
592 
594  // Traverse the CFG until hitting `to`.
595  while (!todoBlocks.empty()) {
596  Block *blk = todoBlocks.pop_back_val();
597  if (done.count(blk))
598  continue;
599  done.insert(blk);
600  for (auto &op : *blk) {
601  if (&op == untilOp)
602  break;
603  checkOperation(&op);
604  if (&op == blk->getTerminator())
605  for (Block *succ : blk->getSuccessors())
606  todoBlocks.push_back(succ);
607  }
608  }
609  };
610  recur(start, memOp);
611  return !hasSideEffect;
612 }
613 
614 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
615 /// which the load is guaranteed to retrieve. This check involves three
616 /// components: 1) The store and load must be on the same location 2) The store
617 /// must dominate (and therefore must always occur prior to) the load 3) No
618 /// other operations will overwrite the memory loaded between the given load
619 /// and store. If such a value exists, the replaced `loadOp` will be added to
620 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
622  AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
623  SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo) {
624 
625  // The store op candidate for forwarding that satisfies all conditions
626  // to replace the load, if any.
627  Operation *lastWriteStoreOp = nullptr;
628 
629  for (auto *user : loadOp.getMemRef().getUsers()) {
630  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
631  if (!storeOp)
632  continue;
633  MemRefAccess srcAccess(storeOp);
634  MemRefAccess destAccess(loadOp);
635 
636  // 1. Check if the store and the load have mathematically equivalent
637  // affine access functions; this implies that they statically refer to the
638  // same single memref element. As an example this filters out cases like:
639  // store %A[%i0 + 1]
640  // load %A[%i0]
641  // store %A[%M]
642  // load %A[%N]
643  // Use the AffineValueMap difference based memref access equality checking.
644  if (srcAccess != destAccess)
645  continue;
646 
647  // 2. The store has to dominate the load op to be candidate.
648  if (!domInfo.dominates(storeOp, loadOp))
649  continue;
650 
651  // 3. Ensure there is no intermediate operation which could replace the
652  // value in memory.
653  if (!hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp))
654  continue;
655 
656  // We now have a candidate for forwarding.
657  assert(lastWriteStoreOp == nullptr &&
658  "multiple simulataneous replacement stores");
659  lastWriteStoreOp = storeOp;
660  }
661 
662  if (!lastWriteStoreOp)
663  return failure();
664 
665  // Perform the actual store to load forwarding.
666  Value storeVal =
667  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
668  // Check if 2 values have the same shape. This is needed for affine vector
669  // loads and stores.
670  if (storeVal.getType() != loadOp.getValue().getType())
671  return failure();
672  loadOp.getValue().replaceAllUsesWith(storeVal);
673  // Record the memref for a later sweep to optimize away.
674  memrefsToErase.insert(loadOp.getMemRef());
675  // Record this to erase later.
676  loadOpsToErase.push_back(loadOp);
677  return success();
678 }
679 
680 // This attempts to find stores which have no impact on the final result.
681 // A writing op writeA will be eliminated if there exists an op writeB if
682 // 1) writeA and writeB have mathematically equivalent affine access functions.
683 // 2) writeB postdominates writeA.
684 // 3) There is no potential read between writeA and writeB.
686  SmallVectorImpl<Operation *> &opsToErase,
687  SmallPtrSetImpl<Value> &memrefsToErase,
688  PostDominanceInfo &postDominanceInfo) {
689 
690  for (Operation *user : writeA.getMemRef().getUsers()) {
691  // Only consider writing operations.
692  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
693  if (!writeB)
694  continue;
695 
696  // The operations must be distinct.
697  if (writeB == writeA)
698  continue;
699 
700  // Both operations must lie in the same region.
701  if (writeB->getParentRegion() != writeA->getParentRegion())
702  continue;
703 
704  // Both operations must write to the same memory.
705  MemRefAccess srcAccess(writeB);
706  MemRefAccess destAccess(writeA);
707 
708  if (srcAccess != destAccess)
709  continue;
710 
711  // writeB must postdominate writeA.
712  if (!postDominanceInfo.postDominates(writeB, writeA))
713  continue;
714 
715  // There cannot be an operation which reads from memory between
716  // the two writes.
717  if (!hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB))
718  continue;
719 
720  opsToErase.push_back(writeA);
721  break;
722  }
723 }
724 
725 // The load to load forwarding / redundant load elimination is similar to the
726 // store to load forwarding.
727 // loadA will be be replaced with loadB if:
728 // 1) loadA and loadB have mathematically equivalent affine access functions.
729 // 2) loadB dominates loadA.
730 // 3) There is no write between loadA and loadB.
731 static void loadCSE(AffineReadOpInterface loadA,
732  SmallVectorImpl<Operation *> &loadOpsToErase,
733  DominanceInfo &domInfo) {
735  for (auto *user : loadA.getMemRef().getUsers()) {
736  auto loadB = dyn_cast<AffineReadOpInterface>(user);
737  if (!loadB || loadB == loadA)
738  continue;
739 
740  MemRefAccess srcAccess(loadB);
741  MemRefAccess destAccess(loadA);
742 
743  // 1. The accesses have to be to the same location.
744  if (srcAccess != destAccess) {
745  continue;
746  }
747 
748  // 2. The store has to dominate the load op to be candidate.
749  if (!domInfo.dominates(loadB, loadA))
750  continue;
751 
752  // 3. There is no write between loadA and loadB.
753  if (!hasNoInterveningEffect<MemoryEffects::Write>(loadB.getOperation(),
754  loadA))
755  continue;
756 
757  // Check if two values have the same shape. This is needed for affine vector
758  // loads.
759  if (loadB.getValue().getType() != loadA.getValue().getType())
760  continue;
761 
762  loadCandidates.push_back(loadB);
763  }
764 
765  // Of the legal load candidates, use the one that dominates all others
766  // to minimize the subsequent need to loadCSE
767  Value loadB;
768  for (AffineReadOpInterface option : loadCandidates) {
769  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
770  return depStore == option ||
771  domInfo.dominates(option.getOperation(),
772  depStore.getOperation());
773  })) {
774  loadB = option.getValue();
775  break;
776  }
777  }
778 
779  if (loadB) {
780  loadA.getValue().replaceAllUsesWith(loadB);
781  // Record this to erase later.
782  loadOpsToErase.push_back(loadA);
783  }
784 }
785 
786 // The store to load forwarding and load CSE rely on three conditions:
787 //
788 // 1) store/load providing a replacement value and load being replaced need to
789 // have mathematically equivalent affine access functions (checked after full
790 // composition of load/store operands); this implies that they access the same
791 // single memref element for all iterations of the common surrounding loop,
792 //
793 // 2) the store/load op should dominate the load op,
794 //
795 // 3) no operation that may write to memory read by the load being replaced can
796 // occur after executing the instruction (load or store) providing the
797 // replacement value and before the load being replaced (thus potentially
798 // allowing overwriting the memory read by the load).
799 //
800 // The above conditions are simple to check, sufficient, and powerful for most
801 // cases in practice - they are sufficient, but not necessary --- since they
802 // don't reason about loops that are guaranteed to execute at least once or
803 // multiple sources to forward from.
804 //
805 // TODO: more forwarding can be done when support for
806 // loop/conditional live-out SSA values is available.
807 // TODO: do general dead store elimination for memref's. This pass
808 // currently only eliminates the stores only if no other loads/uses (other
809 // than dealloc) remain.
810 //
811 void mlir::affineScalarReplace(FuncOp f, DominanceInfo &domInfo,
812  PostDominanceInfo &postDomInfo) {
813  // Load op's whose results were replaced by those forwarded from stores.
814  SmallVector<Operation *, 8> opsToErase;
815 
816  // A list of memref's that are potentially dead / could be eliminated.
817  SmallPtrSet<Value, 4> memrefsToErase;
818 
819  // Walk all load's and perform store to load forwarding.
820  f.walk([&](AffineReadOpInterface loadOp) {
821  if (failed(
822  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo))) {
823  loadCSE(loadOp, opsToErase, domInfo);
824  }
825  });
826 
827  // Erase all load op's whose results were replaced with store fwd'ed ones.
828  for (auto *op : opsToErase)
829  op->erase();
830  opsToErase.clear();
831 
832  // Walk all store's and perform unused store elimination
833  f.walk([&](AffineWriteOpInterface storeOp) {
834  findUnusedStore(storeOp, opsToErase, memrefsToErase, postDomInfo);
835  });
836  // Erase all store op's which don't impact the program
837  for (auto *op : opsToErase)
838  op->erase();
839 
840  // Check if the store fwd'ed memrefs are now left with only stores and can
841  // thus be completely deleted. Note: the canonicalize pass should be able
842  // to do this as well, but we'll do it here since we collected these anyway.
843  for (auto memref : memrefsToErase) {
844  // If the memref hasn't been alloc'ed in this function, skip.
845  Operation *defOp = memref.getDefiningOp();
846  if (!defOp || !isa<memref::AllocOp>(defOp))
847  // TODO: if the memref was returned by a 'call' operation, we
848  // could still erase it if the call had no side-effects.
849  continue;
850  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
851  return !isa<AffineWriteOpInterface, memref::DeallocOp>(ownerOp);
852  }))
853  continue;
854 
855  // Erase all stores, the dealloc, and the alloc on the memref.
856  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
857  user->erase();
858  defOp->erase();
859  }
860 }
861 
862 // Perform the replacement in `op`.
864  Operation *op,
865  ArrayRef<Value> extraIndices,
866  AffineMap indexRemap,
867  ArrayRef<Value> extraOperands,
868  ArrayRef<Value> symbolOperands,
869  bool allowNonDereferencingOps) {
870  unsigned newMemRefRank = newMemRef.getType().cast<MemRefType>().getRank();
871  (void)newMemRefRank; // unused in opt mode
872  unsigned oldMemRefRank = oldMemRef.getType().cast<MemRefType>().getRank();
873  (void)oldMemRefRank; // unused in opt mode
874  if (indexRemap) {
875  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
876  "symbolic operand count mismatch");
877  assert(indexRemap.getNumInputs() ==
878  extraOperands.size() + oldMemRefRank + symbolOperands.size());
879  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
880  } else {
881  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
882  }
883 
884  // Assert same elemental type.
885  assert(oldMemRef.getType().cast<MemRefType>().getElementType() ==
886  newMemRef.getType().cast<MemRefType>().getElementType());
887 
888  SmallVector<unsigned, 2> usePositions;
889  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
890  if (opEntry.value() == oldMemRef)
891  usePositions.push_back(opEntry.index());
892  }
893 
894  // If memref doesn't appear, nothing to do.
895  if (usePositions.empty())
896  return success();
897 
898  if (usePositions.size() > 1) {
899  // TODO: extend it for this case when needed (rare).
900  assert(false && "multiple dereferencing uses in a single op not supported");
901  return failure();
902  }
903 
904  unsigned memRefOperandPos = usePositions.front();
905 
906  OpBuilder builder(op);
907  // The following checks if op is dereferencing memref and performs the access
908  // index rewrites.
909  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
910  if (!affMapAccInterface) {
911  if (!allowNonDereferencingOps) {
912  // Failure: memref used in a non-dereferencing context (potentially
913  // escapes); no replacement in these cases unless allowNonDereferencingOps
914  // is set.
915  return failure();
916  }
917  op->setOperand(memRefOperandPos, newMemRef);
918  return success();
919  }
920  // Perform index rewrites for the dereferencing op and then replace the op
921  NamedAttribute oldMapAttrPair =
922  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
923  AffineMap oldMap = oldMapAttrPair.getValue().cast<AffineMapAttr>().getValue();
924  unsigned oldMapNumInputs = oldMap.getNumInputs();
925  SmallVector<Value, 4> oldMapOperands(
926  op->operand_begin() + memRefOperandPos + 1,
927  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
928 
929  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
930  SmallVector<Value, 4> oldMemRefOperands;
931  SmallVector<Value, 4> affineApplyOps;
932  oldMemRefOperands.reserve(oldMemRefRank);
933  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
934  for (auto resultExpr : oldMap.getResults()) {
935  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
936  oldMap.getNumSymbols(), resultExpr);
937  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
938  oldMapOperands);
939  oldMemRefOperands.push_back(afOp);
940  affineApplyOps.push_back(afOp);
941  }
942  } else {
943  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
944  }
945 
946  // Construct new indices as a remap of the old ones if a remapping has been
947  // provided. The indices of a memref come right after it, i.e.,
948  // at position memRefOperandPos + 1.
949  SmallVector<Value, 4> remapOperands;
950  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
951  symbolOperands.size());
952  remapOperands.append(extraOperands.begin(), extraOperands.end());
953  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
954  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
955 
956  SmallVector<Value, 4> remapOutputs;
957  remapOutputs.reserve(oldMemRefRank);
958 
959  if (indexRemap &&
960  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
961  // Remapped indices.
962  for (auto resultExpr : indexRemap.getResults()) {
963  auto singleResMap = AffineMap::get(
964  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
965  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
966  remapOperands);
967  remapOutputs.push_back(afOp);
968  affineApplyOps.push_back(afOp);
969  }
970  } else {
971  // No remapping specified.
972  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
973  }
974 
975  SmallVector<Value, 4> newMapOperands;
976  newMapOperands.reserve(newMemRefRank);
977 
978  // Prepend 'extraIndices' in 'newMapOperands'.
979  for (Value extraIndex : extraIndices) {
980  assert(extraIndex.getDefiningOp()->getNumResults() == 1 &&
981  "single result op's expected to generate these indices");
982  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
983  "invalid memory op index");
984  newMapOperands.push_back(extraIndex);
985  }
986 
987  // Append 'remapOutputs' to 'newMapOperands'.
988  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
989 
990  // Create new fully composed AffineMap for new op to be created.
991  assert(newMapOperands.size() == newMemRefRank);
992  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
993  // TODO: Avoid creating/deleting temporary AffineApplyOps here.
994  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
995  newMap = simplifyAffineMap(newMap);
996  canonicalizeMapAndOperands(&newMap, &newMapOperands);
997  // Remove any affine.apply's that became dead as a result of composition.
998  for (Value value : affineApplyOps)
999  if (value.use_empty())
1000  value.getDefiningOp()->erase();
1001 
1002  OperationState state(op->getLoc(), op->getName());
1003  // Construct the new operation using this memref.
1004  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1005  // Insert the non-memref operands.
1006  state.operands.append(op->operand_begin(),
1007  op->operand_begin() + memRefOperandPos);
1008  // Insert the new memref value.
1009  state.operands.push_back(newMemRef);
1010 
1011  // Insert the new memref map operands.
1012  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1013 
1014  // Insert the remaining operands unmodified.
1015  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1016  oldMapNumInputs,
1017  op->operand_end());
1018 
1019  // Result types don't change. Both memref's are of the same elemental type.
1020  state.types.reserve(op->getNumResults());
1021  for (auto result : op->getResults())
1022  state.types.push_back(result.getType());
1023 
1024  // Add attribute for 'newMap', other Attributes do not change.
1025  auto newMapAttr = AffineMapAttr::get(newMap);
1026  for (auto namedAttr : op->getAttrs()) {
1027  if (namedAttr.getName() == oldMapAttrPair.getName())
1028  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1029  else
1030  state.attributes.push_back(namedAttr);
1031  }
1032 
1033  // Create the new operation.
1034  auto *repOp = builder.createOperation(state);
1035  op->replaceAllUsesWith(repOp);
1036  op->erase();
1037 
1038  return success();
1039 }
1040 
1042  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1043  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1044  ArrayRef<Value> symbolOperands, Operation *domOpFilter,
1045  Operation *postDomOpFilter, bool allowNonDereferencingOps,
1046  bool replaceInDeallocOp) {
1047  unsigned newMemRefRank = newMemRef.getType().cast<MemRefType>().getRank();
1048  (void)newMemRefRank; // unused in opt mode
1049  unsigned oldMemRefRank = oldMemRef.getType().cast<MemRefType>().getRank();
1050  (void)oldMemRefRank;
1051  if (indexRemap) {
1052  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1053  "symbol operand count mismatch");
1054  assert(indexRemap.getNumInputs() ==
1055  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1056  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1057  } else {
1058  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1059  }
1060 
1061  // Assert same elemental type.
1062  assert(oldMemRef.getType().cast<MemRefType>().getElementType() ==
1063  newMemRef.getType().cast<MemRefType>().getElementType());
1064 
1065  std::unique_ptr<DominanceInfo> domInfo;
1066  std::unique_ptr<PostDominanceInfo> postDomInfo;
1067  if (domOpFilter)
1068  domInfo =
1069  std::make_unique<DominanceInfo>(domOpFilter->getParentOfType<FuncOp>());
1070 
1071  if (postDomOpFilter)
1072  postDomInfo = std::make_unique<PostDominanceInfo>(
1073  postDomOpFilter->getParentOfType<FuncOp>());
1074 
1075  // Walk all uses of old memref; collect ops to perform replacement. We use a
1076  // DenseSet since an operation could potentially have multiple uses of a
1077  // memref (although rare), and the replacement later is going to erase ops.
1078  DenseSet<Operation *> opsToReplace;
1079  for (auto *op : oldMemRef.getUsers()) {
1080  // Skip this use if it's not dominated by domOpFilter.
1081  if (domOpFilter && !domInfo->dominates(domOpFilter, op))
1082  continue;
1083 
1084  // Skip this use if it's not post-dominated by postDomOpFilter.
1085  if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
1086  continue;
1087 
1088  // Skip dealloc's - no replacement is necessary, and a memref replacement
1089  // at other uses doesn't hurt these dealloc's.
1090  if (isa<memref::DeallocOp>(op) && !replaceInDeallocOp)
1091  continue;
1092 
1093  // Check if the memref was used in a non-dereferencing context. It is fine
1094  // for the memref to be used in a non-dereferencing way outside of the
1095  // region where this replacement is happening.
1096  if (!isa<AffineMapAccessInterface>(*op)) {
1097  if (!allowNonDereferencingOps) {
1098  LLVM_DEBUG(llvm::dbgs()
1099  << "Memref replacement failed: non-deferencing memref op: \n"
1100  << *op << '\n');
1101  return failure();
1102  }
1103  // Non-dereferencing ops with the MemRefsNormalizable trait are
1104  // supported for replacement.
1105  if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
1106  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1107  "memrefs normalizable trait: \n"
1108  << *op << '\n');
1109  return failure();
1110  }
1111  }
1112 
1113  // We'll first collect and then replace --- since replacement erases the op
1114  // that has the use, and that op could be postDomFilter or domFilter itself!
1115  opsToReplace.insert(op);
1116  }
1117 
1118  for (auto *op : opsToReplace) {
1120  oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
1121  symbolOperands, allowNonDereferencingOps)))
1122  llvm_unreachable("memref replacement guaranteed to succeed here");
1123  }
1124 
1125  return success();
1126 }
1127 
1128 /// Given an operation, inserts one or more single result affine
1129 /// apply operations, results of which are exclusively used by this operation
1130 /// operation. The operands of these newly created affine apply ops are
1131 /// guaranteed to be loop iterators or terminal symbols of a function.
1132 ///
1133 /// Before
1134 ///
1135 /// affine.for %i = 0 to #map(%N)
1136 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1137 /// "send"(%idx, %A, ...)
1138 /// "compute"(%idx)
1139 ///
1140 /// After
1141 ///
1142 /// affine.for %i = 0 to #map(%N)
1143 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1144 /// "send"(%idx, %A, ...)
1145 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1146 /// "compute"(%idx_)
1147 ///
1148 /// This allows applying different transformations on send and compute (for eg.
1149 /// different shifts/delays).
1150 ///
1151 /// Returns nullptr either if none of opInst's operands were the result of an
1152 /// affine.apply and thus there was no affine computation slice to create, or if
1153 /// all the affine.apply op's supplying operands to this opInst did not have any
1154 /// uses besides this opInst; otherwise returns the list of affine.apply
1155 /// operations created in output argument `sliceOps`.
1157  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1158  // Collect all operands that are results of affine apply ops.
1159  SmallVector<Value, 4> subOperands;
1160  subOperands.reserve(opInst->getNumOperands());
1161  for (auto operand : opInst->getOperands())
1162  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1163  subOperands.push_back(operand);
1164 
1165  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1166  SmallVector<Operation *, 4> affineApplyOps;
1167  getReachableAffineApplyOps(subOperands, affineApplyOps);
1168  // Skip transforming if there are no affine maps to compose.
1169  if (affineApplyOps.empty())
1170  return;
1171 
1172  // Check if all uses of the affine apply op's lie only in this op op, in
1173  // which case there would be nothing to do.
1174  bool localized = true;
1175  for (auto *op : affineApplyOps) {
1176  for (auto result : op->getResults()) {
1177  for (auto *user : result.getUsers()) {
1178  if (user != opInst) {
1179  localized = false;
1180  break;
1181  }
1182  }
1183  }
1184  }
1185  if (localized)
1186  return;
1187 
1188  OpBuilder builder(opInst);
1189  SmallVector<Value, 4> composedOpOperands(subOperands);
1190  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1191  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1192 
1193  // Create an affine.apply for each of the map results.
1194  sliceOps->reserve(composedMap.getNumResults());
1195  for (auto resultExpr : composedMap.getResults()) {
1196  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1197  composedMap.getNumSymbols(), resultExpr);
1198  sliceOps->push_back(builder.create<AffineApplyOp>(
1199  opInst->getLoc(), singleResMap, composedOpOperands));
1200  }
1201 
1202  // Construct the new operands that include the results from the composed
1203  // affine apply op above instead of existing ones (subOperands). So, they
1204  // differ from opInst's operands only for those operands in 'subOperands', for
1205  // which they will be replaced by the corresponding one from 'sliceOps'.
1206  SmallVector<Value, 4> newOperands(opInst->getOperands());
1207  for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
1208  // Replace the subOperands from among the new operands.
1209  unsigned j, f;
1210  for (j = 0, f = subOperands.size(); j < f; j++) {
1211  if (newOperands[i] == subOperands[j])
1212  break;
1213  }
1214  if (j < subOperands.size()) {
1215  newOperands[i] = (*sliceOps)[j];
1216  }
1217  }
1218  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) {
1219  opInst->setOperand(idx, newOperands[idx]);
1220  }
1221 }
1222 
1223 /// Enum to set patterns of affine expr in tiled-layout map.
1224 /// TileFloorDiv: <dim expr> div <tile size>
1225 /// TileMod: <dim expr> mod <tile size>
1226 /// TileNone: None of the above
1227 /// Example:
1228 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1229 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1230 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1231 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1233 
1234 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1235 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1236 /// tile sizes, and no other expression involves those k dimensions. This
1237 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1238 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1239 /// tiled layout, an empty vector is returned.
1241  AffineMap map,
1242  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1243  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1244  // `floordiv` and its position in `map` output.
1245  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1246  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1247  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1249  unsigned pos = 0;
1250  for (AffineExpr expr : map.getResults()) {
1251  if (expr.getKind() == AffineExprKind::FloorDiv) {
1252  AffineBinaryOpExpr binaryExpr = expr.cast<AffineBinaryOpExpr>();
1253  if (binaryExpr.getRHS().isa<AffineConstantExpr>())
1254  floordivExprs.emplace_back(
1255  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1256  }
1257  pos++;
1258  }
1259  // Not tiled layout if `floordivExprs` is empty.
1260  if (floordivExprs.empty()) {
1262  return success();
1263  }
1264 
1265  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1266  // not tiled layout.
1267  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1268  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1269  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1270  unsigned floordivPos = std::get<2>(fexpr);
1271 
1272  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1273  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1274  // other expr, the map is not tiled layout. Example of non tiled layout:
1275  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1276  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1277  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1278  // 256)>
1279  bool found = false;
1280  pos = 0;
1281  for (AffineExpr expr : map.getResults()) {
1282  bool notTiled = false;
1283  if (pos != floordivPos) {
1284  expr.walk([&](AffineExpr e) {
1285  if (e == floordivExprLHS) {
1286  if (expr.getKind() == AffineExprKind::Mod) {
1287  AffineBinaryOpExpr binaryExpr = expr.cast<AffineBinaryOpExpr>();
1288  // If LHS and RHS of `mod` are the same with those of floordiv.
1289  if (floordivExprLHS == binaryExpr.getLHS() &&
1290  floordivExprRHS == binaryExpr.getRHS()) {
1291  // Save tile size (RHS of `mod`), and position of `floordiv` and
1292  // `mod` if same expr with `mod` is not found yet.
1293  if (!found) {
1294  tileSizePos.emplace_back(
1295  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1296  found = true;
1297  } else {
1298  // Non tiled layout: Have multilpe `mod` with the same LHS.
1299  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1300  // mod 256, d2 mod 256)>
1301  notTiled = true;
1302  }
1303  } else {
1304  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1305  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1306  // mod 128)>
1307  notTiled = true;
1308  }
1309  } else {
1310  // Non tiled layout: LHS is the same, but not `mod`.
1311  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1312  // floordiv 256)>
1313  notTiled = true;
1314  }
1315  }
1316  });
1317  }
1318  if (notTiled) {
1320  return success();
1321  }
1322  pos++;
1323  }
1324  }
1325  return success();
1326 }
1327 
1328 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1329 /// after normalization. Dimensions that include dynamic dimensions in the map
1330 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1331 /// dimension.
1332 ///
1333 /// Example:
1334 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1335 ///
1336 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1337 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1338 static bool
1339 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1340  SmallVectorImpl<unsigned> &inMemrefTypeDynDims,
1341  MLIRContext *context) {
1342  bool isDynamicDim = false;
1343  AffineExpr expr = layoutMap.getResults()[dim];
1344  // Check if affine expr of the dimension includes dynamic dimension of input
1345  // memrefType.
1346  expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) {
1347  if (e.isa<AffineDimExpr>()) {
1348  for (unsigned dm : inMemrefTypeDynDims) {
1349  if (e == getAffineDimExpr(dm, context)) {
1350  isDynamicDim = true;
1351  }
1352  }
1353  }
1354  });
1355  return isDynamicDim;
1356 }
1357 
1358 /// Create affine expr to calculate dimension size for a tiled-layout map.
1360  TileExprPattern pat) {
1361  // Create map output for the patterns.
1362  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1363  // "mod <tile size>" ==> "<tile size>"
1364  AffineExpr newMapOutput;
1365  AffineBinaryOpExpr binaryExpr = nullptr;
1366  switch (pat) {
1368  binaryExpr = oldMapOutput.cast<AffineBinaryOpExpr>();
1369  newMapOutput = binaryExpr.getRHS();
1370  break;
1372  binaryExpr = oldMapOutput.cast<AffineBinaryOpExpr>();
1373  newMapOutput = getAffineBinaryOpExpr(
1374  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1375  break;
1376  default:
1377  newMapOutput = oldMapOutput;
1378  }
1379  return newMapOutput;
1380 }
1381 
1382 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1383 /// create `newDynamicSizes` from them by using AffineApplyOp.
1384 ///
1385 /// Steps for normalizing dynamic memrefs for a tiled layout map
1386 /// Example:
1387 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1388 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1389 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1390 ///
1391 /// (Before this function)
1392 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1393 /// single layout map is supported.
1394 ///
1395 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1396 /// is memref<4x?x?xf32> in the above example.
1397 ///
1398 /// (In this function)
1399 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1400 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1401 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1402 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1403 /// - New map in the above example
1404 /// #map0 = affine_map<(d0, d1) -> (d0)>
1405 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1406 /// #map2 = affine_map<(d0, d1) -> (32)>
1407 ///
1408 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1409 /// is used in dynamicSizes of new AllocOp.
1410 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1411 /// %c4 = arith.constant 4 : index
1412 /// %1 = affine.apply #map1(%c4, %0)
1413 /// %2 = affine.apply #map2(%c4, %0)
1414 static void createNewDynamicSizes(MemRefType oldMemRefType,
1415  MemRefType newMemRefType, AffineMap map,
1416  memref::AllocOp *allocOp, OpBuilder b,
1417  SmallVectorImpl<Value> &newDynamicSizes) {
1418  // Create new input for AffineApplyOp.
1419  SmallVector<Value, 4> inAffineApply;
1420  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1421  unsigned dynIdx = 0;
1422  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1423  if (oldMemRefShape[d] < 0) {
1424  // Use dynamicSizes of allocOp for dynamic dimension.
1425  inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]);
1426  dynIdx++;
1427  } else {
1428  // Create ConstantOp for static dimension.
1429  Attribute constantAttr =
1430  b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1431  inAffineApply.emplace_back(
1432  b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
1433  }
1434  }
1435 
1436  // Create new map to calculate each dimension size of new memref for each
1437  // original map output. Only for dynamic dimesion of `newMemRefType`.
1438  unsigned newDimIdx = 0;
1439  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1441  (void)getTileSizePos(map, tileSizePos);
1442  for (AffineExpr expr : map.getResults()) {
1443  if (newMemRefShape[newDimIdx] < 0) {
1444  // Create new maps to calculate each dimension size of new memref.
1446  for (auto pos : tileSizePos) {
1447  if (newDimIdx == std::get<1>(pos))
1449  else if (newDimIdx == std::get<2>(pos))
1451  }
1452  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1453  AffineMap newMap =
1454  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1455  Value affineApp =
1456  b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
1457  newDynamicSizes.emplace_back(affineApp);
1458  }
1459  newDimIdx++;
1460  }
1461 }
1462 
1463 // TODO: Currently works for static memrefs with a single layout map.
1464 LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) {
1465  MemRefType memrefType = allocOp->getType();
1466  OpBuilder b(*allocOp);
1467 
1468  // Fetch a new memref type after normalizing the old memref to have an
1469  // identity map layout.
1470  MemRefType newMemRefType =
1471  normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size());
1472  if (newMemRefType == memrefType)
1473  // Either memrefType already had an identity map or the map couldn't be
1474  // transformed to an identity map.
1475  return failure();
1476 
1477  Value oldMemRef = allocOp->getResult();
1478 
1479  SmallVector<Value, 4> symbolOperands(allocOp->symbolOperands());
1480  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1481  memref::AllocOp newAlloc;
1482  // Check if `layoutMap` is a tiled layout. Only single layout map is
1483  // supported for normalizing dynamic memrefs.
1485  (void)getTileSizePos(layoutMap, tileSizePos);
1486  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1487  MemRefType oldMemRefType = oldMemRef.getType().cast<MemRefType>();
1488  SmallVector<Value, 4> newDynamicSizes;
1489  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1490  newDynamicSizes);
1491  // Add the new dynamic sizes in new AllocOp.
1492  newAlloc =
1493  b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1494  newDynamicSizes, allocOp->alignmentAttr());
1495  } else {
1496  newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1497  allocOp->alignmentAttr());
1498  }
1499  // Replace all uses of the old memref.
1500  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1501  /*extraIndices=*/{},
1502  /*indexRemap=*/layoutMap,
1503  /*extraOperands=*/{},
1504  /*symbolOperands=*/symbolOperands,
1505  /*domOpFilter=*/nullptr,
1506  /*postDomOpFilter=*/nullptr,
1507  /*allowNonDereferencingOps=*/true))) {
1508  // If it failed (due to escapes for example), bail out.
1509  newAlloc.erase();
1510  return failure();
1511  }
1512  // Replace any uses of the original alloc op and erase it. All remaining uses
1513  // have to be dealloc's; RAMUW above would've failed otherwise.
1514  assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) {
1515  return isa<memref::DeallocOp>(op);
1516  }));
1517  oldMemRef.replaceAllUsesWith(newAlloc);
1518  allocOp->erase();
1519  return success();
1520 }
1521 
1522 MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b,
1523  unsigned numSymbolicOperands) {
1524  unsigned rank = memrefType.getRank();
1525  if (rank == 0)
1526  return memrefType;
1527 
1528  if (memrefType.getLayout().isIdentity()) {
1529  // Either no maps is associated with this memref or this memref has
1530  // a trivial (identity) map.
1531  return memrefType;
1532  }
1533  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1534 
1535  // We don't do any checks for one-to-one'ness; we assume that it is
1536  // one-to-one.
1537 
1538  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1539  // for now.
1540  // TODO: Normalize the other types of dynamic memrefs.
1542  (void)getTileSizePos(layoutMap, tileSizePos);
1543  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1544  return memrefType;
1545 
1546  // We have a single map that is not an identity map. Create a new memref
1547  // with the right shape and an identity layout map.
1548  ArrayRef<int64_t> shape = memrefType.getShape();
1549  // FlatAffineConstraint may later on use symbolicOperands.
1550  FlatAffineConstraints fac(rank, numSymbolicOperands);
1551  SmallVector<unsigned, 4> memrefTypeDynDims;
1552  for (unsigned d = 0; d < rank; ++d) {
1553  // Use constraint system only in static dimensions.
1554  if (shape[d] > 0) {
1556  fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1);
1557  } else {
1558  memrefTypeDynDims.emplace_back(d);
1559  }
1560  }
1561  // We compose this map with the original index (logical) space to derive
1562  // the upper bounds for the new index space.
1563  unsigned newRank = layoutMap.getNumResults();
1564  if (failed(fac.composeMatchingMap(layoutMap)))
1565  return memrefType;
1566  // TODO: Handle semi-affine maps.
1567  // Project out the old data dimensions.
1568  fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds());
1569  SmallVector<int64_t, 4> newShape(newRank);
1570  for (unsigned d = 0; d < newRank; ++d) {
1571  // Check if each dimension of normalized memrefType is dynamic.
1572  bool isDynDim = isNormalizedMemRefDynamicDim(
1573  d, layoutMap, memrefTypeDynDims, b.getContext());
1574  if (isDynDim) {
1575  newShape[d] = -1;
1576  } else {
1577  // The lower bound for the shape is always zero.
1578  auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d);
1579  // For a static memref and an affine map with no symbols, this is
1580  // always bounded.
1581  assert(ubConst.hasValue() && "should always have an upper bound");
1582  if (ubConst.getValue() < 0)
1583  // This is due to an invalid map that maps to a negative space.
1584  return memrefType;
1585  // If dimension of new memrefType is dynamic, the value is -1.
1586  newShape[d] = ubConst.getValue() + 1;
1587  }
1588  }
1589 
1590  // Create the new memref type after trivializing the old layout map.
1591  MemRefType newMemRefType =
1592  MemRefType::Builder(memrefType)
1593  .setShape(newShape)
1594  .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank)));
1595 
1596  return newMemRefType;
1597 }
Affine binary operation expression.
Definition: AffineExpr.h:207
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, SmallVector< DependenceComponent, 2 > *dependenceComponents, bool allowRAR=false)
Include the generated interface declarations.
OpTy create(Location location, Args &&...args)
Create an operation of specific op type at the current insertion point.
Definition: Builders.h:430
This class contains a list of basic blocks and a link to the parent operation it is attached to...
Definition: Region.h:26
iterator begin()
Definition: Block.h:134
MLIRContext * getContext() const
Definition: Builders.h:54
U cast() const
Definition: Attributes.h:123
RHS of mod is always a constant or a symbolic expression with a positive value.
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:308
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:373
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1359
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:151
static Value min(ImplicitLocOpBuilder &builder, Value a, Value b)
unsigned getNumSymbols() const
Definition: AffineMap.cpp:298
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:145
unsigned getNumDims() const
Definition: AffineMap.cpp:294
operand_range getOperands()
Returns an iterator on the underlying Value&#39;s.
Definition: Operation.h:247
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in &#39;operands&#39;.
Definition: Operation.cpp:200
int64_t getValue() const
Definition: AffineExpr.cpp:508
This class represents a frozen set of patterns that can be processed by a pattern applicator...
Block represents an ordered list of Operations.
Definition: Block.h:29
Value getOperand(unsigned idx)
Definition: AffineOps.h:438
LogicalResult applyOpPatternsAndFold(Operation *op, const FrozenRewritePatternSet &patterns, bool *erased=nullptr)
Applies the specified patterns on op alone while also trying to fold it, by selecting the highest ben...
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:457
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:308
Value getOperand(unsigned idx)
Definition: Operation.h:219
OpListType & getOperations()
Definition: Block.h:128
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
unsigned getNumOperands()
Definition: Operation.h:215
static Type getElementType(Type type, ArrayRef< int32_t > indices, function_ref< InFlightDiagnostic(StringRef)> emitErrorFn)
Walks the given type hierarchy with the given indices, potentially down to component granularity...
Definition: SPIRVOps.cpp:639
AffineExpr getResult(unsigned i)
void projectOut(unsigned pos, unsigned num)
Projects out (aka eliminates) num identifiers starting at position pos.
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:129
A class for computing basic dominance information.
Definition: Dominance.h:117
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value...
Definition: LogicalResult.h:68
Operation * getOperation()
Return the operation that this refers to.
Definition: OpDefinition.h:106
This trait indicates that the side effects of an operation includes the effects of operations nested ...
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:96
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps &#39;a&#39; and &#39;b&#39;, represented as an affine map a...
Checks whether two accesses to the same memref access the same element.
user_range getUsers() const
Definition: Value.h:212
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:1280
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type &#39;OpTy&#39;.
Definition: Operation.h:120
An integer constant appearing in affine expression.
Definition: AffineExpr.h:232
LogicalResult normalizeMemRef(memref::AllocOp *op)
Rewrites the memref defined by this alloc op to have an identity layout map and updates all its index...
Definition: Utils.cpp:1464
void replaceAllUsesWith(Value newValue) const
Replace all uses of &#39;this&#39; value with the new value, updating anything in the IR that uses &#39;this&#39; to ...
Definition: Value.h:161
void normalizeAffineFor(AffineForOp op)
Normalize an affine.for op.
Definition: Utils.cpp:346
static constexpr const bool value
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:424
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:209
SmallVector< Value, 4 > operands
void walk(std::function< void(AffineExpr)> callback) const
Walk all of the AffineExpr&#39;s in this expression in postorder.
Definition: AffineExpr.cpp:28
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
AffineExpr getRHS() const
Definition: AffineExpr.cpp:307
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:137
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:315
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:343
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:33
unsigned getNumInputs() const
Definition: AffineMap.cpp:306
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1232
U dyn_cast() const
Definition: AffineExpr.h:281
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
A flat list of affine equalities and inequalities in the form.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:264
unsigned getNumOperands()
Definition: AffineOps.h:437
OpListType::iterator iterator
Definition: Block.h:131
void clear()
Clears all mappings held by the mapper.
unsigned getNumIds() const
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
arith::AtomicRMWKind kind
Reduction kind.
AffineExpr getLHS() const
Definition: AffineExpr.cpp:304
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:170
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
iterator end()
Definition: Block.h:135
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:361
Attributes are known-constant values of operations.
Definition: Attributes.h:24
int64_t ceilDiv(int64_t lhs, int64_t rhs)
Returns the result of MLIR&#39;s ceildiv operation on constants.
Definition: MathExtras.h:23
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:705
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either: ...
Definition: Utils.cpp:249
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, memref::AllocOp *allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1414
Operation * createOperation(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:380
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:117
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:206
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided &#39;values&#39;.
Definition: Operation.h:154
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:32
Base type for affine expression.
Definition: AffineExpr.h:68
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:879
OpResult getResult(unsigned idx)
Get the &#39;idx&#39;th result of this operation.
Definition: Operation.h:276
static WalkResult advance()
Definition: Visitors.h:51
unsigned getNumResults() const
Definition: AffineMap.cpp:302
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:106
This represents an operation in an abstracted form, suitable for use with the builder APIs...
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:38
bool isForInductionVar(Value val)
Returns true if the provided value is the induction variable of a AffineForOp.
Definition: AffineOps.cpp:1838
U cast() const
Definition: AffineExpr.h:291
static WalkResult interrupt()
Definition: Visitors.h:50
unsigned getNumLocalIds() const
Optional< int64_t > getConstantBound(BoundType type, unsigned pos) const
Returns the constant bound for the pos^th identifier if there is one; None otherwise.
AffineBound represents a lower or upper bound in the for operation.
Definition: AffineOps.h:432
This class represents an argument of a Block.
Definition: Value.h:298
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:811
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:311
Eliminates identifier at the specified position using Fourier-Motzkin variable elimination.
void setOperand(unsigned idx, Value value)
Definition: Operation.h:220
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:45
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1240
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine.fors/parallel&#39;s.
Definition: Utils.cpp:198
RHS of ceildiv is always a constant or a symbolic expression.
bool isa() const
Definition: AffineExpr.h:270
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap)
Adds a bound for the identifier at the specified position with constraints being drawn from the speci...
void affineScalarReplace(FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:811
unsigned getNumDims() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:84
MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder, unsigned numSymbolicOperands)
Uses the old memref type map layout and computes the new memref type to have a new shape and a layout...
Definition: Utils.cpp:1522
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation *> &loadOpsToErase, DominanceInfo &domInfo)
Definition: Utils.cpp:731
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation *> &opsToErase, SmallPtrSetImpl< Value > &memrefsToErase, PostDominanceInfo &postDominanceInfo)
Definition: Utils.cpp:685
static LogicalResult forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation *> &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:621
bool isTopLevelValue(Value value)
A utility function to check if a value is defined at the top level of an op with trait AffineScope or...
Definition: AffineOps.cpp:233
operand_iterator operand_begin()
Definition: Operation.h:243
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:362
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:293
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={})
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:138
AffineMap getMap()
Definition: AffineOps.h:435
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:654
Type getType() const
Return the type of this value.
Definition: Value.h:117
IndexType getIndexType()
Definition: Builders.cpp:48
An extension of FlatAffineConstraints in which dimensions and symbols can optionally be associated wi...
This class provides the API for ops that are known to be isolated from above.
static bool hasNoInterveningEffect(Operation *start, T memOp)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e...
Definition: Utils.cpp:447
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:161
type_range getTypes() const
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:87
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:137
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:55
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:285
This class implements the operand iterators for the Operation class.
Encapsulates a memref load or store access information.
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1156
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:113
unsigned getNumSymbols() const
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:299
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:275
A description of a (parallelizable) reduction in an affine loop.
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:273
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes...
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims, MLIRContext *context)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1339
Value value
The value being reduced.
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation *> &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
bool postDominates(Operation *a, Operation *b)
Return true if operation A postdominates operation B.
Definition: Dominance.h:184
operand_iterator operand_end()
Definition: Operation.h:244
SuccessorRange getSuccessors()
Definition: Block.h:255
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:376
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:57
ArrayRef< Value > getOperands() const
LogicalResult applyPatternsAndFoldGreedily(MutableArrayRef< Region > regions, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig())
Rewrite the regions of the specified operation, which must be isolated from above, by repeatedly applying the highest benefit patterns in a greedy work-list driven manner.
result_range getResults()
Definition: Operation.h:284
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:49
This class helps build Operations.
Definition: Builders.h:177
This class provides an abstraction over the different types of ranges over Values.
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:201
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:172
A class for computing basic postdominance information.
Definition: Dominance.h:176
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
operand_range getOperands()
Definition: AffineOps.h:445
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
U cast() const
Definition: Types.h:250
static Value max(ImplicitLocOpBuilder &builder, Value a, Value b)
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1041
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation *> &exceptions) const
Replace all uses of &#39;this&#39; value with &#39;newValue&#39;, updating anything in the IR that uses &#39;this&#39; to use...
Definition: Value.cpp:61