MLIR  20.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
25 #include "mlir/IR/Dominance.h"
26 #include "mlir/IR/IRMapping.h"
28 #include "mlir/IR/IntegerSet.h"
30 #include "llvm/Support/LogicalResult.h"
31 #include <optional>
32 
33 #define DEBUG_TYPE "affine-utils"
34 
35 using namespace mlir;
36 using namespace affine;
37 using namespace presburger;
38 
39 namespace {
40 /// Visit affine expressions recursively and build the sequence of operations
41 /// that correspond to it. Visitation functions return an Value of the
42 /// expression subtree they visited or `nullptr` on error.
43 class AffineApplyExpander
44  : public AffineExprVisitor<AffineApplyExpander, Value> {
45 public:
46  /// This internal class expects arguments to be non-null, checks must be
47  /// performed at the call site.
48  AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
49  ValueRange symbolValues, Location loc)
50  : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
51  loc(loc) {}
52 
53  template <typename OpTy>
54  Value buildBinaryExpr(AffineBinaryOpExpr expr) {
55  auto lhs = visit(expr.getLHS());
56  auto rhs = visit(expr.getRHS());
57  if (!lhs || !rhs)
58  return nullptr;
59  auto op = builder.create<OpTy>(loc, lhs, rhs);
60  return op.getResult();
61  }
62 
63  Value visitAddExpr(AffineBinaryOpExpr expr) {
64  return buildBinaryExpr<arith::AddIOp>(expr);
65  }
66 
67  Value visitMulExpr(AffineBinaryOpExpr expr) {
68  return buildBinaryExpr<arith::MulIOp>(expr);
69  }
70 
71  /// Euclidean modulo operation: negative RHS is not allowed.
72  /// Remainder of the euclidean integer division is always non-negative.
73  ///
74  /// Implemented as
75  ///
76  /// a mod b =
77  /// let remainder = srem a, b;
78  /// negative = a < 0 in
79  /// select negative, remainder + b, remainder.
80  Value visitModExpr(AffineBinaryOpExpr expr) {
81  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
82  if (rhsConst.getValue() <= 0) {
83  emitError(loc, "modulo by non-positive value is not supported");
84  return nullptr;
85  }
86  }
87 
88  auto lhs = visit(expr.getLHS());
89  auto rhs = visit(expr.getRHS());
90  assert(lhs && rhs && "unexpected affine expr lowering failure");
91 
92  Value remainder = builder.create<arith::RemSIOp>(loc, lhs, rhs);
93  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
94  Value isRemainderNegative = builder.create<arith::CmpIOp>(
95  loc, arith::CmpIPredicate::slt, remainder, zeroCst);
96  Value correctedRemainder =
97  builder.create<arith::AddIOp>(loc, remainder, rhs);
98  Value result = builder.create<arith::SelectOp>(
99  loc, isRemainderNegative, correctedRemainder, remainder);
100  return result;
101  }
102 
103  /// Floor division operation (rounds towards negative infinity).
104  ///
105  /// For positive divisors, it can be implemented without branching and with a
106  /// single division operation as
107  ///
108  /// a floordiv b =
109  /// let negative = a < 0 in
110  /// let absolute = negative ? -a - 1 : a in
111  /// let quotient = absolute / b in
112  /// negative ? -quotient - 1 : quotient
113  ///
114  /// Note: this lowering does not use arith.floordivsi because the lowering of
115  /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
116  /// not one but two arith.divsi. That could be changed to one divsi, but one
117  /// way or another, going through arith.floordivsi will result in more complex
118  /// IR because arith.floordivsi is more general than affine floordiv in that
119  /// it supports negative RHS.
120  Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
121  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
122  if (rhsConst.getValue() <= 0) {
123  emitError(loc, "division by non-positive value is not supported");
124  return nullptr;
125  }
126  }
127  auto lhs = visit(expr.getLHS());
128  auto rhs = visit(expr.getRHS());
129  assert(lhs && rhs && "unexpected affine expr lowering failure");
130 
131  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
132  Value noneCst = builder.create<arith::ConstantIndexOp>(loc, -1);
133  Value negative = builder.create<arith::CmpIOp>(
134  loc, arith::CmpIPredicate::slt, lhs, zeroCst);
135  Value negatedDecremented = builder.create<arith::SubIOp>(loc, noneCst, lhs);
136  Value dividend =
137  builder.create<arith::SelectOp>(loc, negative, negatedDecremented, lhs);
138  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
139  Value correctedQuotient =
140  builder.create<arith::SubIOp>(loc, noneCst, quotient);
141  Value result = builder.create<arith::SelectOp>(loc, negative,
142  correctedQuotient, quotient);
143  return result;
144  }
145 
146  /// Ceiling division operation (rounds towards positive infinity).
147  ///
148  /// For positive divisors, it can be implemented without branching and with a
149  /// single division operation as
150  ///
151  /// a ceildiv b =
152  /// let negative = a <= 0 in
153  /// let absolute = negative ? -a : a - 1 in
154  /// let quotient = absolute / b in
155  /// negative ? -quotient : quotient + 1
156  ///
157  /// Note: not using arith.ceildivsi for the same reason as explained in the
158  /// visitFloorDivExpr comment.
159  Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
160  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
161  if (rhsConst.getValue() <= 0) {
162  emitError(loc, "division by non-positive value is not supported");
163  return nullptr;
164  }
165  }
166  auto lhs = visit(expr.getLHS());
167  auto rhs = visit(expr.getRHS());
168  assert(lhs && rhs && "unexpected affine expr lowering failure");
169 
170  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
171  Value oneCst = builder.create<arith::ConstantIndexOp>(loc, 1);
172  Value nonPositive = builder.create<arith::CmpIOp>(
173  loc, arith::CmpIPredicate::sle, lhs, zeroCst);
174  Value negated = builder.create<arith::SubIOp>(loc, zeroCst, lhs);
175  Value decremented = builder.create<arith::SubIOp>(loc, lhs, oneCst);
176  Value dividend =
177  builder.create<arith::SelectOp>(loc, nonPositive, negated, decremented);
178  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
179  Value negatedQuotient =
180  builder.create<arith::SubIOp>(loc, zeroCst, quotient);
181  Value incrementedQuotient =
182  builder.create<arith::AddIOp>(loc, quotient, oneCst);
183  Value result = builder.create<arith::SelectOp>(
184  loc, nonPositive, negatedQuotient, incrementedQuotient);
185  return result;
186  }
187 
188  Value visitConstantExpr(AffineConstantExpr expr) {
189  auto op = builder.create<arith::ConstantIndexOp>(loc, expr.getValue());
190  return op.getResult();
191  }
192 
193  Value visitDimExpr(AffineDimExpr expr) {
194  assert(expr.getPosition() < dimValues.size() &&
195  "affine dim position out of range");
196  return dimValues[expr.getPosition()];
197  }
198 
199  Value visitSymbolExpr(AffineSymbolExpr expr) {
200  assert(expr.getPosition() < symbolValues.size() &&
201  "symbol dim position out of range");
202  return symbolValues[expr.getPosition()];
203  }
204 
205 private:
206  OpBuilder &builder;
207  ValueRange dimValues;
208  ValueRange symbolValues;
209 
210  Location loc;
211 };
212 } // namespace
213 
214 /// Create a sequence of operations that implement the `expr` applied to the
215 /// given dimension and symbol values.
217  AffineExpr expr,
218  ValueRange dimValues,
219  ValueRange symbolValues) {
220  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
221 }
222 
223 /// Create a sequence of operations that implement the `affineMap` applied to
224 /// the given `operands` (as it it were an AffineApplyOp).
225 std::optional<SmallVector<Value, 8>>
227  AffineMap affineMap, ValueRange operands) {
228  auto numDims = affineMap.getNumDims();
229  auto expanded = llvm::to_vector<8>(
230  llvm::map_range(affineMap.getResults(),
231  [numDims, &builder, loc, operands](AffineExpr expr) {
232  return expandAffineExpr(builder, loc, expr,
233  operands.take_front(numDims),
234  operands.drop_front(numDims));
235  }));
236  if (llvm::all_of(expanded, [](Value v) { return v; }))
237  return expanded;
238  return std::nullopt;
239 }
240 
241 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
242 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
243 /// the rest of the op.
244 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
245  if (elseBlock)
246  assert(ifOp.hasElse() && "else block expected");
247 
248  Block *destBlock = ifOp->getBlock();
249  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
250  destBlock->getOperations().splice(
251  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
252  std::prev(srcBlock->end()));
253  ifOp.erase();
254 }
255 
256 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
257 /// on. The `ifOp` could be hoisted and placed right before such an operation.
258 /// This method assumes that the ifOp has been canonicalized (to be correct and
259 /// effective).
260 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
261  // Walk up the parents past all for op that this conditional is invariant on.
262  auto ifOperands = ifOp.getOperands();
263  auto *res = ifOp.getOperation();
264  while (!isa<func::FuncOp>(res->getParentOp())) {
265  auto *parentOp = res->getParentOp();
266  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
267  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
268  break;
269  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
270  for (auto iv : parallelOp.getIVs())
271  if (llvm::is_contained(ifOperands, iv))
272  break;
273  } else if (!isa<AffineIfOp>(parentOp)) {
274  // Won't walk up past anything other than affine.for/if ops.
275  break;
276  }
277  // You can always hoist up past any affine.if ops.
278  res = parentOp;
279  }
280  return res;
281 }
282 
283 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
284 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
285 /// otherwise the same `ifOp`.
286 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
287  // No hoisting to do.
288  if (hoistOverOp == ifOp)
289  return ifOp;
290 
291  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
292  // the else block. Then drop the else block of the original 'if' in the 'then'
293  // branch while promoting its then block, and analogously drop the 'then'
294  // block of the original 'if' from the 'else' branch while promoting its else
295  // block.
296  IRMapping operandMap;
297  OpBuilder b(hoistOverOp);
298  auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(),
299  ifOp.getOperands(),
300  /*elseBlock=*/true);
301 
302  // Create a clone of hoistOverOp to use for the else branch of the hoisted
303  // conditional. The else block may get optimized away if empty.
304  Operation *hoistOverOpClone = nullptr;
305  // We use this unique name to identify/find `ifOp`'s clone in the else
306  // version.
307  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
308  operandMap.clear();
309  b.setInsertionPointAfter(hoistOverOp);
310  // We'll set an attribute to identify this op in a clone of this sub-tree.
311  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
312  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
313 
314  // Promote the 'then' block of the original affine.if in the then version.
315  promoteIfBlock(ifOp, /*elseBlock=*/false);
316 
317  // Move the then version to the hoisted if op's 'then' block.
318  auto *thenBlock = hoistedIfOp.getThenBlock();
319  thenBlock->getOperations().splice(thenBlock->begin(),
320  hoistOverOp->getBlock()->getOperations(),
321  Block::iterator(hoistOverOp));
322 
323  // Find the clone of the original affine.if op in the else version.
324  AffineIfOp ifCloneInElse;
325  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
326  if (!ifClone->getAttr(idForIfOp))
327  return WalkResult::advance();
328  ifCloneInElse = ifClone;
329  return WalkResult::interrupt();
330  });
331  assert(ifCloneInElse && "if op clone should exist");
332  // For the else block, promote the else block of the original 'if' if it had
333  // one; otherwise, the op itself is to be erased.
334  if (!ifCloneInElse.hasElse())
335  ifCloneInElse.erase();
336  else
337  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
338 
339  // Move the else version into the else block of the hoisted if op.
340  auto *elseBlock = hoistedIfOp.getElseBlock();
341  elseBlock->getOperations().splice(
342  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
343  Block::iterator(hoistOverOpClone));
344 
345  return hoistedIfOp;
346 }
347 
348 LogicalResult
350  ArrayRef<LoopReduction> parallelReductions,
351  AffineParallelOp *resOp) {
352  // Fail early if there are iter arguments that are not reductions.
353  unsigned numReductions = parallelReductions.size();
354  if (numReductions != forOp.getNumIterOperands())
355  return failure();
356 
357  Location loc = forOp.getLoc();
358  OpBuilder outsideBuilder(forOp);
359  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
360  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
361  AffineMap upperBoundMap = forOp.getUpperBoundMap();
362  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
363 
364  // Creating empty 1-D affine.parallel op.
365  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
366  parallelReductions, [](const LoopReduction &red) { return red.value; }));
367  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
368  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
369  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
370  loc, ValueRange(reducedValues).getTypes(), reductionKinds,
371  llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
372  llvm::ArrayRef(upperBoundMap), upperBoundOperands,
373  llvm::ArrayRef(forOp.getStepAsInt()));
374  // Steal the body of the old affine for op.
375  newPloop.getRegion().takeBody(forOp.getRegion());
376  Operation *yieldOp = &newPloop.getBody()->back();
377 
378  // Handle the initial values of reductions because the parallel loop always
379  // starts from the neutral value.
380  SmallVector<Value> newResults;
381  newResults.reserve(numReductions);
382  for (unsigned i = 0; i < numReductions; ++i) {
383  Value init = forOp.getInits()[i];
384  // This works because we are only handling single-op reductions at the
385  // moment. A switch on reduction kind or a mechanism to collect operations
386  // participating in the reduction will be necessary for multi-op reductions.
387  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
388  assert(reductionOp && "yielded value is expected to be produced by an op");
389  outsideBuilder.getInsertionBlock()->getOperations().splice(
390  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
391  reductionOp);
392  reductionOp->setOperands({init, newPloop->getResult(i)});
393  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
394  }
395 
396  // Update the loop terminator to yield reduced values bypassing the reduction
397  // operation itself (now moved outside of the loop) and erase the block
398  // arguments that correspond to reductions. Note that the loop always has one
399  // "main" induction variable whenc coming from a non-parallel for.
400  unsigned numIVs = 1;
401  yieldOp->setOperands(reducedValues);
402  newPloop.getBody()->eraseArguments(numIVs, numReductions);
403 
404  forOp.erase();
405  if (resOp)
406  *resOp = newPloop;
407  return success();
408 }
409 
410 // Returns success if any hoisting happened.
411 LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
412  // Bail out early if the ifOp returns a result. TODO: Consider how to
413  // properly support this case.
414  if (ifOp.getNumResults() != 0)
415  return failure();
416 
417  // Apply canonicalization patterns and folding - this is necessary for the
418  // hoisting check to be correct (operands should be composed), and to be more
419  // effective (no unused operands). Since the pattern rewriter's folding is
420  // entangled with application of patterns, we may fold/end up erasing the op,
421  // in which case we return with `folded` being set.
422  RewritePatternSet patterns(ifOp.getContext());
423  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
424  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
427  bool erased;
428  (void)applyOpPatternsGreedily(ifOp.getOperation(), frozenPatterns, config,
429  /*changed=*/nullptr, &erased);
430  if (erased) {
431  if (folded)
432  *folded = true;
433  return failure();
434  }
435  if (folded)
436  *folded = false;
437 
438  // The folding above should have ensured this, but the affine.if's
439  // canonicalization is missing composition of affine.applys into it.
440  assert(llvm::all_of(ifOp.getOperands(),
441  [](Value v) {
442  return isTopLevelValue(v) || isAffineForInductionVar(v);
443  }) &&
444  "operands not composed");
445 
446  // We are going hoist as high as possible.
447  // TODO: this could be customized in the future.
448  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
449 
450  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
451  // Nothing to hoist over.
452  if (hoistedIfOp == ifOp)
453  return failure();
454 
455  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
456  // a sequence of affine.fors that are all perfectly nested).
457  (void)applyPatternsGreedily(
458  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
459  frozenPatterns);
460 
461  return success();
462 }
463 
464 // Return the min expr after replacing the given dim.
467  bool positivePath) {
468  if (e == dim)
469  return positivePath ? min : max;
470  if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
471  AffineExpr lhs = bin.getLHS();
472  AffineExpr rhs = bin.getRHS();
473  if (bin.getKind() == mlir::AffineExprKind::Add)
474  return substWithMin(lhs, dim, min, max, positivePath) +
475  substWithMin(rhs, dim, min, max, positivePath);
476 
477  auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
478  auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
479  if (c1 && c1.getValue() < 0)
480  return getAffineBinaryOpExpr(
481  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
482  if (c2 && c2.getValue() < 0)
483  return getAffineBinaryOpExpr(
484  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
485  return getAffineBinaryOpExpr(
486  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
487  substWithMin(rhs, dim, min, max, positivePath));
488  }
489  return e;
490 }
491 
492 void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
493  // Loops with min/max in bounds are not normalized at the moment.
494  if (op.hasMinMaxBounds())
495  return;
496 
497  AffineMap lbMap = op.getLowerBoundsMap();
498  SmallVector<int64_t, 8> steps = op.getSteps();
499  // No need to do any work if the parallel op is already normalized.
500  bool isAlreadyNormalized =
501  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
502  int64_t step = std::get<0>(tuple);
503  auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
504  return lbExpr && lbExpr.getValue() == 0 && step == 1;
505  });
506  if (isAlreadyNormalized)
507  return;
508 
509  AffineValueMap ranges;
510  AffineValueMap::difference(op.getUpperBoundsValueMap(),
511  op.getLowerBoundsValueMap(), &ranges);
512  auto builder = OpBuilder::atBlockBegin(op.getBody());
513  auto zeroExpr = builder.getAffineConstantExpr(0);
516  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
517  int64_t step = steps[i];
518 
519  // Adjust the lower bound to be 0.
520  lbExprs.push_back(zeroExpr);
521 
522  // Adjust the upper bound expression: 'range / step'.
523  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
524  ubExprs.push_back(ubExpr);
525 
526  // Adjust the corresponding IV: 'lb + i * step'.
527  BlockArgument iv = op.getBody()->getArgument(i);
528  AffineExpr lbExpr = lbMap.getResult(i);
529  unsigned nDims = lbMap.getNumDims();
530  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
531  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
532  /*symbolCount=*/lbMap.getNumSymbols(), expr);
533 
534  // Use an 'affine.apply' op that will be simplified later in subsequent
535  // canonicalizations.
536  OperandRange lbOperands = op.getLowerBoundsOperands();
537  OperandRange dimOperands = lbOperands.take_front(nDims);
538  OperandRange symbolOperands = lbOperands.drop_front(nDims);
539  SmallVector<Value, 8> applyOperands{dimOperands};
540  applyOperands.push_back(iv);
541  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
542  auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
543  iv.replaceAllUsesExcept(apply, apply);
544  }
545 
546  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
547  op.setSteps(newSteps);
548  auto newLowerMap = AffineMap::get(
549  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
550  op.setLowerBounds({}, newLowerMap);
551  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
552  ubExprs, op.getContext());
553  op.setUpperBounds(ranges.getOperands(), newUpperMap);
554 }
555 
556 LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
557  bool promoteSingleIter) {
558  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
559  return success();
560 
561  // Check if the forop is already normalized.
562  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
563  (op.getStep() == 1))
564  return success();
565 
566  // Check if the lower bound has a single result only. Loops with a max lower
567  // bound can't be normalized without additional support like
568  // affine.execute_region's. If the lower bound does not have a single result
569  // then skip this op.
570  if (op.getLowerBoundMap().getNumResults() != 1)
571  return failure();
572 
573  Location loc = op.getLoc();
574  OpBuilder opBuilder(op);
575  int64_t origLoopStep = op.getStepAsInt();
576 
577  // Construct the new upper bound value map.
578  AffineMap oldLbMap = op.getLowerBoundMap();
579  // The upper bound can have multiple results. To use
580  // AffineValueMap::difference, we need to have the same number of results in
581  // both lower and upper bound maps. So, we just create a value map for the
582  // lower bound with the only available lower bound result repeated to pad up
583  // to the number of upper bound results.
584  SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
585  op.getLowerBoundMap().getResult(0));
586  AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
587  AffineMap paddedLbMap =
588  AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
589  op.getContext());
590  AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
591  AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
592  AffineValueMap newUbValueMap;
593  // Compute the `upper bound - lower bound`.
594  AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
595  (void)newUbValueMap.canonicalize();
596 
597  // Scale down the upper bound value map by the loop step.
598  unsigned numResult = newUbValueMap.getNumResults();
599  SmallVector<AffineExpr> scaleDownExprs(numResult);
600  for (unsigned i = 0; i < numResult; ++i)
601  scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
602  // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
603  // step). Where `n` is the number of results in the upper bound map.
604  AffineMap scaleDownMap =
605  AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
606  AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
607 
608  // Set the newly create upper bound map and operands.
609  op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
610  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
611  op.setStep(1);
612 
613  // Calculate the Value of new loopIV. Create affine.apply for the value of
614  // the loopIV in normalized loop.
615  opBuilder.setInsertionPointToStart(op.getBody());
616  // Construct an affine.apply op mapping the new IV to the old IV.
617  AffineMap scaleIvMap =
618  AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
619  AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
620  AffineValueMap newIvToOldIvMap;
621  AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
622  (void)newIvToOldIvMap.canonicalize();
623  auto newIV = opBuilder.create<AffineApplyOp>(
624  loc, newIvToOldIvMap.getAffineMap(), newIvToOldIvMap.getOperands());
625  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
626  return success();
627 }
628 
629 /// Returns true if the memory operation of `destAccess` depends on `srcAccess`
630 /// inside of the innermost common surrounding affine loop between the two
631 /// accesses.
632 static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
633  const MemRefAccess &destAccess) {
634  // Affine dependence analysis is possible only if both ops in the same
635  // AffineScope.
636  if (getAffineScope(srcAccess.opInst) != getAffineScope(destAccess.opInst))
637  return false;
638 
639  unsigned nsLoops =
640  getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
641  DependenceResult result =
642  checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
643  return hasDependence(result);
644 }
645 
646 /// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
647 /// scope of the outermost `minSurroundingLoops` loops that surround them.
648 /// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
649 static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
650  unsigned minSurroundingLoops) {
651  MemRefAccess srcAccess(srcMemOp);
652  MemRefAccess destAccess(destMemOp);
653 
654  // Affine dependence analysis here is applicable only if both ops operate on
655  // the same memref and if `srcMemOp` and `destMemOp` are in the same
656  // AffineScope. Also, we can only check if our affine scope is isolated from
657  // above; otherwise, values can from outside of the affine scope that the
658  // check below cannot analyze.
659  Region *srcScope = getAffineScope(srcMemOp);
660  if (srcAccess.memref == destAccess.memref &&
661  srcScope == getAffineScope(destMemOp)) {
662  unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
663  FlatAffineValueConstraints dependenceConstraints;
664  for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
666  srcAccess, destAccess, d, &dependenceConstraints,
667  /*dependenceComponents=*/nullptr);
668  // A dependence failure or the presence of a dependence implies a
669  // side effect.
670  if (!noDependence(result))
671  return true;
672  }
673  // No side effect was seen.
674  return false;
675  }
676  // TODO: Check here if the memrefs alias: there is no side effect if
677  // `srcAccess.memref` and `destAccess.memref` don't alias.
678  return true;
679 }
680 
681 template <typename EffectType, typename T>
683  Operation *start, T memOp,
685  // A boolean representing whether an intervening operation could have impacted
686  // memOp.
687  bool hasSideEffect = false;
688 
689  // Check whether the effect on memOp can be caused by a given operation op.
690  Value memref = memOp.getMemRef();
691  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
692  // If the effect has alreay been found, early exit,
693  if (hasSideEffect)
694  return;
695 
696  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
698  memEffect.getEffects(effects);
699 
700  bool opMayHaveEffect = false;
701  for (auto effect : effects) {
702  // If op causes EffectType on a potentially aliasing location for
703  // memOp, mark as having the effect.
704  if (isa<EffectType>(effect.getEffect())) {
705  if (effect.getValue() && effect.getValue() != memref &&
706  !mayAlias(effect.getValue(), memref))
707  continue;
708  opMayHaveEffect = true;
709  break;
710  }
711  }
712 
713  if (!opMayHaveEffect)
714  return;
715 
716  // If the side effect comes from an affine read or write, try to
717  // prove the side effecting `op` cannot reach `memOp`.
718  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
719  // For ease, let's consider the case that `op` is a store and
720  // we're looking for other potential stores that overwrite memory after
721  // `start`, and before being read in `memOp`. In this case, we only
722  // need to consider other potential stores with depth >
723  // minSurroundingLoops since `start` would overwrite any store with a
724  // smaller number of surrounding loops before.
725  unsigned minSurroundingLoops =
726  getNumCommonSurroundingLoops(*start, *memOp);
727  if (mayHaveEffect(op, memOp, minSurroundingLoops))
728  hasSideEffect = true;
729  return;
730  }
731 
732  // We have an op with a memory effect and we cannot prove if it
733  // intervenes.
734  hasSideEffect = true;
735  return;
736  }
737 
738  if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
739  // Recurse into the regions for this op and check whether the internal
740  // operations may have the side effect `EffectType` on memOp.
741  for (Region &region : op->getRegions())
742  for (Block &block : region)
743  for (Operation &op : block)
744  checkOperation(&op);
745  return;
746  }
747 
748  // Otherwise, conservatively assume generic operations have the effect
749  // on the operation
750  hasSideEffect = true;
751  };
752 
753  // Check all paths from ancestor op `parent` to the operation `to` for the
754  // effect. It is known that `to` must be contained within `parent`.
755  auto until = [&](Operation *parent, Operation *to) {
756  // TODO check only the paths from `parent` to `to`.
757  // Currently we fallback and check the entire parent op, rather than
758  // just the paths from the parent path, stopping after reaching `to`.
759  // This is conservatively correct, but could be made more aggressive.
760  assert(parent->isAncestor(to));
761  checkOperation(parent);
762  };
763 
764  // Check for all paths from operation `from` to operation `untilOp` for the
765  // given memory effect.
766  std::function<void(Operation *, Operation *)> recur =
767  [&](Operation *from, Operation *untilOp) {
768  assert(
769  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
770  "Checking for side effect between two operations without a common "
771  "ancestor");
772 
773  // If the operations are in different regions, recursively consider all
774  // path from `from` to the parent of `to` and all paths from the parent
775  // of `to` to `to`.
776  if (from->getParentRegion() != untilOp->getParentRegion()) {
777  recur(from, untilOp->getParentOp());
778  until(untilOp->getParentOp(), untilOp);
779  return;
780  }
781 
782  // Now, assuming that `from` and `to` exist in the same region, perform
783  // a CFG traversal to check all the relevant operations.
784 
785  // Additional blocks to consider.
786  SmallVector<Block *, 2> todoBlocks;
787  {
788  // First consider the parent block of `from` an check all operations
789  // after `from`.
790  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
791  iter != end && &*iter != untilOp; ++iter) {
792  checkOperation(&*iter);
793  }
794 
795  // If the parent of `from` doesn't contain `to`, add the successors
796  // to the list of blocks to check.
797  if (untilOp->getBlock() != from->getBlock())
798  for (Block *succ : from->getBlock()->getSuccessors())
799  todoBlocks.push_back(succ);
800  }
801 
803  // Traverse the CFG until hitting `to`.
804  while (!todoBlocks.empty()) {
805  Block *blk = todoBlocks.pop_back_val();
806  if (done.count(blk))
807  continue;
808  done.insert(blk);
809  for (auto &op : *blk) {
810  if (&op == untilOp)
811  break;
812  checkOperation(&op);
813  if (&op == blk->getTerminator())
814  for (Block *succ : blk->getSuccessors())
815  todoBlocks.push_back(succ);
816  }
817  }
818  };
819  recur(start, memOp);
820  return !hasSideEffect;
821 }
822 
823 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
824 /// which the load is guaranteed to retrieve. This check involves three
825 /// components: 1) The store and load must be on the same location 2) The store
826 /// must dominate (and therefore must always occur prior to) the load 3) No
827 /// other operations will overwrite the memory loaded between the given load
828 /// and store. If such a value exists, the replaced `loadOp` will be added to
829 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
830 static void forwardStoreToLoad(
831  AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
832  SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
834 
835  // The store op candidate for forwarding that satisfies all conditions
836  // to replace the load, if any.
837  Operation *lastWriteStoreOp = nullptr;
838 
839  for (auto *user : loadOp.getMemRef().getUsers()) {
840  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
841  if (!storeOp)
842  continue;
843  MemRefAccess srcAccess(storeOp);
844  MemRefAccess destAccess(loadOp);
845 
846  // 1. Check if the store and the load have mathematically equivalent
847  // affine access functions; this implies that they statically refer to the
848  // same single memref element. As an example this filters out cases like:
849  // store %A[%i0 + 1]
850  // load %A[%i0]
851  // store %A[%M]
852  // load %A[%N]
853  // Use the AffineValueMap difference based memref access equality checking.
854  if (srcAccess != destAccess)
855  continue;
856 
857  // 2. The store has to dominate the load op to be candidate.
858  if (!domInfo.dominates(storeOp, loadOp))
859  continue;
860 
861  // 3. The store must reach the load. Access function equivalence only
862  // guarantees this for accesses in the same block. The load could be in a
863  // nested block that is unreachable.
864  if (!mustReachAtInnermost(srcAccess, destAccess))
865  continue;
866 
867  // 4. Ensure there is no intermediate operation which could replace the
868  // value in memory.
869  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp,
870  mayAlias))
871  continue;
872 
873  // We now have a candidate for forwarding.
874  assert(lastWriteStoreOp == nullptr &&
875  "multiple simultaneous replacement stores");
876  lastWriteStoreOp = storeOp;
877  }
878 
879  if (!lastWriteStoreOp)
880  return;
881 
882  // Perform the actual store to load forwarding.
883  Value storeVal =
884  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
885  // Check if 2 values have the same shape. This is needed for affine vector
886  // loads and stores.
887  if (storeVal.getType() != loadOp.getValue().getType())
888  return;
889  loadOp.getValue().replaceAllUsesWith(storeVal);
890  // Record the memref for a later sweep to optimize away.
891  memrefsToErase.insert(loadOp.getMemRef());
892  // Record this to erase later.
893  loadOpsToErase.push_back(loadOp);
894 }
895 
896 template bool
898  affine::AffineReadOpInterface>(
899  mlir::Operation *, affine::AffineReadOpInterface,
900  llvm::function_ref<bool(Value, Value)>);
901 
902 // This attempts to find stores which have no impact on the final result.
903 // A writing op writeA will be eliminated if there exists an op writeB if
904 // 1) writeA and writeB have mathematically equivalent affine access functions.
905 // 2) writeB postdominates writeA.
906 // 3) There is no potential read between writeA and writeB.
907 static void findUnusedStore(AffineWriteOpInterface writeA,
908  SmallVectorImpl<Operation *> &opsToErase,
909  PostDominanceInfo &postDominanceInfo,
911 
912  for (Operation *user : writeA.getMemRef().getUsers()) {
913  // Only consider writing operations.
914  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
915  if (!writeB)
916  continue;
917 
918  // The operations must be distinct.
919  if (writeB == writeA)
920  continue;
921 
922  // Both operations must lie in the same region.
923  if (writeB->getParentRegion() != writeA->getParentRegion())
924  continue;
925 
926  // Both operations must write to the same memory.
927  MemRefAccess srcAccess(writeB);
928  MemRefAccess destAccess(writeA);
929 
930  if (srcAccess != destAccess)
931  continue;
932 
933  // writeB must postdominate writeA.
934  if (!postDominanceInfo.postDominates(writeB, writeA))
935  continue;
936 
937  // There cannot be an operation which reads from memory between
938  // the two writes.
939  if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB,
940  mayAlias))
941  continue;
942 
943  opsToErase.push_back(writeA);
944  break;
945  }
946 }
947 
948 // The load to load forwarding / redundant load elimination is similar to the
949 // store to load forwarding.
950 // loadA will be be replaced with loadB if:
951 // 1) loadA and loadB have mathematically equivalent affine access functions.
952 // 2) loadB dominates loadA.
953 // 3) There is no write between loadA and loadB.
954 static void loadCSE(AffineReadOpInterface loadA,
955  SmallVectorImpl<Operation *> &loadOpsToErase,
956  DominanceInfo &domInfo,
959  for (auto *user : loadA.getMemRef().getUsers()) {
960  auto loadB = dyn_cast<AffineReadOpInterface>(user);
961  if (!loadB || loadB == loadA)
962  continue;
963 
964  MemRefAccess srcAccess(loadB);
965  MemRefAccess destAccess(loadA);
966 
967  // 1. The accesses should be to be to the same location.
968  if (srcAccess != destAccess) {
969  continue;
970  }
971 
972  // 2. loadB should dominate loadA.
973  if (!domInfo.dominates(loadB, loadA))
974  continue;
975 
976  // 3. There should not be a write between loadA and loadB.
977  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
978  loadB.getOperation(), loadA, mayAlias))
979  continue;
980 
981  // Check if two values have the same shape. This is needed for affine vector
982  // loads.
983  if (loadB.getValue().getType() != loadA.getValue().getType())
984  continue;
985 
986  loadCandidates.push_back(loadB);
987  }
988 
989  // Of the legal load candidates, use the one that dominates all others
990  // to minimize the subsequent need to loadCSE
991  Value loadB;
992  for (AffineReadOpInterface option : loadCandidates) {
993  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
994  return depStore == option ||
995  domInfo.dominates(option.getOperation(),
996  depStore.getOperation());
997  })) {
998  loadB = option.getValue();
999  break;
1000  }
1001  }
1002 
1003  if (loadB) {
1004  loadA.getValue().replaceAllUsesWith(loadB);
1005  // Record this to erase later.
1006  loadOpsToErase.push_back(loadA);
1007  }
1008 }
1009 
1010 // The store to load forwarding and load CSE rely on three conditions:
1011 //
1012 // 1) store/load providing a replacement value and load being replaced need to
1013 // have mathematically equivalent affine access functions (checked after full
1014 // composition of load/store operands); this implies that they access the same
1015 // single memref element for all iterations of the common surrounding loop,
1016 //
1017 // 2) the store/load op should dominate the load op,
1018 //
1019 // 3) no operation that may write to memory read by the load being replaced can
1020 // occur after executing the instruction (load or store) providing the
1021 // replacement value and before the load being replaced (thus potentially
1022 // allowing overwriting the memory read by the load).
1023 //
1024 // The above conditions are simple to check, sufficient, and powerful for most
1025 // cases in practice - they are sufficient, but not necessary --- since they
1026 // don't reason about loops that are guaranteed to execute at least once or
1027 // multiple sources to forward from.
1028 //
1029 // TODO: more forwarding can be done when support for
1030 // loop/conditional live-out SSA values is available.
1031 // TODO: do general dead store elimination for memref's. This pass
1032 // currently only eliminates the stores only if no other loads/uses (other
1033 // than dealloc) remain.
1034 //
1035 void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1036  PostDominanceInfo &postDomInfo,
1037  AliasAnalysis &aliasAnalysis) {
1038  // Load op's whose results were replaced by those forwarded from stores.
1039  SmallVector<Operation *, 8> opsToErase;
1040 
1041  // A list of memref's that are potentially dead / could be eliminated.
1042  SmallPtrSet<Value, 4> memrefsToErase;
1043 
1044  auto mayAlias = [&](Value val1, Value val2) -> bool {
1045  return !aliasAnalysis.alias(val1, val2).isNo();
1046  };
1047 
1048  // Walk all load's and perform store to load forwarding.
1049  f.walk([&](AffineReadOpInterface loadOp) {
1050  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1051  });
1052  for (auto *op : opsToErase)
1053  op->erase();
1054  opsToErase.clear();
1055 
1056  // Walk all store's and perform unused store elimination
1057  f.walk([&](AffineWriteOpInterface storeOp) {
1058  findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1059  });
1060  for (auto *op : opsToErase)
1061  op->erase();
1062  opsToErase.clear();
1063 
1064  // Check if the store fwd'ed memrefs are now left with only stores and
1065  // deallocs and can thus be completely deleted. Note: the canonicalize pass
1066  // should be able to do this as well, but we'll do it here since we collected
1067  // these anyway.
1068  for (auto memref : memrefsToErase) {
1069  // If the memref hasn't been locally alloc'ed, skip.
1070  Operation *defOp = memref.getDefiningOp();
1071  if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1072  // TODO: if the memref was returned by a 'call' operation, we
1073  // could still erase it if the call had no side-effects.
1074  continue;
1075  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1076  return !isa<AffineWriteOpInterface>(ownerOp) &&
1077  !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1078  }))
1079  continue;
1080 
1081  // Erase all stores, the dealloc, and the alloc on the memref.
1082  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1083  user->erase();
1084  defOp->erase();
1085  }
1086 
1087  // To eliminate as many loads as possible, run load CSE after eliminating
1088  // stores. Otherwise, some stores are wrongly seen as having an intervening
1089  // effect.
1090  f.walk([&](AffineReadOpInterface loadOp) {
1091  loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1092  });
1093  for (auto *op : opsToErase)
1094  op->erase();
1095 }
1096 
1097 // Private helper function to transform memref.load with reduced rank.
1098 // This function will modify the indices of the memref.load to match the
1099 // newMemRef.
1101  Operation *op, Value oldMemRef, Value newMemRef, unsigned memRefOperandPos,
1102  ArrayRef<Value> extraIndices, ArrayRef<Value> extraOperands,
1103  ArrayRef<Value> symbolOperands, AffineMap indexRemap) {
1104  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1105  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1106  unsigned oldMapNumInputs = oldMemRefRank;
1107  SmallVector<Value, 4> oldMapOperands(
1108  op->operand_begin() + memRefOperandPos + 1,
1109  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1110  SmallVector<Value, 4> oldMemRefOperands;
1111  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1112  SmallVector<Value, 4> remapOperands;
1113  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1114  symbolOperands.size());
1115  remapOperands.append(extraOperands.begin(), extraOperands.end());
1116  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1117  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1118 
1119  SmallVector<Value, 4> remapOutputs;
1120  remapOutputs.reserve(oldMemRefRank);
1121  SmallVector<Value, 4> affineApplyOps;
1122 
1123  OpBuilder builder(op);
1124 
1125  if (indexRemap &&
1126  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1127  // Remapped indices.
1128  for (auto resultExpr : indexRemap.getResults()) {
1129  auto singleResMap = AffineMap::get(
1130  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1131  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1132  remapOperands);
1133  remapOutputs.push_back(afOp);
1134  affineApplyOps.push_back(afOp);
1135  }
1136  } else {
1137  // No remapping specified.
1138  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1139  }
1140 
1141  SmallVector<Value, 4> newMapOperands;
1142  newMapOperands.reserve(newMemRefRank);
1143 
1144  // Prepend 'extraIndices' in 'newMapOperands'.
1145  for (Value extraIndex : extraIndices) {
1146  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1147  "invalid memory op index");
1148  newMapOperands.push_back(extraIndex);
1149  }
1150 
1151  // Append 'remapOutputs' to 'newMapOperands'.
1152  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1153 
1154  // Create new fully composed AffineMap for new op to be created.
1155  assert(newMapOperands.size() == newMemRefRank);
1156 
1157  OperationState state(op->getLoc(), op->getName());
1158  // Construct the new operation using this memref.
1159  state.operands.reserve(newMapOperands.size() + extraIndices.size());
1160  state.operands.push_back(newMemRef);
1161 
1162  // Insert the new memref map operands.
1163  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1164 
1165  state.types.reserve(op->getNumResults());
1166  for (auto result : op->getResults())
1167  state.types.push_back(result.getType());
1168 
1169  // Copy over the attributes from the old operation to the new operation.
1170  for (auto namedAttr : op->getAttrs()) {
1171  state.attributes.push_back(namedAttr);
1172  }
1173 
1174  // Create the new operation.
1175  auto *repOp = builder.create(state);
1176  op->replaceAllUsesWith(repOp);
1177  op->erase();
1178 
1179  return success();
1180 }
1181 // Perform the replacement in `op`.
1183  Value oldMemRef, Value newMemRef, Operation *op,
1184  ArrayRef<Value> extraIndices, AffineMap indexRemap,
1185  ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1186  bool allowNonDereferencingOps) {
1187  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1188  (void)newMemRefRank; // unused in opt mode
1189  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1190  (void)oldMemRefRank; // unused in opt mode
1191  if (indexRemap) {
1192  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1193  "symbolic operand count mismatch");
1194  assert(indexRemap.getNumInputs() ==
1195  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1196  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1197  } else {
1198  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1199  }
1200 
1201  // Assert same elemental type.
1202  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1203  cast<MemRefType>(newMemRef.getType()).getElementType());
1204 
1205  SmallVector<unsigned, 2> usePositions;
1206  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1207  if (opEntry.value() == oldMemRef)
1208  usePositions.push_back(opEntry.index());
1209  }
1210 
1211  // If memref doesn't appear, nothing to do.
1212  if (usePositions.empty())
1213  return success();
1214 
1215  if (usePositions.size() > 1) {
1216  // TODO: extend it for this case when needed (rare).
1217  assert(false && "multiple dereferencing uses in a single op not supported");
1218  return failure();
1219  }
1220 
1221  unsigned memRefOperandPos = usePositions.front();
1222 
1223  OpBuilder builder(op);
1224  // The following checks if op is dereferencing memref and performs the access
1225  // index rewrites.
1226  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1227  if (!affMapAccInterface) {
1228  if (!allowNonDereferencingOps) {
1229  // Failure: memref used in a non-dereferencing context (potentially
1230  // escapes); no replacement in these cases unless allowNonDereferencingOps
1231  // is set.
1232  return failure();
1233  }
1234 
1235  // Check if it is a memref.load
1236  auto memrefLoad = dyn_cast<memref::LoadOp>(op);
1237  bool isReductionLike =
1238  indexRemap.getNumResults() < indexRemap.getNumInputs();
1239  if (!memrefLoad || !isReductionLike) {
1240  op->setOperand(memRefOperandPos, newMemRef);
1241  return success();
1242  }
1243 
1245  op, oldMemRef, newMemRef, memRefOperandPos, extraIndices, extraOperands,
1246  symbolOperands, indexRemap);
1247  }
1248  // Perform index rewrites for the dereferencing op and then replace the op
1249  NamedAttribute oldMapAttrPair =
1250  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1251  AffineMap oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1252  unsigned oldMapNumInputs = oldMap.getNumInputs();
1253  SmallVector<Value, 4> oldMapOperands(
1254  op->operand_begin() + memRefOperandPos + 1,
1255  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1256 
1257  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1258  SmallVector<Value, 4> oldMemRefOperands;
1259  SmallVector<Value, 4> affineApplyOps;
1260  oldMemRefOperands.reserve(oldMemRefRank);
1261  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1262  for (auto resultExpr : oldMap.getResults()) {
1263  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1264  oldMap.getNumSymbols(), resultExpr);
1265  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1266  oldMapOperands);
1267  oldMemRefOperands.push_back(afOp);
1268  affineApplyOps.push_back(afOp);
1269  }
1270  } else {
1271  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1272  }
1273 
1274  // Construct new indices as a remap of the old ones if a remapping has been
1275  // provided. The indices of a memref come right after it, i.e.,
1276  // at position memRefOperandPos + 1.
1277  SmallVector<Value, 4> remapOperands;
1278  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1279  symbolOperands.size());
1280  remapOperands.append(extraOperands.begin(), extraOperands.end());
1281  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1282  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1283 
1284  SmallVector<Value, 4> remapOutputs;
1285  remapOutputs.reserve(oldMemRefRank);
1286 
1287  if (indexRemap &&
1288  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1289  // Remapped indices.
1290  for (auto resultExpr : indexRemap.getResults()) {
1291  auto singleResMap = AffineMap::get(
1292  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1293  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1294  remapOperands);
1295  remapOutputs.push_back(afOp);
1296  affineApplyOps.push_back(afOp);
1297  }
1298  } else {
1299  // No remapping specified.
1300  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1301  }
1302 
1303  SmallVector<Value, 4> newMapOperands;
1304  newMapOperands.reserve(newMemRefRank);
1305 
1306  // Prepend 'extraIndices' in 'newMapOperands'.
1307  for (Value extraIndex : extraIndices) {
1308  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1309  "invalid memory op index");
1310  newMapOperands.push_back(extraIndex);
1311  }
1312 
1313  // Append 'remapOutputs' to 'newMapOperands'.
1314  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1315 
1316  // Create new fully composed AffineMap for new op to be created.
1317  assert(newMapOperands.size() == newMemRefRank);
1318  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1319  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1320  newMap = simplifyAffineMap(newMap);
1321  canonicalizeMapAndOperands(&newMap, &newMapOperands);
1322  // Remove any affine.apply's that became dead as a result of composition.
1323  for (Value value : affineApplyOps)
1324  if (value.use_empty())
1325  value.getDefiningOp()->erase();
1326 
1327  OperationState state(op->getLoc(), op->getName());
1328  // Construct the new operation using this memref.
1329  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1330  // Insert the non-memref operands.
1331  state.operands.append(op->operand_begin(),
1332  op->operand_begin() + memRefOperandPos);
1333  // Insert the new memref value.
1334  state.operands.push_back(newMemRef);
1335 
1336  // Insert the new memref map operands.
1337  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1338 
1339  // Insert the remaining operands unmodified.
1340  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1341  oldMapNumInputs,
1342  op->operand_end());
1343 
1344  // Result types don't change. Both memref's are of the same elemental type.
1345  state.types.reserve(op->getNumResults());
1346  for (auto result : op->getResults())
1347  state.types.push_back(result.getType());
1348 
1349  // Add attribute for 'newMap', other Attributes do not change.
1350  auto newMapAttr = AffineMapAttr::get(newMap);
1351  for (auto namedAttr : op->getAttrs()) {
1352  if (namedAttr.getName() == oldMapAttrPair.getName())
1353  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1354  else
1355  state.attributes.push_back(namedAttr);
1356  }
1357 
1358  // Create the new operation.
1359  auto *repOp = builder.create(state);
1360  op->replaceAllUsesWith(repOp);
1361  op->erase();
1362 
1363  return success();
1364 }
1365 
1367  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1368  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1369  ArrayRef<Value> symbolOperands, Operation *domOpFilter,
1370  Operation *postDomOpFilter, bool allowNonDereferencingOps,
1371  bool replaceInDeallocOp) {
1372  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1373  (void)newMemRefRank; // unused in opt mode
1374  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1375  (void)oldMemRefRank;
1376  if (indexRemap) {
1377  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1378  "symbol operand count mismatch");
1379  assert(indexRemap.getNumInputs() ==
1380  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1381  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1382  } else {
1383  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1384  }
1385 
1386  // Assert same elemental type.
1387  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1388  cast<MemRefType>(newMemRef.getType()).getElementType());
1389 
1390  std::unique_ptr<DominanceInfo> domInfo;
1391  std::unique_ptr<PostDominanceInfo> postDomInfo;
1392  if (domOpFilter)
1393  domInfo = std::make_unique<DominanceInfo>(
1394  domOpFilter->getParentOfType<FunctionOpInterface>());
1395 
1396  if (postDomOpFilter)
1397  postDomInfo = std::make_unique<PostDominanceInfo>(
1398  postDomOpFilter->getParentOfType<FunctionOpInterface>());
1399 
1400  // Walk all uses of old memref; collect ops to perform replacement. We use a
1401  // DenseSet since an operation could potentially have multiple uses of a
1402  // memref (although rare), and the replacement later is going to erase ops.
1403  DenseSet<Operation *> opsToReplace;
1404  for (auto *op : oldMemRef.getUsers()) {
1405  // Skip this use if it's not dominated by domOpFilter.
1406  if (domOpFilter && !domInfo->dominates(domOpFilter, op))
1407  continue;
1408 
1409  // Skip this use if it's not post-dominated by postDomOpFilter.
1410  if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
1411  continue;
1412 
1413  // Skip dealloc's - no replacement is necessary, and a memref replacement
1414  // at other uses doesn't hurt these dealloc's.
1415  if (hasSingleEffect<MemoryEffects::Free>(op, oldMemRef) &&
1416  !replaceInDeallocOp)
1417  continue;
1418 
1419  // Check if the memref was used in a non-dereferencing context. It is fine
1420  // for the memref to be used in a non-dereferencing way outside of the
1421  // region where this replacement is happening.
1422  if (!isa<AffineMapAccessInterface>(*op)) {
1423  if (!allowNonDereferencingOps) {
1424  LLVM_DEBUG(llvm::dbgs()
1425  << "Memref replacement failed: non-deferencing memref op: \n"
1426  << *op << '\n');
1427  return failure();
1428  }
1429  // Non-dereferencing ops with the MemRefsNormalizable trait are
1430  // supported for replacement.
1431  if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
1432  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1433  "memrefs normalizable trait: \n"
1434  << *op << '\n');
1435  return failure();
1436  }
1437  }
1438 
1439  // We'll first collect and then replace --- since replacement erases the op
1440  // that has the use, and that op could be postDomFilter or domFilter itself!
1441  opsToReplace.insert(op);
1442  }
1443 
1444  for (auto *op : opsToReplace) {
1445  if (failed(replaceAllMemRefUsesWith(
1446  oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
1447  symbolOperands, allowNonDereferencingOps)))
1448  llvm_unreachable("memref replacement guaranteed to succeed here");
1449  }
1450 
1451  return success();
1452 }
1453 
1454 /// Given an operation, inserts one or more single result affine
1455 /// apply operations, results of which are exclusively used by this operation
1456 /// operation. The operands of these newly created affine apply ops are
1457 /// guaranteed to be loop iterators or terminal symbols of a function.
1458 ///
1459 /// Before
1460 ///
1461 /// affine.for %i = 0 to #map(%N)
1462 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1463 /// "send"(%idx, %A, ...)
1464 /// "compute"(%idx)
1465 ///
1466 /// After
1467 ///
1468 /// affine.for %i = 0 to #map(%N)
1469 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1470 /// "send"(%idx, %A, ...)
1471 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1472 /// "compute"(%idx_)
1473 ///
1474 /// This allows applying different transformations on send and compute (for eg.
1475 /// different shifts/delays).
1476 ///
1477 /// Returns nullptr either if none of opInst's operands were the result of an
1478 /// affine.apply and thus there was no affine computation slice to create, or if
1479 /// all the affine.apply op's supplying operands to this opInst did not have any
1480 /// uses besides this opInst; otherwise returns the list of affine.apply
1481 /// operations created in output argument `sliceOps`.
1483  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1484  // Collect all operands that are results of affine apply ops.
1485  SmallVector<Value, 4> subOperands;
1486  subOperands.reserve(opInst->getNumOperands());
1487  for (auto operand : opInst->getOperands())
1488  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1489  subOperands.push_back(operand);
1490 
1491  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1492  SmallVector<Operation *, 4> affineApplyOps;
1493  getReachableAffineApplyOps(subOperands, affineApplyOps);
1494  // Skip transforming if there are no affine maps to compose.
1495  if (affineApplyOps.empty())
1496  return;
1497 
1498  // Check if all uses of the affine apply op's lie only in this op op, in
1499  // which case there would be nothing to do.
1500  bool localized = true;
1501  for (auto *op : affineApplyOps) {
1502  for (auto result : op->getResults()) {
1503  for (auto *user : result.getUsers()) {
1504  if (user != opInst) {
1505  localized = false;
1506  break;
1507  }
1508  }
1509  }
1510  }
1511  if (localized)
1512  return;
1513 
1514  OpBuilder builder(opInst);
1515  SmallVector<Value, 4> composedOpOperands(subOperands);
1516  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1517  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1518 
1519  // Create an affine.apply for each of the map results.
1520  sliceOps->reserve(composedMap.getNumResults());
1521  for (auto resultExpr : composedMap.getResults()) {
1522  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1523  composedMap.getNumSymbols(), resultExpr);
1524  sliceOps->push_back(builder.create<AffineApplyOp>(
1525  opInst->getLoc(), singleResMap, composedOpOperands));
1526  }
1527 
1528  // Construct the new operands that include the results from the composed
1529  // affine apply op above instead of existing ones (subOperands). So, they
1530  // differ from opInst's operands only for those operands in 'subOperands', for
1531  // which they will be replaced by the corresponding one from 'sliceOps'.
1532  SmallVector<Value, 4> newOperands(opInst->getOperands());
1533  for (Value &operand : newOperands) {
1534  // Replace the subOperands from among the new operands.
1535  unsigned j, f;
1536  for (j = 0, f = subOperands.size(); j < f; j++) {
1537  if (operand == subOperands[j])
1538  break;
1539  }
1540  if (j < subOperands.size())
1541  operand = (*sliceOps)[j];
1542  }
1543  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1544  opInst->setOperand(idx, newOperands[idx]);
1545 }
1546 
1547 /// Enum to set patterns of affine expr in tiled-layout map.
1548 /// TileFloorDiv: <dim expr> div <tile size>
1549 /// TileMod: <dim expr> mod <tile size>
1550 /// TileNone: None of the above
1551 /// Example:
1552 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1553 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1554 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1555 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1557 
1558 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1559 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1560 /// tile sizes, and no other expression involves those k dimensions. This
1561 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1562 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1563 /// tiled layout, an empty vector is returned.
1564 static LogicalResult getTileSizePos(
1565  AffineMap map,
1566  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1567  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1568  // `floordiv` and its position in `map` output.
1569  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1570  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1571  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1573  unsigned pos = 0;
1574  for (AffineExpr expr : map.getResults()) {
1575  if (expr.getKind() == AffineExprKind::FloorDiv) {
1576  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1577  if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1578  floordivExprs.emplace_back(
1579  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1580  }
1581  pos++;
1582  }
1583  // Not tiled layout if `floordivExprs` is empty.
1584  if (floordivExprs.empty()) {
1586  return success();
1587  }
1588 
1589  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1590  // not tiled layout.
1591  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1592  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1593  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1594  unsigned floordivPos = std::get<2>(fexpr);
1595 
1596  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1597  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1598  // other expr, the map is not tiled layout. Example of non tiled layout:
1599  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1600  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1601  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1602  // 256)>
1603  bool found = false;
1604  pos = 0;
1605  for (AffineExpr expr : map.getResults()) {
1606  bool notTiled = false;
1607  if (pos != floordivPos) {
1608  expr.walk([&](AffineExpr e) {
1609  if (e == floordivExprLHS) {
1610  if (expr.getKind() == AffineExprKind::Mod) {
1611  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1612  // If LHS and RHS of `mod` are the same with those of floordiv.
1613  if (floordivExprLHS == binaryExpr.getLHS() &&
1614  floordivExprRHS == binaryExpr.getRHS()) {
1615  // Save tile size (RHS of `mod`), and position of `floordiv` and
1616  // `mod` if same expr with `mod` is not found yet.
1617  if (!found) {
1618  tileSizePos.emplace_back(
1619  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1620  found = true;
1621  } else {
1622  // Non tiled layout: Have multilpe `mod` with the same LHS.
1623  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1624  // mod 256, d2 mod 256)>
1625  notTiled = true;
1626  }
1627  } else {
1628  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1629  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1630  // mod 128)>
1631  notTiled = true;
1632  }
1633  } else {
1634  // Non tiled layout: LHS is the same, but not `mod`.
1635  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1636  // floordiv 256)>
1637  notTiled = true;
1638  }
1639  }
1640  });
1641  }
1642  if (notTiled) {
1644  return success();
1645  }
1646  pos++;
1647  }
1648  }
1649  return success();
1650 }
1651 
1652 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1653 /// after normalization. Dimensions that include dynamic dimensions in the map
1654 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1655 /// dimension.
1656 ///
1657 /// Example:
1658 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1659 ///
1660 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1661 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1662 static bool
1663 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1664  SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1665  AffineExpr expr = layoutMap.getResults()[dim];
1666  // Check if affine expr of the dimension includes dynamic dimension of input
1667  // memrefType.
1668  MLIRContext *context = layoutMap.getContext();
1669  return expr
1670  .walk([&](AffineExpr e) {
1671  if (isa<AffineDimExpr>(e) &&
1672  llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1673  return e == getAffineDimExpr(dim, context);
1674  }))
1675  return WalkResult::interrupt();
1676  return WalkResult::advance();
1677  })
1678  .wasInterrupted();
1679 }
1680 
1681 /// Create affine expr to calculate dimension size for a tiled-layout map.
1683  TileExprPattern pat) {
1684  // Create map output for the patterns.
1685  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1686  // "mod <tile size>" ==> "<tile size>"
1687  AffineExpr newMapOutput;
1688  AffineBinaryOpExpr binaryExpr = nullptr;
1689  switch (pat) {
1691  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1692  newMapOutput = binaryExpr.getRHS();
1693  break;
1695  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1696  newMapOutput = getAffineBinaryOpExpr(
1697  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1698  break;
1699  default:
1700  newMapOutput = oldMapOutput;
1701  }
1702  return newMapOutput;
1703 }
1704 
1705 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1706 /// create `newDynamicSizes` from them by using AffineApplyOp.
1707 ///
1708 /// Steps for normalizing dynamic memrefs for a tiled layout map
1709 /// Example:
1710 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1711 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1712 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1713 ///
1714 /// (Before this function)
1715 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1716 /// single layout map is supported.
1717 ///
1718 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1719 /// is memref<4x?x?xf32> in the above example.
1720 ///
1721 /// (In this function)
1722 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1723 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1724 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1725 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1726 /// - New map in the above example
1727 /// #map0 = affine_map<(d0, d1) -> (d0)>
1728 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1729 /// #map2 = affine_map<(d0, d1) -> (32)>
1730 ///
1731 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1732 /// is used in dynamicSizes of new AllocOp.
1733 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1734 /// %c4 = arith.constant 4 : index
1735 /// %1 = affine.apply #map1(%c4, %0)
1736 /// %2 = affine.apply #map2(%c4, %0)
1737 static void createNewDynamicSizes(MemRefType oldMemRefType,
1738  MemRefType newMemRefType, AffineMap map,
1739  memref::AllocOp *allocOp, OpBuilder b,
1740  SmallVectorImpl<Value> &newDynamicSizes) {
1741  // Create new input for AffineApplyOp.
1742  SmallVector<Value, 4> inAffineApply;
1743  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1744  unsigned dynIdx = 0;
1745  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1746  if (oldMemRefShape[d] < 0) {
1747  // Use dynamicSizes of allocOp for dynamic dimension.
1748  inAffineApply.emplace_back(allocOp->getDynamicSizes()[dynIdx]);
1749  dynIdx++;
1750  } else {
1751  // Create ConstantOp for static dimension.
1752  auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1753  inAffineApply.emplace_back(
1754  b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
1755  }
1756  }
1757 
1758  // Create new map to calculate each dimension size of new memref for each
1759  // original map output. Only for dynamic dimesion of `newMemRefType`.
1760  unsigned newDimIdx = 0;
1761  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1763  (void)getTileSizePos(map, tileSizePos);
1764  for (AffineExpr expr : map.getResults()) {
1765  if (newMemRefShape[newDimIdx] < 0) {
1766  // Create new maps to calculate each dimension size of new memref.
1768  for (auto pos : tileSizePos) {
1769  if (newDimIdx == std::get<1>(pos))
1771  else if (newDimIdx == std::get<2>(pos))
1773  }
1774  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1775  AffineMap newMap =
1776  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1777  Value affineApp =
1778  b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
1779  newDynamicSizes.emplace_back(affineApp);
1780  }
1781  newDimIdx++;
1782  }
1783 }
1784 
1785 // TODO: Currently works for static memrefs with a single layout map.
1786 LogicalResult mlir::affine::normalizeMemRef(memref::AllocOp *allocOp) {
1787  MemRefType memrefType = allocOp->getType();
1788  OpBuilder b(*allocOp);
1789 
1790  // Fetch a new memref type after normalizing the old memref to have an
1791  // identity map layout.
1792  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1793  if (newMemRefType == memrefType)
1794  // Either memrefType already had an identity map or the map couldn't be
1795  // transformed to an identity map.
1796  return failure();
1797 
1798  Value oldMemRef = allocOp->getResult();
1799 
1800  SmallVector<Value, 4> symbolOperands(allocOp->getSymbolOperands());
1801  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1802  memref::AllocOp newAlloc;
1803  // Check if `layoutMap` is a tiled layout. Only single layout map is
1804  // supported for normalizing dynamic memrefs.
1806  (void)getTileSizePos(layoutMap, tileSizePos);
1807  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1808  MemRefType oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1809  SmallVector<Value, 4> newDynamicSizes;
1810  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1811  newDynamicSizes);
1812  // Add the new dynamic sizes in new AllocOp.
1813  newAlloc =
1814  b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1815  newDynamicSizes, allocOp->getAlignmentAttr());
1816  } else {
1817  newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1818  allocOp->getAlignmentAttr());
1819  }
1820  // Replace all uses of the old memref.
1821  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1822  /*extraIndices=*/{},
1823  /*indexRemap=*/layoutMap,
1824  /*extraOperands=*/{},
1825  /*symbolOperands=*/symbolOperands,
1826  /*domOpFilter=*/nullptr,
1827  /*postDomOpFilter=*/nullptr,
1828  /*allowNonDereferencingOps=*/true))) {
1829  // If it failed (due to escapes for example), bail out.
1830  newAlloc.erase();
1831  return failure();
1832  }
1833  // Replace any uses of the original alloc op and erase it. All remaining uses
1834  // have to be dealloc's; RAMUW above would've failed otherwise.
1835  assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1836  return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1837  }));
1838  oldMemRef.replaceAllUsesWith(newAlloc);
1839  allocOp->erase();
1840  return success();
1841 }
1842 
1843 MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1844  unsigned rank = memrefType.getRank();
1845  if (rank == 0)
1846  return memrefType;
1847 
1848  if (memrefType.getLayout().isIdentity()) {
1849  // Either no maps is associated with this memref or this memref has
1850  // a trivial (identity) map.
1851  return memrefType;
1852  }
1853  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1854  unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1855 
1856  // We don't do any checks for one-to-one'ness; we assume that it is
1857  // one-to-one.
1858 
1859  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1860  // for now.
1861  // TODO: Normalize the other types of dynamic memrefs.
1863  (void)getTileSizePos(layoutMap, tileSizePos);
1864  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1865  return memrefType;
1866 
1867  // We have a single map that is not an identity map. Create a new memref
1868  // with the right shape and an identity layout map.
1869  ArrayRef<int64_t> shape = memrefType.getShape();
1870  // FlatAffineValueConstraint may later on use symbolicOperands.
1871  FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1872  SmallVector<unsigned, 4> memrefTypeDynDims;
1873  for (unsigned d = 0; d < rank; ++d) {
1874  // Use constraint system only in static dimensions.
1875  if (shape[d] > 0) {
1876  fac.addBound(BoundType::LB, d, 0);
1877  fac.addBound(BoundType::UB, d, shape[d] - 1);
1878  } else {
1879  memrefTypeDynDims.emplace_back(d);
1880  }
1881  }
1882  // We compose this map with the original index (logical) space to derive
1883  // the upper bounds for the new index space.
1884  unsigned newRank = layoutMap.getNumResults();
1885  if (failed(fac.composeMatchingMap(layoutMap)))
1886  return memrefType;
1887  // TODO: Handle semi-affine maps.
1888  // Project out the old data dimensions.
1889  fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1890  SmallVector<int64_t, 4> newShape(newRank);
1891  MLIRContext *context = memrefType.getContext();
1892  for (unsigned d = 0; d < newRank; ++d) {
1893  // Check if this dimension is dynamic.
1894  if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1895  newShape[d] = ShapedType::kDynamic;
1896  continue;
1897  }
1898  // The lower bound for the shape is always zero.
1899  std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1900  // For a static memref and an affine map with no symbols, this is
1901  // always bounded. However, when we have symbols, we may not be able to
1902  // obtain a constant upper bound. Also, mapping to a negative space is
1903  // invalid for normalization.
1904  if (!ubConst.has_value() || *ubConst < 0) {
1905  LLVM_DEBUG(llvm::dbgs()
1906  << "can't normalize map due to unknown/invalid upper bound");
1907  return memrefType;
1908  }
1909  // If dimension of new memrefType is dynamic, the value is -1.
1910  newShape[d] = *ubConst + 1;
1911  }
1912 
1913  // Create the new memref type after trivializing the old layout map.
1914  auto newMemRefType =
1915  MemRefType::Builder(memrefType)
1916  .setShape(newShape)
1918  AffineMap::getMultiDimIdentityMap(newRank, context)));
1919  return newMemRefType;
1920 }
1921 
1923  Value rhs) {
1924  DivModValue result;
1925  AffineExpr d0, d1;
1926  bindDims(b.getContext(), d0, d1);
1927  result.quotient =
1928  affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1929  result.remainder =
1930  affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1931  return result;
1932 }
1933 
1934 /// Create an affine map that computes `lhs` * `rhs`, composing in any other
1935 /// affine maps.
1936 static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1937  Location loc,
1938  OpFoldResult lhs,
1939  OpFoldResult rhs) {
1940  AffineExpr s0, s1;
1941  bindSymbols(b.getContext(), s0, s1);
1942  return makeComposedFoldedAffineApply(b, loc, s0 * s1, {lhs, rhs});
1943 }
1944 
1945 FailureOr<SmallVector<Value>>
1947  ArrayRef<Value> basis, bool hasOuterBound) {
1948  if (hasOuterBound)
1949  basis = basis.drop_front();
1950 
1951  // Note: the divisors are backwards due to the scan.
1952  SmallVector<Value> divisors;
1953  OpFoldResult basisProd = b.getIndexAttr(1);
1954  for (OpFoldResult basisElem : llvm::reverse(basis)) {
1955  FailureOr<OpFoldResult> nextProd =
1956  composedAffineMultiply(b, loc, basisElem, basisProd);
1957  if (failed(nextProd))
1958  return failure();
1959  basisProd = *nextProd;
1960  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1961  }
1962 
1963  SmallVector<Value> results;
1964  results.reserve(divisors.size() + 1);
1965  Value residual = linearIndex;
1966  for (Value divisor : llvm::reverse(divisors)) {
1967  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1968  results.push_back(divMod.quotient);
1969  residual = divMod.remainder;
1970  }
1971  results.push_back(residual);
1972  return results;
1973 }
1974 
1975 FailureOr<SmallVector<Value>>
1977  ArrayRef<OpFoldResult> basis,
1978  bool hasOuterBound) {
1979  if (hasOuterBound)
1980  basis = basis.drop_front();
1981 
1982  // Note: the divisors are backwards due to the scan.
1983  SmallVector<Value> divisors;
1984  OpFoldResult basisProd = b.getIndexAttr(1);
1985  for (OpFoldResult basisElem : llvm::reverse(basis)) {
1986  FailureOr<OpFoldResult> nextProd =
1987  composedAffineMultiply(b, loc, basisElem, basisProd);
1988  if (failed(nextProd))
1989  return failure();
1990  basisProd = *nextProd;
1991  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1992  }
1993 
1994  SmallVector<Value> results;
1995  results.reserve(divisors.size() + 1);
1996  Value residual = linearIndex;
1997  for (Value divisor : llvm::reverse(divisors)) {
1998  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1999  results.push_back(divMod.quotient);
2000  residual = divMod.remainder;
2001  }
2002  results.push_back(residual);
2003  return results;
2004 }
2005 
2007  ArrayRef<OpFoldResult> basis,
2008  ImplicitLocOpBuilder &builder) {
2009  return linearizeIndex(builder, builder.getLoc(), multiIndex, basis);
2010 }
2011 
2013  ArrayRef<OpFoldResult> multiIndex,
2014  ArrayRef<OpFoldResult> basis) {
2015  assert(multiIndex.size() == basis.size() ||
2016  multiIndex.size() == basis.size() + 1);
2017  SmallVector<AffineExpr> basisAffine;
2018 
2019  // Add a fake initial size in order to make the later index linearization
2020  // computations line up if an outer bound is not provided.
2021  if (multiIndex.size() == basis.size() + 1)
2022  basisAffine.push_back(getAffineConstantExpr(1, builder.getContext()));
2023 
2024  for (size_t i = 0; i < basis.size(); ++i) {
2025  basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
2026  }
2027 
2028  SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
2029  SmallVector<OpFoldResult> strides;
2030  strides.reserve(stridesAffine.size());
2031  llvm::transform(stridesAffine, std::back_inserter(strides),
2032  [&builder, &basis, loc](AffineExpr strideExpr) {
2034  builder, loc, strideExpr, basis);
2035  });
2036 
2037  auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2038  OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
2039  return affine::makeComposedFoldedAffineApply(builder, loc, linearIndexExpr,
2040  multiIndexAndStrides);
2041 }
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, memref::AllocOp *allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1737
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition: Utils.cpp:649
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1564
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1556
@ TileFloorDiv
Definition: Utils.cpp:1556
@ TileNone
Definition: Utils.cpp:1556
@ TileMod
Definition: Utils.cpp:1556
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:244
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1663
LogicalResult transformMemRefLoadWithReducedRank(Operation *op, Value oldMemRef, Value newMemRef, unsigned memRefOperandPos, ArrayRef< Value > extraIndices, ArrayRef< Value > extraOperands, ArrayRef< Value > symbolOperands, AffineMap indexRemap)
Definition: Utils.cpp:1100
static FailureOr< OpFoldResult > composedAffineMultiply(OpBuilder &b, Location loc, OpFoldResult lhs, OpFoldResult rhs)
Create an affine map that computes lhs * rhs, composing in any other affine maps.
Definition: Utils.cpp:1936
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:954
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1682
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:260
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:907
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition: Utils.cpp:632
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:830
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:227
AffineExpr getLHS() const
Definition: AffineExpr.cpp:340
AffineExpr getRHS() const
Definition: AffineExpr.cpp:343
An integer constant appearing in affine expression.
Definition: AffineExpr.h:252
int64_t getValue() const
Definition: AffineExpr.cpp:633
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:236
unsigned getPosition() const
Definition: AffineExpr.cpp:348
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:917
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition: AffineExpr.h:130
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:964
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:343
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:334
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
Definition: AffineMap.cpp:398
unsigned getNumDims() const
Definition: AffineMap.cpp:394
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
unsigned getNumInputs() const
Definition: AffineMap.cpp:403
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:556
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:244
unsigned getPosition() const
Definition: AffineExpr.cpp:623
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
Definition: AliasAnalysis.h:59
This class represents an argument of a Block.
Definition: Value.h:319
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
SuccessorRange getSuccessors()
Definition: Block.h:267
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
OpListType & getOperations()
Definition: Block.h:137
iterator end()
Definition: Block.h:144
iterator begin()
Definition: Block.h:143
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:148
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:268
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:427
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:140
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:302
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:404
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:418
MLIRContext * getContext() const
Definition: Builders.h:56
IndexType getIndexType()
Definition: Builders.cpp:95
A class for computing basic dominance information.
Definition: Dominance.h:140
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:160
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
void projectOut(Value val)
Projects out the variable that is associate with Value.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition: IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Location getLoc() const
Accessors for the implied location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:213
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Definition: BuiltinTypes.h:234
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:224
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
This class helps build Operations.
Definition: Builders.h:216
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:249
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:454
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:588
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:440
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:497
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:421
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:451
This class represents a single result from folding an operation.
Definition: OpDefinition.h:268
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
void setOperand(unsigned idx, Value value)
Definition: Operation.h:351
operand_iterator operand_begin()
Definition: Operation.h:374
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
operand_iterator operand_end()
Definition: Operation.h:375
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:687
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:237
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:874
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
A class for computing basic postdominance information.
Definition: Dominance.h:197
bool postDominates(Operation *a, Operation *b) const
Return true if operation A postdominates operation B.
Definition: Dominance.h:208
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
void takeBody(Region &other)
Takes body of another region (that region will have no body after this operation completes).
Definition: Region.h:241
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition: Value.h:173
user_range getUsers() const
Definition: Value.h:228
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
static WalkResult advance()
Definition: Visitors.h:51
static WalkResult interrupt()
Definition: Visitors.h:50
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
Definition: AffineOps.cpp:3956
unsigned getNumSymbols() const
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
std::optional< SmallVector< Value, 8 > > expandAffineMap(OpBuilder &builder, Location loc, AffineMap affineMap, ValueRange operands)
Create a sequence of operations that implement the affineMap applied to the given operands (as it it ...
Definition: Utils.cpp:226
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1134
void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo, AliasAnalysis &analysis)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:1035
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:118
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:278
Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, ValueRange dimValues, ValueRange symbolValues)
Emit code that computes the given affine expression using standard arithmetic operations applied to t...
Definition: Utils.cpp:216
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:1893
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:492
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:349
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1144
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1435
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
LogicalResult normalizeAffineFor(AffineForOp op, bool promoteSingleIter=false)
Normalize an affine.for op.
Definition: Utils.cpp:556
LogicalResult normalizeMemRef(memref::AllocOp *op)
Rewrites the memref defined by this alloc op to have an identity layout map and updates all its index...
Definition: Utils.cpp:1786
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:392
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1194
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
MemRefType normalizeMemRefType(MemRefType memrefType)
Normalizes memrefType so that the affine layout map of the memref is transformed to an identity map w...
Definition: Utils.cpp:1843
FailureOr< SmallVector< Value > > delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex, ArrayRef< Value > basis, bool hasOuterBound=true)
Generate the IR to delinearize linearIndex given the basis and return the multi-index.
Definition: Utils.cpp:1946
OpFoldResult linearizeIndex(ArrayRef< OpFoldResult > multiIndex, ArrayRef< OpFoldResult > basis, ImplicitLocOpBuilder &builder)
Definition: Utils.cpp:2006
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:263
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1922
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e....
Definition: Utils.cpp:682
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1482
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition: Utils.cpp:411
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either:
Definition: Utils.cpp:465
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1366
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:773
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:348
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
const FrozenRewritePatternSet GreedyRewriteConfig config
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:70
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:362
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:641
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:617
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:627
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:301
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.