MLIR  22.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
25 #include "mlir/IR/Dominance.h"
26 #include "mlir/IR/IRMapping.h"
27 #include "mlir/IR/IntegerSet.h"
29 #include <optional>
30 
31 #define DEBUG_TYPE "affine-utils"
32 
33 using namespace mlir;
34 using namespace affine;
35 using namespace presburger;
36 
37 namespace {
38 /// Visit affine expressions recursively and build the sequence of operations
39 /// that correspond to it. Visitation functions return an Value of the
40 /// expression subtree they visited or `nullptr` on error.
41 class AffineApplyExpander
42  : public AffineExprVisitor<AffineApplyExpander, Value> {
43 public:
44  /// This internal class expects arguments to be non-null, checks must be
45  /// performed at the call site.
46  AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
47  ValueRange symbolValues, Location loc)
48  : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
49  loc(loc) {}
50 
51  template <typename OpTy>
52  Value buildBinaryExpr(AffineBinaryOpExpr expr,
53  arith::IntegerOverflowFlags overflowFlags =
54  arith::IntegerOverflowFlags::none) {
55  auto lhs = visit(expr.getLHS());
56  auto rhs = visit(expr.getRHS());
57  if (!lhs || !rhs)
58  return nullptr;
59  auto op = OpTy::create(builder, loc, lhs, rhs, overflowFlags);
60  return op.getResult();
61  }
62 
63  Value visitAddExpr(AffineBinaryOpExpr expr) {
64  return buildBinaryExpr<arith::AddIOp>(expr);
65  }
66 
67  Value visitMulExpr(AffineBinaryOpExpr expr) {
68  return buildBinaryExpr<arith::MulIOp>(expr,
69  arith::IntegerOverflowFlags::nsw);
70  }
71 
72  /// Euclidean modulo operation: negative RHS is not allowed.
73  /// Remainder of the euclidean integer division is always non-negative.
74  ///
75  /// Implemented as
76  ///
77  /// a mod b =
78  /// let remainder = srem a, b;
79  /// negative = a < 0 in
80  /// select negative, remainder + b, remainder.
81  Value visitModExpr(AffineBinaryOpExpr expr) {
82  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
83  if (rhsConst.getValue() <= 0) {
84  emitError(loc, "modulo by non-positive value is not supported");
85  return nullptr;
86  }
87  }
88 
89  auto lhs = visit(expr.getLHS());
90  auto rhs = visit(expr.getRHS());
91  assert(lhs && rhs && "unexpected affine expr lowering failure");
92 
93  Value remainder = arith::RemSIOp::create(builder, loc, lhs, rhs);
94  Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
95  Value isRemainderNegative = arith::CmpIOp::create(
96  builder, loc, arith::CmpIPredicate::slt, remainder, zeroCst);
97  Value correctedRemainder =
98  arith::AddIOp::create(builder, loc, remainder, rhs);
99  Value result = arith::SelectOp::create(builder, loc, isRemainderNegative,
100  correctedRemainder, remainder);
101  return result;
102  }
103 
104  /// Floor division operation (rounds towards negative infinity).
105  ///
106  /// For positive divisors, it can be implemented without branching and with a
107  /// single division operation as
108  ///
109  /// a floordiv b =
110  /// let negative = a < 0 in
111  /// let absolute = negative ? -a - 1 : a in
112  /// let quotient = absolute / b in
113  /// negative ? -quotient - 1 : quotient
114  ///
115  /// Note: this lowering does not use arith.floordivsi because the lowering of
116  /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
117  /// not one but two arith.divsi. That could be changed to one divsi, but one
118  /// way or another, going through arith.floordivsi will result in more complex
119  /// IR because arith.floordivsi is more general than affine floordiv in that
120  /// it supports negative RHS.
121  Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
122  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
123  if (rhsConst.getValue() <= 0) {
124  emitError(loc, "division by non-positive value is not supported");
125  return nullptr;
126  }
127  }
128  auto lhs = visit(expr.getLHS());
129  auto rhs = visit(expr.getRHS());
130  assert(lhs && rhs && "unexpected affine expr lowering failure");
131 
132  Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
133  Value noneCst = arith::ConstantIndexOp::create(builder, loc, -1);
134  Value negative = arith::CmpIOp::create(
135  builder, loc, arith::CmpIPredicate::slt, lhs, zeroCst);
136  Value negatedDecremented =
137  arith::SubIOp::create(builder, loc, noneCst, lhs);
138  Value dividend = arith::SelectOp::create(builder, loc, negative,
139  negatedDecremented, lhs);
140  Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
141  Value correctedQuotient =
142  arith::SubIOp::create(builder, loc, noneCst, quotient);
143  Value result = arith::SelectOp::create(builder, loc, negative,
144  correctedQuotient, quotient);
145  return result;
146  }
147 
148  /// Ceiling division operation (rounds towards positive infinity).
149  ///
150  /// For positive divisors, it can be implemented without branching and with a
151  /// single division operation as
152  ///
153  /// a ceildiv b =
154  /// let negative = a <= 0 in
155  /// let absolute = negative ? -a : a - 1 in
156  /// let quotient = absolute / b in
157  /// negative ? -quotient : quotient + 1
158  ///
159  /// Note: not using arith.ceildivsi for the same reason as explained in the
160  /// visitFloorDivExpr comment.
161  Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
162  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
163  if (rhsConst.getValue() <= 0) {
164  emitError(loc, "division by non-positive value is not supported");
165  return nullptr;
166  }
167  }
168  auto lhs = visit(expr.getLHS());
169  auto rhs = visit(expr.getRHS());
170  assert(lhs && rhs && "unexpected affine expr lowering failure");
171 
172  Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
173  Value oneCst = arith::ConstantIndexOp::create(builder, loc, 1);
174  Value nonPositive = arith::CmpIOp::create(
175  builder, loc, arith::CmpIPredicate::sle, lhs, zeroCst);
176  Value negated = arith::SubIOp::create(builder, loc, zeroCst, lhs);
177  Value decremented = arith::SubIOp::create(builder, loc, lhs, oneCst);
178  Value dividend = arith::SelectOp::create(builder, loc, nonPositive, negated,
179  decremented);
180  Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
181  Value negatedQuotient =
182  arith::SubIOp::create(builder, loc, zeroCst, quotient);
183  Value incrementedQuotient =
184  arith::AddIOp::create(builder, loc, quotient, oneCst);
185  Value result = arith::SelectOp::create(
186  builder, loc, nonPositive, negatedQuotient, incrementedQuotient);
187  return result;
188  }
189 
190  Value visitConstantExpr(AffineConstantExpr expr) {
191  auto op = arith::ConstantIndexOp::create(builder, loc, expr.getValue());
192  return op.getResult();
193  }
194 
195  Value visitDimExpr(AffineDimExpr expr) {
196  assert(expr.getPosition() < dimValues.size() &&
197  "affine dim position out of range");
198  return dimValues[expr.getPosition()];
199  }
200 
201  Value visitSymbolExpr(AffineSymbolExpr expr) {
202  assert(expr.getPosition() < symbolValues.size() &&
203  "symbol dim position out of range");
204  return symbolValues[expr.getPosition()];
205  }
206 
207 private:
208  OpBuilder &builder;
209  ValueRange dimValues;
210  ValueRange symbolValues;
211 
212  Location loc;
213 };
214 } // namespace
215 
216 /// Create a sequence of operations that implement the `expr` applied to the
217 /// given dimension and symbol values.
219  AffineExpr expr,
220  ValueRange dimValues,
221  ValueRange symbolValues) {
222  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
223 }
224 
225 /// Create a sequence of operations that implement the `affineMap` applied to
226 /// the given `operands` (as it it were an AffineApplyOp).
227 std::optional<SmallVector<Value, 8>>
229  AffineMap affineMap, ValueRange operands) {
230  auto numDims = affineMap.getNumDims();
231  auto expanded = llvm::to_vector<8>(
232  llvm::map_range(affineMap.getResults(),
233  [numDims, &builder, loc, operands](AffineExpr expr) {
234  return expandAffineExpr(builder, loc, expr,
235  operands.take_front(numDims),
236  operands.drop_front(numDims));
237  }));
238  if (llvm::all_of(expanded, [](Value v) { return v; }))
239  return expanded;
240  return std::nullopt;
241 }
242 
243 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
244 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
245 /// the rest of the op.
246 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
247  if (elseBlock)
248  assert(ifOp.hasElse() && "else block expected");
249 
250  Block *destBlock = ifOp->getBlock();
251  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
252  destBlock->getOperations().splice(
253  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
254  std::prev(srcBlock->end()));
255  ifOp.erase();
256 }
257 
258 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
259 /// on. The `ifOp` could be hoisted and placed right before such an operation.
260 /// This method assumes that the ifOp has been canonicalized (to be correct and
261 /// effective).
262 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
263  // Walk up the parents past all for op that this conditional is invariant on.
264  auto ifOperands = ifOp.getOperands();
265  Operation *res = ifOp;
267  auto *parentOp = res->getParentOp();
268  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
269  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
270  break;
271  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
272  if (llvm::any_of(parallelOp.getIVs(), [&](Value iv) {
273  return llvm::is_contained(ifOperands, iv);
274  }))
275  break;
276  } else if (!isa<AffineIfOp>(parentOp)) {
277  // Won't walk up past anything other than affine.for/if ops.
278  break;
279  }
280  // You can always hoist up past any affine.if ops.
281  res = parentOp;
282  }
283  return res;
284 }
285 
286 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
287 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
288 /// otherwise the same `ifOp`.
289 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
290  // No hoisting to do.
291  if (hoistOverOp == ifOp)
292  return ifOp;
293 
294  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
295  // the else block. Then drop the else block of the original 'if' in the 'then'
296  // branch while promoting its then block, and analogously drop the 'then'
297  // block of the original 'if' from the 'else' branch while promoting its else
298  // block.
299  IRMapping operandMap;
300  OpBuilder b(hoistOverOp);
301  auto hoistedIfOp = AffineIfOp::create(b, ifOp.getLoc(), ifOp.getIntegerSet(),
302  ifOp.getOperands(),
303  /*elseBlock=*/true);
304 
305  // Create a clone of hoistOverOp to use for the else branch of the hoisted
306  // conditional. The else block may get optimized away if empty.
307  Operation *hoistOverOpClone = nullptr;
308  // We use this unique name to identify/find `ifOp`'s clone in the else
309  // version.
310  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
311  operandMap.clear();
312  b.setInsertionPointAfter(hoistOverOp);
313  // We'll set an attribute to identify this op in a clone of this sub-tree.
314  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
315  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
316 
317  // Promote the 'then' block of the original affine.if in the then version.
318  promoteIfBlock(ifOp, /*elseBlock=*/false);
319 
320  // Move the then version to the hoisted if op's 'then' block.
321  auto *thenBlock = hoistedIfOp.getThenBlock();
322  thenBlock->getOperations().splice(thenBlock->begin(),
323  hoistOverOp->getBlock()->getOperations(),
324  Block::iterator(hoistOverOp));
325 
326  // Find the clone of the original affine.if op in the else version.
327  AffineIfOp ifCloneInElse;
328  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
329  if (!ifClone->getAttr(idForIfOp))
330  return WalkResult::advance();
331  ifCloneInElse = ifClone;
332  return WalkResult::interrupt();
333  });
334  assert(ifCloneInElse && "if op clone should exist");
335  // For the else block, promote the else block of the original 'if' if it had
336  // one; otherwise, the op itself is to be erased.
337  if (!ifCloneInElse.hasElse())
338  ifCloneInElse.erase();
339  else
340  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
341 
342  // Move the else version into the else block of the hoisted if op.
343  auto *elseBlock = hoistedIfOp.getElseBlock();
344  elseBlock->getOperations().splice(
345  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
346  Block::iterator(hoistOverOpClone));
347 
348  return hoistedIfOp;
349 }
350 
351 LogicalResult
353  ArrayRef<LoopReduction> parallelReductions,
354  AffineParallelOp *resOp) {
355  // Fail early if there are iter arguments that are not reductions.
356  unsigned numReductions = parallelReductions.size();
357  if (numReductions != forOp.getNumIterOperands())
358  return failure();
359 
360  Location loc = forOp.getLoc();
361  OpBuilder outsideBuilder(forOp);
362  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
363  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
364  AffineMap upperBoundMap = forOp.getUpperBoundMap();
365  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
366 
367  // Creating empty 1-D affine.parallel op.
368  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
369  parallelReductions, [](const LoopReduction &red) { return red.value; }));
370  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
371  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
372  AffineParallelOp newPloop = AffineParallelOp::create(
373  outsideBuilder, loc, ValueRange(reducedValues).getTypes(), reductionKinds,
374  llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
375  llvm::ArrayRef(upperBoundMap), upperBoundOperands,
376  llvm::ArrayRef(forOp.getStepAsInt()));
377  // Steal the body of the old affine for op.
378  newPloop.getRegion().takeBody(forOp.getRegion());
379  Operation *yieldOp = &newPloop.getBody()->back();
380 
381  // Handle the initial values of reductions because the parallel loop always
382  // starts from the neutral value.
383  SmallVector<Value> newResults;
384  newResults.reserve(numReductions);
385  for (unsigned i = 0; i < numReductions; ++i) {
386  Value init = forOp.getInits()[i];
387  // This works because we are only handling single-op reductions at the
388  // moment. A switch on reduction kind or a mechanism to collect operations
389  // participating in the reduction will be necessary for multi-op reductions.
390  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
391  assert(reductionOp && "yielded value is expected to be produced by an op");
392  outsideBuilder.getInsertionBlock()->getOperations().splice(
393  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
394  reductionOp);
395  reductionOp->setOperands({init, newPloop->getResult(i)});
396  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
397  }
398 
399  // Update the loop terminator to yield reduced values bypassing the reduction
400  // operation itself (now moved outside of the loop) and erase the block
401  // arguments that correspond to reductions. Note that the loop always has one
402  // "main" induction variable whenc coming from a non-parallel for.
403  unsigned numIVs = 1;
404  yieldOp->setOperands(reducedValues);
405  newPloop.getBody()->eraseArguments(numIVs, numReductions);
406 
407  forOp.erase();
408  if (resOp)
409  *resOp = newPloop;
410  return success();
411 }
412 
413 // Returns success if any hoisting happened.
414 LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
415  // Bail out early if the ifOp returns a result. TODO: Consider how to
416  // properly support this case.
417  if (ifOp.getNumResults() != 0)
418  return failure();
419 
420  // Apply canonicalization patterns and folding - this is necessary for the
421  // hoisting check to be correct (operands should be composed), and to be more
422  // effective (no unused operands). Since the pattern rewriter's folding is
423  // entangled with application of patterns, we may fold/end up erasing the op,
424  // in which case we return with `folded` being set.
425  RewritePatternSet patterns(ifOp.getContext());
426  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
427  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
428  bool erased;
430  ifOp.getOperation(), frozenPatterns,
432  /*changed=*/nullptr, &erased);
433  if (erased) {
434  if (folded)
435  *folded = true;
436  return failure();
437  }
438  if (folded)
439  *folded = false;
440 
441  // The folding above should have ensured this.
442  assert(llvm::all_of(ifOp.getOperands(),
443  [](Value v) {
444  return isTopLevelValue(v) || isAffineInductionVar(v);
445  }) &&
446  "operands not composed");
447 
448  // We are going hoist as high as possible.
449  // TODO: this could be customized in the future.
450  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
451 
452  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
453  // Nothing to hoist over.
454  if (hoistedIfOp == ifOp)
455  return failure();
456 
457  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
458  // a sequence of affine.fors that are all perfectly nested).
459  (void)applyPatternsGreedily(
460  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
461  frozenPatterns);
462 
463  return success();
464 }
465 
466 // Return the min expr after replacing the given dim.
469  bool positivePath) {
470  if (e == dim)
471  return positivePath ? min : max;
472  if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
473  AffineExpr lhs = bin.getLHS();
474  AffineExpr rhs = bin.getRHS();
475  if (bin.getKind() == mlir::AffineExprKind::Add)
476  return substWithMin(lhs, dim, min, max, positivePath) +
477  substWithMin(rhs, dim, min, max, positivePath);
478 
479  auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
480  auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
481  if (c1 && c1.getValue() < 0)
482  return getAffineBinaryOpExpr(
483  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
484  if (c2 && c2.getValue() < 0)
485  return getAffineBinaryOpExpr(
486  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
487  return getAffineBinaryOpExpr(
488  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
489  substWithMin(rhs, dim, min, max, positivePath));
490  }
491  return e;
492 }
493 
494 void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
495  // Loops with min/max in bounds are not normalized at the moment.
496  if (op.hasMinMaxBounds())
497  return;
498 
499  AffineMap lbMap = op.getLowerBoundsMap();
500  SmallVector<int64_t, 8> steps = op.getSteps();
501  // No need to do any work if the parallel op is already normalized.
502  bool isAlreadyNormalized =
503  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
504  int64_t step = std::get<0>(tuple);
505  auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
506  return lbExpr && lbExpr.getValue() == 0 && step == 1;
507  });
508  if (isAlreadyNormalized)
509  return;
510 
511  AffineValueMap ranges;
512  AffineValueMap::difference(op.getUpperBoundsValueMap(),
513  op.getLowerBoundsValueMap(), &ranges);
514  auto builder = OpBuilder::atBlockBegin(op.getBody());
515  auto zeroExpr = builder.getAffineConstantExpr(0);
518  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
519  int64_t step = steps[i];
520 
521  // Adjust the lower bound to be 0.
522  lbExprs.push_back(zeroExpr);
523 
524  // Adjust the upper bound expression: 'range / step'.
525  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
526  ubExprs.push_back(ubExpr);
527 
528  // Adjust the corresponding IV: 'lb + i * step'.
529  BlockArgument iv = op.getBody()->getArgument(i);
530  AffineExpr lbExpr = lbMap.getResult(i);
531  unsigned nDims = lbMap.getNumDims();
532  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
533  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
534  /*symbolCount=*/lbMap.getNumSymbols(), expr);
535 
536  // Use an 'affine.apply' op that will be simplified later in subsequent
537  // canonicalizations.
538  OperandRange lbOperands = op.getLowerBoundsOperands();
539  OperandRange dimOperands = lbOperands.take_front(nDims);
540  OperandRange symbolOperands = lbOperands.drop_front(nDims);
541  SmallVector<Value, 8> applyOperands{dimOperands};
542  applyOperands.push_back(iv);
543  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
544  auto apply =
545  AffineApplyOp::create(builder, op.getLoc(), map, applyOperands);
546  iv.replaceAllUsesExcept(apply, apply);
547  }
548 
549  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
550  op.setSteps(newSteps);
551  auto newLowerMap = AffineMap::get(
552  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
553  op.setLowerBounds({}, newLowerMap);
554  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
555  ubExprs, op.getContext());
556  op.setUpperBounds(ranges.getOperands(), newUpperMap);
557 }
558 
559 LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
560  bool promoteSingleIter) {
561  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
562  return success();
563 
564  // Check if the forop is already normalized.
565  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
566  (op.getStep() == 1))
567  return success();
568 
569  // Check if the lower bound has a single result only. Loops with a max lower
570  // bound can't be normalized without additional support like
571  // affine.execute_region's. If the lower bound does not have a single result
572  // then skip this op.
573  if (op.getLowerBoundMap().getNumResults() != 1)
574  return failure();
575 
576  Location loc = op.getLoc();
577  OpBuilder opBuilder(op);
578  int64_t origLoopStep = op.getStepAsInt();
579 
580  // Construct the new upper bound value map.
581  AffineMap oldLbMap = op.getLowerBoundMap();
582  // The upper bound can have multiple results. To use
583  // AffineValueMap::difference, we need to have the same number of results in
584  // both lower and upper bound maps. So, we just create a value map for the
585  // lower bound with the only available lower bound result repeated to pad up
586  // to the number of upper bound results.
587  SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
588  op.getLowerBoundMap().getResult(0));
589  AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
590  AffineMap paddedLbMap =
591  AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
592  op.getContext());
593  AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
594  AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
595  AffineValueMap newUbValueMap;
596  // Compute the `upper bound - lower bound`.
597  AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
598  (void)newUbValueMap.canonicalize();
599 
600  // Scale down the upper bound value map by the loop step.
601  unsigned numResult = newUbValueMap.getNumResults();
602  SmallVector<AffineExpr> scaleDownExprs(numResult);
603  for (unsigned i = 0; i < numResult; ++i)
604  scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
605  // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
606  // step). Where `n` is the number of results in the upper bound map.
607  AffineMap scaleDownMap =
608  AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
609  AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
610 
611  // Set the newly create upper bound map and operands.
612  op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
613  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
614  op.setStep(1);
615 
616  // Calculate the Value of new loopIV. Create affine.apply for the value of
617  // the loopIV in normalized loop.
618  opBuilder.setInsertionPointToStart(op.getBody());
619  // Construct an affine.apply op mapping the new IV to the old IV.
620  AffineMap scaleIvMap =
621  AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
622  AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
623  AffineValueMap newIvToOldIvMap;
624  AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
625  (void)newIvToOldIvMap.canonicalize();
626  auto newIV =
627  AffineApplyOp::create(opBuilder, loc, newIvToOldIvMap.getAffineMap(),
628  newIvToOldIvMap.getOperands());
629  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
630  return success();
631 }
632 
633 /// Returns true if the memory operation of `destAccess` depends on `srcAccess`
634 /// inside of the innermost common surrounding affine loop between the two
635 /// accesses.
636 static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
637  const MemRefAccess &destAccess) {
638  // Affine dependence analysis is possible only if both ops in the same
639  // AffineScope.
640  if (getAffineAnalysisScope(srcAccess.opInst) !=
641  getAffineAnalysisScope(destAccess.opInst))
642  return false;
643 
644  unsigned nsLoops =
645  getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
646  DependenceResult result =
647  checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
648  return hasDependence(result);
649 }
650 
651 /// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
652 /// scope of the outermost `minSurroundingLoops` loops that surround them.
653 /// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
654 static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
655  unsigned minSurroundingLoops) {
656  MemRefAccess srcAccess(srcMemOp);
657  MemRefAccess destAccess(destMemOp);
658 
659  // Affine dependence analysis here is applicable only if both ops operate on
660  // the same memref and if `srcMemOp` and `destMemOp` are in the same
661  // AffineScope. Also, we can only check if our affine scope is isolated from
662  // above; otherwise, values can from outside of the affine scope that the
663  // check below cannot analyze.
664  Region *srcScope = getAffineAnalysisScope(srcMemOp);
665  if (srcAccess.memref == destAccess.memref &&
666  srcScope == getAffineAnalysisScope(destMemOp)) {
667  unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
668  FlatAffineValueConstraints dependenceConstraints;
669  for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
671  srcAccess, destAccess, d, &dependenceConstraints,
672  /*dependenceComponents=*/nullptr);
673  // A dependence failure or the presence of a dependence implies a
674  // side effect.
675  if (!noDependence(result))
676  return true;
677  }
678  // No side effect was seen.
679  return false;
680  }
681  // TODO: Check here if the memrefs alias: there is no side effect if
682  // `srcAccess.memref` and `destAccess.memref` don't alias.
683  return true;
684 }
685 
686 template <typename EffectType, typename T>
688  Operation *start, T memOp,
690  // A boolean representing whether an intervening operation could have impacted
691  // memOp.
692  bool hasSideEffect = false;
693 
694  // Check whether the effect on memOp can be caused by a given operation op.
695  Value memref = memOp.getMemRef();
696  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
697  // If the effect has alreay been found, early exit,
698  if (hasSideEffect)
699  return;
700 
701  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
703  memEffect.getEffects(effects);
704 
705  bool opMayHaveEffect = false;
706  for (auto effect : effects) {
707  // If op causes EffectType on a potentially aliasing location for
708  // memOp, mark as having the effect.
709  if (isa<EffectType>(effect.getEffect())) {
710  if (effect.getValue() && effect.getValue() != memref &&
711  !mayAlias(effect.getValue(), memref))
712  continue;
713  opMayHaveEffect = true;
714  break;
715  }
716  }
717 
718  if (!opMayHaveEffect)
719  return;
720 
721  // If the side effect comes from an affine read or write, try to
722  // prove the side effecting `op` cannot reach `memOp`.
723  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
724  // For ease, let's consider the case that `op` is a store and
725  // we're looking for other potential stores that overwrite memory after
726  // `start`, and before being read in `memOp`. In this case, we only
727  // need to consider other potential stores with depth >
728  // minSurroundingLoops since `start` would overwrite any store with a
729  // smaller number of surrounding loops before.
730  unsigned minSurroundingLoops =
731  getNumCommonSurroundingLoops(*start, *memOp);
732  if (mayHaveEffect(op, memOp, minSurroundingLoops))
733  hasSideEffect = true;
734  return;
735  }
736 
737  // We have an op with a memory effect and we cannot prove if it
738  // intervenes.
739  hasSideEffect = true;
740  return;
741  }
742 
743  if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
744  // Recurse into the regions for this op and check whether the internal
745  // operations may have the side effect `EffectType` on memOp.
746  for (Region &region : op->getRegions())
747  for (Block &block : region)
748  for (Operation &op : block)
749  checkOperation(&op);
750  return;
751  }
752 
753  // Otherwise, conservatively assume generic operations have the effect
754  // on the operation
755  hasSideEffect = true;
756  };
757 
758  // Check all paths from ancestor op `parent` to the operation `to` for the
759  // effect. It is known that `to` must be contained within `parent`.
760  auto until = [&](Operation *parent, Operation *to) {
761  // TODO check only the paths from `parent` to `to`.
762  // Currently we fallback and check the entire parent op, rather than
763  // just the paths from the parent path, stopping after reaching `to`.
764  // This is conservatively correct, but could be made more aggressive.
765  assert(parent->isAncestor(to));
766  checkOperation(parent);
767  };
768 
769  // Check for all paths from operation `from` to operation `untilOp` for the
770  // given memory effect.
771  std::function<void(Operation *, Operation *)> recur =
772  [&](Operation *from, Operation *untilOp) {
773  assert(
774  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
775  "Checking for side effect between two operations without a common "
776  "ancestor");
777 
778  // If the operations are in different regions, recursively consider all
779  // path from `from` to the parent of `to` and all paths from the parent
780  // of `to` to `to`.
781  if (from->getParentRegion() != untilOp->getParentRegion()) {
782  recur(from, untilOp->getParentOp());
783  until(untilOp->getParentOp(), untilOp);
784  return;
785  }
786 
787  // Now, assuming that `from` and `to` exist in the same region, perform
788  // a CFG traversal to check all the relevant operations.
789 
790  // Additional blocks to consider.
791  SmallVector<Block *, 2> todoBlocks;
792  {
793  // First consider the parent block of `from` an check all operations
794  // after `from`.
795  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
796  iter != end && &*iter != untilOp; ++iter) {
797  checkOperation(&*iter);
798  }
799 
800  // If the parent of `from` doesn't contain `to`, add the successors
801  // to the list of blocks to check.
802  if (untilOp->getBlock() != from->getBlock())
803  for (Block *succ : from->getBlock()->getSuccessors())
804  todoBlocks.push_back(succ);
805  }
806 
808  // Traverse the CFG until hitting `to`.
809  while (!todoBlocks.empty()) {
810  Block *blk = todoBlocks.pop_back_val();
811  if (done.count(blk))
812  continue;
813  done.insert(blk);
814  for (auto &op : *blk) {
815  if (&op == untilOp)
816  break;
817  checkOperation(&op);
818  if (&op == blk->getTerminator())
819  for (Block *succ : blk->getSuccessors())
820  todoBlocks.push_back(succ);
821  }
822  }
823  };
824  recur(start, memOp);
825  return !hasSideEffect;
826 }
827 
828 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
829 /// which the load is guaranteed to retrieve. This check involves three
830 /// components: 1) The store and load must be on the same location 2) The store
831 /// must dominate (and therefore must always occur prior to) the load 3) No
832 /// other operations will overwrite the memory loaded between the given load
833 /// and store. If such a value exists, the replaced `loadOp` will be added to
834 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
835 static void forwardStoreToLoad(
836  AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
837  SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
839 
840  // The store op candidate for forwarding that satisfies all conditions
841  // to replace the load, if any.
842  Operation *lastWriteStoreOp = nullptr;
843 
844  for (auto *user : loadOp.getMemRef().getUsers()) {
845  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
846  if (!storeOp)
847  continue;
848  MemRefAccess srcAccess(storeOp);
849  MemRefAccess destAccess(loadOp);
850 
851  // 1. Check if the store and the load have mathematically equivalent
852  // affine access functions; this implies that they statically refer to the
853  // same single memref element. As an example this filters out cases like:
854  // store %A[%i0 + 1]
855  // load %A[%i0]
856  // store %A[%M]
857  // load %A[%N]
858  // Use the AffineValueMap difference based memref access equality checking.
859  if (srcAccess != destAccess)
860  continue;
861 
862  // 2. The store has to dominate the load op to be candidate.
863  if (!domInfo.dominates(storeOp, loadOp))
864  continue;
865 
866  // 3. The store must reach the load. Access function equivalence only
867  // guarantees this for accesses in the same block. The load could be in a
868  // nested block that is unreachable.
869  if (!mustReachAtInnermost(srcAccess, destAccess))
870  continue;
871 
872  // 4. Ensure there is no intermediate operation which could replace the
873  // value in memory.
874  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp,
875  mayAlias))
876  continue;
877 
878  // We now have a candidate for forwarding.
879  assert(lastWriteStoreOp == nullptr &&
880  "multiple simultaneous replacement stores");
881  lastWriteStoreOp = storeOp;
882  }
883 
884  if (!lastWriteStoreOp)
885  return;
886 
887  // Perform the actual store to load forwarding.
888  Value storeVal =
889  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
890  // Check if 2 values have the same shape. This is needed for affine vector
891  // loads and stores.
892  if (storeVal.getType() != loadOp.getValue().getType())
893  return;
894  loadOp.getValue().replaceAllUsesWith(storeVal);
895  // Record the memref for a later sweep to optimize away.
896  memrefsToErase.insert(loadOp.getMemRef());
897  // Record this to erase later.
898  loadOpsToErase.push_back(loadOp);
899 }
900 
901 template bool
903  affine::AffineReadOpInterface>(
904  mlir::Operation *, affine::AffineReadOpInterface,
905  llvm::function_ref<bool(Value, Value)>);
906 
907 // This attempts to find stores which have no impact on the final result.
908 // A writing op writeA will be eliminated if there exists an op writeB if
909 // 1) writeA and writeB have mathematically equivalent affine access functions.
910 // 2) writeB postdominates writeA.
911 // 3) There is no potential read between writeA and writeB.
912 static void findUnusedStore(AffineWriteOpInterface writeA,
913  SmallVectorImpl<Operation *> &opsToErase,
914  PostDominanceInfo &postDominanceInfo,
916 
917  for (Operation *user : writeA.getMemRef().getUsers()) {
918  // Only consider writing operations.
919  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
920  if (!writeB)
921  continue;
922 
923  // The operations must be distinct.
924  if (writeB == writeA)
925  continue;
926 
927  // Both operations must lie in the same region.
928  if (writeB->getParentRegion() != writeA->getParentRegion())
929  continue;
930 
931  // Both operations must write to the same memory.
932  MemRefAccess srcAccess(writeB);
933  MemRefAccess destAccess(writeA);
934 
935  if (srcAccess != destAccess)
936  continue;
937 
938  // writeB must postdominate writeA.
939  if (!postDominanceInfo.postDominates(writeB, writeA))
940  continue;
941 
942  // There cannot be an operation which reads from memory between
943  // the two writes.
944  if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB,
945  mayAlias))
946  continue;
947 
948  opsToErase.push_back(writeA);
949  break;
950  }
951 }
952 
953 // The load to load forwarding / redundant load elimination is similar to the
954 // store to load forwarding.
955 // loadA will be be replaced with loadB if:
956 // 1) loadA and loadB have mathematically equivalent affine access functions.
957 // 2) loadB dominates loadA.
958 // 3) There is no write between loadA and loadB.
959 static void loadCSE(AffineReadOpInterface loadA,
960  SmallVectorImpl<Operation *> &loadOpsToErase,
961  DominanceInfo &domInfo,
964  for (auto *user : loadA.getMemRef().getUsers()) {
965  auto loadB = dyn_cast<AffineReadOpInterface>(user);
966  if (!loadB || loadB == loadA)
967  continue;
968 
969  MemRefAccess srcAccess(loadB);
970  MemRefAccess destAccess(loadA);
971 
972  // 1. The accesses should be to be to the same location.
973  if (srcAccess != destAccess) {
974  continue;
975  }
976 
977  // 2. loadB should dominate loadA.
978  if (!domInfo.dominates(loadB, loadA))
979  continue;
980 
981  // 3. There should not be a write between loadA and loadB.
982  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
983  loadB.getOperation(), loadA, mayAlias))
984  continue;
985 
986  // Check if two values have the same shape. This is needed for affine vector
987  // loads.
988  if (loadB.getValue().getType() != loadA.getValue().getType())
989  continue;
990 
991  loadCandidates.push_back(loadB);
992  }
993 
994  // Of the legal load candidates, use the one that dominates all others
995  // to minimize the subsequent need to loadCSE
996  Value loadB;
997  for (AffineReadOpInterface option : loadCandidates) {
998  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
999  return depStore == option ||
1000  domInfo.dominates(option.getOperation(),
1001  depStore.getOperation());
1002  })) {
1003  loadB = option.getValue();
1004  break;
1005  }
1006  }
1007 
1008  if (loadB) {
1009  loadA.getValue().replaceAllUsesWith(loadB);
1010  // Record this to erase later.
1011  loadOpsToErase.push_back(loadA);
1012  }
1013 }
1014 
1015 // The store to load forwarding and load CSE rely on three conditions:
1016 //
1017 // 1) store/load providing a replacement value and load being replaced need to
1018 // have mathematically equivalent affine access functions (checked after full
1019 // composition of load/store operands); this implies that they access the same
1020 // single memref element for all iterations of the common surrounding loop,
1021 //
1022 // 2) the store/load op should dominate the load op,
1023 //
1024 // 3) no operation that may write to memory read by the load being replaced can
1025 // occur after executing the instruction (load or store) providing the
1026 // replacement value and before the load being replaced (thus potentially
1027 // allowing overwriting the memory read by the load).
1028 //
1029 // The above conditions are simple to check, sufficient, and powerful for most
1030 // cases in practice - they are sufficient, but not necessary --- since they
1031 // don't reason about loops that are guaranteed to execute at least once or
1032 // multiple sources to forward from.
1033 //
1034 // TODO: more forwarding can be done when support for
1035 // loop/conditional live-out SSA values is available.
1036 // TODO: do general dead store elimination for memref's. This pass
1037 // currently only eliminates the stores only if no other loads/uses (other
1038 // than dealloc) remain.
1039 //
1040 void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1041  PostDominanceInfo &postDomInfo,
1042  AliasAnalysis &aliasAnalysis) {
1043  // Load op's whose results were replaced by those forwarded from stores.
1044  SmallVector<Operation *, 8> opsToErase;
1045 
1046  // A list of memref's that are potentially dead / could be eliminated.
1047  SmallPtrSet<Value, 4> memrefsToErase;
1048 
1049  auto mayAlias = [&](Value val1, Value val2) -> bool {
1050  return !aliasAnalysis.alias(val1, val2).isNo();
1051  };
1052 
1053  // Walk all load's and perform store to load forwarding.
1054  f.walk([&](AffineReadOpInterface loadOp) {
1055  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1056  });
1057  for (auto *op : opsToErase)
1058  op->erase();
1059  opsToErase.clear();
1060 
1061  // Walk all store's and perform unused store elimination
1062  f.walk([&](AffineWriteOpInterface storeOp) {
1063  findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1064  });
1065  for (auto *op : opsToErase)
1066  op->erase();
1067  opsToErase.clear();
1068 
1069  // Check if the store fwd'ed memrefs are now left with only stores and
1070  // deallocs and can thus be completely deleted. Note: the canonicalize pass
1071  // should be able to do this as well, but we'll do it here since we collected
1072  // these anyway.
1073  for (auto memref : memrefsToErase) {
1074  // If the memref hasn't been locally alloc'ed, skip.
1075  Operation *defOp = memref.getDefiningOp();
1076  if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1077  // TODO: if the memref was returned by a 'call' operation, we
1078  // could still erase it if the call had no side-effects.
1079  continue;
1080  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1081  return !isa<AffineWriteOpInterface>(ownerOp) &&
1082  !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1083  }))
1084  continue;
1085 
1086  // Erase all stores, the dealloc, and the alloc on the memref.
1087  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1088  user->erase();
1089  defOp->erase();
1090  }
1091 
1092  // To eliminate as many loads as possible, run load CSE after eliminating
1093  // stores. Otherwise, some stores are wrongly seen as having an intervening
1094  // effect.
1095  f.walk([&](AffineReadOpInterface loadOp) {
1096  loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1097  });
1098  for (auto *op : opsToErase)
1099  op->erase();
1100 }
1101 
1102 // Checks if `op` is non dereferencing.
1103 // TODO: This hardcoded check will be removed once the right interface is added.
1104 static bool isDereferencingOp(Operation *op) {
1105  return isa<AffineMapAccessInterface, memref::LoadOp, memref::StoreOp>(op);
1106 }
1107 
1108 // Perform the replacement in `op`.
1110  Value oldMemRef, Value newMemRef, Operation *op,
1111  ArrayRef<Value> extraIndices, AffineMap indexRemap,
1112  ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1113  bool allowNonDereferencingOps) {
1114  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1115  (void)newMemRefRank; // unused in opt mode
1116  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1117  (void)oldMemRefRank; // unused in opt mode
1118  if (indexRemap) {
1119  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1120  "symbolic operand count mismatch");
1121  assert(indexRemap.getNumInputs() ==
1122  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1123  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1124  } else {
1125  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1126  }
1127 
1128  // Assert same elemental type.
1129  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1130  cast<MemRefType>(newMemRef.getType()).getElementType());
1131 
1132  SmallVector<unsigned, 2> usePositions;
1133  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1134  if (opEntry.value() == oldMemRef)
1135  usePositions.push_back(opEntry.index());
1136  }
1137 
1138  // If memref doesn't appear, nothing to do.
1139  if (usePositions.empty())
1140  return success();
1141 
1142  unsigned memRefOperandPos = usePositions.front();
1143 
1144  OpBuilder builder(op);
1145  // The following checks if op is dereferencing memref and performs the access
1146  // index rewrites.
1147  if (!isDereferencingOp(op)) {
1148  if (!allowNonDereferencingOps) {
1149  // Failure: memref used in a non-dereferencing context (potentially
1150  // escapes); no replacement in these cases unless allowNonDereferencingOps
1151  // is set.
1152  return failure();
1153  }
1154  for (unsigned pos : usePositions)
1155  op->setOperand(pos, newMemRef);
1156  return success();
1157  }
1158 
1159  if (usePositions.size() > 1) {
1160  // TODO: extend it for this case when needed (rare).
1161  LLVM_DEBUG(llvm::dbgs()
1162  << "multiple dereferencing uses in a single op not supported");
1163  return failure();
1164  }
1165 
1166  // Perform index rewrites for the dereferencing op and then replace the op.
1167  SmallVector<Value, 4> oldMapOperands;
1168  AffineMap oldMap;
1169  unsigned oldMemRefNumIndices = oldMemRefRank;
1170  auto startIdx = op->operand_begin() + memRefOperandPos + 1;
1171  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1172  if (affMapAccInterface) {
1173  // If `op` implements AffineMapAccessInterface, we can get the indices by
1174  // quering the number of map operands from the operand list from a certain
1175  // offset (`memRefOperandPos` in this case).
1176  NamedAttribute oldMapAttrPair =
1177  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1178  oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1179  oldMemRefNumIndices = oldMap.getNumInputs();
1180  }
1181  oldMapOperands.assign(startIdx, startIdx + oldMemRefNumIndices);
1182 
1183  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1184  SmallVector<Value, 4> oldMemRefOperands;
1185  SmallVector<Value, 4> affineApplyOps;
1186  oldMemRefOperands.reserve(oldMemRefRank);
1187  if (affMapAccInterface &&
1188  oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1189  for (auto resultExpr : oldMap.getResults()) {
1190  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1191  oldMap.getNumSymbols(), resultExpr);
1192  auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1193  oldMapOperands);
1194  oldMemRefOperands.push_back(afOp);
1195  affineApplyOps.push_back(afOp);
1196  }
1197  } else {
1198  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1199  }
1200 
1201  // Construct new indices as a remap of the old ones if a remapping has been
1202  // provided. The indices of a memref come right after it, i.e.,
1203  // at position memRefOperandPos + 1.
1204  SmallVector<Value, 4> remapOperands;
1205  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1206  symbolOperands.size());
1207  remapOperands.append(extraOperands.begin(), extraOperands.end());
1208  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1209  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1210 
1211  SmallVector<Value, 4> remapOutputs;
1212  remapOutputs.reserve(oldMemRefRank);
1213  if (indexRemap &&
1214  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1215  // Remapped indices.
1216  for (auto resultExpr : indexRemap.getResults()) {
1217  auto singleResMap = AffineMap::get(
1218  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1219  auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1220  remapOperands);
1221  remapOutputs.push_back(afOp);
1222  affineApplyOps.push_back(afOp);
1223  }
1224  } else {
1225  // No remapping specified.
1226  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1227  }
1228  SmallVector<Value, 4> newMapOperands;
1229  newMapOperands.reserve(newMemRefRank);
1230 
1231  // Prepend 'extraIndices' in 'newMapOperands'.
1232  for (Value extraIndex : extraIndices) {
1233  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1234  "invalid memory op index");
1235  newMapOperands.push_back(extraIndex);
1236  }
1237 
1238  // Append 'remapOutputs' to 'newMapOperands'.
1239  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1240 
1241  // Create new fully composed AffineMap for new op to be created.
1242  assert(newMapOperands.size() == newMemRefRank);
1243  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1244  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1245  newMap = simplifyAffineMap(newMap);
1246  canonicalizeMapAndOperands(&newMap, &newMapOperands);
1247  // Remove any affine.apply's that became dead as a result of composition.
1248  for (Value value : affineApplyOps)
1249  if (value.use_empty())
1250  value.getDefiningOp()->erase();
1251 
1252  OperationState state(op->getLoc(), op->getName());
1253  // Construct the new operation using this memref.
1254  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1255  // Insert the non-memref operands.
1256  state.operands.append(op->operand_begin(),
1257  op->operand_begin() + memRefOperandPos);
1258  // Insert the new memref value.
1259  state.operands.push_back(newMemRef);
1260 
1261  // Insert the new memref map operands.
1262  if (affMapAccInterface) {
1263  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1264  } else {
1265  // In the case of dereferencing ops not implementing
1266  // AffineMapAccessInterface, we need to apply the values of `newMapOperands`
1267  // to the `newMap` to get the correct indices.
1268  for (unsigned i = 0; i < newMemRefRank; i++) {
1269  state.operands.push_back(AffineApplyOp::create(
1270  builder, op->getLoc(),
1271  AffineMap::get(newMap.getNumDims(), newMap.getNumSymbols(),
1272  newMap.getResult(i)),
1273  newMapOperands));
1274  }
1275  }
1276 
1277  // Insert the remaining operands unmodified.
1278  unsigned oldMapNumInputs = oldMapOperands.size();
1279  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1280  oldMapNumInputs,
1281  op->operand_end());
1282  // Result types don't change. Both memref's are of the same elemental type.
1283  state.types.reserve(op->getNumResults());
1284  for (auto result : op->getResults())
1285  state.types.push_back(result.getType());
1286 
1287  // Add attribute for 'newMap', other Attributes do not change.
1288  auto newMapAttr = AffineMapAttr::get(newMap);
1289  for (auto namedAttr : op->getAttrs()) {
1290  if (affMapAccInterface &&
1291  namedAttr.getName() ==
1292  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef).getName())
1293  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1294  else
1295  state.attributes.push_back(namedAttr);
1296  }
1297 
1298  // Create the new operation.
1299  auto *repOp = builder.create(state);
1300  op->replaceAllUsesWith(repOp);
1301  op->erase();
1302 
1303  return success();
1304 }
1305 
1307  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1308  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1309  ArrayRef<Value> symbolOperands,
1310  llvm::function_ref<bool(Operation *)> userFilterFn,
1311  bool allowNonDereferencingOps, bool replaceInDeallocOp) {
1312  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1313  (void)newMemRefRank; // unused in opt mode
1314  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1315  (void)oldMemRefRank;
1316  if (indexRemap) {
1317  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1318  "symbol operand count mismatch");
1319  assert(indexRemap.getNumInputs() ==
1320  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1321  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1322  } else {
1323  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1324  }
1325 
1326  // Assert same elemental type.
1327  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1328  cast<MemRefType>(newMemRef.getType()).getElementType());
1329 
1330  std::unique_ptr<DominanceInfo> domInfo;
1331  std::unique_ptr<PostDominanceInfo> postDomInfo;
1332 
1333  // Walk all uses of old memref; collect ops to perform replacement. We use a
1334  // DenseSet since an operation could potentially have multiple uses of a
1335  // memref (although rare), and the replacement later is going to erase ops.
1336  DenseSet<Operation *> opsToReplace;
1337  for (auto *user : oldMemRef.getUsers()) {
1338  // Check if this user doesn't pass the filter.
1339  if (userFilterFn && !userFilterFn(user))
1340  continue;
1341 
1342  // Skip dealloc's - no replacement is necessary, and a memref replacement
1343  // at other uses doesn't hurt these dealloc's.
1344  if (hasSingleEffect<MemoryEffects::Free>(user, oldMemRef) &&
1345  !replaceInDeallocOp)
1346  continue;
1347 
1348  // Check if the memref was used in a non-dereferencing context. It is fine
1349  // for the memref to be used in a non-dereferencing way outside of the
1350  // region where this replacement is happening.
1351  if (!isa<AffineMapAccessInterface>(*user)) {
1352  if (!allowNonDereferencingOps) {
1353  LLVM_DEBUG(
1354  llvm::dbgs()
1355  << "Memref replacement failed: non-deferencing memref user: \n"
1356  << *user << '\n');
1357  return failure();
1358  }
1359  // Non-dereferencing ops with the MemRefsNormalizable trait are
1360  // supported for replacement.
1361  if (!user->hasTrait<OpTrait::MemRefsNormalizable>()) {
1362  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1363  "memrefs normalizable trait: \n"
1364  << *user << '\n');
1365  return failure();
1366  }
1367  }
1368 
1369  // We'll first collect and then replace --- since replacement erases the
1370  // user that has the use, and that user could be postDomFilter or domFilter
1371  // itself!
1372  opsToReplace.insert(user);
1373  }
1374 
1375  for (auto *user : opsToReplace) {
1377  oldMemRef, newMemRef, user, extraIndices, indexRemap, extraOperands,
1378  symbolOperands, allowNonDereferencingOps)))
1379  llvm_unreachable("memref replacement guaranteed to succeed here");
1380  }
1381 
1382  return success();
1383 }
1384 
1385 /// Given an operation, inserts one or more single result affine
1386 /// apply operations, results of which are exclusively used by this operation
1387 /// operation. The operands of these newly created affine apply ops are
1388 /// guaranteed to be loop iterators or terminal symbols of a function.
1389 ///
1390 /// Before
1391 ///
1392 /// affine.for %i = 0 to #map(%N)
1393 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1394 /// "send"(%idx, %A, ...)
1395 /// "compute"(%idx)
1396 ///
1397 /// After
1398 ///
1399 /// affine.for %i = 0 to #map(%N)
1400 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1401 /// "send"(%idx, %A, ...)
1402 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1403 /// "compute"(%idx_)
1404 ///
1405 /// This allows applying different transformations on send and compute (for eg.
1406 /// different shifts/delays).
1407 ///
1408 /// Returns nullptr either if none of opInst's operands were the result of an
1409 /// affine.apply and thus there was no affine computation slice to create, or if
1410 /// all the affine.apply op's supplying operands to this opInst did not have any
1411 /// uses besides this opInst; otherwise returns the list of affine.apply
1412 /// operations created in output argument `sliceOps`.
1414  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1415  // Collect all operands that are results of affine apply ops.
1416  SmallVector<Value, 4> subOperands;
1417  subOperands.reserve(opInst->getNumOperands());
1418  for (auto operand : opInst->getOperands())
1419  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1420  subOperands.push_back(operand);
1421 
1422  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1423  SmallVector<Operation *, 4> affineApplyOps;
1424  getReachableAffineApplyOps(subOperands, affineApplyOps);
1425  // Skip transforming if there are no affine maps to compose.
1426  if (affineApplyOps.empty())
1427  return;
1428 
1429  // Check if all uses of the affine apply op's lie only in this op op, in
1430  // which case there would be nothing to do.
1431  bool localized = true;
1432  for (auto *op : affineApplyOps) {
1433  for (auto result : op->getResults()) {
1434  for (auto *user : result.getUsers()) {
1435  if (user != opInst) {
1436  localized = false;
1437  break;
1438  }
1439  }
1440  }
1441  }
1442  if (localized)
1443  return;
1444 
1445  OpBuilder builder(opInst);
1446  SmallVector<Value, 4> composedOpOperands(subOperands);
1447  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1448  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1449 
1450  // Create an affine.apply for each of the map results.
1451  sliceOps->reserve(composedMap.getNumResults());
1452  for (auto resultExpr : composedMap.getResults()) {
1453  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1454  composedMap.getNumSymbols(), resultExpr);
1455  sliceOps->push_back(AffineApplyOp::create(
1456  builder, opInst->getLoc(), singleResMap, composedOpOperands));
1457  }
1458 
1459  // Construct the new operands that include the results from the composed
1460  // affine apply op above instead of existing ones (subOperands). So, they
1461  // differ from opInst's operands only for those operands in 'subOperands', for
1462  // which they will be replaced by the corresponding one from 'sliceOps'.
1463  SmallVector<Value, 4> newOperands(opInst->getOperands());
1464  for (Value &operand : newOperands) {
1465  // Replace the subOperands from among the new operands.
1466  unsigned j, f;
1467  for (j = 0, f = subOperands.size(); j < f; j++) {
1468  if (operand == subOperands[j])
1469  break;
1470  }
1471  if (j < subOperands.size())
1472  operand = (*sliceOps)[j];
1473  }
1474  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1475  opInst->setOperand(idx, newOperands[idx]);
1476 }
1477 
1478 /// Enum to set patterns of affine expr in tiled-layout map.
1479 /// TileFloorDiv: <dim expr> div <tile size>
1480 /// TileMod: <dim expr> mod <tile size>
1481 /// TileNone: None of the above
1482 /// Example:
1483 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1484 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1485 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1486 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1488 
1489 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1490 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1491 /// tile sizes, and no other expression involves those k dimensions. This
1492 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1493 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1494 /// tiled layout, an empty vector is returned.
1495 static LogicalResult getTileSizePos(
1496  AffineMap map,
1497  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1498  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1499  // `floordiv` and its position in `map` output.
1500  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1501  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1502  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1504  unsigned pos = 0;
1505  for (AffineExpr expr : map.getResults()) {
1506  if (expr.getKind() == AffineExprKind::FloorDiv) {
1507  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1508  if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1509  floordivExprs.emplace_back(
1510  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1511  }
1512  pos++;
1513  }
1514  // Not tiled layout if `floordivExprs` is empty.
1515  if (floordivExprs.empty()) {
1517  return success();
1518  }
1519 
1520  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1521  // not tiled layout.
1522  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1523  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1524  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1525  unsigned floordivPos = std::get<2>(fexpr);
1526 
1527  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1528  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1529  // other expr, the map is not tiled layout. Example of non tiled layout:
1530  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1531  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1532  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1533  // 256)>
1534  bool found = false;
1535  pos = 0;
1536  for (AffineExpr expr : map.getResults()) {
1537  bool notTiled = false;
1538  if (pos != floordivPos) {
1539  expr.walk([&](AffineExpr e) {
1540  if (e == floordivExprLHS) {
1541  if (expr.getKind() == AffineExprKind::Mod) {
1542  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1543  // If LHS and RHS of `mod` are the same with those of floordiv.
1544  if (floordivExprLHS == binaryExpr.getLHS() &&
1545  floordivExprRHS == binaryExpr.getRHS()) {
1546  // Save tile size (RHS of `mod`), and position of `floordiv` and
1547  // `mod` if same expr with `mod` is not found yet.
1548  if (!found) {
1549  tileSizePos.emplace_back(
1550  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1551  found = true;
1552  } else {
1553  // Non tiled layout: Have multilpe `mod` with the same LHS.
1554  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1555  // mod 256, d2 mod 256)>
1556  notTiled = true;
1557  }
1558  } else {
1559  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1560  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1561  // mod 128)>
1562  notTiled = true;
1563  }
1564  } else {
1565  // Non tiled layout: LHS is the same, but not `mod`.
1566  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1567  // floordiv 256)>
1568  notTiled = true;
1569  }
1570  }
1571  });
1572  }
1573  if (notTiled) {
1575  return success();
1576  }
1577  pos++;
1578  }
1579  }
1580  return success();
1581 }
1582 
1583 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1584 /// after normalization. Dimensions that include dynamic dimensions in the map
1585 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1586 /// dimension.
1587 ///
1588 /// Example:
1589 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1590 ///
1591 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1592 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1593 static bool
1594 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1595  SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1596  AffineExpr expr = layoutMap.getResults()[dim];
1597  // Check if affine expr of the dimension includes dynamic dimension of input
1598  // memrefType.
1599  MLIRContext *context = layoutMap.getContext();
1600  return expr
1601  .walk([&](AffineExpr e) {
1602  if (isa<AffineDimExpr>(e) &&
1603  llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1604  return e == getAffineDimExpr(dim, context);
1605  }))
1606  return WalkResult::interrupt();
1607  return WalkResult::advance();
1608  })
1609  .wasInterrupted();
1610 }
1611 
1612 /// Create affine expr to calculate dimension size for a tiled-layout map.
1614  TileExprPattern pat) {
1615  // Create map output for the patterns.
1616  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1617  // "mod <tile size>" ==> "<tile size>"
1618  AffineExpr newMapOutput;
1619  AffineBinaryOpExpr binaryExpr = nullptr;
1620  switch (pat) {
1622  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1623  newMapOutput = binaryExpr.getRHS();
1624  break;
1626  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1627  newMapOutput = getAffineBinaryOpExpr(
1628  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1629  break;
1630  default:
1631  newMapOutput = oldMapOutput;
1632  }
1633  return newMapOutput;
1634 }
1635 
1636 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1637 /// create `newDynamicSizes` from them by using AffineApplyOp.
1638 ///
1639 /// Steps for normalizing dynamic memrefs for a tiled layout map
1640 /// Example:
1641 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1642 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1643 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1644 ///
1645 /// (Before this function)
1646 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1647 /// single layout map is supported.
1648 ///
1649 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1650 /// is memref<4x?x?xf32> in the above example.
1651 ///
1652 /// (In this function)
1653 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1654 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1655 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1656 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1657 /// - New map in the above example
1658 /// #map0 = affine_map<(d0, d1) -> (d0)>
1659 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1660 /// #map2 = affine_map<(d0, d1) -> (32)>
1661 ///
1662 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1663 /// is used in dynamicSizes of new AllocOp.
1664 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1665 /// %c4 = arith.constant 4 : index
1666 /// %1 = affine.apply #map1(%c4, %0)
1667 /// %2 = affine.apply #map2(%c4, %0)
1668 template <typename AllocLikeOp>
1669 static void createNewDynamicSizes(MemRefType oldMemRefType,
1670  MemRefType newMemRefType, AffineMap map,
1671  AllocLikeOp allocOp, OpBuilder b,
1672  SmallVectorImpl<Value> &newDynamicSizes) {
1673  // Create new input for AffineApplyOp.
1674  SmallVector<Value, 4> inAffineApply;
1675  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1676  unsigned dynIdx = 0;
1677  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1678  if (oldMemRefShape[d] < 0) {
1679  // Use dynamicSizes of allocOp for dynamic dimension.
1680  inAffineApply.emplace_back(allocOp.getDynamicSizes()[dynIdx]);
1681  dynIdx++;
1682  } else {
1683  // Create ConstantOp for static dimension.
1684  auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1685  inAffineApply.emplace_back(
1686  arith::ConstantOp::create(b, allocOp.getLoc(), constantAttr));
1687  }
1688  }
1689 
1690  // Create new map to calculate each dimension size of new memref for each
1691  // original map output. Only for dynamic dimesion of `newMemRefType`.
1692  unsigned newDimIdx = 0;
1693  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1695  (void)getTileSizePos(map, tileSizePos);
1696  for (AffineExpr expr : map.getResults()) {
1697  if (newMemRefShape[newDimIdx] < 0) {
1698  // Create new maps to calculate each dimension size of new memref.
1700  for (auto pos : tileSizePos) {
1701  if (newDimIdx == std::get<1>(pos))
1703  else if (newDimIdx == std::get<2>(pos))
1705  }
1706  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1707  AffineMap newMap =
1708  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1709  Value affineApp =
1710  AffineApplyOp::create(b, allocOp.getLoc(), newMap, inAffineApply);
1711  newDynamicSizes.emplace_back(affineApp);
1712  }
1713  newDimIdx++;
1714  }
1715 }
1716 
1717 template <typename AllocLikeOp>
1718 LogicalResult mlir::affine::normalizeMemRef(AllocLikeOp allocOp) {
1719  MemRefType memrefType = allocOp.getType();
1720  OpBuilder b(allocOp);
1721 
1722  // Fetch a new memref type after normalizing the old memref to have an
1723  // identity map layout.
1724  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1725  if (newMemRefType == memrefType)
1726  // Either memrefType already had an identity map or the map couldn't be
1727  // transformed to an identity map.
1728  return failure();
1729 
1730  Value oldMemRef = allocOp.getResult();
1731 
1732  SmallVector<Value, 4> symbolOperands(allocOp.getSymbolOperands());
1733  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1734  AllocLikeOp newAlloc;
1735  // Check if `layoutMap` is a tiled layout. Only single layout map is
1736  // supported for normalizing dynamic memrefs.
1738  (void)getTileSizePos(layoutMap, tileSizePos);
1739  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1740  auto oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1741  SmallVector<Value, 4> newDynamicSizes;
1742  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1743  newDynamicSizes);
1744  // Add the new dynamic sizes in new AllocOp.
1745  newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1746  newDynamicSizes, allocOp.getAlignmentAttr());
1747  } else {
1748  newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1749  allocOp.getAlignmentAttr());
1750  }
1751  // Replace all uses of the old memref.
1752  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1753  /*extraIndices=*/{},
1754  /*indexRemap=*/layoutMap,
1755  /*extraOperands=*/{},
1756  /*symbolOperands=*/symbolOperands,
1757  /*userFilterFn=*/nullptr,
1758  /*allowNonDereferencingOps=*/true))) {
1759  // If it failed (due to escapes for example), bail out.
1760  newAlloc.erase();
1761  return failure();
1762  }
1763  // Replace any uses of the original alloc op and erase it. All remaining uses
1764  // have to be dealloc's; RAMUW above would've failed otherwise.
1765  assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1766  return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1767  }));
1768  oldMemRef.replaceAllUsesWith(newAlloc);
1769  allocOp.erase();
1770  return success();
1771 }
1772 
1773 LogicalResult
1774 mlir::affine::normalizeMemRef(memref::ReinterpretCastOp reinterpretCastOp) {
1775  MemRefType memrefType = reinterpretCastOp.getType();
1776  AffineMap oldLayoutMap = memrefType.getLayout().getAffineMap();
1777  Value oldMemRef = reinterpretCastOp.getResult();
1778 
1779  // If `oldLayoutMap` is identity, `memrefType` is already normalized.
1780  if (oldLayoutMap.isIdentity())
1781  return success();
1782 
1783  // Fetch a new memref type after normalizing the old memref to have an
1784  // identity map layout.
1785  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1786  if (newMemRefType == memrefType)
1787  // `oldLayoutMap` couldn't be transformed to an identity map.
1788  return failure();
1789 
1790  uint64_t newRank = newMemRefType.getRank();
1791  SmallVector<Value> mapOperands(oldLayoutMap.getNumDims() +
1792  oldLayoutMap.getNumSymbols());
1793  SmallVector<Value> oldStrides = reinterpretCastOp.getStrides();
1794  Location loc = reinterpretCastOp.getLoc();
1795  // As `newMemRefType` is normalized, it is unit strided.
1796  SmallVector<int64_t> newStaticStrides(newRank, 1);
1797  SmallVector<int64_t> newStaticOffsets(newRank, 0);
1798  ArrayRef<int64_t> oldShape = memrefType.getShape();
1799  ValueRange oldSizes = reinterpretCastOp.getSizes();
1800  unsigned idx = 0;
1801  OpBuilder b(reinterpretCastOp);
1802  // Collect the map operands which will be used to compute the new normalized
1803  // memref shape.
1804  for (unsigned i = 0, e = memrefType.getRank(); i < e; i++) {
1805  if (memrefType.isDynamicDim(i))
1806  mapOperands[i] =
1807  arith::SubIOp::create(b, loc, oldSizes[0].getType(), oldSizes[idx++],
1808  arith::ConstantIndexOp::create(b, loc, 1));
1809  else
1810  mapOperands[i] = arith::ConstantIndexOp::create(b, loc, oldShape[i] - 1);
1811  }
1812  for (unsigned i = 0, e = oldStrides.size(); i < e; i++)
1813  mapOperands[memrefType.getRank() + i] = oldStrides[i];
1814  SmallVector<Value> newSizes;
1815  ArrayRef<int64_t> newShape = newMemRefType.getShape();
1816  // Compute size along all the dimensions of the new normalized memref.
1817  for (unsigned i = 0; i < newRank; i++) {
1818  if (!newMemRefType.isDynamicDim(i))
1819  continue;
1820  newSizes.push_back(AffineApplyOp::create(
1821  b, loc,
1822  AffineMap::get(oldLayoutMap.getNumDims(), oldLayoutMap.getNumSymbols(),
1823  oldLayoutMap.getResult(i)),
1824  mapOperands));
1825  }
1826  for (unsigned i = 0, e = newSizes.size(); i < e; i++) {
1827  newSizes[i] =
1828  arith::AddIOp::create(b, loc, newSizes[i].getType(), newSizes[i],
1829  arith::ConstantIndexOp::create(b, loc, 1));
1830  }
1831  // Create the new reinterpret_cast op.
1832  auto newReinterpretCast = memref::ReinterpretCastOp::create(
1833  b, loc, newMemRefType, reinterpretCastOp.getSource(),
1834  /*offsets=*/ValueRange(), newSizes,
1835  /*strides=*/ValueRange(),
1836  /*static_offsets=*/newStaticOffsets,
1837  /*static_sizes=*/newShape,
1838  /*static_strides=*/newStaticStrides);
1839 
1840  // Replace all uses of the old memref.
1841  if (failed(replaceAllMemRefUsesWith(oldMemRef,
1842  /*newMemRef=*/newReinterpretCast,
1843  /*extraIndices=*/{},
1844  /*indexRemap=*/oldLayoutMap,
1845  /*extraOperands=*/{},
1846  /*symbolOperands=*/oldStrides,
1847  /*userFilterFn=*/nullptr,
1848  /*allowNonDereferencingOps=*/true))) {
1849  // If it failed (due to escapes for example), bail out.
1850  newReinterpretCast.erase();
1851  return failure();
1852  }
1853 
1854  oldMemRef.replaceAllUsesWith(newReinterpretCast);
1855  reinterpretCastOp.erase();
1856  return success();
1857 }
1858 
1859 template LogicalResult
1860 mlir::affine::normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
1861 template LogicalResult
1862 mlir::affine::normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
1863 
1864 MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1865  unsigned rank = memrefType.getRank();
1866  if (rank == 0)
1867  return memrefType;
1868 
1869  if (memrefType.getLayout().isIdentity()) {
1870  // Either no maps is associated with this memref or this memref has
1871  // a trivial (identity) map.
1872  return memrefType;
1873  }
1874  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1875  unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1876 
1877  // We don't do any checks for one-to-one'ness; we assume that it is
1878  // one-to-one.
1879 
1880  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1881  // for now.
1882  // TODO: Normalize the other types of dynamic memrefs.
1884  (void)getTileSizePos(layoutMap, tileSizePos);
1885  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1886  return memrefType;
1887 
1888  // We have a single map that is not an identity map. Create a new memref
1889  // with the right shape and an identity layout map.
1890  ArrayRef<int64_t> shape = memrefType.getShape();
1891  // FlatAffineValueConstraint may later on use symbolicOperands.
1892  FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1893  SmallVector<unsigned, 4> memrefTypeDynDims;
1894  for (unsigned d = 0; d < rank; ++d) {
1895  // Use constraint system only in static dimensions.
1896  if (shape[d] > 0) {
1897  fac.addBound(BoundType::LB, d, 0);
1898  fac.addBound(BoundType::UB, d, shape[d] - 1);
1899  } else {
1900  memrefTypeDynDims.emplace_back(d);
1901  }
1902  }
1903  // We compose this map with the original index (logical) space to derive
1904  // the upper bounds for the new index space.
1905  unsigned newRank = layoutMap.getNumResults();
1906  if (failed(fac.composeMatchingMap(layoutMap)))
1907  return memrefType;
1908  // TODO: Handle semi-affine maps.
1909  // Project out the old data dimensions.
1910  fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1911  SmallVector<int64_t, 4> newShape(newRank);
1912  MLIRContext *context = memrefType.getContext();
1913  for (unsigned d = 0; d < newRank; ++d) {
1914  // Check if this dimension is dynamic.
1915  if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1916  newShape[d] = ShapedType::kDynamic;
1917  continue;
1918  }
1919  // The lower bound for the shape is always zero.
1920  std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1921  // For a static memref and an affine map with no symbols, this is
1922  // always bounded. However, when we have symbols, we may not be able to
1923  // obtain a constant upper bound. Also, mapping to a negative space is
1924  // invalid for normalization.
1925  if (!ubConst.has_value() || *ubConst < 0) {
1926  LLVM_DEBUG(llvm::dbgs()
1927  << "can't normalize map due to unknown/invalid upper bound");
1928  return memrefType;
1929  }
1930  // If dimension of new memrefType is dynamic, the value is -1.
1931  newShape[d] = *ubConst + 1;
1932  }
1933 
1934  // Create the new memref type after trivializing the old layout map.
1935  auto newMemRefType =
1936  MemRefType::Builder(memrefType)
1937  .setShape(newShape)
1939  AffineMap::getMultiDimIdentityMap(newRank, context)));
1940  return newMemRefType;
1941 }
1942 
1944  Value rhs) {
1945  DivModValue result;
1946  AffineExpr d0, d1;
1947  bindDims(b.getContext(), d0, d1);
1948  result.quotient =
1949  affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1950  result.remainder =
1951  affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1952  return result;
1953 }
1954 
1955 /// Create an affine map that computes `lhs` * `rhs`, composing in any other
1956 /// affine maps.
1957 static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1958  Location loc,
1959  OpFoldResult lhs,
1960  OpFoldResult rhs) {
1961  AffineExpr s0, s1;
1962  bindSymbols(b.getContext(), s0, s1);
1963  return makeComposedFoldedAffineApply(b, loc, s0 * s1, {lhs, rhs});
1964 }
1965 
1966 FailureOr<SmallVector<Value>>
1968  ArrayRef<Value> basis, bool hasOuterBound) {
1969  if (hasOuterBound)
1970  basis = basis.drop_front();
1971 
1972  // Note: the divisors are backwards due to the scan.
1973  SmallVector<Value> divisors;
1974  OpFoldResult basisProd = b.getIndexAttr(1);
1975  for (OpFoldResult basisElem : llvm::reverse(basis)) {
1976  FailureOr<OpFoldResult> nextProd =
1977  composedAffineMultiply(b, loc, basisElem, basisProd);
1978  if (failed(nextProd))
1979  return failure();
1980  basisProd = *nextProd;
1981  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1982  }
1983 
1984  SmallVector<Value> results;
1985  results.reserve(divisors.size() + 1);
1986  Value residual = linearIndex;
1987  for (Value divisor : llvm::reverse(divisors)) {
1988  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1989  results.push_back(divMod.quotient);
1990  residual = divMod.remainder;
1991  }
1992  results.push_back(residual);
1993  return results;
1994 }
1995 
1996 FailureOr<SmallVector<Value>>
1998  ArrayRef<OpFoldResult> basis,
1999  bool hasOuterBound) {
2000  if (hasOuterBound)
2001  basis = basis.drop_front();
2002 
2003  // Note: the divisors are backwards due to the scan.
2004  SmallVector<Value> divisors;
2005  OpFoldResult basisProd = b.getIndexAttr(1);
2006  for (OpFoldResult basisElem : llvm::reverse(basis)) {
2007  FailureOr<OpFoldResult> nextProd =
2008  composedAffineMultiply(b, loc, basisElem, basisProd);
2009  if (failed(nextProd))
2010  return failure();
2011  basisProd = *nextProd;
2012  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
2013  }
2014 
2015  SmallVector<Value> results;
2016  results.reserve(divisors.size() + 1);
2017  Value residual = linearIndex;
2018  for (Value divisor : llvm::reverse(divisors)) {
2019  DivModValue divMod = getDivMod(b, loc, residual, divisor);
2020  results.push_back(divMod.quotient);
2021  residual = divMod.remainder;
2022  }
2023  results.push_back(residual);
2024  return results;
2025 }
2026 
2028  ArrayRef<OpFoldResult> basis,
2029  ImplicitLocOpBuilder &builder) {
2030  return linearizeIndex(builder, builder.getLoc(), multiIndex, basis);
2031 }
2032 
2034  ArrayRef<OpFoldResult> multiIndex,
2035  ArrayRef<OpFoldResult> basis) {
2036  assert(multiIndex.size() == basis.size() ||
2037  multiIndex.size() == basis.size() + 1);
2038  SmallVector<AffineExpr> basisAffine;
2039 
2040  // Add a fake initial size in order to make the later index linearization
2041  // computations line up if an outer bound is not provided.
2042  if (multiIndex.size() == basis.size() + 1)
2043  basisAffine.push_back(getAffineConstantExpr(1, builder.getContext()));
2044 
2045  for (size_t i = 0; i < basis.size(); ++i) {
2046  basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
2047  }
2048 
2049  SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
2050  SmallVector<OpFoldResult> strides;
2051  strides.reserve(stridesAffine.size());
2052  llvm::transform(stridesAffine, std::back_inserter(strides),
2053  [&builder, &basis, loc](AffineExpr strideExpr) {
2055  builder, loc, strideExpr, basis);
2056  });
2057 
2058  auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2059  OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
2060  return affine::makeComposedFoldedAffineApply(builder, loc, linearIndexExpr,
2061  multiIndexAndStrides);
2062 }
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition: Utils.cpp:654
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, AllocLikeOp allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1669
static bool isDereferencingOp(Operation *op)
Definition: Utils.cpp:1104
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1495
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1487
@ TileFloorDiv
Definition: Utils.cpp:1487
@ TileNone
Definition: Utils.cpp:1487
@ TileMod
Definition: Utils.cpp:1487
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:246
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1594
static FailureOr< OpFoldResult > composedAffineMultiply(OpBuilder &b, Location loc, OpFoldResult lhs, OpFoldResult rhs)
Create an affine map that computes lhs * rhs, composing in any other affine maps.
Definition: Utils.cpp:1957
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:959
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1613
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:262
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:912
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition: Utils.cpp:636
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:835
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:62
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:214
AffineExpr getLHS() const
Definition: AffineExpr.cpp:338
AffineExpr getRHS() const
Definition: AffineExpr.cpp:341
An integer constant appearing in affine expression.
Definition: AffineExpr.h:239
int64_t getValue() const
Definition: AffineExpr.cpp:635
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:346
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:959
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition: AffineExpr.h:117
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:339
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:330
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
Definition: AffineMap.cpp:394
unsigned getNumDims() const
Definition: AffineMap.cpp:390
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403
unsigned getNumResults() const
Definition: AffineMap.cpp:398
unsigned getNumInputs() const
Definition: AffineMap.cpp:399
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:407
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:552
bool isIdentity() const
Returns true if this affine map is an identity affine map.
Definition: AffineMap.cpp:341
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:231
unsigned getPosition() const
Definition: AffineExpr.cpp:625
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
Definition: AliasAnalysis.h:59
This class represents an argument of a Block.
Definition: Value.h:309
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
SuccessorRange getSuccessors()
Definition: Block.h:270
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244
OpListType & getOperations()
Definition: Block.h:137
iterator end()
Definition: Block.h:144
iterator begin()
Definition: Block.h:143
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:223
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:382
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:95
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:257
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:359
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:373
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:50
A class for computing basic dominance information.
Definition: Dominance.h:140
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:158
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
void projectOut(Value val)
Projects out the variable that is associate with Value.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteConfig & setStrictness(GreedyRewriteStrictness mode)
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition: IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Definition: Builders.h:621
Location getLoc() const
Accessors for the implied location.
Definition: Builders.h:654
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:182
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Definition: BuiltinTypes.h:203
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:193
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class helps build Operations.
Definition: Builders.h:205
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:238
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:443
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:548
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:452
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:410
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:440
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:749
void setOperand(unsigned idx, Value value)
Definition: Operation.h:351
operand_iterator operand_begin()
Definition: Operation.h:374
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
operand_iterator operand_end()
Definition: Operation.h:375
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:236
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
A class for computing basic postdominance information.
Definition: Dominance.h:204
bool postDominates(Operation *a, Operation *b) const
Return true if operation A postdominates operation B.
Definition: Dominance.h:213
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:71
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition: Value.h:149
user_range getUsers() const
Definition: Value.h:218
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
static WalkResult advance()
Definition: WalkResult.h:47
static WalkResult interrupt()
Definition: WalkResult.h:46
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
Definition: AffineOps.cpp:4200
unsigned getNumSymbols() const
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
std::optional< SmallVector< Value, 8 > > expandAffineMap(OpBuilder &builder, Location loc, AffineMap affineMap, ValueRange operands)
Create a sequence of operations that implement the affineMap applied to the given operands (as it it ...
Definition: Utils.cpp:228
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1276
void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo, AliasAnalysis &analysis)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:1040
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:119
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:290
Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, ValueRange dimValues, ValueRange symbolValues)
Emit code that computes the given affine expression using standard arithmetic operations applied to t...
Definition: Utils.cpp:218
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2012
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:494
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:352
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1619
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1329
LogicalResult normalizeAffineFor(AffineForOp op, bool promoteSingleIter=false)
Normalize an affine.for op.
Definition: Utils.cpp:559
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:275
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1260
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:412
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
MemRefType normalizeMemRefType(MemRefType memrefType)
Normalizes memrefType so that the affine layout map of the memref is transformed to an identity map w...
Definition: Utils.cpp:1864
LogicalResult normalizeMemRef(AllocLikeOp op)
Rewrites the memref defined by alloc or reinterpret_cast op to have an identity layout map and update...
Definition: Utils.cpp:1718
FailureOr< SmallVector< Value > > delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex, ArrayRef< Value > basis, bool hasOuterBound=true)
Generate the IR to delinearize linearIndex given the basis and return the multi-index.
Definition: Utils.cpp:1967
OpFoldResult linearizeIndex(ArrayRef< OpFoldResult > multiIndex, ArrayRef< OpFoldResult > basis, ImplicitLocOpBuilder &builder)
Definition: Utils.cpp:2027
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1943
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e....
Definition: Utils.cpp:687
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1413
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition: Utils.cpp:414
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, llvm::function_ref< bool(Operation *)> userFilterFn=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1306
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either:
Definition: Utils.cpp:467
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:766
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:68
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:111
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:643
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:619
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:629
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:309
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.