MLIR  21.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
25 #include "mlir/IR/Dominance.h"
26 #include "mlir/IR/IRMapping.h"
28 #include "mlir/IR/IntegerSet.h"
30 #include "llvm/Support/LogicalResult.h"
31 #include <optional>
32 
33 #define DEBUG_TYPE "affine-utils"
34 
35 using namespace mlir;
36 using namespace affine;
37 using namespace presburger;
38 
39 namespace {
40 /// Visit affine expressions recursively and build the sequence of operations
41 /// that correspond to it. Visitation functions return an Value of the
42 /// expression subtree they visited or `nullptr` on error.
43 class AffineApplyExpander
44  : public AffineExprVisitor<AffineApplyExpander, Value> {
45 public:
46  /// This internal class expects arguments to be non-null, checks must be
47  /// performed at the call site.
48  AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
49  ValueRange symbolValues, Location loc)
50  : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
51  loc(loc) {}
52 
53  template <typename OpTy>
54  Value buildBinaryExpr(AffineBinaryOpExpr expr,
55  arith::IntegerOverflowFlags overflowFlags =
56  arith::IntegerOverflowFlags::none) {
57  auto lhs = visit(expr.getLHS());
58  auto rhs = visit(expr.getRHS());
59  if (!lhs || !rhs)
60  return nullptr;
61  auto op = builder.create<OpTy>(loc, lhs, rhs, overflowFlags);
62  return op.getResult();
63  }
64 
65  Value visitAddExpr(AffineBinaryOpExpr expr) {
66  return buildBinaryExpr<arith::AddIOp>(expr);
67  }
68 
69  Value visitMulExpr(AffineBinaryOpExpr expr) {
70  return buildBinaryExpr<arith::MulIOp>(expr,
71  arith::IntegerOverflowFlags::nsw);
72  }
73 
74  /// Euclidean modulo operation: negative RHS is not allowed.
75  /// Remainder of the euclidean integer division is always non-negative.
76  ///
77  /// Implemented as
78  ///
79  /// a mod b =
80  /// let remainder = srem a, b;
81  /// negative = a < 0 in
82  /// select negative, remainder + b, remainder.
83  Value visitModExpr(AffineBinaryOpExpr expr) {
84  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
85  if (rhsConst.getValue() <= 0) {
86  emitError(loc, "modulo by non-positive value is not supported");
87  return nullptr;
88  }
89  }
90 
91  auto lhs = visit(expr.getLHS());
92  auto rhs = visit(expr.getRHS());
93  assert(lhs && rhs && "unexpected affine expr lowering failure");
94 
95  Value remainder = builder.create<arith::RemSIOp>(loc, lhs, rhs);
96  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
97  Value isRemainderNegative = builder.create<arith::CmpIOp>(
98  loc, arith::CmpIPredicate::slt, remainder, zeroCst);
99  Value correctedRemainder =
100  builder.create<arith::AddIOp>(loc, remainder, rhs);
101  Value result = builder.create<arith::SelectOp>(
102  loc, isRemainderNegative, correctedRemainder, remainder);
103  return result;
104  }
105 
106  /// Floor division operation (rounds towards negative infinity).
107  ///
108  /// For positive divisors, it can be implemented without branching and with a
109  /// single division operation as
110  ///
111  /// a floordiv b =
112  /// let negative = a < 0 in
113  /// let absolute = negative ? -a - 1 : a in
114  /// let quotient = absolute / b in
115  /// negative ? -quotient - 1 : quotient
116  ///
117  /// Note: this lowering does not use arith.floordivsi because the lowering of
118  /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
119  /// not one but two arith.divsi. That could be changed to one divsi, but one
120  /// way or another, going through arith.floordivsi will result in more complex
121  /// IR because arith.floordivsi is more general than affine floordiv in that
122  /// it supports negative RHS.
123  Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
124  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
125  if (rhsConst.getValue() <= 0) {
126  emitError(loc, "division by non-positive value is not supported");
127  return nullptr;
128  }
129  }
130  auto lhs = visit(expr.getLHS());
131  auto rhs = visit(expr.getRHS());
132  assert(lhs && rhs && "unexpected affine expr lowering failure");
133 
134  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
135  Value noneCst = builder.create<arith::ConstantIndexOp>(loc, -1);
136  Value negative = builder.create<arith::CmpIOp>(
137  loc, arith::CmpIPredicate::slt, lhs, zeroCst);
138  Value negatedDecremented = builder.create<arith::SubIOp>(loc, noneCst, lhs);
139  Value dividend =
140  builder.create<arith::SelectOp>(loc, negative, negatedDecremented, lhs);
141  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
142  Value correctedQuotient =
143  builder.create<arith::SubIOp>(loc, noneCst, quotient);
144  Value result = builder.create<arith::SelectOp>(loc, negative,
145  correctedQuotient, quotient);
146  return result;
147  }
148 
149  /// Ceiling division operation (rounds towards positive infinity).
150  ///
151  /// For positive divisors, it can be implemented without branching and with a
152  /// single division operation as
153  ///
154  /// a ceildiv b =
155  /// let negative = a <= 0 in
156  /// let absolute = negative ? -a : a - 1 in
157  /// let quotient = absolute / b in
158  /// negative ? -quotient : quotient + 1
159  ///
160  /// Note: not using arith.ceildivsi for the same reason as explained in the
161  /// visitFloorDivExpr comment.
162  Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
163  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
164  if (rhsConst.getValue() <= 0) {
165  emitError(loc, "division by non-positive value is not supported");
166  return nullptr;
167  }
168  }
169  auto lhs = visit(expr.getLHS());
170  auto rhs = visit(expr.getRHS());
171  assert(lhs && rhs && "unexpected affine expr lowering failure");
172 
173  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
174  Value oneCst = builder.create<arith::ConstantIndexOp>(loc, 1);
175  Value nonPositive = builder.create<arith::CmpIOp>(
176  loc, arith::CmpIPredicate::sle, lhs, zeroCst);
177  Value negated = builder.create<arith::SubIOp>(loc, zeroCst, lhs);
178  Value decremented = builder.create<arith::SubIOp>(loc, lhs, oneCst);
179  Value dividend =
180  builder.create<arith::SelectOp>(loc, nonPositive, negated, decremented);
181  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
182  Value negatedQuotient =
183  builder.create<arith::SubIOp>(loc, zeroCst, quotient);
184  Value incrementedQuotient =
185  builder.create<arith::AddIOp>(loc, quotient, oneCst);
186  Value result = builder.create<arith::SelectOp>(
187  loc, nonPositive, negatedQuotient, incrementedQuotient);
188  return result;
189  }
190 
191  Value visitConstantExpr(AffineConstantExpr expr) {
192  auto op = builder.create<arith::ConstantIndexOp>(loc, expr.getValue());
193  return op.getResult();
194  }
195 
196  Value visitDimExpr(AffineDimExpr expr) {
197  assert(expr.getPosition() < dimValues.size() &&
198  "affine dim position out of range");
199  return dimValues[expr.getPosition()];
200  }
201 
202  Value visitSymbolExpr(AffineSymbolExpr expr) {
203  assert(expr.getPosition() < symbolValues.size() &&
204  "symbol dim position out of range");
205  return symbolValues[expr.getPosition()];
206  }
207 
208 private:
209  OpBuilder &builder;
210  ValueRange dimValues;
211  ValueRange symbolValues;
212 
213  Location loc;
214 };
215 } // namespace
216 
217 /// Create a sequence of operations that implement the `expr` applied to the
218 /// given dimension and symbol values.
220  AffineExpr expr,
221  ValueRange dimValues,
222  ValueRange symbolValues) {
223  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
224 }
225 
226 /// Create a sequence of operations that implement the `affineMap` applied to
227 /// the given `operands` (as it it were an AffineApplyOp).
228 std::optional<SmallVector<Value, 8>>
230  AffineMap affineMap, ValueRange operands) {
231  auto numDims = affineMap.getNumDims();
232  auto expanded = llvm::to_vector<8>(
233  llvm::map_range(affineMap.getResults(),
234  [numDims, &builder, loc, operands](AffineExpr expr) {
235  return expandAffineExpr(builder, loc, expr,
236  operands.take_front(numDims),
237  operands.drop_front(numDims));
238  }));
239  if (llvm::all_of(expanded, [](Value v) { return v; }))
240  return expanded;
241  return std::nullopt;
242 }
243 
244 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
245 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
246 /// the rest of the op.
247 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
248  if (elseBlock)
249  assert(ifOp.hasElse() && "else block expected");
250 
251  Block *destBlock = ifOp->getBlock();
252  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
253  destBlock->getOperations().splice(
254  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
255  std::prev(srcBlock->end()));
256  ifOp.erase();
257 }
258 
259 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
260 /// on. The `ifOp` could be hoisted and placed right before such an operation.
261 /// This method assumes that the ifOp has been canonicalized (to be correct and
262 /// effective).
263 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
264  // Walk up the parents past all for op that this conditional is invariant on.
265  auto ifOperands = ifOp.getOperands();
266  Operation *res = ifOp;
268  auto *parentOp = res->getParentOp();
269  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
270  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
271  break;
272  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
273  if (llvm::any_of(parallelOp.getIVs(), [&](Value iv) {
274  return llvm::is_contained(ifOperands, iv);
275  }))
276  break;
277  } else if (!isa<AffineIfOp>(parentOp)) {
278  // Won't walk up past anything other than affine.for/if ops.
279  break;
280  }
281  // You can always hoist up past any affine.if ops.
282  res = parentOp;
283  }
284  return res;
285 }
286 
287 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
288 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
289 /// otherwise the same `ifOp`.
290 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
291  // No hoisting to do.
292  if (hoistOverOp == ifOp)
293  return ifOp;
294 
295  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
296  // the else block. Then drop the else block of the original 'if' in the 'then'
297  // branch while promoting its then block, and analogously drop the 'then'
298  // block of the original 'if' from the 'else' branch while promoting its else
299  // block.
300  IRMapping operandMap;
301  OpBuilder b(hoistOverOp);
302  auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(),
303  ifOp.getOperands(),
304  /*elseBlock=*/true);
305 
306  // Create a clone of hoistOverOp to use for the else branch of the hoisted
307  // conditional. The else block may get optimized away if empty.
308  Operation *hoistOverOpClone = nullptr;
309  // We use this unique name to identify/find `ifOp`'s clone in the else
310  // version.
311  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
312  operandMap.clear();
313  b.setInsertionPointAfter(hoistOverOp);
314  // We'll set an attribute to identify this op in a clone of this sub-tree.
315  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
316  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
317 
318  // Promote the 'then' block of the original affine.if in the then version.
319  promoteIfBlock(ifOp, /*elseBlock=*/false);
320 
321  // Move the then version to the hoisted if op's 'then' block.
322  auto *thenBlock = hoistedIfOp.getThenBlock();
323  thenBlock->getOperations().splice(thenBlock->begin(),
324  hoistOverOp->getBlock()->getOperations(),
325  Block::iterator(hoistOverOp));
326 
327  // Find the clone of the original affine.if op in the else version.
328  AffineIfOp ifCloneInElse;
329  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
330  if (!ifClone->getAttr(idForIfOp))
331  return WalkResult::advance();
332  ifCloneInElse = ifClone;
333  return WalkResult::interrupt();
334  });
335  assert(ifCloneInElse && "if op clone should exist");
336  // For the else block, promote the else block of the original 'if' if it had
337  // one; otherwise, the op itself is to be erased.
338  if (!ifCloneInElse.hasElse())
339  ifCloneInElse.erase();
340  else
341  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
342 
343  // Move the else version into the else block of the hoisted if op.
344  auto *elseBlock = hoistedIfOp.getElseBlock();
345  elseBlock->getOperations().splice(
346  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
347  Block::iterator(hoistOverOpClone));
348 
349  return hoistedIfOp;
350 }
351 
352 LogicalResult
354  ArrayRef<LoopReduction> parallelReductions,
355  AffineParallelOp *resOp) {
356  // Fail early if there are iter arguments that are not reductions.
357  unsigned numReductions = parallelReductions.size();
358  if (numReductions != forOp.getNumIterOperands())
359  return failure();
360 
361  Location loc = forOp.getLoc();
362  OpBuilder outsideBuilder(forOp);
363  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
364  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
365  AffineMap upperBoundMap = forOp.getUpperBoundMap();
366  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
367 
368  // Creating empty 1-D affine.parallel op.
369  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
370  parallelReductions, [](const LoopReduction &red) { return red.value; }));
371  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
372  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
373  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
374  loc, ValueRange(reducedValues).getTypes(), reductionKinds,
375  llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
376  llvm::ArrayRef(upperBoundMap), upperBoundOperands,
377  llvm::ArrayRef(forOp.getStepAsInt()));
378  // Steal the body of the old affine for op.
379  newPloop.getRegion().takeBody(forOp.getRegion());
380  Operation *yieldOp = &newPloop.getBody()->back();
381 
382  // Handle the initial values of reductions because the parallel loop always
383  // starts from the neutral value.
384  SmallVector<Value> newResults;
385  newResults.reserve(numReductions);
386  for (unsigned i = 0; i < numReductions; ++i) {
387  Value init = forOp.getInits()[i];
388  // This works because we are only handling single-op reductions at the
389  // moment. A switch on reduction kind or a mechanism to collect operations
390  // participating in the reduction will be necessary for multi-op reductions.
391  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
392  assert(reductionOp && "yielded value is expected to be produced by an op");
393  outsideBuilder.getInsertionBlock()->getOperations().splice(
394  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
395  reductionOp);
396  reductionOp->setOperands({init, newPloop->getResult(i)});
397  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
398  }
399 
400  // Update the loop terminator to yield reduced values bypassing the reduction
401  // operation itself (now moved outside of the loop) and erase the block
402  // arguments that correspond to reductions. Note that the loop always has one
403  // "main" induction variable whenc coming from a non-parallel for.
404  unsigned numIVs = 1;
405  yieldOp->setOperands(reducedValues);
406  newPloop.getBody()->eraseArguments(numIVs, numReductions);
407 
408  forOp.erase();
409  if (resOp)
410  *resOp = newPloop;
411  return success();
412 }
413 
414 // Returns success if any hoisting happened.
415 LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
416  // Bail out early if the ifOp returns a result. TODO: Consider how to
417  // properly support this case.
418  if (ifOp.getNumResults() != 0)
419  return failure();
420 
421  // Apply canonicalization patterns and folding - this is necessary for the
422  // hoisting check to be correct (operands should be composed), and to be more
423  // effective (no unused operands). Since the pattern rewriter's folding is
424  // entangled with application of patterns, we may fold/end up erasing the op,
425  // in which case we return with `folded` being set.
426  RewritePatternSet patterns(ifOp.getContext());
427  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
428  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
429  bool erased;
431  ifOp.getOperation(), frozenPatterns,
433  /*changed=*/nullptr, &erased);
434  if (erased) {
435  if (folded)
436  *folded = true;
437  return failure();
438  }
439  if (folded)
440  *folded = false;
441 
442  // The folding above should have ensured this.
443  assert(llvm::all_of(ifOp.getOperands(),
444  [](Value v) {
445  return isTopLevelValue(v) || isAffineInductionVar(v);
446  }) &&
447  "operands not composed");
448 
449  // We are going hoist as high as possible.
450  // TODO: this could be customized in the future.
451  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
452 
453  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
454  // Nothing to hoist over.
455  if (hoistedIfOp == ifOp)
456  return failure();
457 
458  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
459  // a sequence of affine.fors that are all perfectly nested).
460  (void)applyPatternsGreedily(
461  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
462  frozenPatterns);
463 
464  return success();
465 }
466 
467 // Return the min expr after replacing the given dim.
470  bool positivePath) {
471  if (e == dim)
472  return positivePath ? min : max;
473  if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
474  AffineExpr lhs = bin.getLHS();
475  AffineExpr rhs = bin.getRHS();
476  if (bin.getKind() == mlir::AffineExprKind::Add)
477  return substWithMin(lhs, dim, min, max, positivePath) +
478  substWithMin(rhs, dim, min, max, positivePath);
479 
480  auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
481  auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
482  if (c1 && c1.getValue() < 0)
483  return getAffineBinaryOpExpr(
484  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
485  if (c2 && c2.getValue() < 0)
486  return getAffineBinaryOpExpr(
487  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
488  return getAffineBinaryOpExpr(
489  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
490  substWithMin(rhs, dim, min, max, positivePath));
491  }
492  return e;
493 }
494 
495 void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
496  // Loops with min/max in bounds are not normalized at the moment.
497  if (op.hasMinMaxBounds())
498  return;
499 
500  AffineMap lbMap = op.getLowerBoundsMap();
501  SmallVector<int64_t, 8> steps = op.getSteps();
502  // No need to do any work if the parallel op is already normalized.
503  bool isAlreadyNormalized =
504  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
505  int64_t step = std::get<0>(tuple);
506  auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
507  return lbExpr && lbExpr.getValue() == 0 && step == 1;
508  });
509  if (isAlreadyNormalized)
510  return;
511 
512  AffineValueMap ranges;
513  AffineValueMap::difference(op.getUpperBoundsValueMap(),
514  op.getLowerBoundsValueMap(), &ranges);
515  auto builder = OpBuilder::atBlockBegin(op.getBody());
516  auto zeroExpr = builder.getAffineConstantExpr(0);
519  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
520  int64_t step = steps[i];
521 
522  // Adjust the lower bound to be 0.
523  lbExprs.push_back(zeroExpr);
524 
525  // Adjust the upper bound expression: 'range / step'.
526  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
527  ubExprs.push_back(ubExpr);
528 
529  // Adjust the corresponding IV: 'lb + i * step'.
530  BlockArgument iv = op.getBody()->getArgument(i);
531  AffineExpr lbExpr = lbMap.getResult(i);
532  unsigned nDims = lbMap.getNumDims();
533  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
534  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
535  /*symbolCount=*/lbMap.getNumSymbols(), expr);
536 
537  // Use an 'affine.apply' op that will be simplified later in subsequent
538  // canonicalizations.
539  OperandRange lbOperands = op.getLowerBoundsOperands();
540  OperandRange dimOperands = lbOperands.take_front(nDims);
541  OperandRange symbolOperands = lbOperands.drop_front(nDims);
542  SmallVector<Value, 8> applyOperands{dimOperands};
543  applyOperands.push_back(iv);
544  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
545  auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
546  iv.replaceAllUsesExcept(apply, apply);
547  }
548 
549  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
550  op.setSteps(newSteps);
551  auto newLowerMap = AffineMap::get(
552  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
553  op.setLowerBounds({}, newLowerMap);
554  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
555  ubExprs, op.getContext());
556  op.setUpperBounds(ranges.getOperands(), newUpperMap);
557 }
558 
559 LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
560  bool promoteSingleIter) {
561  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
562  return success();
563 
564  // Check if the forop is already normalized.
565  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
566  (op.getStep() == 1))
567  return success();
568 
569  // Check if the lower bound has a single result only. Loops with a max lower
570  // bound can't be normalized without additional support like
571  // affine.execute_region's. If the lower bound does not have a single result
572  // then skip this op.
573  if (op.getLowerBoundMap().getNumResults() != 1)
574  return failure();
575 
576  Location loc = op.getLoc();
577  OpBuilder opBuilder(op);
578  int64_t origLoopStep = op.getStepAsInt();
579 
580  // Construct the new upper bound value map.
581  AffineMap oldLbMap = op.getLowerBoundMap();
582  // The upper bound can have multiple results. To use
583  // AffineValueMap::difference, we need to have the same number of results in
584  // both lower and upper bound maps. So, we just create a value map for the
585  // lower bound with the only available lower bound result repeated to pad up
586  // to the number of upper bound results.
587  SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
588  op.getLowerBoundMap().getResult(0));
589  AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
590  AffineMap paddedLbMap =
591  AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
592  op.getContext());
593  AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
594  AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
595  AffineValueMap newUbValueMap;
596  // Compute the `upper bound - lower bound`.
597  AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
598  (void)newUbValueMap.canonicalize();
599 
600  // Scale down the upper bound value map by the loop step.
601  unsigned numResult = newUbValueMap.getNumResults();
602  SmallVector<AffineExpr> scaleDownExprs(numResult);
603  for (unsigned i = 0; i < numResult; ++i)
604  scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
605  // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
606  // step). Where `n` is the number of results in the upper bound map.
607  AffineMap scaleDownMap =
608  AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
609  AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
610 
611  // Set the newly create upper bound map and operands.
612  op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
613  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
614  op.setStep(1);
615 
616  // Calculate the Value of new loopIV. Create affine.apply for the value of
617  // the loopIV in normalized loop.
618  opBuilder.setInsertionPointToStart(op.getBody());
619  // Construct an affine.apply op mapping the new IV to the old IV.
620  AffineMap scaleIvMap =
621  AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
622  AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
623  AffineValueMap newIvToOldIvMap;
624  AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
625  (void)newIvToOldIvMap.canonicalize();
626  auto newIV = opBuilder.create<AffineApplyOp>(
627  loc, newIvToOldIvMap.getAffineMap(), newIvToOldIvMap.getOperands());
628  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
629  return success();
630 }
631 
632 /// Returns true if the memory operation of `destAccess` depends on `srcAccess`
633 /// inside of the innermost common surrounding affine loop between the two
634 /// accesses.
635 static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
636  const MemRefAccess &destAccess) {
637  // Affine dependence analysis is possible only if both ops in the same
638  // AffineScope.
639  if (getAffineAnalysisScope(srcAccess.opInst) !=
640  getAffineAnalysisScope(destAccess.opInst))
641  return false;
642 
643  unsigned nsLoops =
644  getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
645  DependenceResult result =
646  checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
647  return hasDependence(result);
648 }
649 
650 /// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
651 /// scope of the outermost `minSurroundingLoops` loops that surround them.
652 /// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
653 static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
654  unsigned minSurroundingLoops) {
655  MemRefAccess srcAccess(srcMemOp);
656  MemRefAccess destAccess(destMemOp);
657 
658  // Affine dependence analysis here is applicable only if both ops operate on
659  // the same memref and if `srcMemOp` and `destMemOp` are in the same
660  // AffineScope. Also, we can only check if our affine scope is isolated from
661  // above; otherwise, values can from outside of the affine scope that the
662  // check below cannot analyze.
663  Region *srcScope = getAffineAnalysisScope(srcMemOp);
664  if (srcAccess.memref == destAccess.memref &&
665  srcScope == getAffineAnalysisScope(destMemOp)) {
666  unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
667  FlatAffineValueConstraints dependenceConstraints;
668  for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
670  srcAccess, destAccess, d, &dependenceConstraints,
671  /*dependenceComponents=*/nullptr);
672  // A dependence failure or the presence of a dependence implies a
673  // side effect.
674  if (!noDependence(result))
675  return true;
676  }
677  // No side effect was seen.
678  return false;
679  }
680  // TODO: Check here if the memrefs alias: there is no side effect if
681  // `srcAccess.memref` and `destAccess.memref` don't alias.
682  return true;
683 }
684 
685 template <typename EffectType, typename T>
687  Operation *start, T memOp,
689  // A boolean representing whether an intervening operation could have impacted
690  // memOp.
691  bool hasSideEffect = false;
692 
693  // Check whether the effect on memOp can be caused by a given operation op.
694  Value memref = memOp.getMemRef();
695  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
696  // If the effect has alreay been found, early exit,
697  if (hasSideEffect)
698  return;
699 
700  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
702  memEffect.getEffects(effects);
703 
704  bool opMayHaveEffect = false;
705  for (auto effect : effects) {
706  // If op causes EffectType on a potentially aliasing location for
707  // memOp, mark as having the effect.
708  if (isa<EffectType>(effect.getEffect())) {
709  if (effect.getValue() && effect.getValue() != memref &&
710  !mayAlias(effect.getValue(), memref))
711  continue;
712  opMayHaveEffect = true;
713  break;
714  }
715  }
716 
717  if (!opMayHaveEffect)
718  return;
719 
720  // If the side effect comes from an affine read or write, try to
721  // prove the side effecting `op` cannot reach `memOp`.
722  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
723  // For ease, let's consider the case that `op` is a store and
724  // we're looking for other potential stores that overwrite memory after
725  // `start`, and before being read in `memOp`. In this case, we only
726  // need to consider other potential stores with depth >
727  // minSurroundingLoops since `start` would overwrite any store with a
728  // smaller number of surrounding loops before.
729  unsigned minSurroundingLoops =
730  getNumCommonSurroundingLoops(*start, *memOp);
731  if (mayHaveEffect(op, memOp, minSurroundingLoops))
732  hasSideEffect = true;
733  return;
734  }
735 
736  // We have an op with a memory effect and we cannot prove if it
737  // intervenes.
738  hasSideEffect = true;
739  return;
740  }
741 
742  if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
743  // Recurse into the regions for this op and check whether the internal
744  // operations may have the side effect `EffectType` on memOp.
745  for (Region &region : op->getRegions())
746  for (Block &block : region)
747  for (Operation &op : block)
748  checkOperation(&op);
749  return;
750  }
751 
752  // Otherwise, conservatively assume generic operations have the effect
753  // on the operation
754  hasSideEffect = true;
755  };
756 
757  // Check all paths from ancestor op `parent` to the operation `to` for the
758  // effect. It is known that `to` must be contained within `parent`.
759  auto until = [&](Operation *parent, Operation *to) {
760  // TODO check only the paths from `parent` to `to`.
761  // Currently we fallback and check the entire parent op, rather than
762  // just the paths from the parent path, stopping after reaching `to`.
763  // This is conservatively correct, but could be made more aggressive.
764  assert(parent->isAncestor(to));
765  checkOperation(parent);
766  };
767 
768  // Check for all paths from operation `from` to operation `untilOp` for the
769  // given memory effect.
770  std::function<void(Operation *, Operation *)> recur =
771  [&](Operation *from, Operation *untilOp) {
772  assert(
773  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
774  "Checking for side effect between two operations without a common "
775  "ancestor");
776 
777  // If the operations are in different regions, recursively consider all
778  // path from `from` to the parent of `to` and all paths from the parent
779  // of `to` to `to`.
780  if (from->getParentRegion() != untilOp->getParentRegion()) {
781  recur(from, untilOp->getParentOp());
782  until(untilOp->getParentOp(), untilOp);
783  return;
784  }
785 
786  // Now, assuming that `from` and `to` exist in the same region, perform
787  // a CFG traversal to check all the relevant operations.
788 
789  // Additional blocks to consider.
790  SmallVector<Block *, 2> todoBlocks;
791  {
792  // First consider the parent block of `from` an check all operations
793  // after `from`.
794  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
795  iter != end && &*iter != untilOp; ++iter) {
796  checkOperation(&*iter);
797  }
798 
799  // If the parent of `from` doesn't contain `to`, add the successors
800  // to the list of blocks to check.
801  if (untilOp->getBlock() != from->getBlock())
802  for (Block *succ : from->getBlock()->getSuccessors())
803  todoBlocks.push_back(succ);
804  }
805 
807  // Traverse the CFG until hitting `to`.
808  while (!todoBlocks.empty()) {
809  Block *blk = todoBlocks.pop_back_val();
810  if (done.count(blk))
811  continue;
812  done.insert(blk);
813  for (auto &op : *blk) {
814  if (&op == untilOp)
815  break;
816  checkOperation(&op);
817  if (&op == blk->getTerminator())
818  for (Block *succ : blk->getSuccessors())
819  todoBlocks.push_back(succ);
820  }
821  }
822  };
823  recur(start, memOp);
824  return !hasSideEffect;
825 }
826 
827 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
828 /// which the load is guaranteed to retrieve. This check involves three
829 /// components: 1) The store and load must be on the same location 2) The store
830 /// must dominate (and therefore must always occur prior to) the load 3) No
831 /// other operations will overwrite the memory loaded between the given load
832 /// and store. If such a value exists, the replaced `loadOp` will be added to
833 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
834 static void forwardStoreToLoad(
835  AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
836  SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
838 
839  // The store op candidate for forwarding that satisfies all conditions
840  // to replace the load, if any.
841  Operation *lastWriteStoreOp = nullptr;
842 
843  for (auto *user : loadOp.getMemRef().getUsers()) {
844  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
845  if (!storeOp)
846  continue;
847  MemRefAccess srcAccess(storeOp);
848  MemRefAccess destAccess(loadOp);
849 
850  // 1. Check if the store and the load have mathematically equivalent
851  // affine access functions; this implies that they statically refer to the
852  // same single memref element. As an example this filters out cases like:
853  // store %A[%i0 + 1]
854  // load %A[%i0]
855  // store %A[%M]
856  // load %A[%N]
857  // Use the AffineValueMap difference based memref access equality checking.
858  if (srcAccess != destAccess)
859  continue;
860 
861  // 2. The store has to dominate the load op to be candidate.
862  if (!domInfo.dominates(storeOp, loadOp))
863  continue;
864 
865  // 3. The store must reach the load. Access function equivalence only
866  // guarantees this for accesses in the same block. The load could be in a
867  // nested block that is unreachable.
868  if (!mustReachAtInnermost(srcAccess, destAccess))
869  continue;
870 
871  // 4. Ensure there is no intermediate operation which could replace the
872  // value in memory.
873  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp,
874  mayAlias))
875  continue;
876 
877  // We now have a candidate for forwarding.
878  assert(lastWriteStoreOp == nullptr &&
879  "multiple simultaneous replacement stores");
880  lastWriteStoreOp = storeOp;
881  }
882 
883  if (!lastWriteStoreOp)
884  return;
885 
886  // Perform the actual store to load forwarding.
887  Value storeVal =
888  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
889  // Check if 2 values have the same shape. This is needed for affine vector
890  // loads and stores.
891  if (storeVal.getType() != loadOp.getValue().getType())
892  return;
893  loadOp.getValue().replaceAllUsesWith(storeVal);
894  // Record the memref for a later sweep to optimize away.
895  memrefsToErase.insert(loadOp.getMemRef());
896  // Record this to erase later.
897  loadOpsToErase.push_back(loadOp);
898 }
899 
900 template bool
902  affine::AffineReadOpInterface>(
903  mlir::Operation *, affine::AffineReadOpInterface,
904  llvm::function_ref<bool(Value, Value)>);
905 
906 // This attempts to find stores which have no impact on the final result.
907 // A writing op writeA will be eliminated if there exists an op writeB if
908 // 1) writeA and writeB have mathematically equivalent affine access functions.
909 // 2) writeB postdominates writeA.
910 // 3) There is no potential read between writeA and writeB.
911 static void findUnusedStore(AffineWriteOpInterface writeA,
912  SmallVectorImpl<Operation *> &opsToErase,
913  PostDominanceInfo &postDominanceInfo,
915 
916  for (Operation *user : writeA.getMemRef().getUsers()) {
917  // Only consider writing operations.
918  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
919  if (!writeB)
920  continue;
921 
922  // The operations must be distinct.
923  if (writeB == writeA)
924  continue;
925 
926  // Both operations must lie in the same region.
927  if (writeB->getParentRegion() != writeA->getParentRegion())
928  continue;
929 
930  // Both operations must write to the same memory.
931  MemRefAccess srcAccess(writeB);
932  MemRefAccess destAccess(writeA);
933 
934  if (srcAccess != destAccess)
935  continue;
936 
937  // writeB must postdominate writeA.
938  if (!postDominanceInfo.postDominates(writeB, writeA))
939  continue;
940 
941  // There cannot be an operation which reads from memory between
942  // the two writes.
943  if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB,
944  mayAlias))
945  continue;
946 
947  opsToErase.push_back(writeA);
948  break;
949  }
950 }
951 
952 // The load to load forwarding / redundant load elimination is similar to the
953 // store to load forwarding.
954 // loadA will be be replaced with loadB if:
955 // 1) loadA and loadB have mathematically equivalent affine access functions.
956 // 2) loadB dominates loadA.
957 // 3) There is no write between loadA and loadB.
958 static void loadCSE(AffineReadOpInterface loadA,
959  SmallVectorImpl<Operation *> &loadOpsToErase,
960  DominanceInfo &domInfo,
963  for (auto *user : loadA.getMemRef().getUsers()) {
964  auto loadB = dyn_cast<AffineReadOpInterface>(user);
965  if (!loadB || loadB == loadA)
966  continue;
967 
968  MemRefAccess srcAccess(loadB);
969  MemRefAccess destAccess(loadA);
970 
971  // 1. The accesses should be to be to the same location.
972  if (srcAccess != destAccess) {
973  continue;
974  }
975 
976  // 2. loadB should dominate loadA.
977  if (!domInfo.dominates(loadB, loadA))
978  continue;
979 
980  // 3. There should not be a write between loadA and loadB.
981  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
982  loadB.getOperation(), loadA, mayAlias))
983  continue;
984 
985  // Check if two values have the same shape. This is needed for affine vector
986  // loads.
987  if (loadB.getValue().getType() != loadA.getValue().getType())
988  continue;
989 
990  loadCandidates.push_back(loadB);
991  }
992 
993  // Of the legal load candidates, use the one that dominates all others
994  // to minimize the subsequent need to loadCSE
995  Value loadB;
996  for (AffineReadOpInterface option : loadCandidates) {
997  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
998  return depStore == option ||
999  domInfo.dominates(option.getOperation(),
1000  depStore.getOperation());
1001  })) {
1002  loadB = option.getValue();
1003  break;
1004  }
1005  }
1006 
1007  if (loadB) {
1008  loadA.getValue().replaceAllUsesWith(loadB);
1009  // Record this to erase later.
1010  loadOpsToErase.push_back(loadA);
1011  }
1012 }
1013 
1014 // The store to load forwarding and load CSE rely on three conditions:
1015 //
1016 // 1) store/load providing a replacement value and load being replaced need to
1017 // have mathematically equivalent affine access functions (checked after full
1018 // composition of load/store operands); this implies that they access the same
1019 // single memref element for all iterations of the common surrounding loop,
1020 //
1021 // 2) the store/load op should dominate the load op,
1022 //
1023 // 3) no operation that may write to memory read by the load being replaced can
1024 // occur after executing the instruction (load or store) providing the
1025 // replacement value and before the load being replaced (thus potentially
1026 // allowing overwriting the memory read by the load).
1027 //
1028 // The above conditions are simple to check, sufficient, and powerful for most
1029 // cases in practice - they are sufficient, but not necessary --- since they
1030 // don't reason about loops that are guaranteed to execute at least once or
1031 // multiple sources to forward from.
1032 //
1033 // TODO: more forwarding can be done when support for
1034 // loop/conditional live-out SSA values is available.
1035 // TODO: do general dead store elimination for memref's. This pass
1036 // currently only eliminates the stores only if no other loads/uses (other
1037 // than dealloc) remain.
1038 //
1039 void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1040  PostDominanceInfo &postDomInfo,
1041  AliasAnalysis &aliasAnalysis) {
1042  // Load op's whose results were replaced by those forwarded from stores.
1043  SmallVector<Operation *, 8> opsToErase;
1044 
1045  // A list of memref's that are potentially dead / could be eliminated.
1046  SmallPtrSet<Value, 4> memrefsToErase;
1047 
1048  auto mayAlias = [&](Value val1, Value val2) -> bool {
1049  return !aliasAnalysis.alias(val1, val2).isNo();
1050  };
1051 
1052  // Walk all load's and perform store to load forwarding.
1053  f.walk([&](AffineReadOpInterface loadOp) {
1054  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1055  });
1056  for (auto *op : opsToErase)
1057  op->erase();
1058  opsToErase.clear();
1059 
1060  // Walk all store's and perform unused store elimination
1061  f.walk([&](AffineWriteOpInterface storeOp) {
1062  findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1063  });
1064  for (auto *op : opsToErase)
1065  op->erase();
1066  opsToErase.clear();
1067 
1068  // Check if the store fwd'ed memrefs are now left with only stores and
1069  // deallocs and can thus be completely deleted. Note: the canonicalize pass
1070  // should be able to do this as well, but we'll do it here since we collected
1071  // these anyway.
1072  for (auto memref : memrefsToErase) {
1073  // If the memref hasn't been locally alloc'ed, skip.
1074  Operation *defOp = memref.getDefiningOp();
1075  if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1076  // TODO: if the memref was returned by a 'call' operation, we
1077  // could still erase it if the call had no side-effects.
1078  continue;
1079  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1080  return !isa<AffineWriteOpInterface>(ownerOp) &&
1081  !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1082  }))
1083  continue;
1084 
1085  // Erase all stores, the dealloc, and the alloc on the memref.
1086  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1087  user->erase();
1088  defOp->erase();
1089  }
1090 
1091  // To eliminate as many loads as possible, run load CSE after eliminating
1092  // stores. Otherwise, some stores are wrongly seen as having an intervening
1093  // effect.
1094  f.walk([&](AffineReadOpInterface loadOp) {
1095  loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1096  });
1097  for (auto *op : opsToErase)
1098  op->erase();
1099 }
1100 
1101 // Private helper function to transform memref.load with reduced rank.
1102 // This function will modify the indices of the memref.load to match the
1103 // newMemRef.
1105  Operation *op, Value oldMemRef, Value newMemRef, unsigned memRefOperandPos,
1106  ArrayRef<Value> extraIndices, ArrayRef<Value> extraOperands,
1107  ArrayRef<Value> symbolOperands, AffineMap indexRemap) {
1108  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1109  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1110  unsigned oldMapNumInputs = oldMemRefRank;
1111  SmallVector<Value, 4> oldMapOperands(
1112  op->operand_begin() + memRefOperandPos + 1,
1113  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1114  SmallVector<Value, 4> oldMemRefOperands;
1115  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1116  SmallVector<Value, 4> remapOperands;
1117  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1118  symbolOperands.size());
1119  remapOperands.append(extraOperands.begin(), extraOperands.end());
1120  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1121  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1122 
1123  SmallVector<Value, 4> remapOutputs;
1124  remapOutputs.reserve(oldMemRefRank);
1125  SmallVector<Value, 4> affineApplyOps;
1126 
1127  OpBuilder builder(op);
1128 
1129  if (indexRemap &&
1130  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1131  // Remapped indices.
1132  for (auto resultExpr : indexRemap.getResults()) {
1133  auto singleResMap = AffineMap::get(
1134  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1135  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1136  remapOperands);
1137  remapOutputs.push_back(afOp);
1138  affineApplyOps.push_back(afOp);
1139  }
1140  } else {
1141  // No remapping specified.
1142  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1143  }
1144 
1145  SmallVector<Value, 4> newMapOperands;
1146  newMapOperands.reserve(newMemRefRank);
1147 
1148  // Prepend 'extraIndices' in 'newMapOperands'.
1149  for (Value extraIndex : extraIndices) {
1150  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1151  "invalid memory op index");
1152  newMapOperands.push_back(extraIndex);
1153  }
1154 
1155  // Append 'remapOutputs' to 'newMapOperands'.
1156  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1157 
1158  // Create new fully composed AffineMap for new op to be created.
1159  assert(newMapOperands.size() == newMemRefRank);
1160 
1161  OperationState state(op->getLoc(), op->getName());
1162  // Construct the new operation using this memref.
1163  state.operands.reserve(newMapOperands.size() + extraIndices.size());
1164  state.operands.push_back(newMemRef);
1165 
1166  // Insert the new memref map operands.
1167  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1168 
1169  state.types.reserve(op->getNumResults());
1170  for (auto result : op->getResults())
1171  state.types.push_back(result.getType());
1172 
1173  // Copy over the attributes from the old operation to the new operation.
1174  for (auto namedAttr : op->getAttrs()) {
1175  state.attributes.push_back(namedAttr);
1176  }
1177 
1178  // Create the new operation.
1179  auto *repOp = builder.create(state);
1180  op->replaceAllUsesWith(repOp);
1181  op->erase();
1182 
1183  return success();
1184 }
1185 // Perform the replacement in `op`.
1187  Value oldMemRef, Value newMemRef, Operation *op,
1188  ArrayRef<Value> extraIndices, AffineMap indexRemap,
1189  ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1190  bool allowNonDereferencingOps) {
1191  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1192  (void)newMemRefRank; // unused in opt mode
1193  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1194  (void)oldMemRefRank; // unused in opt mode
1195  if (indexRemap) {
1196  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1197  "symbolic operand count mismatch");
1198  assert(indexRemap.getNumInputs() ==
1199  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1200  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1201  } else {
1202  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1203  }
1204 
1205  // Assert same elemental type.
1206  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1207  cast<MemRefType>(newMemRef.getType()).getElementType());
1208 
1209  SmallVector<unsigned, 2> usePositions;
1210  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1211  if (opEntry.value() == oldMemRef)
1212  usePositions.push_back(opEntry.index());
1213  }
1214 
1215  // If memref doesn't appear, nothing to do.
1216  if (usePositions.empty())
1217  return success();
1218 
1219  if (usePositions.size() > 1) {
1220  // TODO: extend it for this case when needed (rare).
1221  assert(false && "multiple dereferencing uses in a single op not supported");
1222  return failure();
1223  }
1224 
1225  unsigned memRefOperandPos = usePositions.front();
1226 
1227  OpBuilder builder(op);
1228  // The following checks if op is dereferencing memref and performs the access
1229  // index rewrites.
1230  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1231  if (!affMapAccInterface) {
1232  if (!allowNonDereferencingOps) {
1233  // Failure: memref used in a non-dereferencing context (potentially
1234  // escapes); no replacement in these cases unless allowNonDereferencingOps
1235  // is set.
1236  return failure();
1237  }
1238 
1239  // Check if it is a memref.load
1240  auto memrefLoad = dyn_cast<memref::LoadOp>(op);
1241  bool isReductionLike =
1242  indexRemap.getNumResults() < indexRemap.getNumInputs();
1243  if (!memrefLoad || !isReductionLike) {
1244  op->setOperand(memRefOperandPos, newMemRef);
1245  return success();
1246  }
1247 
1249  op, oldMemRef, newMemRef, memRefOperandPos, extraIndices, extraOperands,
1250  symbolOperands, indexRemap);
1251  }
1252  // Perform index rewrites for the dereferencing op and then replace the op
1253  NamedAttribute oldMapAttrPair =
1254  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1255  AffineMap oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1256  unsigned oldMapNumInputs = oldMap.getNumInputs();
1257  SmallVector<Value, 4> oldMapOperands(
1258  op->operand_begin() + memRefOperandPos + 1,
1259  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1260 
1261  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1262  SmallVector<Value, 4> oldMemRefOperands;
1263  SmallVector<Value, 4> affineApplyOps;
1264  oldMemRefOperands.reserve(oldMemRefRank);
1265  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1266  for (auto resultExpr : oldMap.getResults()) {
1267  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1268  oldMap.getNumSymbols(), resultExpr);
1269  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1270  oldMapOperands);
1271  oldMemRefOperands.push_back(afOp);
1272  affineApplyOps.push_back(afOp);
1273  }
1274  } else {
1275  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1276  }
1277 
1278  // Construct new indices as a remap of the old ones if a remapping has been
1279  // provided. The indices of a memref come right after it, i.e.,
1280  // at position memRefOperandPos + 1.
1281  SmallVector<Value, 4> remapOperands;
1282  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1283  symbolOperands.size());
1284  remapOperands.append(extraOperands.begin(), extraOperands.end());
1285  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1286  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1287 
1288  SmallVector<Value, 4> remapOutputs;
1289  remapOutputs.reserve(oldMemRefRank);
1290 
1291  if (indexRemap &&
1292  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1293  // Remapped indices.
1294  for (auto resultExpr : indexRemap.getResults()) {
1295  auto singleResMap = AffineMap::get(
1296  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1297  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1298  remapOperands);
1299  remapOutputs.push_back(afOp);
1300  affineApplyOps.push_back(afOp);
1301  }
1302  } else {
1303  // No remapping specified.
1304  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1305  }
1306 
1307  SmallVector<Value, 4> newMapOperands;
1308  newMapOperands.reserve(newMemRefRank);
1309 
1310  // Prepend 'extraIndices' in 'newMapOperands'.
1311  for (Value extraIndex : extraIndices) {
1312  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1313  "invalid memory op index");
1314  newMapOperands.push_back(extraIndex);
1315  }
1316 
1317  // Append 'remapOutputs' to 'newMapOperands'.
1318  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1319 
1320  // Create new fully composed AffineMap for new op to be created.
1321  assert(newMapOperands.size() == newMemRefRank);
1322  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1323  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1324  newMap = simplifyAffineMap(newMap);
1325  canonicalizeMapAndOperands(&newMap, &newMapOperands);
1326  // Remove any affine.apply's that became dead as a result of composition.
1327  for (Value value : affineApplyOps)
1328  if (value.use_empty())
1329  value.getDefiningOp()->erase();
1330 
1331  OperationState state(op->getLoc(), op->getName());
1332  // Construct the new operation using this memref.
1333  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1334  // Insert the non-memref operands.
1335  state.operands.append(op->operand_begin(),
1336  op->operand_begin() + memRefOperandPos);
1337  // Insert the new memref value.
1338  state.operands.push_back(newMemRef);
1339 
1340  // Insert the new memref map operands.
1341  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1342 
1343  // Insert the remaining operands unmodified.
1344  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1345  oldMapNumInputs,
1346  op->operand_end());
1347 
1348  // Result types don't change. Both memref's are of the same elemental type.
1349  state.types.reserve(op->getNumResults());
1350  for (auto result : op->getResults())
1351  state.types.push_back(result.getType());
1352 
1353  // Add attribute for 'newMap', other Attributes do not change.
1354  auto newMapAttr = AffineMapAttr::get(newMap);
1355  for (auto namedAttr : op->getAttrs()) {
1356  if (namedAttr.getName() == oldMapAttrPair.getName())
1357  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1358  else
1359  state.attributes.push_back(namedAttr);
1360  }
1361 
1362  // Create the new operation.
1363  auto *repOp = builder.create(state);
1364  op->replaceAllUsesWith(repOp);
1365  op->erase();
1366 
1367  return success();
1368 }
1369 
1371  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1372  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1373  ArrayRef<Value> symbolOperands, Operation *domOpFilter,
1374  Operation *postDomOpFilter, bool allowNonDereferencingOps,
1375  bool replaceInDeallocOp) {
1376  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1377  (void)newMemRefRank; // unused in opt mode
1378  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1379  (void)oldMemRefRank;
1380  if (indexRemap) {
1381  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1382  "symbol operand count mismatch");
1383  assert(indexRemap.getNumInputs() ==
1384  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1385  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1386  } else {
1387  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1388  }
1389 
1390  // Assert same elemental type.
1391  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1392  cast<MemRefType>(newMemRef.getType()).getElementType());
1393 
1394  std::unique_ptr<DominanceInfo> domInfo;
1395  std::unique_ptr<PostDominanceInfo> postDomInfo;
1396  if (domOpFilter)
1397  domInfo = std::make_unique<DominanceInfo>(
1398  domOpFilter->getParentOfType<FunctionOpInterface>());
1399 
1400  if (postDomOpFilter)
1401  postDomInfo = std::make_unique<PostDominanceInfo>(
1402  postDomOpFilter->getParentOfType<FunctionOpInterface>());
1403 
1404  // Walk all uses of old memref; collect ops to perform replacement. We use a
1405  // DenseSet since an operation could potentially have multiple uses of a
1406  // memref (although rare), and the replacement later is going to erase ops.
1407  DenseSet<Operation *> opsToReplace;
1408  for (auto *op : oldMemRef.getUsers()) {
1409  // Skip this use if it's not dominated by domOpFilter.
1410  if (domOpFilter && !domInfo->dominates(domOpFilter, op))
1411  continue;
1412 
1413  // Skip this use if it's not post-dominated by postDomOpFilter.
1414  if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
1415  continue;
1416 
1417  // Skip dealloc's - no replacement is necessary, and a memref replacement
1418  // at other uses doesn't hurt these dealloc's.
1419  if (hasSingleEffect<MemoryEffects::Free>(op, oldMemRef) &&
1420  !replaceInDeallocOp)
1421  continue;
1422 
1423  // Check if the memref was used in a non-dereferencing context. It is fine
1424  // for the memref to be used in a non-dereferencing way outside of the
1425  // region where this replacement is happening.
1426  if (!isa<AffineMapAccessInterface>(*op)) {
1427  if (!allowNonDereferencingOps) {
1428  LLVM_DEBUG(llvm::dbgs()
1429  << "Memref replacement failed: non-deferencing memref op: \n"
1430  << *op << '\n');
1431  return failure();
1432  }
1433  // Non-dereferencing ops with the MemRefsNormalizable trait are
1434  // supported for replacement.
1435  if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
1436  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1437  "memrefs normalizable trait: \n"
1438  << *op << '\n');
1439  return failure();
1440  }
1441  }
1442 
1443  // We'll first collect and then replace --- since replacement erases the op
1444  // that has the use, and that op could be postDomFilter or domFilter itself!
1445  opsToReplace.insert(op);
1446  }
1447 
1448  for (auto *op : opsToReplace) {
1449  if (failed(replaceAllMemRefUsesWith(
1450  oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
1451  symbolOperands, allowNonDereferencingOps)))
1452  llvm_unreachable("memref replacement guaranteed to succeed here");
1453  }
1454 
1455  return success();
1456 }
1457 
1458 /// Given an operation, inserts one or more single result affine
1459 /// apply operations, results of which are exclusively used by this operation
1460 /// operation. The operands of these newly created affine apply ops are
1461 /// guaranteed to be loop iterators or terminal symbols of a function.
1462 ///
1463 /// Before
1464 ///
1465 /// affine.for %i = 0 to #map(%N)
1466 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1467 /// "send"(%idx, %A, ...)
1468 /// "compute"(%idx)
1469 ///
1470 /// After
1471 ///
1472 /// affine.for %i = 0 to #map(%N)
1473 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1474 /// "send"(%idx, %A, ...)
1475 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1476 /// "compute"(%idx_)
1477 ///
1478 /// This allows applying different transformations on send and compute (for eg.
1479 /// different shifts/delays).
1480 ///
1481 /// Returns nullptr either if none of opInst's operands were the result of an
1482 /// affine.apply and thus there was no affine computation slice to create, or if
1483 /// all the affine.apply op's supplying operands to this opInst did not have any
1484 /// uses besides this opInst; otherwise returns the list of affine.apply
1485 /// operations created in output argument `sliceOps`.
1487  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1488  // Collect all operands that are results of affine apply ops.
1489  SmallVector<Value, 4> subOperands;
1490  subOperands.reserve(opInst->getNumOperands());
1491  for (auto operand : opInst->getOperands())
1492  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1493  subOperands.push_back(operand);
1494 
1495  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1496  SmallVector<Operation *, 4> affineApplyOps;
1497  getReachableAffineApplyOps(subOperands, affineApplyOps);
1498  // Skip transforming if there are no affine maps to compose.
1499  if (affineApplyOps.empty())
1500  return;
1501 
1502  // Check if all uses of the affine apply op's lie only in this op op, in
1503  // which case there would be nothing to do.
1504  bool localized = true;
1505  for (auto *op : affineApplyOps) {
1506  for (auto result : op->getResults()) {
1507  for (auto *user : result.getUsers()) {
1508  if (user != opInst) {
1509  localized = false;
1510  break;
1511  }
1512  }
1513  }
1514  }
1515  if (localized)
1516  return;
1517 
1518  OpBuilder builder(opInst);
1519  SmallVector<Value, 4> composedOpOperands(subOperands);
1520  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1521  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1522 
1523  // Create an affine.apply for each of the map results.
1524  sliceOps->reserve(composedMap.getNumResults());
1525  for (auto resultExpr : composedMap.getResults()) {
1526  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1527  composedMap.getNumSymbols(), resultExpr);
1528  sliceOps->push_back(builder.create<AffineApplyOp>(
1529  opInst->getLoc(), singleResMap, composedOpOperands));
1530  }
1531 
1532  // Construct the new operands that include the results from the composed
1533  // affine apply op above instead of existing ones (subOperands). So, they
1534  // differ from opInst's operands only for those operands in 'subOperands', for
1535  // which they will be replaced by the corresponding one from 'sliceOps'.
1536  SmallVector<Value, 4> newOperands(opInst->getOperands());
1537  for (Value &operand : newOperands) {
1538  // Replace the subOperands from among the new operands.
1539  unsigned j, f;
1540  for (j = 0, f = subOperands.size(); j < f; j++) {
1541  if (operand == subOperands[j])
1542  break;
1543  }
1544  if (j < subOperands.size())
1545  operand = (*sliceOps)[j];
1546  }
1547  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1548  opInst->setOperand(idx, newOperands[idx]);
1549 }
1550 
1551 /// Enum to set patterns of affine expr in tiled-layout map.
1552 /// TileFloorDiv: <dim expr> div <tile size>
1553 /// TileMod: <dim expr> mod <tile size>
1554 /// TileNone: None of the above
1555 /// Example:
1556 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1557 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1558 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1559 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1561 
1562 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1563 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1564 /// tile sizes, and no other expression involves those k dimensions. This
1565 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1566 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1567 /// tiled layout, an empty vector is returned.
1568 static LogicalResult getTileSizePos(
1569  AffineMap map,
1570  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1571  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1572  // `floordiv` and its position in `map` output.
1573  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1574  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1575  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1577  unsigned pos = 0;
1578  for (AffineExpr expr : map.getResults()) {
1579  if (expr.getKind() == AffineExprKind::FloorDiv) {
1580  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1581  if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1582  floordivExprs.emplace_back(
1583  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1584  }
1585  pos++;
1586  }
1587  // Not tiled layout if `floordivExprs` is empty.
1588  if (floordivExprs.empty()) {
1590  return success();
1591  }
1592 
1593  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1594  // not tiled layout.
1595  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1596  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1597  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1598  unsigned floordivPos = std::get<2>(fexpr);
1599 
1600  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1601  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1602  // other expr, the map is not tiled layout. Example of non tiled layout:
1603  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1604  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1605  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1606  // 256)>
1607  bool found = false;
1608  pos = 0;
1609  for (AffineExpr expr : map.getResults()) {
1610  bool notTiled = false;
1611  if (pos != floordivPos) {
1612  expr.walk([&](AffineExpr e) {
1613  if (e == floordivExprLHS) {
1614  if (expr.getKind() == AffineExprKind::Mod) {
1615  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1616  // If LHS and RHS of `mod` are the same with those of floordiv.
1617  if (floordivExprLHS == binaryExpr.getLHS() &&
1618  floordivExprRHS == binaryExpr.getRHS()) {
1619  // Save tile size (RHS of `mod`), and position of `floordiv` and
1620  // `mod` if same expr with `mod` is not found yet.
1621  if (!found) {
1622  tileSizePos.emplace_back(
1623  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1624  found = true;
1625  } else {
1626  // Non tiled layout: Have multilpe `mod` with the same LHS.
1627  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1628  // mod 256, d2 mod 256)>
1629  notTiled = true;
1630  }
1631  } else {
1632  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1633  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1634  // mod 128)>
1635  notTiled = true;
1636  }
1637  } else {
1638  // Non tiled layout: LHS is the same, but not `mod`.
1639  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1640  // floordiv 256)>
1641  notTiled = true;
1642  }
1643  }
1644  });
1645  }
1646  if (notTiled) {
1648  return success();
1649  }
1650  pos++;
1651  }
1652  }
1653  return success();
1654 }
1655 
1656 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1657 /// after normalization. Dimensions that include dynamic dimensions in the map
1658 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1659 /// dimension.
1660 ///
1661 /// Example:
1662 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1663 ///
1664 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1665 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1666 static bool
1667 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1668  SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1669  AffineExpr expr = layoutMap.getResults()[dim];
1670  // Check if affine expr of the dimension includes dynamic dimension of input
1671  // memrefType.
1672  MLIRContext *context = layoutMap.getContext();
1673  return expr
1674  .walk([&](AffineExpr e) {
1675  if (isa<AffineDimExpr>(e) &&
1676  llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1677  return e == getAffineDimExpr(dim, context);
1678  }))
1679  return WalkResult::interrupt();
1680  return WalkResult::advance();
1681  })
1682  .wasInterrupted();
1683 }
1684 
1685 /// Create affine expr to calculate dimension size for a tiled-layout map.
1687  TileExprPattern pat) {
1688  // Create map output for the patterns.
1689  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1690  // "mod <tile size>" ==> "<tile size>"
1691  AffineExpr newMapOutput;
1692  AffineBinaryOpExpr binaryExpr = nullptr;
1693  switch (pat) {
1695  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1696  newMapOutput = binaryExpr.getRHS();
1697  break;
1699  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1700  newMapOutput = getAffineBinaryOpExpr(
1701  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1702  break;
1703  default:
1704  newMapOutput = oldMapOutput;
1705  }
1706  return newMapOutput;
1707 }
1708 
1709 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1710 /// create `newDynamicSizes` from them by using AffineApplyOp.
1711 ///
1712 /// Steps for normalizing dynamic memrefs for a tiled layout map
1713 /// Example:
1714 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1715 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1716 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1717 ///
1718 /// (Before this function)
1719 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1720 /// single layout map is supported.
1721 ///
1722 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1723 /// is memref<4x?x?xf32> in the above example.
1724 ///
1725 /// (In this function)
1726 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1727 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1728 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1729 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1730 /// - New map in the above example
1731 /// #map0 = affine_map<(d0, d1) -> (d0)>
1732 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1733 /// #map2 = affine_map<(d0, d1) -> (32)>
1734 ///
1735 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1736 /// is used in dynamicSizes of new AllocOp.
1737 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1738 /// %c4 = arith.constant 4 : index
1739 /// %1 = affine.apply #map1(%c4, %0)
1740 /// %2 = affine.apply #map2(%c4, %0)
1741 template <typename AllocLikeOp>
1742 static void createNewDynamicSizes(MemRefType oldMemRefType,
1743  MemRefType newMemRefType, AffineMap map,
1744  AllocLikeOp allocOp, OpBuilder b,
1745  SmallVectorImpl<Value> &newDynamicSizes) {
1746  // Create new input for AffineApplyOp.
1747  SmallVector<Value, 4> inAffineApply;
1748  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1749  unsigned dynIdx = 0;
1750  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1751  if (oldMemRefShape[d] < 0) {
1752  // Use dynamicSizes of allocOp for dynamic dimension.
1753  inAffineApply.emplace_back(allocOp.getDynamicSizes()[dynIdx]);
1754  dynIdx++;
1755  } else {
1756  // Create ConstantOp for static dimension.
1757  auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1758  inAffineApply.emplace_back(
1759  b.create<arith::ConstantOp>(allocOp.getLoc(), constantAttr));
1760  }
1761  }
1762 
1763  // Create new map to calculate each dimension size of new memref for each
1764  // original map output. Only for dynamic dimesion of `newMemRefType`.
1765  unsigned newDimIdx = 0;
1766  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1768  (void)getTileSizePos(map, tileSizePos);
1769  for (AffineExpr expr : map.getResults()) {
1770  if (newMemRefShape[newDimIdx] < 0) {
1771  // Create new maps to calculate each dimension size of new memref.
1773  for (auto pos : tileSizePos) {
1774  if (newDimIdx == std::get<1>(pos))
1776  else if (newDimIdx == std::get<2>(pos))
1778  }
1779  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1780  AffineMap newMap =
1781  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1782  Value affineApp =
1783  b.create<AffineApplyOp>(allocOp.getLoc(), newMap, inAffineApply);
1784  newDynamicSizes.emplace_back(affineApp);
1785  }
1786  newDimIdx++;
1787  }
1788 }
1789 
1790 template <typename AllocLikeOp>
1791 LogicalResult mlir::affine::normalizeMemRef(AllocLikeOp allocOp) {
1792  MemRefType memrefType = allocOp.getType();
1793  OpBuilder b(allocOp);
1794 
1795  // Fetch a new memref type after normalizing the old memref to have an
1796  // identity map layout.
1797  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1798  if (newMemRefType == memrefType)
1799  // Either memrefType already had an identity map or the map couldn't be
1800  // transformed to an identity map.
1801  return failure();
1802 
1803  Value oldMemRef = allocOp.getResult();
1804 
1805  SmallVector<Value, 4> symbolOperands(allocOp.getSymbolOperands());
1806  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1807  AllocLikeOp newAlloc;
1808  // Check if `layoutMap` is a tiled layout. Only single layout map is
1809  // supported for normalizing dynamic memrefs.
1811  (void)getTileSizePos(layoutMap, tileSizePos);
1812  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1813  auto oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1814  SmallVector<Value, 4> newDynamicSizes;
1815  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1816  newDynamicSizes);
1817  // Add the new dynamic sizes in new AllocOp.
1818  newAlloc =
1819  b.create<AllocLikeOp>(allocOp.getLoc(), newMemRefType, newDynamicSizes,
1820  allocOp.getAlignmentAttr());
1821  } else {
1822  newAlloc = b.create<AllocLikeOp>(allocOp.getLoc(), newMemRefType,
1823  allocOp.getAlignmentAttr());
1824  }
1825  // Replace all uses of the old memref.
1826  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1827  /*extraIndices=*/{},
1828  /*indexRemap=*/layoutMap,
1829  /*extraOperands=*/{},
1830  /*symbolOperands=*/symbolOperands,
1831  /*domOpFilter=*/nullptr,
1832  /*postDomOpFilter=*/nullptr,
1833  /*allowNonDereferencingOps=*/true))) {
1834  // If it failed (due to escapes for example), bail out.
1835  newAlloc.erase();
1836  return failure();
1837  }
1838  // Replace any uses of the original alloc op and erase it. All remaining uses
1839  // have to be dealloc's; RAMUW above would've failed otherwise.
1840  assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1841  return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1842  }));
1843  oldMemRef.replaceAllUsesWith(newAlloc);
1844  allocOp.erase();
1845  return success();
1846 }
1847 
1848 template LogicalResult
1849 mlir::affine::normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
1850 template LogicalResult
1851 mlir::affine::normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
1852 
1853 MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1854  unsigned rank = memrefType.getRank();
1855  if (rank == 0)
1856  return memrefType;
1857 
1858  if (memrefType.getLayout().isIdentity()) {
1859  // Either no maps is associated with this memref or this memref has
1860  // a trivial (identity) map.
1861  return memrefType;
1862  }
1863  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1864  unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1865 
1866  // We don't do any checks for one-to-one'ness; we assume that it is
1867  // one-to-one.
1868 
1869  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1870  // for now.
1871  // TODO: Normalize the other types of dynamic memrefs.
1873  (void)getTileSizePos(layoutMap, tileSizePos);
1874  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1875  return memrefType;
1876 
1877  // We have a single map that is not an identity map. Create a new memref
1878  // with the right shape and an identity layout map.
1879  ArrayRef<int64_t> shape = memrefType.getShape();
1880  // FlatAffineValueConstraint may later on use symbolicOperands.
1881  FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1882  SmallVector<unsigned, 4> memrefTypeDynDims;
1883  for (unsigned d = 0; d < rank; ++d) {
1884  // Use constraint system only in static dimensions.
1885  if (shape[d] > 0) {
1886  fac.addBound(BoundType::LB, d, 0);
1887  fac.addBound(BoundType::UB, d, shape[d] - 1);
1888  } else {
1889  memrefTypeDynDims.emplace_back(d);
1890  }
1891  }
1892  // We compose this map with the original index (logical) space to derive
1893  // the upper bounds for the new index space.
1894  unsigned newRank = layoutMap.getNumResults();
1895  if (failed(fac.composeMatchingMap(layoutMap)))
1896  return memrefType;
1897  // TODO: Handle semi-affine maps.
1898  // Project out the old data dimensions.
1899  fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1900  SmallVector<int64_t, 4> newShape(newRank);
1901  MLIRContext *context = memrefType.getContext();
1902  for (unsigned d = 0; d < newRank; ++d) {
1903  // Check if this dimension is dynamic.
1904  if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1905  newShape[d] = ShapedType::kDynamic;
1906  continue;
1907  }
1908  // The lower bound for the shape is always zero.
1909  std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1910  // For a static memref and an affine map with no symbols, this is
1911  // always bounded. However, when we have symbols, we may not be able to
1912  // obtain a constant upper bound. Also, mapping to a negative space is
1913  // invalid for normalization.
1914  if (!ubConst.has_value() || *ubConst < 0) {
1915  LLVM_DEBUG(llvm::dbgs()
1916  << "can't normalize map due to unknown/invalid upper bound");
1917  return memrefType;
1918  }
1919  // If dimension of new memrefType is dynamic, the value is -1.
1920  newShape[d] = *ubConst + 1;
1921  }
1922 
1923  // Create the new memref type after trivializing the old layout map.
1924  auto newMemRefType =
1925  MemRefType::Builder(memrefType)
1926  .setShape(newShape)
1928  AffineMap::getMultiDimIdentityMap(newRank, context)));
1929  return newMemRefType;
1930 }
1931 
1933  Value rhs) {
1934  DivModValue result;
1935  AffineExpr d0, d1;
1936  bindDims(b.getContext(), d0, d1);
1937  result.quotient =
1938  affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1939  result.remainder =
1940  affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1941  return result;
1942 }
1943 
1944 /// Create an affine map that computes `lhs` * `rhs`, composing in any other
1945 /// affine maps.
1946 static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1947  Location loc,
1948  OpFoldResult lhs,
1949  OpFoldResult rhs) {
1950  AffineExpr s0, s1;
1951  bindSymbols(b.getContext(), s0, s1);
1952  return makeComposedFoldedAffineApply(b, loc, s0 * s1, {lhs, rhs});
1953 }
1954 
1955 FailureOr<SmallVector<Value>>
1957  ArrayRef<Value> basis, bool hasOuterBound) {
1958  if (hasOuterBound)
1959  basis = basis.drop_front();
1960 
1961  // Note: the divisors are backwards due to the scan.
1962  SmallVector<Value> divisors;
1963  OpFoldResult basisProd = b.getIndexAttr(1);
1964  for (OpFoldResult basisElem : llvm::reverse(basis)) {
1965  FailureOr<OpFoldResult> nextProd =
1966  composedAffineMultiply(b, loc, basisElem, basisProd);
1967  if (failed(nextProd))
1968  return failure();
1969  basisProd = *nextProd;
1970  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1971  }
1972 
1973  SmallVector<Value> results;
1974  results.reserve(divisors.size() + 1);
1975  Value residual = linearIndex;
1976  for (Value divisor : llvm::reverse(divisors)) {
1977  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1978  results.push_back(divMod.quotient);
1979  residual = divMod.remainder;
1980  }
1981  results.push_back(residual);
1982  return results;
1983 }
1984 
1985 FailureOr<SmallVector<Value>>
1987  ArrayRef<OpFoldResult> basis,
1988  bool hasOuterBound) {
1989  if (hasOuterBound)
1990  basis = basis.drop_front();
1991 
1992  // Note: the divisors are backwards due to the scan.
1993  SmallVector<Value> divisors;
1994  OpFoldResult basisProd = b.getIndexAttr(1);
1995  for (OpFoldResult basisElem : llvm::reverse(basis)) {
1996  FailureOr<OpFoldResult> nextProd =
1997  composedAffineMultiply(b, loc, basisElem, basisProd);
1998  if (failed(nextProd))
1999  return failure();
2000  basisProd = *nextProd;
2001  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
2002  }
2003 
2004  SmallVector<Value> results;
2005  results.reserve(divisors.size() + 1);
2006  Value residual = linearIndex;
2007  for (Value divisor : llvm::reverse(divisors)) {
2008  DivModValue divMod = getDivMod(b, loc, residual, divisor);
2009  results.push_back(divMod.quotient);
2010  residual = divMod.remainder;
2011  }
2012  results.push_back(residual);
2013  return results;
2014 }
2015 
2017  ArrayRef<OpFoldResult> basis,
2018  ImplicitLocOpBuilder &builder) {
2019  return linearizeIndex(builder, builder.getLoc(), multiIndex, basis);
2020 }
2021 
2023  ArrayRef<OpFoldResult> multiIndex,
2024  ArrayRef<OpFoldResult> basis) {
2025  assert(multiIndex.size() == basis.size() ||
2026  multiIndex.size() == basis.size() + 1);
2027  SmallVector<AffineExpr> basisAffine;
2028 
2029  // Add a fake initial size in order to make the later index linearization
2030  // computations line up if an outer bound is not provided.
2031  if (multiIndex.size() == basis.size() + 1)
2032  basisAffine.push_back(getAffineConstantExpr(1, builder.getContext()));
2033 
2034  for (size_t i = 0; i < basis.size(); ++i) {
2035  basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
2036  }
2037 
2038  SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
2039  SmallVector<OpFoldResult> strides;
2040  strides.reserve(stridesAffine.size());
2041  llvm::transform(stridesAffine, std::back_inserter(strides),
2042  [&builder, &basis, loc](AffineExpr strideExpr) {
2044  builder, loc, strideExpr, basis);
2045  });
2046 
2047  auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2048  OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
2049  return affine::makeComposedFoldedAffineApply(builder, loc, linearIndexExpr,
2050  multiIndexAndStrides);
2051 }
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition: Utils.cpp:653
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, AllocLikeOp allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1742
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1568
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1560
@ TileFloorDiv
Definition: Utils.cpp:1560
@ TileNone
Definition: Utils.cpp:1560
@ TileMod
Definition: Utils.cpp:1560
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:247
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1667
LogicalResult transformMemRefLoadWithReducedRank(Operation *op, Value oldMemRef, Value newMemRef, unsigned memRefOperandPos, ArrayRef< Value > extraIndices, ArrayRef< Value > extraOperands, ArrayRef< Value > symbolOperands, AffineMap indexRemap)
Definition: Utils.cpp:1104
static FailureOr< OpFoldResult > composedAffineMultiply(OpBuilder &b, Location loc, OpFoldResult lhs, OpFoldResult rhs)
Create an affine map that computes lhs * rhs, composing in any other affine maps.
Definition: Utils.cpp:1946
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:958
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1686
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:263
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:911
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition: Utils.cpp:635
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:834
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:214
AffineExpr getLHS() const
Definition: AffineExpr.cpp:340
AffineExpr getRHS() const
Definition: AffineExpr.cpp:343
An integer constant appearing in affine expression.
Definition: AffineExpr.h:239
int64_t getValue() const
Definition: AffineExpr.cpp:637
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:223
unsigned getPosition() const
Definition: AffineExpr.cpp:348
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:921
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition: AffineExpr.h:117
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:968
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:343
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:334
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
Definition: AffineMap.cpp:398
unsigned getNumDims() const
Definition: AffineMap.cpp:394
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
unsigned getNumInputs() const
Definition: AffineMap.cpp:403
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:556
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:231
unsigned getPosition() const
Definition: AffineExpr.cpp:627
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
Definition: AliasAnalysis.h:59
This class represents an argument of a Block.
Definition: Value.h:295
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
SuccessorRange getSuccessors()
Definition: Block.h:267
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246
OpListType & getOperations()
Definition: Block.h:137
iterator end()
Definition: Block.h:144
iterator begin()
Definition: Block.h:143
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:104
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:224
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:383
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:96
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:258
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:360
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:374
MLIRContext * getContext() const
Definition: Builders.h:56
IndexType getIndexType()
Definition: Builders.cpp:51
A class for computing basic dominance information.
Definition: Dominance.h:140
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:158
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
void projectOut(Value val)
Projects out the variable that is associate with Value.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteConfig & setStrictness(GreedyRewriteStrictness mode)
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition: IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Location getLoc() const
Accessors for the implied location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:166
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Definition: BuiltinTypes.h:187
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:177
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:55
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:179
This class helps build Operations.
Definition: Builders.h:205
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:238
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:443
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:549
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:410
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:440
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:350
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:750
void setOperand(unsigned idx, Value value)
Definition: Operation.h:351
operand_iterator operand_begin()
Definition: Operation.h:374
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
operand_iterator operand_end()
Definition: Operation.h:375
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:687
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:237
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:874
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:415
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
A class for computing basic postdominance information.
Definition: Dominance.h:204
bool postDominates(Operation *a, Operation *b) const
Return true if operation A postdominates operation B.
Definition: Dominance.h:213
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
void takeBody(Region &other)
Takes body of another region (that region will have no body after this operation completes).
Definition: Region.h:241
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition: Value.h:149
user_range getUsers() const
Definition: Value.h:204
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
static WalkResult advance()
Definition: Visitors.h:51
static WalkResult interrupt()
Definition: Visitors.h:50
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
Definition: AffineOps.cpp:4044
unsigned getNumSymbols() const
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
std::optional< SmallVector< Value, 8 > > expandAffineMap(OpBuilder &builder, Location loc, AffineMap affineMap, ValueRange operands)
Create a sequence of operations that implement the affineMap applied to the given operands (as it it ...
Definition: Utils.cpp:229
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1157
void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo, AliasAnalysis &analysis)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:1039
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:118
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:288
Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, ValueRange dimValues, ValueRange symbolValues)
Emit code that computes the given affine expression using standard arithmetic operations applied to t...
Definition: Utils.cpp:219
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:2030
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:495
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:353
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1167
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1508
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
LogicalResult normalizeAffineFor(AffineForOp op, bool promoteSingleIter=false)
Normalize an affine.for op.
Definition: Utils.cpp:559
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
Definition: AffineOps.cpp:273
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:402
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1217
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
MemRefType normalizeMemRefType(MemRefType memrefType)
Normalizes memrefType so that the affine layout map of the memref is transformed to an identity map w...
Definition: Utils.cpp:1853
LogicalResult normalizeMemRef(AllocLikeOp op)
Rewrites the memref defined by this alloc op to have an identity layout map and updates all its index...
Definition: Utils.cpp:1791
FailureOr< SmallVector< Value > > delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex, ArrayRef< Value > basis, bool hasOuterBound=true)
Generate the IR to delinearize linearIndex given the basis and return the multi-index.
Definition: Utils.cpp:1956
OpFoldResult linearizeIndex(ArrayRef< OpFoldResult > multiIndex, ArrayRef< OpFoldResult > basis, ImplicitLocOpBuilder &builder)
Definition: Utils.cpp:2016
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1932
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e....
Definition: Utils.cpp:686
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1486
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition: Utils.cpp:415
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either:
Definition: Utils.cpp:468
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1370
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:773
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:70
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:645
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:621
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:631
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:307
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.