MLIR  20.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
25 #include "mlir/IR/Dominance.h"
26 #include "mlir/IR/IRMapping.h"
28 #include "mlir/IR/IntegerSet.h"
30 #include <optional>
31 
32 #define DEBUG_TYPE "affine-utils"
33 
34 using namespace mlir;
35 using namespace affine;
36 using namespace presburger;
37 
38 namespace {
39 /// Visit affine expressions recursively and build the sequence of operations
40 /// that correspond to it. Visitation functions return an Value of the
41 /// expression subtree they visited or `nullptr` on error.
42 class AffineApplyExpander
43  : public AffineExprVisitor<AffineApplyExpander, Value> {
44 public:
45  /// This internal class expects arguments to be non-null, checks must be
46  /// performed at the call site.
47  AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
48  ValueRange symbolValues, Location loc)
49  : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
50  loc(loc) {}
51 
52  template <typename OpTy>
53  Value buildBinaryExpr(AffineBinaryOpExpr expr) {
54  auto lhs = visit(expr.getLHS());
55  auto rhs = visit(expr.getRHS());
56  if (!lhs || !rhs)
57  return nullptr;
58  auto op = builder.create<OpTy>(loc, lhs, rhs);
59  return op.getResult();
60  }
61 
62  Value visitAddExpr(AffineBinaryOpExpr expr) {
63  return buildBinaryExpr<arith::AddIOp>(expr);
64  }
65 
66  Value visitMulExpr(AffineBinaryOpExpr expr) {
67  return buildBinaryExpr<arith::MulIOp>(expr);
68  }
69 
70  /// Euclidean modulo operation: negative RHS is not allowed.
71  /// Remainder of the euclidean integer division is always non-negative.
72  ///
73  /// Implemented as
74  ///
75  /// a mod b =
76  /// let remainder = srem a, b;
77  /// negative = a < 0 in
78  /// select negative, remainder + b, remainder.
79  Value visitModExpr(AffineBinaryOpExpr expr) {
80  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
81  if (rhsConst.getValue() <= 0) {
82  emitError(loc, "modulo by non-positive value is not supported");
83  return nullptr;
84  }
85  }
86 
87  auto lhs = visit(expr.getLHS());
88  auto rhs = visit(expr.getRHS());
89  assert(lhs && rhs && "unexpected affine expr lowering failure");
90 
91  Value remainder = builder.create<arith::RemSIOp>(loc, lhs, rhs);
92  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
93  Value isRemainderNegative = builder.create<arith::CmpIOp>(
94  loc, arith::CmpIPredicate::slt, remainder, zeroCst);
95  Value correctedRemainder =
96  builder.create<arith::AddIOp>(loc, remainder, rhs);
97  Value result = builder.create<arith::SelectOp>(
98  loc, isRemainderNegative, correctedRemainder, remainder);
99  return result;
100  }
101 
102  /// Floor division operation (rounds towards negative infinity).
103  ///
104  /// For positive divisors, it can be implemented without branching and with a
105  /// single division operation as
106  ///
107  /// a floordiv b =
108  /// let negative = a < 0 in
109  /// let absolute = negative ? -a - 1 : a in
110  /// let quotient = absolute / b in
111  /// negative ? -quotient - 1 : quotient
112  ///
113  /// Note: this lowering does not use arith.floordivsi because the lowering of
114  /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
115  /// not one but two arith.divsi. That could be changed to one divsi, but one
116  /// way or another, going through arith.floordivsi will result in more complex
117  /// IR because arith.floordivsi is more general than affine floordiv in that
118  /// it supports negative RHS.
119  Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
120  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
121  if (rhsConst.getValue() <= 0) {
122  emitError(loc, "division by non-positive value is not supported");
123  return nullptr;
124  }
125  }
126  auto lhs = visit(expr.getLHS());
127  auto rhs = visit(expr.getRHS());
128  assert(lhs && rhs && "unexpected affine expr lowering failure");
129 
130  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
131  Value noneCst = builder.create<arith::ConstantIndexOp>(loc, -1);
132  Value negative = builder.create<arith::CmpIOp>(
133  loc, arith::CmpIPredicate::slt, lhs, zeroCst);
134  Value negatedDecremented = builder.create<arith::SubIOp>(loc, noneCst, lhs);
135  Value dividend =
136  builder.create<arith::SelectOp>(loc, negative, negatedDecremented, lhs);
137  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
138  Value correctedQuotient =
139  builder.create<arith::SubIOp>(loc, noneCst, quotient);
140  Value result = builder.create<arith::SelectOp>(loc, negative,
141  correctedQuotient, quotient);
142  return result;
143  }
144 
145  /// Ceiling division operation (rounds towards positive infinity).
146  ///
147  /// For positive divisors, it can be implemented without branching and with a
148  /// single division operation as
149  ///
150  /// a ceildiv b =
151  /// let negative = a <= 0 in
152  /// let absolute = negative ? -a : a - 1 in
153  /// let quotient = absolute / b in
154  /// negative ? -quotient : quotient + 1
155  ///
156  /// Note: not using arith.ceildivsi for the same reason as explained in the
157  /// visitFloorDivExpr comment.
158  Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
159  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
160  if (rhsConst.getValue() <= 0) {
161  emitError(loc, "division by non-positive value is not supported");
162  return nullptr;
163  }
164  }
165  auto lhs = visit(expr.getLHS());
166  auto rhs = visit(expr.getRHS());
167  assert(lhs && rhs && "unexpected affine expr lowering failure");
168 
169  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
170  Value oneCst = builder.create<arith::ConstantIndexOp>(loc, 1);
171  Value nonPositive = builder.create<arith::CmpIOp>(
172  loc, arith::CmpIPredicate::sle, lhs, zeroCst);
173  Value negated = builder.create<arith::SubIOp>(loc, zeroCst, lhs);
174  Value decremented = builder.create<arith::SubIOp>(loc, lhs, oneCst);
175  Value dividend =
176  builder.create<arith::SelectOp>(loc, nonPositive, negated, decremented);
177  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
178  Value negatedQuotient =
179  builder.create<arith::SubIOp>(loc, zeroCst, quotient);
180  Value incrementedQuotient =
181  builder.create<arith::AddIOp>(loc, quotient, oneCst);
182  Value result = builder.create<arith::SelectOp>(
183  loc, nonPositive, negatedQuotient, incrementedQuotient);
184  return result;
185  }
186 
187  Value visitConstantExpr(AffineConstantExpr expr) {
188  auto op = builder.create<arith::ConstantIndexOp>(loc, expr.getValue());
189  return op.getResult();
190  }
191 
192  Value visitDimExpr(AffineDimExpr expr) {
193  assert(expr.getPosition() < dimValues.size() &&
194  "affine dim position out of range");
195  return dimValues[expr.getPosition()];
196  }
197 
198  Value visitSymbolExpr(AffineSymbolExpr expr) {
199  assert(expr.getPosition() < symbolValues.size() &&
200  "symbol dim position out of range");
201  return symbolValues[expr.getPosition()];
202  }
203 
204 private:
205  OpBuilder &builder;
206  ValueRange dimValues;
207  ValueRange symbolValues;
208 
209  Location loc;
210 };
211 } // namespace
212 
213 /// Create a sequence of operations that implement the `expr` applied to the
214 /// given dimension and symbol values.
216  AffineExpr expr,
217  ValueRange dimValues,
218  ValueRange symbolValues) {
219  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
220 }
221 
222 /// Create a sequence of operations that implement the `affineMap` applied to
223 /// the given `operands` (as it it were an AffineApplyOp).
224 std::optional<SmallVector<Value, 8>>
226  AffineMap affineMap, ValueRange operands) {
227  auto numDims = affineMap.getNumDims();
228  auto expanded = llvm::to_vector<8>(
229  llvm::map_range(affineMap.getResults(),
230  [numDims, &builder, loc, operands](AffineExpr expr) {
231  return expandAffineExpr(builder, loc, expr,
232  operands.take_front(numDims),
233  operands.drop_front(numDims));
234  }));
235  if (llvm::all_of(expanded, [](Value v) { return v; }))
236  return expanded;
237  return std::nullopt;
238 }
239 
240 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
241 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
242 /// the rest of the op.
243 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
244  if (elseBlock)
245  assert(ifOp.hasElse() && "else block expected");
246 
247  Block *destBlock = ifOp->getBlock();
248  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
249  destBlock->getOperations().splice(
250  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
251  std::prev(srcBlock->end()));
252  ifOp.erase();
253 }
254 
255 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
256 /// on. The `ifOp` could be hoisted and placed right before such an operation.
257 /// This method assumes that the ifOp has been canonicalized (to be correct and
258 /// effective).
259 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
260  // Walk up the parents past all for op that this conditional is invariant on.
261  auto ifOperands = ifOp.getOperands();
262  auto *res = ifOp.getOperation();
263  while (!isa<func::FuncOp>(res->getParentOp())) {
264  auto *parentOp = res->getParentOp();
265  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
266  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
267  break;
268  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
269  for (auto iv : parallelOp.getIVs())
270  if (llvm::is_contained(ifOperands, iv))
271  break;
272  } else if (!isa<AffineIfOp>(parentOp)) {
273  // Won't walk up past anything other than affine.for/if ops.
274  break;
275  }
276  // You can always hoist up past any affine.if ops.
277  res = parentOp;
278  }
279  return res;
280 }
281 
282 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
283 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
284 /// otherwise the same `ifOp`.
285 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
286  // No hoisting to do.
287  if (hoistOverOp == ifOp)
288  return ifOp;
289 
290  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
291  // the else block. Then drop the else block of the original 'if' in the 'then'
292  // branch while promoting its then block, and analogously drop the 'then'
293  // block of the original 'if' from the 'else' branch while promoting its else
294  // block.
295  IRMapping operandMap;
296  OpBuilder b(hoistOverOp);
297  auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(),
298  ifOp.getOperands(),
299  /*elseBlock=*/true);
300 
301  // Create a clone of hoistOverOp to use for the else branch of the hoisted
302  // conditional. The else block may get optimized away if empty.
303  Operation *hoistOverOpClone = nullptr;
304  // We use this unique name to identify/find `ifOp`'s clone in the else
305  // version.
306  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
307  operandMap.clear();
308  b.setInsertionPointAfter(hoistOverOp);
309  // We'll set an attribute to identify this op in a clone of this sub-tree.
310  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
311  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
312 
313  // Promote the 'then' block of the original affine.if in the then version.
314  promoteIfBlock(ifOp, /*elseBlock=*/false);
315 
316  // Move the then version to the hoisted if op's 'then' block.
317  auto *thenBlock = hoistedIfOp.getThenBlock();
318  thenBlock->getOperations().splice(thenBlock->begin(),
319  hoistOverOp->getBlock()->getOperations(),
320  Block::iterator(hoistOverOp));
321 
322  // Find the clone of the original affine.if op in the else version.
323  AffineIfOp ifCloneInElse;
324  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
325  if (!ifClone->getAttr(idForIfOp))
326  return WalkResult::advance();
327  ifCloneInElse = ifClone;
328  return WalkResult::interrupt();
329  });
330  assert(ifCloneInElse && "if op clone should exist");
331  // For the else block, promote the else block of the original 'if' if it had
332  // one; otherwise, the op itself is to be erased.
333  if (!ifCloneInElse.hasElse())
334  ifCloneInElse.erase();
335  else
336  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
337 
338  // Move the else version into the else block of the hoisted if op.
339  auto *elseBlock = hoistedIfOp.getElseBlock();
340  elseBlock->getOperations().splice(
341  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
342  Block::iterator(hoistOverOpClone));
343 
344  return hoistedIfOp;
345 }
346 
347 LogicalResult
349  ArrayRef<LoopReduction> parallelReductions,
350  AffineParallelOp *resOp) {
351  // Fail early if there are iter arguments that are not reductions.
352  unsigned numReductions = parallelReductions.size();
353  if (numReductions != forOp.getNumIterOperands())
354  return failure();
355 
356  Location loc = forOp.getLoc();
357  OpBuilder outsideBuilder(forOp);
358  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
359  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
360  AffineMap upperBoundMap = forOp.getUpperBoundMap();
361  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
362 
363  // Creating empty 1-D affine.parallel op.
364  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
365  parallelReductions, [](const LoopReduction &red) { return red.value; }));
366  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
367  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
368  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
369  loc, ValueRange(reducedValues).getTypes(), reductionKinds,
370  llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
371  llvm::ArrayRef(upperBoundMap), upperBoundOperands,
372  llvm::ArrayRef(forOp.getStepAsInt()));
373  // Steal the body of the old affine for op.
374  newPloop.getRegion().takeBody(forOp.getRegion());
375  Operation *yieldOp = &newPloop.getBody()->back();
376 
377  // Handle the initial values of reductions because the parallel loop always
378  // starts from the neutral value.
379  SmallVector<Value> newResults;
380  newResults.reserve(numReductions);
381  for (unsigned i = 0; i < numReductions; ++i) {
382  Value init = forOp.getInits()[i];
383  // This works because we are only handling single-op reductions at the
384  // moment. A switch on reduction kind or a mechanism to collect operations
385  // participating in the reduction will be necessary for multi-op reductions.
386  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
387  assert(reductionOp && "yielded value is expected to be produced by an op");
388  outsideBuilder.getInsertionBlock()->getOperations().splice(
389  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
390  reductionOp);
391  reductionOp->setOperands({init, newPloop->getResult(i)});
392  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
393  }
394 
395  // Update the loop terminator to yield reduced values bypassing the reduction
396  // operation itself (now moved outside of the loop) and erase the block
397  // arguments that correspond to reductions. Note that the loop always has one
398  // "main" induction variable whenc coming from a non-parallel for.
399  unsigned numIVs = 1;
400  yieldOp->setOperands(reducedValues);
401  newPloop.getBody()->eraseArguments(numIVs, numReductions);
402 
403  forOp.erase();
404  if (resOp)
405  *resOp = newPloop;
406  return success();
407 }
408 
409 // Returns success if any hoisting happened.
410 LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
411  // Bail out early if the ifOp returns a result. TODO: Consider how to
412  // properly support this case.
413  if (ifOp.getNumResults() != 0)
414  return failure();
415 
416  // Apply canonicalization patterns and folding - this is necessary for the
417  // hoisting check to be correct (operands should be composed), and to be more
418  // effective (no unused operands). Since the pattern rewriter's folding is
419  // entangled with application of patterns, we may fold/end up erasing the op,
420  // in which case we return with `folded` being set.
421  RewritePatternSet patterns(ifOp.getContext());
422  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
423  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
424  GreedyRewriteConfig config;
426  bool erased;
427  (void)applyOpPatternsAndFold(ifOp.getOperation(), frozenPatterns, config,
428  /*changed=*/nullptr, &erased);
429  if (erased) {
430  if (folded)
431  *folded = true;
432  return failure();
433  }
434  if (folded)
435  *folded = false;
436 
437  // The folding above should have ensured this, but the affine.if's
438  // canonicalization is missing composition of affine.applys into it.
439  assert(llvm::all_of(ifOp.getOperands(),
440  [](Value v) {
441  return isTopLevelValue(v) || isAffineForInductionVar(v);
442  }) &&
443  "operands not composed");
444 
445  // We are going hoist as high as possible.
446  // TODO: this could be customized in the future.
447  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
448 
449  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
450  // Nothing to hoist over.
451  if (hoistedIfOp == ifOp)
452  return failure();
453 
454  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
455  // a sequence of affine.fors that are all perfectly nested).
457  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
458  frozenPatterns);
459 
460  return success();
461 }
462 
463 // Return the min expr after replacing the given dim.
466  bool positivePath) {
467  if (e == dim)
468  return positivePath ? min : max;
469  if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
470  AffineExpr lhs = bin.getLHS();
471  AffineExpr rhs = bin.getRHS();
472  if (bin.getKind() == mlir::AffineExprKind::Add)
473  return substWithMin(lhs, dim, min, max, positivePath) +
474  substWithMin(rhs, dim, min, max, positivePath);
475 
476  auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
477  auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
478  if (c1 && c1.getValue() < 0)
479  return getAffineBinaryOpExpr(
480  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
481  if (c2 && c2.getValue() < 0)
482  return getAffineBinaryOpExpr(
483  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
484  return getAffineBinaryOpExpr(
485  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
486  substWithMin(rhs, dim, min, max, positivePath));
487  }
488  return e;
489 }
490 
491 void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
492  // Loops with min/max in bounds are not normalized at the moment.
493  if (op.hasMinMaxBounds())
494  return;
495 
496  AffineMap lbMap = op.getLowerBoundsMap();
497  SmallVector<int64_t, 8> steps = op.getSteps();
498  // No need to do any work if the parallel op is already normalized.
499  bool isAlreadyNormalized =
500  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
501  int64_t step = std::get<0>(tuple);
502  auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
503  return lbExpr && lbExpr.getValue() == 0 && step == 1;
504  });
505  if (isAlreadyNormalized)
506  return;
507 
508  AffineValueMap ranges;
509  AffineValueMap::difference(op.getUpperBoundsValueMap(),
510  op.getLowerBoundsValueMap(), &ranges);
511  auto builder = OpBuilder::atBlockBegin(op.getBody());
512  auto zeroExpr = builder.getAffineConstantExpr(0);
515  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
516  int64_t step = steps[i];
517 
518  // Adjust the lower bound to be 0.
519  lbExprs.push_back(zeroExpr);
520 
521  // Adjust the upper bound expression: 'range / step'.
522  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
523  ubExprs.push_back(ubExpr);
524 
525  // Adjust the corresponding IV: 'lb + i * step'.
526  BlockArgument iv = op.getBody()->getArgument(i);
527  AffineExpr lbExpr = lbMap.getResult(i);
528  unsigned nDims = lbMap.getNumDims();
529  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
530  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
531  /*symbolCount=*/lbMap.getNumSymbols(), expr);
532 
533  // Use an 'affine.apply' op that will be simplified later in subsequent
534  // canonicalizations.
535  OperandRange lbOperands = op.getLowerBoundsOperands();
536  OperandRange dimOperands = lbOperands.take_front(nDims);
537  OperandRange symbolOperands = lbOperands.drop_front(nDims);
538  SmallVector<Value, 8> applyOperands{dimOperands};
539  applyOperands.push_back(iv);
540  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
541  auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
542  iv.replaceAllUsesExcept(apply, apply);
543  }
544 
545  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
546  op.setSteps(newSteps);
547  auto newLowerMap = AffineMap::get(
548  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
549  op.setLowerBounds({}, newLowerMap);
550  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
551  ubExprs, op.getContext());
552  op.setUpperBounds(ranges.getOperands(), newUpperMap);
553 }
554 
555 LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
556  bool promoteSingleIter) {
557  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
558  return success();
559 
560  // Check if the forop is already normalized.
561  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
562  (op.getStep() == 1))
563  return success();
564 
565  // Check if the lower bound has a single result only. Loops with a max lower
566  // bound can't be normalized without additional support like
567  // affine.execute_region's. If the lower bound does not have a single result
568  // then skip this op.
569  if (op.getLowerBoundMap().getNumResults() != 1)
570  return failure();
571 
572  Location loc = op.getLoc();
573  OpBuilder opBuilder(op);
574  int64_t origLoopStep = op.getStepAsInt();
575 
576  // Construct the new upper bound value map.
577  AffineMap oldLbMap = op.getLowerBoundMap();
578  // The upper bound can have multiple results. To use
579  // AffineValueMap::difference, we need to have the same number of results in
580  // both lower and upper bound maps. So, we just create a value map for the
581  // lower bound with the only available lower bound result repeated to pad up
582  // to the number of upper bound results.
583  SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
584  op.getLowerBoundMap().getResult(0));
585  AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
586  AffineMap paddedLbMap =
587  AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
588  op.getContext());
589  AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
590  AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
591  AffineValueMap newUbValueMap;
592  // Compute the `upper bound - lower bound`.
593  AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
594  (void)newUbValueMap.canonicalize();
595 
596  // Scale down the upper bound value map by the loop step.
597  unsigned numResult = newUbValueMap.getNumResults();
598  SmallVector<AffineExpr> scaleDownExprs(numResult);
599  for (unsigned i = 0; i < numResult; ++i)
600  scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
601  // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
602  // step). Where `n` is the number of results in the upper bound map.
603  AffineMap scaleDownMap =
604  AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
605  AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
606 
607  // Set the newly create upper bound map and operands.
608  op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
609  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
610  op.setStep(1);
611 
612  // Calculate the Value of new loopIV. Create affine.apply for the value of
613  // the loopIV in normalized loop.
614  opBuilder.setInsertionPointToStart(op.getBody());
615  // Construct an affine.apply op mapping the new IV to the old IV.
616  AffineMap scaleIvMap =
617  AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
618  AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
619  AffineValueMap newIvToOldIvMap;
620  AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
621  (void)newIvToOldIvMap.canonicalize();
622  auto newIV = opBuilder.create<AffineApplyOp>(
623  loc, newIvToOldIvMap.getAffineMap(), newIvToOldIvMap.getOperands());
624  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
625  return success();
626 }
627 
628 /// Returns true if the memory operation of `destAccess` depends on `srcAccess`
629 /// inside of the innermost common surrounding affine loop between the two
630 /// accesses.
631 static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
632  const MemRefAccess &destAccess) {
633  // Affine dependence analysis is possible only if both ops in the same
634  // AffineScope.
635  if (getAffineScope(srcAccess.opInst) != getAffineScope(destAccess.opInst))
636  return false;
637 
638  unsigned nsLoops =
639  getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
640  DependenceResult result =
641  checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
642  return hasDependence(result);
643 }
644 
645 /// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
646 /// scope of the outermost `minSurroundingLoops` loops that surround them.
647 /// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
648 static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
649  unsigned minSurroundingLoops) {
650  MemRefAccess srcAccess(srcMemOp);
651  MemRefAccess destAccess(destMemOp);
652 
653  // Affine dependence analysis here is applicable only if both ops operate on
654  // the same memref and if `srcMemOp` and `destMemOp` are in the same
655  // AffineScope. Also, we can only check if our affine scope is isolated from
656  // above; otherwise, values can from outside of the affine scope that the
657  // check below cannot analyze.
658  Region *srcScope = getAffineScope(srcMemOp);
659  if (srcAccess.memref == destAccess.memref &&
660  srcScope == getAffineScope(destMemOp)) {
661  unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
662  FlatAffineValueConstraints dependenceConstraints;
663  for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
665  srcAccess, destAccess, d, &dependenceConstraints,
666  /*dependenceComponents=*/nullptr);
667  // A dependence failure or the presence of a dependence implies a
668  // side effect.
669  if (!noDependence(result))
670  return true;
671  }
672  // No side effect was seen.
673  return false;
674  }
675  // TODO: Check here if the memrefs alias: there is no side effect if
676  // `srcAccess.memref` and `destAccess.memref` don't alias.
677  return true;
678 }
679 
680 template <typename EffectType, typename T>
682  Operation *start, T memOp,
684  // A boolean representing whether an intervening operation could have impacted
685  // memOp.
686  bool hasSideEffect = false;
687 
688  // Check whether the effect on memOp can be caused by a given operation op.
689  Value memref = memOp.getMemRef();
690  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
691  // If the effect has alreay been found, early exit,
692  if (hasSideEffect)
693  return;
694 
695  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
697  memEffect.getEffects(effects);
698 
699  bool opMayHaveEffect = false;
700  for (auto effect : effects) {
701  // If op causes EffectType on a potentially aliasing location for
702  // memOp, mark as having the effect.
703  if (isa<EffectType>(effect.getEffect())) {
704  if (effect.getValue() && effect.getValue() != memref &&
705  !mayAlias(effect.getValue(), memref))
706  continue;
707  opMayHaveEffect = true;
708  break;
709  }
710  }
711 
712  if (!opMayHaveEffect)
713  return;
714 
715  // If the side effect comes from an affine read or write, try to
716  // prove the side effecting `op` cannot reach `memOp`.
717  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
718  // For ease, let's consider the case that `op` is a store and
719  // we're looking for other potential stores that overwrite memory after
720  // `start`, and before being read in `memOp`. In this case, we only
721  // need to consider other potential stores with depth >
722  // minSurroundingLoops since `start` would overwrite any store with a
723  // smaller number of surrounding loops before.
724  unsigned minSurroundingLoops =
725  getNumCommonSurroundingLoops(*start, *memOp);
726  if (mayHaveEffect(op, memOp, minSurroundingLoops))
727  hasSideEffect = true;
728  return;
729  }
730 
731  // We have an op with a memory effect and we cannot prove if it
732  // intervenes.
733  hasSideEffect = true;
734  return;
735  }
736 
738  // Recurse into the regions for this op and check whether the internal
739  // operations may have the side effect `EffectType` on memOp.
740  for (Region &region : op->getRegions())
741  for (Block &block : region)
742  for (Operation &op : block)
743  checkOperation(&op);
744  return;
745  }
746 
747  // Otherwise, conservatively assume generic operations have the effect
748  // on the operation
749  hasSideEffect = true;
750  };
751 
752  // Check all paths from ancestor op `parent` to the operation `to` for the
753  // effect. It is known that `to` must be contained within `parent`.
754  auto until = [&](Operation *parent, Operation *to) {
755  // TODO check only the paths from `parent` to `to`.
756  // Currently we fallback and check the entire parent op, rather than
757  // just the paths from the parent path, stopping after reaching `to`.
758  // This is conservatively correct, but could be made more aggressive.
759  assert(parent->isAncestor(to));
760  checkOperation(parent);
761  };
762 
763  // Check for all paths from operation `from` to operation `untilOp` for the
764  // given memory effect.
765  std::function<void(Operation *, Operation *)> recur =
766  [&](Operation *from, Operation *untilOp) {
767  assert(
768  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
769  "Checking for side effect between two operations without a common "
770  "ancestor");
771 
772  // If the operations are in different regions, recursively consider all
773  // path from `from` to the parent of `to` and all paths from the parent
774  // of `to` to `to`.
775  if (from->getParentRegion() != untilOp->getParentRegion()) {
776  recur(from, untilOp->getParentOp());
777  until(untilOp->getParentOp(), untilOp);
778  return;
779  }
780 
781  // Now, assuming that `from` and `to` exist in the same region, perform
782  // a CFG traversal to check all the relevant operations.
783 
784  // Additional blocks to consider.
785  SmallVector<Block *, 2> todoBlocks;
786  {
787  // First consider the parent block of `from` an check all operations
788  // after `from`.
789  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
790  iter != end && &*iter != untilOp; ++iter) {
791  checkOperation(&*iter);
792  }
793 
794  // If the parent of `from` doesn't contain `to`, add the successors
795  // to the list of blocks to check.
796  if (untilOp->getBlock() != from->getBlock())
797  for (Block *succ : from->getBlock()->getSuccessors())
798  todoBlocks.push_back(succ);
799  }
800 
802  // Traverse the CFG until hitting `to`.
803  while (!todoBlocks.empty()) {
804  Block *blk = todoBlocks.pop_back_val();
805  if (done.count(blk))
806  continue;
807  done.insert(blk);
808  for (auto &op : *blk) {
809  if (&op == untilOp)
810  break;
811  checkOperation(&op);
812  if (&op == blk->getTerminator())
813  for (Block *succ : blk->getSuccessors())
814  todoBlocks.push_back(succ);
815  }
816  }
817  };
818  recur(start, memOp);
819  return !hasSideEffect;
820 }
821 
822 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
823 /// which the load is guaranteed to retrieve. This check involves three
824 /// components: 1) The store and load must be on the same location 2) The store
825 /// must dominate (and therefore must always occur prior to) the load 3) No
826 /// other operations will overwrite the memory loaded between the given load
827 /// and store. If such a value exists, the replaced `loadOp` will be added to
828 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
829 static void forwardStoreToLoad(
830  AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
831  SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
833 
834  // The store op candidate for forwarding that satisfies all conditions
835  // to replace the load, if any.
836  Operation *lastWriteStoreOp = nullptr;
837 
838  for (auto *user : loadOp.getMemRef().getUsers()) {
839  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
840  if (!storeOp)
841  continue;
842  MemRefAccess srcAccess(storeOp);
843  MemRefAccess destAccess(loadOp);
844 
845  // 1. Check if the store and the load have mathematically equivalent
846  // affine access functions; this implies that they statically refer to the
847  // same single memref element. As an example this filters out cases like:
848  // store %A[%i0 + 1]
849  // load %A[%i0]
850  // store %A[%M]
851  // load %A[%N]
852  // Use the AffineValueMap difference based memref access equality checking.
853  if (srcAccess != destAccess)
854  continue;
855 
856  // 2. The store has to dominate the load op to be candidate.
857  if (!domInfo.dominates(storeOp, loadOp))
858  continue;
859 
860  // 3. The store must reach the load. Access function equivalence only
861  // guarantees this for accesses in the same block. The load could be in a
862  // nested block that is unreachable.
863  if (!mustReachAtInnermost(srcAccess, destAccess))
864  continue;
865 
866  // 4. Ensure there is no intermediate operation which could replace the
867  // value in memory.
868  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp,
869  mayAlias))
870  continue;
871 
872  // We now have a candidate for forwarding.
873  assert(lastWriteStoreOp == nullptr &&
874  "multiple simultaneous replacement stores");
875  lastWriteStoreOp = storeOp;
876  }
877 
878  if (!lastWriteStoreOp)
879  return;
880 
881  // Perform the actual store to load forwarding.
882  Value storeVal =
883  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
884  // Check if 2 values have the same shape. This is needed for affine vector
885  // loads and stores.
886  if (storeVal.getType() != loadOp.getValue().getType())
887  return;
888  loadOp.getValue().replaceAllUsesWith(storeVal);
889  // Record the memref for a later sweep to optimize away.
890  memrefsToErase.insert(loadOp.getMemRef());
891  // Record this to erase later.
892  loadOpsToErase.push_back(loadOp);
893 }
894 
895 template bool
897  affine::AffineReadOpInterface>(
898  mlir::Operation *, affine::AffineReadOpInterface,
899  llvm::function_ref<bool(Value, Value)>);
900 
901 // This attempts to find stores which have no impact on the final result.
902 // A writing op writeA will be eliminated if there exists an op writeB if
903 // 1) writeA and writeB have mathematically equivalent affine access functions.
904 // 2) writeB postdominates writeA.
905 // 3) There is no potential read between writeA and writeB.
906 static void findUnusedStore(AffineWriteOpInterface writeA,
907  SmallVectorImpl<Operation *> &opsToErase,
908  PostDominanceInfo &postDominanceInfo,
910 
911  for (Operation *user : writeA.getMemRef().getUsers()) {
912  // Only consider writing operations.
913  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
914  if (!writeB)
915  continue;
916 
917  // The operations must be distinct.
918  if (writeB == writeA)
919  continue;
920 
921  // Both operations must lie in the same region.
922  if (writeB->getParentRegion() != writeA->getParentRegion())
923  continue;
924 
925  // Both operations must write to the same memory.
926  MemRefAccess srcAccess(writeB);
927  MemRefAccess destAccess(writeA);
928 
929  if (srcAccess != destAccess)
930  continue;
931 
932  // writeB must postdominate writeA.
933  if (!postDominanceInfo.postDominates(writeB, writeA))
934  continue;
935 
936  // There cannot be an operation which reads from memory between
937  // the two writes.
938  if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB,
939  mayAlias))
940  continue;
941 
942  opsToErase.push_back(writeA);
943  break;
944  }
945 }
946 
947 // The load to load forwarding / redundant load elimination is similar to the
948 // store to load forwarding.
949 // loadA will be be replaced with loadB if:
950 // 1) loadA and loadB have mathematically equivalent affine access functions.
951 // 2) loadB dominates loadA.
952 // 3) There is no write between loadA and loadB.
953 static void loadCSE(AffineReadOpInterface loadA,
954  SmallVectorImpl<Operation *> &loadOpsToErase,
955  DominanceInfo &domInfo,
958  for (auto *user : loadA.getMemRef().getUsers()) {
959  auto loadB = dyn_cast<AffineReadOpInterface>(user);
960  if (!loadB || loadB == loadA)
961  continue;
962 
963  MemRefAccess srcAccess(loadB);
964  MemRefAccess destAccess(loadA);
965 
966  // 1. The accesses should be to be to the same location.
967  if (srcAccess != destAccess) {
968  continue;
969  }
970 
971  // 2. loadB should dominate loadA.
972  if (!domInfo.dominates(loadB, loadA))
973  continue;
974 
975  // 3. There should not be a write between loadA and loadB.
976  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
977  loadB.getOperation(), loadA, mayAlias))
978  continue;
979 
980  // Check if two values have the same shape. This is needed for affine vector
981  // loads.
982  if (loadB.getValue().getType() != loadA.getValue().getType())
983  continue;
984 
985  loadCandidates.push_back(loadB);
986  }
987 
988  // Of the legal load candidates, use the one that dominates all others
989  // to minimize the subsequent need to loadCSE
990  Value loadB;
991  for (AffineReadOpInterface option : loadCandidates) {
992  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
993  return depStore == option ||
994  domInfo.dominates(option.getOperation(),
995  depStore.getOperation());
996  })) {
997  loadB = option.getValue();
998  break;
999  }
1000  }
1001 
1002  if (loadB) {
1003  loadA.getValue().replaceAllUsesWith(loadB);
1004  // Record this to erase later.
1005  loadOpsToErase.push_back(loadA);
1006  }
1007 }
1008 
1009 // The store to load forwarding and load CSE rely on three conditions:
1010 //
1011 // 1) store/load providing a replacement value and load being replaced need to
1012 // have mathematically equivalent affine access functions (checked after full
1013 // composition of load/store operands); this implies that they access the same
1014 // single memref element for all iterations of the common surrounding loop,
1015 //
1016 // 2) the store/load op should dominate the load op,
1017 //
1018 // 3) no operation that may write to memory read by the load being replaced can
1019 // occur after executing the instruction (load or store) providing the
1020 // replacement value and before the load being replaced (thus potentially
1021 // allowing overwriting the memory read by the load).
1022 //
1023 // The above conditions are simple to check, sufficient, and powerful for most
1024 // cases in practice - they are sufficient, but not necessary --- since they
1025 // don't reason about loops that are guaranteed to execute at least once or
1026 // multiple sources to forward from.
1027 //
1028 // TODO: more forwarding can be done when support for
1029 // loop/conditional live-out SSA values is available.
1030 // TODO: do general dead store elimination for memref's. This pass
1031 // currently only eliminates the stores only if no other loads/uses (other
1032 // than dealloc) remain.
1033 //
1034 void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1035  PostDominanceInfo &postDomInfo,
1036  AliasAnalysis &aliasAnalysis) {
1037  // Load op's whose results were replaced by those forwarded from stores.
1038  SmallVector<Operation *, 8> opsToErase;
1039 
1040  // A list of memref's that are potentially dead / could be eliminated.
1041  SmallPtrSet<Value, 4> memrefsToErase;
1042 
1043  auto mayAlias = [&](Value val1, Value val2) -> bool {
1044  return !aliasAnalysis.alias(val1, val2).isNo();
1045  };
1046 
1047  // Walk all load's and perform store to load forwarding.
1048  f.walk([&](AffineReadOpInterface loadOp) {
1049  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1050  });
1051  for (auto *op : opsToErase)
1052  op->erase();
1053  opsToErase.clear();
1054 
1055  // Walk all store's and perform unused store elimination
1056  f.walk([&](AffineWriteOpInterface storeOp) {
1057  findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1058  });
1059  for (auto *op : opsToErase)
1060  op->erase();
1061  opsToErase.clear();
1062 
1063  // Check if the store fwd'ed memrefs are now left with only stores and
1064  // deallocs and can thus be completely deleted. Note: the canonicalize pass
1065  // should be able to do this as well, but we'll do it here since we collected
1066  // these anyway.
1067  for (auto memref : memrefsToErase) {
1068  // If the memref hasn't been locally alloc'ed, skip.
1069  Operation *defOp = memref.getDefiningOp();
1070  if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1071  // TODO: if the memref was returned by a 'call' operation, we
1072  // could still erase it if the call had no side-effects.
1073  continue;
1074  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1075  return !isa<AffineWriteOpInterface>(ownerOp) &&
1076  !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1077  }))
1078  continue;
1079 
1080  // Erase all stores, the dealloc, and the alloc on the memref.
1081  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1082  user->erase();
1083  defOp->erase();
1084  }
1085 
1086  // To eliminate as many loads as possible, run load CSE after eliminating
1087  // stores. Otherwise, some stores are wrongly seen as having an intervening
1088  // effect.
1089  f.walk([&](AffineReadOpInterface loadOp) {
1090  loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1091  });
1092  for (auto *op : opsToErase)
1093  op->erase();
1094 }
1095 
1096 // Perform the replacement in `op`.
1098  Value oldMemRef, Value newMemRef, Operation *op,
1099  ArrayRef<Value> extraIndices, AffineMap indexRemap,
1100  ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1101  bool allowNonDereferencingOps) {
1102  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1103  (void)newMemRefRank; // unused in opt mode
1104  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1105  (void)oldMemRefRank; // unused in opt mode
1106  if (indexRemap) {
1107  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1108  "symbolic operand count mismatch");
1109  assert(indexRemap.getNumInputs() ==
1110  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1111  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1112  } else {
1113  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1114  }
1115 
1116  // Assert same elemental type.
1117  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1118  cast<MemRefType>(newMemRef.getType()).getElementType());
1119 
1120  SmallVector<unsigned, 2> usePositions;
1121  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1122  if (opEntry.value() == oldMemRef)
1123  usePositions.push_back(opEntry.index());
1124  }
1125 
1126  // If memref doesn't appear, nothing to do.
1127  if (usePositions.empty())
1128  return success();
1129 
1130  if (usePositions.size() > 1) {
1131  // TODO: extend it for this case when needed (rare).
1132  assert(false && "multiple dereferencing uses in a single op not supported");
1133  return failure();
1134  }
1135 
1136  unsigned memRefOperandPos = usePositions.front();
1137 
1138  OpBuilder builder(op);
1139  // The following checks if op is dereferencing memref and performs the access
1140  // index rewrites.
1141  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1142  if (!affMapAccInterface) {
1143  if (!allowNonDereferencingOps) {
1144  // Failure: memref used in a non-dereferencing context (potentially
1145  // escapes); no replacement in these cases unless allowNonDereferencingOps
1146  // is set.
1147  return failure();
1148  }
1149  op->setOperand(memRefOperandPos, newMemRef);
1150  return success();
1151  }
1152  // Perform index rewrites for the dereferencing op and then replace the op
1153  NamedAttribute oldMapAttrPair =
1154  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1155  AffineMap oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1156  unsigned oldMapNumInputs = oldMap.getNumInputs();
1157  SmallVector<Value, 4> oldMapOperands(
1158  op->operand_begin() + memRefOperandPos + 1,
1159  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1160 
1161  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1162  SmallVector<Value, 4> oldMemRefOperands;
1163  SmallVector<Value, 4> affineApplyOps;
1164  oldMemRefOperands.reserve(oldMemRefRank);
1165  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1166  for (auto resultExpr : oldMap.getResults()) {
1167  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1168  oldMap.getNumSymbols(), resultExpr);
1169  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1170  oldMapOperands);
1171  oldMemRefOperands.push_back(afOp);
1172  affineApplyOps.push_back(afOp);
1173  }
1174  } else {
1175  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1176  }
1177 
1178  // Construct new indices as a remap of the old ones if a remapping has been
1179  // provided. The indices of a memref come right after it, i.e.,
1180  // at position memRefOperandPos + 1.
1181  SmallVector<Value, 4> remapOperands;
1182  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1183  symbolOperands.size());
1184  remapOperands.append(extraOperands.begin(), extraOperands.end());
1185  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1186  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1187 
1188  SmallVector<Value, 4> remapOutputs;
1189  remapOutputs.reserve(oldMemRefRank);
1190 
1191  if (indexRemap &&
1192  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1193  // Remapped indices.
1194  for (auto resultExpr : indexRemap.getResults()) {
1195  auto singleResMap = AffineMap::get(
1196  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1197  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1198  remapOperands);
1199  remapOutputs.push_back(afOp);
1200  affineApplyOps.push_back(afOp);
1201  }
1202  } else {
1203  // No remapping specified.
1204  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1205  }
1206 
1207  SmallVector<Value, 4> newMapOperands;
1208  newMapOperands.reserve(newMemRefRank);
1209 
1210  // Prepend 'extraIndices' in 'newMapOperands'.
1211  for (Value extraIndex : extraIndices) {
1212  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1213  "invalid memory op index");
1214  newMapOperands.push_back(extraIndex);
1215  }
1216 
1217  // Append 'remapOutputs' to 'newMapOperands'.
1218  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1219 
1220  // Create new fully composed AffineMap for new op to be created.
1221  assert(newMapOperands.size() == newMemRefRank);
1222  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1223  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1224  newMap = simplifyAffineMap(newMap);
1225  canonicalizeMapAndOperands(&newMap, &newMapOperands);
1226  // Remove any affine.apply's that became dead as a result of composition.
1227  for (Value value : affineApplyOps)
1228  if (value.use_empty())
1229  value.getDefiningOp()->erase();
1230 
1231  OperationState state(op->getLoc(), op->getName());
1232  // Construct the new operation using this memref.
1233  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1234  // Insert the non-memref operands.
1235  state.operands.append(op->operand_begin(),
1236  op->operand_begin() + memRefOperandPos);
1237  // Insert the new memref value.
1238  state.operands.push_back(newMemRef);
1239 
1240  // Insert the new memref map operands.
1241  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1242 
1243  // Insert the remaining operands unmodified.
1244  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1245  oldMapNumInputs,
1246  op->operand_end());
1247 
1248  // Result types don't change. Both memref's are of the same elemental type.
1249  state.types.reserve(op->getNumResults());
1250  for (auto result : op->getResults())
1251  state.types.push_back(result.getType());
1252 
1253  // Add attribute for 'newMap', other Attributes do not change.
1254  auto newMapAttr = AffineMapAttr::get(newMap);
1255  for (auto namedAttr : op->getAttrs()) {
1256  if (namedAttr.getName() == oldMapAttrPair.getName())
1257  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1258  else
1259  state.attributes.push_back(namedAttr);
1260  }
1261 
1262  // Create the new operation.
1263  auto *repOp = builder.create(state);
1264  op->replaceAllUsesWith(repOp);
1265  op->erase();
1266 
1267  return success();
1268 }
1269 
1271  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1272  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1273  ArrayRef<Value> symbolOperands, Operation *domOpFilter,
1274  Operation *postDomOpFilter, bool allowNonDereferencingOps,
1275  bool replaceInDeallocOp) {
1276  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1277  (void)newMemRefRank; // unused in opt mode
1278  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1279  (void)oldMemRefRank;
1280  if (indexRemap) {
1281  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1282  "symbol operand count mismatch");
1283  assert(indexRemap.getNumInputs() ==
1284  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1285  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1286  } else {
1287  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1288  }
1289 
1290  // Assert same elemental type.
1291  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1292  cast<MemRefType>(newMemRef.getType()).getElementType());
1293 
1294  std::unique_ptr<DominanceInfo> domInfo;
1295  std::unique_ptr<PostDominanceInfo> postDomInfo;
1296  if (domOpFilter)
1297  domInfo = std::make_unique<DominanceInfo>(
1298  domOpFilter->getParentOfType<func::FuncOp>());
1299 
1300  if (postDomOpFilter)
1301  postDomInfo = std::make_unique<PostDominanceInfo>(
1302  postDomOpFilter->getParentOfType<func::FuncOp>());
1303 
1304  // Walk all uses of old memref; collect ops to perform replacement. We use a
1305  // DenseSet since an operation could potentially have multiple uses of a
1306  // memref (although rare), and the replacement later is going to erase ops.
1307  DenseSet<Operation *> opsToReplace;
1308  for (auto *op : oldMemRef.getUsers()) {
1309  // Skip this use if it's not dominated by domOpFilter.
1310  if (domOpFilter && !domInfo->dominates(domOpFilter, op))
1311  continue;
1312 
1313  // Skip this use if it's not post-dominated by postDomOpFilter.
1314  if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
1315  continue;
1316 
1317  // Skip dealloc's - no replacement is necessary, and a memref replacement
1318  // at other uses doesn't hurt these dealloc's.
1319  if (hasSingleEffect<MemoryEffects::Free>(op, oldMemRef) &&
1320  !replaceInDeallocOp)
1321  continue;
1322 
1323  // Check if the memref was used in a non-dereferencing context. It is fine
1324  // for the memref to be used in a non-dereferencing way outside of the
1325  // region where this replacement is happening.
1326  if (!isa<AffineMapAccessInterface>(*op)) {
1327  if (!allowNonDereferencingOps) {
1328  LLVM_DEBUG(llvm::dbgs()
1329  << "Memref replacement failed: non-deferencing memref op: \n"
1330  << *op << '\n');
1331  return failure();
1332  }
1333  // Non-dereferencing ops with the MemRefsNormalizable trait are
1334  // supported for replacement.
1335  if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
1336  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1337  "memrefs normalizable trait: \n"
1338  << *op << '\n');
1339  return failure();
1340  }
1341  }
1342 
1343  // We'll first collect and then replace --- since replacement erases the op
1344  // that has the use, and that op could be postDomFilter or domFilter itself!
1345  opsToReplace.insert(op);
1346  }
1347 
1348  for (auto *op : opsToReplace) {
1349  if (failed(replaceAllMemRefUsesWith(
1350  oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
1351  symbolOperands, allowNonDereferencingOps)))
1352  llvm_unreachable("memref replacement guaranteed to succeed here");
1353  }
1354 
1355  return success();
1356 }
1357 
1358 /// Given an operation, inserts one or more single result affine
1359 /// apply operations, results of which are exclusively used by this operation
1360 /// operation. The operands of these newly created affine apply ops are
1361 /// guaranteed to be loop iterators or terminal symbols of a function.
1362 ///
1363 /// Before
1364 ///
1365 /// affine.for %i = 0 to #map(%N)
1366 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1367 /// "send"(%idx, %A, ...)
1368 /// "compute"(%idx)
1369 ///
1370 /// After
1371 ///
1372 /// affine.for %i = 0 to #map(%N)
1373 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1374 /// "send"(%idx, %A, ...)
1375 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1376 /// "compute"(%idx_)
1377 ///
1378 /// This allows applying different transformations on send and compute (for eg.
1379 /// different shifts/delays).
1380 ///
1381 /// Returns nullptr either if none of opInst's operands were the result of an
1382 /// affine.apply and thus there was no affine computation slice to create, or if
1383 /// all the affine.apply op's supplying operands to this opInst did not have any
1384 /// uses besides this opInst; otherwise returns the list of affine.apply
1385 /// operations created in output argument `sliceOps`.
1387  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1388  // Collect all operands that are results of affine apply ops.
1389  SmallVector<Value, 4> subOperands;
1390  subOperands.reserve(opInst->getNumOperands());
1391  for (auto operand : opInst->getOperands())
1392  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1393  subOperands.push_back(operand);
1394 
1395  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1396  SmallVector<Operation *, 4> affineApplyOps;
1397  getReachableAffineApplyOps(subOperands, affineApplyOps);
1398  // Skip transforming if there are no affine maps to compose.
1399  if (affineApplyOps.empty())
1400  return;
1401 
1402  // Check if all uses of the affine apply op's lie only in this op op, in
1403  // which case there would be nothing to do.
1404  bool localized = true;
1405  for (auto *op : affineApplyOps) {
1406  for (auto result : op->getResults()) {
1407  for (auto *user : result.getUsers()) {
1408  if (user != opInst) {
1409  localized = false;
1410  break;
1411  }
1412  }
1413  }
1414  }
1415  if (localized)
1416  return;
1417 
1418  OpBuilder builder(opInst);
1419  SmallVector<Value, 4> composedOpOperands(subOperands);
1420  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1421  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1422 
1423  // Create an affine.apply for each of the map results.
1424  sliceOps->reserve(composedMap.getNumResults());
1425  for (auto resultExpr : composedMap.getResults()) {
1426  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1427  composedMap.getNumSymbols(), resultExpr);
1428  sliceOps->push_back(builder.create<AffineApplyOp>(
1429  opInst->getLoc(), singleResMap, composedOpOperands));
1430  }
1431 
1432  // Construct the new operands that include the results from the composed
1433  // affine apply op above instead of existing ones (subOperands). So, they
1434  // differ from opInst's operands only for those operands in 'subOperands', for
1435  // which they will be replaced by the corresponding one from 'sliceOps'.
1436  SmallVector<Value, 4> newOperands(opInst->getOperands());
1437  for (Value &operand : newOperands) {
1438  // Replace the subOperands from among the new operands.
1439  unsigned j, f;
1440  for (j = 0, f = subOperands.size(); j < f; j++) {
1441  if (operand == subOperands[j])
1442  break;
1443  }
1444  if (j < subOperands.size())
1445  operand = (*sliceOps)[j];
1446  }
1447  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1448  opInst->setOperand(idx, newOperands[idx]);
1449 }
1450 
1451 /// Enum to set patterns of affine expr in tiled-layout map.
1452 /// TileFloorDiv: <dim expr> div <tile size>
1453 /// TileMod: <dim expr> mod <tile size>
1454 /// TileNone: None of the above
1455 /// Example:
1456 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1457 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1458 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1459 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1461 
1462 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1463 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1464 /// tile sizes, and no other expression involves those k dimensions. This
1465 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1466 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1467 /// tiled layout, an empty vector is returned.
1468 static LogicalResult getTileSizePos(
1469  AffineMap map,
1470  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1471  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1472  // `floordiv` and its position in `map` output.
1473  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1474  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1475  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1477  unsigned pos = 0;
1478  for (AffineExpr expr : map.getResults()) {
1479  if (expr.getKind() == AffineExprKind::FloorDiv) {
1480  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1481  if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1482  floordivExprs.emplace_back(
1483  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1484  }
1485  pos++;
1486  }
1487  // Not tiled layout if `floordivExprs` is empty.
1488  if (floordivExprs.empty()) {
1490  return success();
1491  }
1492 
1493  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1494  // not tiled layout.
1495  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1496  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1497  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1498  unsigned floordivPos = std::get<2>(fexpr);
1499 
1500  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1501  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1502  // other expr, the map is not tiled layout. Example of non tiled layout:
1503  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1504  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1505  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1506  // 256)>
1507  bool found = false;
1508  pos = 0;
1509  for (AffineExpr expr : map.getResults()) {
1510  bool notTiled = false;
1511  if (pos != floordivPos) {
1512  expr.walk([&](AffineExpr e) {
1513  if (e == floordivExprLHS) {
1514  if (expr.getKind() == AffineExprKind::Mod) {
1515  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1516  // If LHS and RHS of `mod` are the same with those of floordiv.
1517  if (floordivExprLHS == binaryExpr.getLHS() &&
1518  floordivExprRHS == binaryExpr.getRHS()) {
1519  // Save tile size (RHS of `mod`), and position of `floordiv` and
1520  // `mod` if same expr with `mod` is not found yet.
1521  if (!found) {
1522  tileSizePos.emplace_back(
1523  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1524  found = true;
1525  } else {
1526  // Non tiled layout: Have multilpe `mod` with the same LHS.
1527  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1528  // mod 256, d2 mod 256)>
1529  notTiled = true;
1530  }
1531  } else {
1532  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1533  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1534  // mod 128)>
1535  notTiled = true;
1536  }
1537  } else {
1538  // Non tiled layout: LHS is the same, but not `mod`.
1539  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1540  // floordiv 256)>
1541  notTiled = true;
1542  }
1543  }
1544  });
1545  }
1546  if (notTiled) {
1548  return success();
1549  }
1550  pos++;
1551  }
1552  }
1553  return success();
1554 }
1555 
1556 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1557 /// after normalization. Dimensions that include dynamic dimensions in the map
1558 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1559 /// dimension.
1560 ///
1561 /// Example:
1562 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1563 ///
1564 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1565 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1566 static bool
1567 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1568  SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1569  AffineExpr expr = layoutMap.getResults()[dim];
1570  // Check if affine expr of the dimension includes dynamic dimension of input
1571  // memrefType.
1572  MLIRContext *context = layoutMap.getContext();
1573  return expr
1574  .walk([&](AffineExpr e) {
1575  if (isa<AffineDimExpr>(e) &&
1576  llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1577  return e == getAffineDimExpr(dim, context);
1578  }))
1579  return WalkResult::interrupt();
1580  return WalkResult::advance();
1581  })
1582  .wasInterrupted();
1583 }
1584 
1585 /// Create affine expr to calculate dimension size for a tiled-layout map.
1587  TileExprPattern pat) {
1588  // Create map output for the patterns.
1589  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1590  // "mod <tile size>" ==> "<tile size>"
1591  AffineExpr newMapOutput;
1592  AffineBinaryOpExpr binaryExpr = nullptr;
1593  switch (pat) {
1595  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1596  newMapOutput = binaryExpr.getRHS();
1597  break;
1599  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1600  newMapOutput = getAffineBinaryOpExpr(
1601  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1602  break;
1603  default:
1604  newMapOutput = oldMapOutput;
1605  }
1606  return newMapOutput;
1607 }
1608 
1609 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1610 /// create `newDynamicSizes` from them by using AffineApplyOp.
1611 ///
1612 /// Steps for normalizing dynamic memrefs for a tiled layout map
1613 /// Example:
1614 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1615 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1616 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1617 ///
1618 /// (Before this function)
1619 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1620 /// single layout map is supported.
1621 ///
1622 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1623 /// is memref<4x?x?xf32> in the above example.
1624 ///
1625 /// (In this function)
1626 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1627 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1628 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1629 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1630 /// - New map in the above example
1631 /// #map0 = affine_map<(d0, d1) -> (d0)>
1632 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1633 /// #map2 = affine_map<(d0, d1) -> (32)>
1634 ///
1635 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1636 /// is used in dynamicSizes of new AllocOp.
1637 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1638 /// %c4 = arith.constant 4 : index
1639 /// %1 = affine.apply #map1(%c4, %0)
1640 /// %2 = affine.apply #map2(%c4, %0)
1641 static void createNewDynamicSizes(MemRefType oldMemRefType,
1642  MemRefType newMemRefType, AffineMap map,
1643  memref::AllocOp *allocOp, OpBuilder b,
1644  SmallVectorImpl<Value> &newDynamicSizes) {
1645  // Create new input for AffineApplyOp.
1646  SmallVector<Value, 4> inAffineApply;
1647  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1648  unsigned dynIdx = 0;
1649  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1650  if (oldMemRefShape[d] < 0) {
1651  // Use dynamicSizes of allocOp for dynamic dimension.
1652  inAffineApply.emplace_back(allocOp->getDynamicSizes()[dynIdx]);
1653  dynIdx++;
1654  } else {
1655  // Create ConstantOp for static dimension.
1656  auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1657  inAffineApply.emplace_back(
1658  b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
1659  }
1660  }
1661 
1662  // Create new map to calculate each dimension size of new memref for each
1663  // original map output. Only for dynamic dimesion of `newMemRefType`.
1664  unsigned newDimIdx = 0;
1665  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1667  (void)getTileSizePos(map, tileSizePos);
1668  for (AffineExpr expr : map.getResults()) {
1669  if (newMemRefShape[newDimIdx] < 0) {
1670  // Create new maps to calculate each dimension size of new memref.
1672  for (auto pos : tileSizePos) {
1673  if (newDimIdx == std::get<1>(pos))
1675  else if (newDimIdx == std::get<2>(pos))
1677  }
1678  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1679  AffineMap newMap =
1680  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1681  Value affineApp =
1682  b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
1683  newDynamicSizes.emplace_back(affineApp);
1684  }
1685  newDimIdx++;
1686  }
1687 }
1688 
1689 // TODO: Currently works for static memrefs with a single layout map.
1690 LogicalResult mlir::affine::normalizeMemRef(memref::AllocOp *allocOp) {
1691  MemRefType memrefType = allocOp->getType();
1692  OpBuilder b(*allocOp);
1693 
1694  // Fetch a new memref type after normalizing the old memref to have an
1695  // identity map layout.
1696  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1697  if (newMemRefType == memrefType)
1698  // Either memrefType already had an identity map or the map couldn't be
1699  // transformed to an identity map.
1700  return failure();
1701 
1702  Value oldMemRef = allocOp->getResult();
1703 
1704  SmallVector<Value, 4> symbolOperands(allocOp->getSymbolOperands());
1705  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1706  memref::AllocOp newAlloc;
1707  // Check if `layoutMap` is a tiled layout. Only single layout map is
1708  // supported for normalizing dynamic memrefs.
1710  (void)getTileSizePos(layoutMap, tileSizePos);
1711  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1712  MemRefType oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1713  SmallVector<Value, 4> newDynamicSizes;
1714  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1715  newDynamicSizes);
1716  // Add the new dynamic sizes in new AllocOp.
1717  newAlloc =
1718  b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1719  newDynamicSizes, allocOp->getAlignmentAttr());
1720  } else {
1721  newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1722  allocOp->getAlignmentAttr());
1723  }
1724  // Replace all uses of the old memref.
1725  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1726  /*extraIndices=*/{},
1727  /*indexRemap=*/layoutMap,
1728  /*extraOperands=*/{},
1729  /*symbolOperands=*/symbolOperands,
1730  /*domOpFilter=*/nullptr,
1731  /*postDomOpFilter=*/nullptr,
1732  /*allowNonDereferencingOps=*/true))) {
1733  // If it failed (due to escapes for example), bail out.
1734  newAlloc.erase();
1735  return failure();
1736  }
1737  // Replace any uses of the original alloc op and erase it. All remaining uses
1738  // have to be dealloc's; RAMUW above would've failed otherwise.
1739  assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1740  return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1741  }));
1742  oldMemRef.replaceAllUsesWith(newAlloc);
1743  allocOp->erase();
1744  return success();
1745 }
1746 
1747 MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1748  unsigned rank = memrefType.getRank();
1749  if (rank == 0)
1750  return memrefType;
1751 
1752  if (memrefType.getLayout().isIdentity()) {
1753  // Either no maps is associated with this memref or this memref has
1754  // a trivial (identity) map.
1755  return memrefType;
1756  }
1757  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1758  unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1759 
1760  // We don't do any checks for one-to-one'ness; we assume that it is
1761  // one-to-one.
1762 
1763  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1764  // for now.
1765  // TODO: Normalize the other types of dynamic memrefs.
1767  (void)getTileSizePos(layoutMap, tileSizePos);
1768  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1769  return memrefType;
1770 
1771  // We have a single map that is not an identity map. Create a new memref
1772  // with the right shape and an identity layout map.
1773  ArrayRef<int64_t> shape = memrefType.getShape();
1774  // FlatAffineValueConstraint may later on use symbolicOperands.
1775  FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1776  SmallVector<unsigned, 4> memrefTypeDynDims;
1777  for (unsigned d = 0; d < rank; ++d) {
1778  // Use constraint system only in static dimensions.
1779  if (shape[d] > 0) {
1780  fac.addBound(BoundType::LB, d, 0);
1781  fac.addBound(BoundType::UB, d, shape[d] - 1);
1782  } else {
1783  memrefTypeDynDims.emplace_back(d);
1784  }
1785  }
1786  // We compose this map with the original index (logical) space to derive
1787  // the upper bounds for the new index space.
1788  unsigned newRank = layoutMap.getNumResults();
1789  if (failed(fac.composeMatchingMap(layoutMap)))
1790  return memrefType;
1791  // TODO: Handle semi-affine maps.
1792  // Project out the old data dimensions.
1793  fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1794  SmallVector<int64_t, 4> newShape(newRank);
1795  MLIRContext *context = memrefType.getContext();
1796  for (unsigned d = 0; d < newRank; ++d) {
1797  // Check if this dimension is dynamic.
1798  if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1799  newShape[d] = ShapedType::kDynamic;
1800  continue;
1801  }
1802  // The lower bound for the shape is always zero.
1803  std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1804  // For a static memref and an affine map with no symbols, this is
1805  // always bounded. However, when we have symbols, we may not be able to
1806  // obtain a constant upper bound. Also, mapping to a negative space is
1807  // invalid for normalization.
1808  if (!ubConst.has_value() || *ubConst < 0) {
1809  LLVM_DEBUG(llvm::dbgs()
1810  << "can't normalize map due to unknown/invalid upper bound");
1811  return memrefType;
1812  }
1813  // If dimension of new memrefType is dynamic, the value is -1.
1814  newShape[d] = *ubConst + 1;
1815  }
1816 
1817  // Create the new memref type after trivializing the old layout map.
1818  auto newMemRefType =
1819  MemRefType::Builder(memrefType)
1820  .setShape(newShape)
1822  AffineMap::getMultiDimIdentityMap(newRank, context)));
1823  return newMemRefType;
1824 }
1825 
1827  Value rhs) {
1828  DivModValue result;
1829  AffineExpr d0, d1;
1830  bindDims(b.getContext(), d0, d1);
1831  result.quotient =
1832  affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1833  result.remainder =
1834  affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1835  return result;
1836 }
1837 
1838 /// Create IR that computes the product of all elements in the set.
1839 static FailureOr<OpFoldResult> getIndexProduct(OpBuilder &b, Location loc,
1840  ArrayRef<Value> set) {
1841  if (set.empty())
1842  return failure();
1843  OpFoldResult result = set[0];
1844  AffineExpr s0, s1;
1845  bindSymbols(b.getContext(), s0, s1);
1846  for (unsigned i = 1, e = set.size(); i < e; i++)
1847  result = makeComposedFoldedAffineApply(b, loc, s0 * s1, {result, set[i]});
1848  return result;
1849 }
1850 
1851 FailureOr<SmallVector<Value>>
1853  ArrayRef<Value> basis) {
1854  unsigned numDims = basis.size();
1855 
1856  SmallVector<Value> divisors;
1857  for (unsigned i = 1; i < numDims; i++) {
1858  ArrayRef<Value> slice = basis.drop_front(i);
1859  FailureOr<OpFoldResult> prod = getIndexProduct(b, loc, slice);
1860  if (failed(prod))
1861  return failure();
1862  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, *prod));
1863  }
1864 
1865  SmallVector<Value> results;
1866  results.reserve(divisors.size() + 1);
1867  Value residual = linearIndex;
1868  for (Value divisor : divisors) {
1869  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1870  results.push_back(divMod.quotient);
1871  residual = divMod.remainder;
1872  }
1873  results.push_back(residual);
1874  return results;
1875 }
1876 
1878  ArrayRef<OpFoldResult> basis,
1879  ImplicitLocOpBuilder &builder) {
1880  assert(multiIndex.size() == basis.size());
1881  SmallVector<AffineExpr> basisAffine;
1882  for (size_t i = 0; i < basis.size(); ++i) {
1883  basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
1884  }
1885 
1886  SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
1887  SmallVector<OpFoldResult> strides;
1888  strides.reserve(stridesAffine.size());
1889  llvm::transform(stridesAffine, std::back_inserter(strides),
1890  [&builder, &basis](AffineExpr strideExpr) {
1892  builder, builder.getLoc(), strideExpr, basis);
1893  });
1894 
1895  auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
1896  OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
1898  builder, builder.getLoc(), linearIndexExpr, multiIndexAndStrides);
1899 }
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, memref::AllocOp *allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1641
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition: Utils.cpp:648
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1468
static FailureOr< OpFoldResult > getIndexProduct(OpBuilder &b, Location loc, ArrayRef< Value > set)
Create IR that computes the product of all elements in the set.
Definition: Utils.cpp:1839
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1460
@ TileFloorDiv
Definition: Utils.cpp:1460
@ TileNone
Definition: Utils.cpp:1460
@ TileMod
Definition: Utils.cpp:1460
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:243
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1567
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:953
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1586
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:259
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:906
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition: Utils.cpp:631
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:829
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:227
AffineExpr getLHS() const
Definition: AffineExpr.cpp:340
AffineExpr getRHS() const
Definition: AffineExpr.cpp:343
An integer constant appearing in affine expression.
Definition: AffineExpr.h:252
int64_t getValue() const
Definition: AffineExpr.cpp:623
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:236
unsigned getPosition() const
Definition: AffineExpr.cpp:348
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:907
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition: AffineExpr.h:130
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:954
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
MLIRContext * getContext() const
Definition: AffineMap.cpp:343
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:334
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
Definition: AffineMap.cpp:398
unsigned getNumDims() const
Definition: AffineMap.cpp:394
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407
unsigned getNumResults() const
Definition: AffineMap.cpp:402
unsigned getNumInputs() const
Definition: AffineMap.cpp:403
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:411
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:556
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:244
unsigned getPosition() const
Definition: AffineExpr.cpp:613
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
Definition: AliasAnalysis.h:59
This class represents an argument of a Block.
Definition: Value.h:319
Block represents an ordered list of Operations.
Definition: Block.h:31
OpListType::iterator iterator
Definition: Block.h:138
SuccessorRange getSuccessors()
Definition: Block.h:265
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
OpListType & getOperations()
Definition: Block.h:135
iterator end()
Definition: Block.h:142
iterator begin()
Definition: Block.h:141
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:140
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:254
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:410
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:132
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:285
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:387
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:401
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:87
A class for computing basic dominance information.
Definition: Dominance.h:136
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:156
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
void projectOut(Value val)
Projects out the variable that is associate with Value.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteStrictness strictMode
Strict mode can restrict the ops that are added to the worklist during the rewrite.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition: IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Location getLoc() const
Accessors for the implied location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:211
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Definition: BuiltinTypes.h:232
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:222
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
This class helps build Operations.
Definition: Builders.h:213
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:246
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:451
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:571
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:437
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:480
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:418
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:448
This class represents a single result from folding an operation.
Definition: OpDefinition.h:268
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:745
void setOperand(unsigned idx, Value value)
Definition: Operation.h:346
operand_iterator operand_begin()
Definition: Operation.h:369
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:507
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
operand_iterator operand_end()
Definition: Operation.h:370
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:672
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:237
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:869
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:410
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
A class for computing basic postdominance information.
Definition: Dominance.h:195
bool postDominates(Operation *a, Operation *b)
Return true if operation A postdominates operation B.
Definition: Dominance.h:203
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
void takeBody(Region &other)
Takes body of another region (that region will have no body after this operation completes).
Definition: Region.h:241
MLIRContext * getContext() const
Definition: PatternMatch.h:823
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition: Value.h:173
user_range getUsers() const
Definition: Value.h:228
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
static WalkResult advance()
Definition: Visitors.h:51
static WalkResult interrupt()
Definition: Visitors.h:50
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
Definition: AffineOps.cpp:3951
unsigned getNumSymbols() const
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
std::optional< SmallVector< Value, 8 > > expandAffineMap(OpBuilder &builder, Location loc, AffineMap affineMap, ValueRange operands)
Create a sequence of operations that implement the affineMap applied to the given operands (as it it ...
Definition: Utils.cpp:225
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1132
FailureOr< SmallVector< Value > > delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex, ArrayRef< Value > basis)
Generate the IR to delinearize linearIndex given the basis and return the multi-index.
Definition: Utils.cpp:1852
void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo, AliasAnalysis &analysis)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:1034
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:118
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:276
Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, ValueRange dimValues, ValueRange symbolValues)
Emit code that computes the given affine expression using standard arithmetic operations applied to t...
Definition: Utils.cpp:215
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:1893
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:491
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:348
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1142
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1433
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
LogicalResult normalizeAffineFor(AffineForOp op, bool promoteSingleIter=false)
Normalize an affine.for op.
Definition: Utils.cpp:555
LogicalResult normalizeMemRef(memref::AllocOp *op)
Rewrites the memref defined by this alloc op to have an identity layout map and updates all its index...
Definition: Utils.cpp:1690
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:390
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1192
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
MemRefType normalizeMemRefType(MemRefType memrefType)
Normalizes memrefType so that the affine layout map of the memref is transformed to an identity map w...
Definition: Utils.cpp:1747
OpFoldResult linearizeIndex(ArrayRef< OpFoldResult > multiIndex, ArrayRef< OpFoldResult > basis, ImplicitLocOpBuilder &builder)
Definition: Utils.cpp:1877
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:261
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1826
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e....
Definition: Utils.cpp:681
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1386
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition: Utils.cpp:410
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either:
Definition: Utils.cpp:464
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1270
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:750
LogicalResult applyOpPatternsAndFold(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:348
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:70
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:362
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:607
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:617
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:301
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.