MLIR  19.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
25 #include "mlir/IR/Dominance.h"
26 #include "mlir/IR/IRMapping.h"
28 #include "mlir/IR/IntegerSet.h"
30 #include <optional>
31 
32 #define DEBUG_TYPE "affine-utils"
33 
34 using namespace mlir;
35 using namespace affine;
36 using namespace presburger;
37 
38 namespace {
39 /// Visit affine expressions recursively and build the sequence of operations
40 /// that correspond to it. Visitation functions return an Value of the
41 /// expression subtree they visited or `nullptr` on error.
42 class AffineApplyExpander
43  : public AffineExprVisitor<AffineApplyExpander, Value> {
44 public:
45  /// This internal class expects arguments to be non-null, checks must be
46  /// performed at the call site.
47  AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
48  ValueRange symbolValues, Location loc)
49  : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
50  loc(loc) {}
51 
52  template <typename OpTy>
53  Value buildBinaryExpr(AffineBinaryOpExpr expr) {
54  auto lhs = visit(expr.getLHS());
55  auto rhs = visit(expr.getRHS());
56  if (!lhs || !rhs)
57  return nullptr;
58  auto op = builder.create<OpTy>(loc, lhs, rhs);
59  return op.getResult();
60  }
61 
62  Value visitAddExpr(AffineBinaryOpExpr expr) {
63  return buildBinaryExpr<arith::AddIOp>(expr);
64  }
65 
66  Value visitMulExpr(AffineBinaryOpExpr expr) {
67  return buildBinaryExpr<arith::MulIOp>(expr);
68  }
69 
70  /// Euclidean modulo operation: negative RHS is not allowed.
71  /// Remainder of the euclidean integer division is always non-negative.
72  ///
73  /// Implemented as
74  ///
75  /// a mod b =
76  /// let remainder = srem a, b;
77  /// negative = a < 0 in
78  /// select negative, remainder + b, remainder.
79  Value visitModExpr(AffineBinaryOpExpr expr) {
80  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
81  if (rhsConst.getValue() <= 0) {
82  emitError(loc, "modulo by non-positive value is not supported");
83  return nullptr;
84  }
85  }
86 
87  auto lhs = visit(expr.getLHS());
88  auto rhs = visit(expr.getRHS());
89  assert(lhs && rhs && "unexpected affine expr lowering failure");
90 
91  Value remainder = builder.create<arith::RemSIOp>(loc, lhs, rhs);
92  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
93  Value isRemainderNegative = builder.create<arith::CmpIOp>(
94  loc, arith::CmpIPredicate::slt, remainder, zeroCst);
95  Value correctedRemainder =
96  builder.create<arith::AddIOp>(loc, remainder, rhs);
97  Value result = builder.create<arith::SelectOp>(
98  loc, isRemainderNegative, correctedRemainder, remainder);
99  return result;
100  }
101 
102  /// Floor division operation (rounds towards negative infinity).
103  ///
104  /// For positive divisors, it can be implemented without branching and with a
105  /// single division operation as
106  ///
107  /// a floordiv b =
108  /// let negative = a < 0 in
109  /// let absolute = negative ? -a - 1 : a in
110  /// let quotient = absolute / b in
111  /// negative ? -quotient - 1 : quotient
112  ///
113  /// Note: this lowering does not use arith.floordivsi because the lowering of
114  /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
115  /// not one but two arith.divsi. That could be changed to one divsi, but one
116  /// way or another, going through arith.floordivsi will result in more complex
117  /// IR because arith.floordivsi is more general than affine floordiv in that
118  /// it supports negative RHS.
119  Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
120  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
121  if (rhsConst.getValue() <= 0) {
122  emitError(loc, "division by non-positive value is not supported");
123  return nullptr;
124  }
125  }
126  auto lhs = visit(expr.getLHS());
127  auto rhs = visit(expr.getRHS());
128  assert(lhs && rhs && "unexpected affine expr lowering failure");
129 
130  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
131  Value noneCst = builder.create<arith::ConstantIndexOp>(loc, -1);
132  Value negative = builder.create<arith::CmpIOp>(
133  loc, arith::CmpIPredicate::slt, lhs, zeroCst);
134  Value negatedDecremented = builder.create<arith::SubIOp>(loc, noneCst, lhs);
135  Value dividend =
136  builder.create<arith::SelectOp>(loc, negative, negatedDecremented, lhs);
137  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
138  Value correctedQuotient =
139  builder.create<arith::SubIOp>(loc, noneCst, quotient);
140  Value result = builder.create<arith::SelectOp>(loc, negative,
141  correctedQuotient, quotient);
142  return result;
143  }
144 
145  /// Ceiling division operation (rounds towards positive infinity).
146  ///
147  /// For positive divisors, it can be implemented without branching and with a
148  /// single division operation as
149  ///
150  /// a ceildiv b =
151  /// let negative = a <= 0 in
152  /// let absolute = negative ? -a : a - 1 in
153  /// let quotient = absolute / b in
154  /// negative ? -quotient : quotient + 1
155  ///
156  /// Note: not using arith.ceildivsi for the same reason as explained in the
157  /// visitFloorDivExpr comment.
158  Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
159  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
160  if (rhsConst.getValue() <= 0) {
161  emitError(loc, "division by non-positive value is not supported");
162  return nullptr;
163  }
164  }
165  auto lhs = visit(expr.getLHS());
166  auto rhs = visit(expr.getRHS());
167  assert(lhs && rhs && "unexpected affine expr lowering failure");
168 
169  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
170  Value oneCst = builder.create<arith::ConstantIndexOp>(loc, 1);
171  Value nonPositive = builder.create<arith::CmpIOp>(
172  loc, arith::CmpIPredicate::sle, lhs, zeroCst);
173  Value negated = builder.create<arith::SubIOp>(loc, zeroCst, lhs);
174  Value decremented = builder.create<arith::SubIOp>(loc, lhs, oneCst);
175  Value dividend =
176  builder.create<arith::SelectOp>(loc, nonPositive, negated, decremented);
177  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
178  Value negatedQuotient =
179  builder.create<arith::SubIOp>(loc, zeroCst, quotient);
180  Value incrementedQuotient =
181  builder.create<arith::AddIOp>(loc, quotient, oneCst);
182  Value result = builder.create<arith::SelectOp>(
183  loc, nonPositive, negatedQuotient, incrementedQuotient);
184  return result;
185  }
186 
187  Value visitConstantExpr(AffineConstantExpr expr) {
188  auto op = builder.create<arith::ConstantIndexOp>(loc, expr.getValue());
189  return op.getResult();
190  }
191 
192  Value visitDimExpr(AffineDimExpr expr) {
193  assert(expr.getPosition() < dimValues.size() &&
194  "affine dim position out of range");
195  return dimValues[expr.getPosition()];
196  }
197 
198  Value visitSymbolExpr(AffineSymbolExpr expr) {
199  assert(expr.getPosition() < symbolValues.size() &&
200  "symbol dim position out of range");
201  return symbolValues[expr.getPosition()];
202  }
203 
204 private:
205  OpBuilder &builder;
206  ValueRange dimValues;
207  ValueRange symbolValues;
208 
209  Location loc;
210 };
211 } // namespace
212 
213 /// Create a sequence of operations that implement the `expr` applied to the
214 /// given dimension and symbol values.
216  AffineExpr expr,
217  ValueRange dimValues,
218  ValueRange symbolValues) {
219  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
220 }
221 
222 /// Create a sequence of operations that implement the `affineMap` applied to
223 /// the given `operands` (as it it were an AffineApplyOp).
224 std::optional<SmallVector<Value, 8>>
226  AffineMap affineMap, ValueRange operands) {
227  auto numDims = affineMap.getNumDims();
228  auto expanded = llvm::to_vector<8>(
229  llvm::map_range(affineMap.getResults(),
230  [numDims, &builder, loc, operands](AffineExpr expr) {
231  return expandAffineExpr(builder, loc, expr,
232  operands.take_front(numDims),
233  operands.drop_front(numDims));
234  }));
235  if (llvm::all_of(expanded, [](Value v) { return v; }))
236  return expanded;
237  return std::nullopt;
238 }
239 
240 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
241 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
242 /// the rest of the op.
243 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
244  if (elseBlock)
245  assert(ifOp.hasElse() && "else block expected");
246 
247  Block *destBlock = ifOp->getBlock();
248  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
249  destBlock->getOperations().splice(
250  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
251  std::prev(srcBlock->end()));
252  ifOp.erase();
253 }
254 
255 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
256 /// on. The `ifOp` could be hoisted and placed right before such an operation.
257 /// This method assumes that the ifOp has been canonicalized (to be correct and
258 /// effective).
259 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
260  // Walk up the parents past all for op that this conditional is invariant on.
261  auto ifOperands = ifOp.getOperands();
262  auto *res = ifOp.getOperation();
263  while (!isa<func::FuncOp>(res->getParentOp())) {
264  auto *parentOp = res->getParentOp();
265  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
266  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
267  break;
268  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
269  for (auto iv : parallelOp.getIVs())
270  if (llvm::is_contained(ifOperands, iv))
271  break;
272  } else if (!isa<AffineIfOp>(parentOp)) {
273  // Won't walk up past anything other than affine.for/if ops.
274  break;
275  }
276  // You can always hoist up past any affine.if ops.
277  res = parentOp;
278  }
279  return res;
280 }
281 
282 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
283 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
284 /// otherwise the same `ifOp`.
285 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
286  // No hoisting to do.
287  if (hoistOverOp == ifOp)
288  return ifOp;
289 
290  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
291  // the else block. Then drop the else block of the original 'if' in the 'then'
292  // branch while promoting its then block, and analogously drop the 'then'
293  // block of the original 'if' from the 'else' branch while promoting its else
294  // block.
295  IRMapping operandMap;
296  OpBuilder b(hoistOverOp);
297  auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(),
298  ifOp.getOperands(),
299  /*elseBlock=*/true);
300 
301  // Create a clone of hoistOverOp to use for the else branch of the hoisted
302  // conditional. The else block may get optimized away if empty.
303  Operation *hoistOverOpClone = nullptr;
304  // We use this unique name to identify/find `ifOp`'s clone in the else
305  // version.
306  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
307  operandMap.clear();
308  b.setInsertionPointAfter(hoistOverOp);
309  // We'll set an attribute to identify this op in a clone of this sub-tree.
310  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
311  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
312 
313  // Promote the 'then' block of the original affine.if in the then version.
314  promoteIfBlock(ifOp, /*elseBlock=*/false);
315 
316  // Move the then version to the hoisted if op's 'then' block.
317  auto *thenBlock = hoistedIfOp.getThenBlock();
318  thenBlock->getOperations().splice(thenBlock->begin(),
319  hoistOverOp->getBlock()->getOperations(),
320  Block::iterator(hoistOverOp));
321 
322  // Find the clone of the original affine.if op in the else version.
323  AffineIfOp ifCloneInElse;
324  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
325  if (!ifClone->getAttr(idForIfOp))
326  return WalkResult::advance();
327  ifCloneInElse = ifClone;
328  return WalkResult::interrupt();
329  });
330  assert(ifCloneInElse && "if op clone should exist");
331  // For the else block, promote the else block of the original 'if' if it had
332  // one; otherwise, the op itself is to be erased.
333  if (!ifCloneInElse.hasElse())
334  ifCloneInElse.erase();
335  else
336  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
337 
338  // Move the else version into the else block of the hoisted if op.
339  auto *elseBlock = hoistedIfOp.getElseBlock();
340  elseBlock->getOperations().splice(
341  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
342  Block::iterator(hoistOverOpClone));
343 
344  return hoistedIfOp;
345 }
346 
349  ArrayRef<LoopReduction> parallelReductions,
350  AffineParallelOp *resOp) {
351  // Fail early if there are iter arguments that are not reductions.
352  unsigned numReductions = parallelReductions.size();
353  if (numReductions != forOp.getNumIterOperands())
354  return failure();
355 
356  Location loc = forOp.getLoc();
357  OpBuilder outsideBuilder(forOp);
358  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
359  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
360  AffineMap upperBoundMap = forOp.getUpperBoundMap();
361  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
362 
363  // Creating empty 1-D affine.parallel op.
364  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
365  parallelReductions, [](const LoopReduction &red) { return red.value; }));
366  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
367  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
368  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
369  loc, ValueRange(reducedValues).getTypes(), reductionKinds,
370  llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
371  llvm::ArrayRef(upperBoundMap), upperBoundOperands,
372  llvm::ArrayRef(forOp.getStepAsInt()));
373  // Steal the body of the old affine for op.
374  newPloop.getRegion().takeBody(forOp.getRegion());
375  Operation *yieldOp = &newPloop.getBody()->back();
376 
377  // Handle the initial values of reductions because the parallel loop always
378  // starts from the neutral value.
379  SmallVector<Value> newResults;
380  newResults.reserve(numReductions);
381  for (unsigned i = 0; i < numReductions; ++i) {
382  Value init = forOp.getInits()[i];
383  // This works because we are only handling single-op reductions at the
384  // moment. A switch on reduction kind or a mechanism to collect operations
385  // participating in the reduction will be necessary for multi-op reductions.
386  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
387  assert(reductionOp && "yielded value is expected to be produced by an op");
388  outsideBuilder.getInsertionBlock()->getOperations().splice(
389  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
390  reductionOp);
391  reductionOp->setOperands({init, newPloop->getResult(i)});
392  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
393  }
394 
395  // Update the loop terminator to yield reduced values bypassing the reduction
396  // operation itself (now moved outside of the loop) and erase the block
397  // arguments that correspond to reductions. Note that the loop always has one
398  // "main" induction variable whenc coming from a non-parallel for.
399  unsigned numIVs = 1;
400  yieldOp->setOperands(reducedValues);
401  newPloop.getBody()->eraseArguments(numIVs, numReductions);
402 
403  forOp.erase();
404  if (resOp)
405  *resOp = newPloop;
406  return success();
407 }
408 
409 // Returns success if any hoisting happened.
410 LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
411  // Bail out early if the ifOp returns a result. TODO: Consider how to
412  // properly support this case.
413  if (ifOp.getNumResults() != 0)
414  return failure();
415 
416  // Apply canonicalization patterns and folding - this is necessary for the
417  // hoisting check to be correct (operands should be composed), and to be more
418  // effective (no unused operands). Since the pattern rewriter's folding is
419  // entangled with application of patterns, we may fold/end up erasing the op,
420  // in which case we return with `folded` being set.
421  RewritePatternSet patterns(ifOp.getContext());
422  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
423  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
424  GreedyRewriteConfig config;
426  bool erased;
427  (void)applyOpPatternsAndFold(ifOp.getOperation(), frozenPatterns, config,
428  /*changed=*/nullptr, &erased);
429  if (erased) {
430  if (folded)
431  *folded = true;
432  return failure();
433  }
434  if (folded)
435  *folded = false;
436 
437  // The folding above should have ensured this, but the affine.if's
438  // canonicalization is missing composition of affine.applys into it.
439  assert(llvm::all_of(ifOp.getOperands(),
440  [](Value v) {
441  return isTopLevelValue(v) || isAffineForInductionVar(v);
442  }) &&
443  "operands not composed");
444 
445  // We are going hoist as high as possible.
446  // TODO: this could be customized in the future.
447  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
448 
449  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
450  // Nothing to hoist over.
451  if (hoistedIfOp == ifOp)
452  return failure();
453 
454  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
455  // a sequence of affine.fors that are all perfectly nested).
457  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
458  frozenPatterns);
459 
460  return success();
461 }
462 
463 // Return the min expr after replacing the given dim.
466  bool positivePath) {
467  if (e == dim)
468  return positivePath ? min : max;
469  if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
470  AffineExpr lhs = bin.getLHS();
471  AffineExpr rhs = bin.getRHS();
472  if (bin.getKind() == mlir::AffineExprKind::Add)
473  return substWithMin(lhs, dim, min, max, positivePath) +
474  substWithMin(rhs, dim, min, max, positivePath);
475 
476  auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
477  auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
478  if (c1 && c1.getValue() < 0)
479  return getAffineBinaryOpExpr(
480  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
481  if (c2 && c2.getValue() < 0)
482  return getAffineBinaryOpExpr(
483  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
484  return getAffineBinaryOpExpr(
485  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
486  substWithMin(rhs, dim, min, max, positivePath));
487  }
488  return e;
489 }
490 
491 void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
492  // Loops with min/max in bounds are not normalized at the moment.
493  if (op.hasMinMaxBounds())
494  return;
495 
496  AffineMap lbMap = op.getLowerBoundsMap();
497  SmallVector<int64_t, 8> steps = op.getSteps();
498  // No need to do any work if the parallel op is already normalized.
499  bool isAlreadyNormalized =
500  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
501  int64_t step = std::get<0>(tuple);
502  auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
503  return lbExpr && lbExpr.getValue() == 0 && step == 1;
504  });
505  if (isAlreadyNormalized)
506  return;
507 
508  AffineValueMap ranges;
509  AffineValueMap::difference(op.getUpperBoundsValueMap(),
510  op.getLowerBoundsValueMap(), &ranges);
511  auto builder = OpBuilder::atBlockBegin(op.getBody());
512  auto zeroExpr = builder.getAffineConstantExpr(0);
515  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
516  int64_t step = steps[i];
517 
518  // Adjust the lower bound to be 0.
519  lbExprs.push_back(zeroExpr);
520 
521  // Adjust the upper bound expression: 'range / step'.
522  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
523  ubExprs.push_back(ubExpr);
524 
525  // Adjust the corresponding IV: 'lb + i * step'.
526  BlockArgument iv = op.getBody()->getArgument(i);
527  AffineExpr lbExpr = lbMap.getResult(i);
528  unsigned nDims = lbMap.getNumDims();
529  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
530  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
531  /*symbolCount=*/lbMap.getNumSymbols(), expr);
532 
533  // Use an 'affine.apply' op that will be simplified later in subsequent
534  // canonicalizations.
535  OperandRange lbOperands = op.getLowerBoundsOperands();
536  OperandRange dimOperands = lbOperands.take_front(nDims);
537  OperandRange symbolOperands = lbOperands.drop_front(nDims);
538  SmallVector<Value, 8> applyOperands{dimOperands};
539  applyOperands.push_back(iv);
540  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
541  auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
542  iv.replaceAllUsesExcept(apply, apply);
543  }
544 
545  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
546  op.setSteps(newSteps);
547  auto newLowerMap = AffineMap::get(
548  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
549  op.setLowerBounds({}, newLowerMap);
550  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
551  ubExprs, op.getContext());
552  op.setUpperBounds(ranges.getOperands(), newUpperMap);
553 }
554 
556  bool promoteSingleIter) {
557  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
558  return success();
559 
560  // Check if the forop is already normalized.
561  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
562  (op.getStep() == 1))
563  return success();
564 
565  // Check if the lower bound has a single result only. Loops with a max lower
566  // bound can't be normalized without additional support like
567  // affine.execute_region's. If the lower bound does not have a single result
568  // then skip this op.
569  if (op.getLowerBoundMap().getNumResults() != 1)
570  return failure();
571 
572  Location loc = op.getLoc();
573  OpBuilder opBuilder(op);
574  int64_t origLoopStep = op.getStepAsInt();
575 
576  // Construct the new upper bound value map.
577  AffineMap oldLbMap = op.getLowerBoundMap();
578  // The upper bound can have multiple results. To use
579  // AffineValueMap::difference, we need to have the same number of results in
580  // both lower and upper bound maps. So, we just create a value map for the
581  // lower bound with the only available lower bound result repeated to pad up
582  // to the number of upper bound results.
583  SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
584  op.getLowerBoundMap().getResult(0));
585  AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
586  AffineMap paddedLbMap =
587  AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
588  op.getContext());
589  AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
590  AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
591  AffineValueMap newUbValueMap;
592  // Compute the `upper bound - lower bound`.
593  AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
594  (void)newUbValueMap.canonicalize();
595 
596  // Scale down the upper bound value map by the loop step.
597  unsigned numResult = newUbValueMap.getNumResults();
598  SmallVector<AffineExpr> scaleDownExprs(numResult);
599  for (unsigned i = 0; i < numResult; ++i)
600  scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
601  // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
602  // step). Where `n` is the number of results in the upper bound map.
603  AffineMap scaleDownMap =
604  AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
605  AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
606 
607  // Set the newly create upper bound map and operands.
608  op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
609  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
610  op.setStep(1);
611 
612  // Calculate the Value of new loopIV. Create affine.apply for the value of
613  // the loopIV in normalized loop.
614  opBuilder.setInsertionPointToStart(op.getBody());
615  // Construct an affine.apply op mapping the new IV to the old IV.
616  AffineMap scaleIvMap =
617  AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
618  AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
619  AffineValueMap newIvToOldIvMap;
620  AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
621  (void)newIvToOldIvMap.canonicalize();
622  auto newIV = opBuilder.create<AffineApplyOp>(
623  loc, newIvToOldIvMap.getAffineMap(), newIvToOldIvMap.getOperands());
624  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
625  return success();
626 }
627 
628 /// Returns true if the memory operation of `destAccess` depends on `srcAccess`
629 /// inside of the innermost common surrounding affine loop between the two
630 /// accesses.
631 static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
632  const MemRefAccess &destAccess) {
633  // Affine dependence analysis is possible only if both ops in the same
634  // AffineScope.
635  if (getAffineScope(srcAccess.opInst) != getAffineScope(destAccess.opInst))
636  return false;
637 
638  unsigned nsLoops =
639  getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
640  DependenceResult result =
641  checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
642  return hasDependence(result);
643 }
644 
645 /// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
646 /// scope of the outermost `minSurroundingLoops` loops that surround them.
647 /// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
648 static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
649  unsigned minSurroundingLoops) {
650  MemRefAccess srcAccess(srcMemOp);
651  MemRefAccess destAccess(destMemOp);
652 
653  // Affine dependence analysis here is applicable only if both ops operate on
654  // the same memref and if `srcMemOp` and `destMemOp` are in the same
655  // AffineScope. Also, we can only check if our affine scope is isolated from
656  // above; otherwise, values can from outside of the affine scope that the
657  // check below cannot analyze.
658  Region *srcScope = getAffineScope(srcMemOp);
659  if (srcAccess.memref == destAccess.memref &&
660  srcScope == getAffineScope(destMemOp)) {
661  unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
662  FlatAffineValueConstraints dependenceConstraints;
663  for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
665  srcAccess, destAccess, d, &dependenceConstraints,
666  /*dependenceComponents=*/nullptr);
667  // A dependence failure or the presence of a dependence implies a
668  // side effect.
669  if (!noDependence(result))
670  return true;
671  }
672  // No side effect was seen.
673  return false;
674  }
675  // TODO: Check here if the memrefs alias: there is no side effect if
676  // `srcAccess.memref` and `destAccess.memref` don't alias.
677  return true;
678 }
679 
680 template <typename EffectType, typename T>
682  Operation *start, T memOp,
684  // A boolean representing whether an intervening operation could have impacted
685  // memOp.
686  bool hasSideEffect = false;
687 
688  // Check whether the effect on memOp can be caused by a given operation op.
689  Value memref = memOp.getMemRef();
690  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
691  // If the effect has alreay been found, early exit,
692  if (hasSideEffect)
693  return;
694 
695  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
697  memEffect.getEffects(effects);
698 
699  bool opMayHaveEffect = false;
700  for (auto effect : effects) {
701  // If op causes EffectType on a potentially aliasing location for
702  // memOp, mark as having the effect.
703  if (isa<EffectType>(effect.getEffect())) {
704  if (effect.getValue() && effect.getValue() != memref &&
705  !mayAlias(effect.getValue(), memref))
706  continue;
707  opMayHaveEffect = true;
708  break;
709  }
710  }
711 
712  if (!opMayHaveEffect)
713  return;
714 
715  // If the side effect comes from an affine read or write, try to
716  // prove the side effecting `op` cannot reach `memOp`.
717  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
718  // For ease, let's consider the case that `op` is a store and
719  // we're looking for other potential stores that overwrite memory after
720  // `start`, and before being read in `memOp`. In this case, we only
721  // need to consider other potential stores with depth >
722  // minSurroundingLoops since `start` would overwrite any store with a
723  // smaller number of surrounding loops before.
724  unsigned minSurroundingLoops =
725  getNumCommonSurroundingLoops(*start, *memOp);
726  if (mayHaveEffect(op, memOp, minSurroundingLoops))
727  hasSideEffect = true;
728  return;
729  }
730 
731  // We have an op with a memory effect and we cannot prove if it
732  // intervenes.
733  hasSideEffect = true;
734  return;
735  }
736 
738  // Recurse into the regions for this op and check whether the internal
739  // operations may have the side effect `EffectType` on memOp.
740  for (Region &region : op->getRegions())
741  for (Block &block : region)
742  for (Operation &op : block)
743  checkOperation(&op);
744  return;
745  }
746 
747  // Otherwise, conservatively assume generic operations have the effect
748  // on the operation
749  hasSideEffect = true;
750  };
751 
752  // Check all paths from ancestor op `parent` to the operation `to` for the
753  // effect. It is known that `to` must be contained within `parent`.
754  auto until = [&](Operation *parent, Operation *to) {
755  // TODO check only the paths from `parent` to `to`.
756  // Currently we fallback and check the entire parent op, rather than
757  // just the paths from the parent path, stopping after reaching `to`.
758  // This is conservatively correct, but could be made more aggressive.
759  assert(parent->isAncestor(to));
760  checkOperation(parent);
761  };
762 
763  // Check for all paths from operation `from` to operation `untilOp` for the
764  // given memory effect.
765  std::function<void(Operation *, Operation *)> recur =
766  [&](Operation *from, Operation *untilOp) {
767  assert(
768  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
769  "Checking for side effect between two operations without a common "
770  "ancestor");
771 
772  // If the operations are in different regions, recursively consider all
773  // path from `from` to the parent of `to` and all paths from the parent
774  // of `to` to `to`.
775  if (from->getParentRegion() != untilOp->getParentRegion()) {
776  recur(from, untilOp->getParentOp());
777  until(untilOp->getParentOp(), untilOp);
778  return;
779  }
780 
781  // Now, assuming that `from` and `to` exist in the same region, perform
782  // a CFG traversal to check all the relevant operations.
783 
784  // Additional blocks to consider.
785  SmallVector<Block *, 2> todoBlocks;
786  {
787  // First consider the parent block of `from` an check all operations
788  // after `from`.
789  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
790  iter != end && &*iter != untilOp; ++iter) {
791  checkOperation(&*iter);
792  }
793 
794  // If the parent of `from` doesn't contain `to`, add the successors
795  // to the list of blocks to check.
796  if (untilOp->getBlock() != from->getBlock())
797  for (Block *succ : from->getBlock()->getSuccessors())
798  todoBlocks.push_back(succ);
799  }
800 
802  // Traverse the CFG until hitting `to`.
803  while (!todoBlocks.empty()) {
804  Block *blk = todoBlocks.pop_back_val();
805  if (done.count(blk))
806  continue;
807  done.insert(blk);
808  for (auto &op : *blk) {
809  if (&op == untilOp)
810  break;
811  checkOperation(&op);
812  if (&op == blk->getTerminator())
813  for (Block *succ : blk->getSuccessors())
814  todoBlocks.push_back(succ);
815  }
816  }
817  };
818  recur(start, memOp);
819  return !hasSideEffect;
820 }
821 
822 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
823 /// which the load is guaranteed to retrieve. This check involves three
824 /// components: 1) The store and load must be on the same location 2) The store
825 /// must dominate (and therefore must always occur prior to) the load 3) No
826 /// other operations will overwrite the memory loaded between the given load
827 /// and store. If such a value exists, the replaced `loadOp` will be added to
828 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
829 static void forwardStoreToLoad(
830  AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
831  SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
833 
834  // The store op candidate for forwarding that satisfies all conditions
835  // to replace the load, if any.
836  Operation *lastWriteStoreOp = nullptr;
837 
838  for (auto *user : loadOp.getMemRef().getUsers()) {
839  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
840  if (!storeOp)
841  continue;
842  MemRefAccess srcAccess(storeOp);
843  MemRefAccess destAccess(loadOp);
844 
845  // 1. Check if the store and the load have mathematically equivalent
846  // affine access functions; this implies that they statically refer to the
847  // same single memref element. As an example this filters out cases like:
848  // store %A[%i0 + 1]
849  // load %A[%i0]
850  // store %A[%M]
851  // load %A[%N]
852  // Use the AffineValueMap difference based memref access equality checking.
853  if (srcAccess != destAccess)
854  continue;
855 
856  // 2. The store has to dominate the load op to be candidate.
857  if (!domInfo.dominates(storeOp, loadOp))
858  continue;
859 
860  // 3. The store must reach the load. Access function equivalence only
861  // guarantees this for accesses in the same block. The load could be in a
862  // nested block that is unreachable.
863  if (storeOp->getBlock() != loadOp->getBlock() &&
864  !mustReachAtInnermost(srcAccess, destAccess))
865  continue;
866 
867  // 4. Ensure there is no intermediate operation which could replace the
868  // value in memory.
869  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp,
870  mayAlias))
871  continue;
872 
873  // We now have a candidate for forwarding.
874  assert(lastWriteStoreOp == nullptr &&
875  "multiple simultaneous replacement stores");
876  lastWriteStoreOp = storeOp;
877  }
878 
879  if (!lastWriteStoreOp)
880  return;
881 
882  // Perform the actual store to load forwarding.
883  Value storeVal =
884  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
885  // Check if 2 values have the same shape. This is needed for affine vector
886  // loads and stores.
887  if (storeVal.getType() != loadOp.getValue().getType())
888  return;
889  loadOp.getValue().replaceAllUsesWith(storeVal);
890  // Record the memref for a later sweep to optimize away.
891  memrefsToErase.insert(loadOp.getMemRef());
892  // Record this to erase later.
893  loadOpsToErase.push_back(loadOp);
894 }
895 
896 template bool
898  affine::AffineReadOpInterface>(
899  mlir::Operation *, affine::AffineReadOpInterface,
900  llvm::function_ref<bool(Value, Value)>);
901 
902 // This attempts to find stores which have no impact on the final result.
903 // A writing op writeA will be eliminated if there exists an op writeB if
904 // 1) writeA and writeB have mathematically equivalent affine access functions.
905 // 2) writeB postdominates writeA.
906 // 3) There is no potential read between writeA and writeB.
907 static void findUnusedStore(AffineWriteOpInterface writeA,
908  SmallVectorImpl<Operation *> &opsToErase,
909  PostDominanceInfo &postDominanceInfo,
911 
912  for (Operation *user : writeA.getMemRef().getUsers()) {
913  // Only consider writing operations.
914  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
915  if (!writeB)
916  continue;
917 
918  // The operations must be distinct.
919  if (writeB == writeA)
920  continue;
921 
922  // Both operations must lie in the same region.
923  if (writeB->getParentRegion() != writeA->getParentRegion())
924  continue;
925 
926  // Both operations must write to the same memory.
927  MemRefAccess srcAccess(writeB);
928  MemRefAccess destAccess(writeA);
929 
930  if (srcAccess != destAccess)
931  continue;
932 
933  // writeB must postdominate writeA.
934  if (!postDominanceInfo.postDominates(writeB, writeA))
935  continue;
936 
937  // There cannot be an operation which reads from memory between
938  // the two writes.
939  if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB,
940  mayAlias))
941  continue;
942 
943  opsToErase.push_back(writeA);
944  break;
945  }
946 }
947 
948 // The load to load forwarding / redundant load elimination is similar to the
949 // store to load forwarding.
950 // loadA will be be replaced with loadB if:
951 // 1) loadA and loadB have mathematically equivalent affine access functions.
952 // 2) loadB dominates loadA.
953 // 3) There is no write between loadA and loadB.
954 static void loadCSE(AffineReadOpInterface loadA,
955  SmallVectorImpl<Operation *> &loadOpsToErase,
956  DominanceInfo &domInfo,
959  for (auto *user : loadA.getMemRef().getUsers()) {
960  auto loadB = dyn_cast<AffineReadOpInterface>(user);
961  if (!loadB || loadB == loadA)
962  continue;
963 
964  MemRefAccess srcAccess(loadB);
965  MemRefAccess destAccess(loadA);
966 
967  // 1. The accesses should be to be to the same location.
968  if (srcAccess != destAccess) {
969  continue;
970  }
971 
972  // 2. loadB should dominate loadA.
973  if (!domInfo.dominates(loadB, loadA))
974  continue;
975 
976  // 3. There should not be a write between loadA and loadB.
977  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
978  loadB.getOperation(), loadA, mayAlias))
979  continue;
980 
981  // Check if two values have the same shape. This is needed for affine vector
982  // loads.
983  if (loadB.getValue().getType() != loadA.getValue().getType())
984  continue;
985 
986  loadCandidates.push_back(loadB);
987  }
988 
989  // Of the legal load candidates, use the one that dominates all others
990  // to minimize the subsequent need to loadCSE
991  Value loadB;
992  for (AffineReadOpInterface option : loadCandidates) {
993  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
994  return depStore == option ||
995  domInfo.dominates(option.getOperation(),
996  depStore.getOperation());
997  })) {
998  loadB = option.getValue();
999  break;
1000  }
1001  }
1002 
1003  if (loadB) {
1004  loadA.getValue().replaceAllUsesWith(loadB);
1005  // Record this to erase later.
1006  loadOpsToErase.push_back(loadA);
1007  }
1008 }
1009 
1010 // The store to load forwarding and load CSE rely on three conditions:
1011 //
1012 // 1) store/load providing a replacement value and load being replaced need to
1013 // have mathematically equivalent affine access functions (checked after full
1014 // composition of load/store operands); this implies that they access the same
1015 // single memref element for all iterations of the common surrounding loop,
1016 //
1017 // 2) the store/load op should dominate the load op,
1018 //
1019 // 3) no operation that may write to memory read by the load being replaced can
1020 // occur after executing the instruction (load or store) providing the
1021 // replacement value and before the load being replaced (thus potentially
1022 // allowing overwriting the memory read by the load).
1023 //
1024 // The above conditions are simple to check, sufficient, and powerful for most
1025 // cases in practice - they are sufficient, but not necessary --- since they
1026 // don't reason about loops that are guaranteed to execute at least once or
1027 // multiple sources to forward from.
1028 //
1029 // TODO: more forwarding can be done when support for
1030 // loop/conditional live-out SSA values is available.
1031 // TODO: do general dead store elimination for memref's. This pass
1032 // currently only eliminates the stores only if no other loads/uses (other
1033 // than dealloc) remain.
1034 //
1035 void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1036  PostDominanceInfo &postDomInfo,
1037  AliasAnalysis &aliasAnalysis) {
1038  // Load op's whose results were replaced by those forwarded from stores.
1039  SmallVector<Operation *, 8> opsToErase;
1040 
1041  // A list of memref's that are potentially dead / could be eliminated.
1042  SmallPtrSet<Value, 4> memrefsToErase;
1043 
1044  auto mayAlias = [&](Value val1, Value val2) -> bool {
1045  return !aliasAnalysis.alias(val1, val2).isNo();
1046  };
1047 
1048  // Walk all load's and perform store to load forwarding.
1049  f.walk([&](AffineReadOpInterface loadOp) {
1050  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1051  });
1052  for (auto *op : opsToErase)
1053  op->erase();
1054  opsToErase.clear();
1055 
1056  // Walk all store's and perform unused store elimination
1057  f.walk([&](AffineWriteOpInterface storeOp) {
1058  findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1059  });
1060  for (auto *op : opsToErase)
1061  op->erase();
1062  opsToErase.clear();
1063 
1064  // Check if the store fwd'ed memrefs are now left with only stores and
1065  // deallocs and can thus be completely deleted. Note: the canonicalize pass
1066  // should be able to do this as well, but we'll do it here since we collected
1067  // these anyway.
1068  for (auto memref : memrefsToErase) {
1069  // If the memref hasn't been locally alloc'ed, skip.
1070  Operation *defOp = memref.getDefiningOp();
1071  if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1072  // TODO: if the memref was returned by a 'call' operation, we
1073  // could still erase it if the call had no side-effects.
1074  continue;
1075  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1076  return !isa<AffineWriteOpInterface>(ownerOp) &&
1077  !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1078  }))
1079  continue;
1080 
1081  // Erase all stores, the dealloc, and the alloc on the memref.
1082  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1083  user->erase();
1084  defOp->erase();
1085  }
1086 
1087  // To eliminate as many loads as possible, run load CSE after eliminating
1088  // stores. Otherwise, some stores are wrongly seen as having an intervening
1089  // effect.
1090  f.walk([&](AffineReadOpInterface loadOp) {
1091  loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1092  });
1093  for (auto *op : opsToErase)
1094  op->erase();
1095 }
1096 
1097 // Perform the replacement in `op`.
1099  Value oldMemRef, Value newMemRef, Operation *op,
1100  ArrayRef<Value> extraIndices, AffineMap indexRemap,
1101  ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1102  bool allowNonDereferencingOps) {
1103  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1104  (void)newMemRefRank; // unused in opt mode
1105  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1106  (void)oldMemRefRank; // unused in opt mode
1107  if (indexRemap) {
1108  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1109  "symbolic operand count mismatch");
1110  assert(indexRemap.getNumInputs() ==
1111  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1112  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1113  } else {
1114  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1115  }
1116 
1117  // Assert same elemental type.
1118  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1119  cast<MemRefType>(newMemRef.getType()).getElementType());
1120 
1121  SmallVector<unsigned, 2> usePositions;
1122  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1123  if (opEntry.value() == oldMemRef)
1124  usePositions.push_back(opEntry.index());
1125  }
1126 
1127  // If memref doesn't appear, nothing to do.
1128  if (usePositions.empty())
1129  return success();
1130 
1131  if (usePositions.size() > 1) {
1132  // TODO: extend it for this case when needed (rare).
1133  assert(false && "multiple dereferencing uses in a single op not supported");
1134  return failure();
1135  }
1136 
1137  unsigned memRefOperandPos = usePositions.front();
1138 
1139  OpBuilder builder(op);
1140  // The following checks if op is dereferencing memref and performs the access
1141  // index rewrites.
1142  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1143  if (!affMapAccInterface) {
1144  if (!allowNonDereferencingOps) {
1145  // Failure: memref used in a non-dereferencing context (potentially
1146  // escapes); no replacement in these cases unless allowNonDereferencingOps
1147  // is set.
1148  return failure();
1149  }
1150  op->setOperand(memRefOperandPos, newMemRef);
1151  return success();
1152  }
1153  // Perform index rewrites for the dereferencing op and then replace the op
1154  NamedAttribute oldMapAttrPair =
1155  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1156  AffineMap oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1157  unsigned oldMapNumInputs = oldMap.getNumInputs();
1158  SmallVector<Value, 4> oldMapOperands(
1159  op->operand_begin() + memRefOperandPos + 1,
1160  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1161 
1162  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1163  SmallVector<Value, 4> oldMemRefOperands;
1164  SmallVector<Value, 4> affineApplyOps;
1165  oldMemRefOperands.reserve(oldMemRefRank);
1166  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1167  for (auto resultExpr : oldMap.getResults()) {
1168  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1169  oldMap.getNumSymbols(), resultExpr);
1170  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1171  oldMapOperands);
1172  oldMemRefOperands.push_back(afOp);
1173  affineApplyOps.push_back(afOp);
1174  }
1175  } else {
1176  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1177  }
1178 
1179  // Construct new indices as a remap of the old ones if a remapping has been
1180  // provided. The indices of a memref come right after it, i.e.,
1181  // at position memRefOperandPos + 1.
1182  SmallVector<Value, 4> remapOperands;
1183  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1184  symbolOperands.size());
1185  remapOperands.append(extraOperands.begin(), extraOperands.end());
1186  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1187  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1188 
1189  SmallVector<Value, 4> remapOutputs;
1190  remapOutputs.reserve(oldMemRefRank);
1191 
1192  if (indexRemap &&
1193  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1194  // Remapped indices.
1195  for (auto resultExpr : indexRemap.getResults()) {
1196  auto singleResMap = AffineMap::get(
1197  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1198  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1199  remapOperands);
1200  remapOutputs.push_back(afOp);
1201  affineApplyOps.push_back(afOp);
1202  }
1203  } else {
1204  // No remapping specified.
1205  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1206  }
1207 
1208  SmallVector<Value, 4> newMapOperands;
1209  newMapOperands.reserve(newMemRefRank);
1210 
1211  // Prepend 'extraIndices' in 'newMapOperands'.
1212  for (Value extraIndex : extraIndices) {
1213  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1214  "invalid memory op index");
1215  newMapOperands.push_back(extraIndex);
1216  }
1217 
1218  // Append 'remapOutputs' to 'newMapOperands'.
1219  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1220 
1221  // Create new fully composed AffineMap for new op to be created.
1222  assert(newMapOperands.size() == newMemRefRank);
1223  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1224  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1225  newMap = simplifyAffineMap(newMap);
1226  canonicalizeMapAndOperands(&newMap, &newMapOperands);
1227  // Remove any affine.apply's that became dead as a result of composition.
1228  for (Value value : affineApplyOps)
1229  if (value.use_empty())
1230  value.getDefiningOp()->erase();
1231 
1232  OperationState state(op->getLoc(), op->getName());
1233  // Construct the new operation using this memref.
1234  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1235  // Insert the non-memref operands.
1236  state.operands.append(op->operand_begin(),
1237  op->operand_begin() + memRefOperandPos);
1238  // Insert the new memref value.
1239  state.operands.push_back(newMemRef);
1240 
1241  // Insert the new memref map operands.
1242  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1243 
1244  // Insert the remaining operands unmodified.
1245  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1246  oldMapNumInputs,
1247  op->operand_end());
1248 
1249  // Result types don't change. Both memref's are of the same elemental type.
1250  state.types.reserve(op->getNumResults());
1251  for (auto result : op->getResults())
1252  state.types.push_back(result.getType());
1253 
1254  // Add attribute for 'newMap', other Attributes do not change.
1255  auto newMapAttr = AffineMapAttr::get(newMap);
1256  for (auto namedAttr : op->getAttrs()) {
1257  if (namedAttr.getName() == oldMapAttrPair.getName())
1258  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1259  else
1260  state.attributes.push_back(namedAttr);
1261  }
1262 
1263  // Create the new operation.
1264  auto *repOp = builder.create(state);
1265  op->replaceAllUsesWith(repOp);
1266  op->erase();
1267 
1268  return success();
1269 }
1270 
1272  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1273  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1274  ArrayRef<Value> symbolOperands, Operation *domOpFilter,
1275  Operation *postDomOpFilter, bool allowNonDereferencingOps,
1276  bool replaceInDeallocOp) {
1277  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1278  (void)newMemRefRank; // unused in opt mode
1279  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1280  (void)oldMemRefRank;
1281  if (indexRemap) {
1282  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1283  "symbol operand count mismatch");
1284  assert(indexRemap.getNumInputs() ==
1285  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1286  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1287  } else {
1288  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1289  }
1290 
1291  // Assert same elemental type.
1292  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1293  cast<MemRefType>(newMemRef.getType()).getElementType());
1294 
1295  std::unique_ptr<DominanceInfo> domInfo;
1296  std::unique_ptr<PostDominanceInfo> postDomInfo;
1297  if (domOpFilter)
1298  domInfo = std::make_unique<DominanceInfo>(
1299  domOpFilter->getParentOfType<func::FuncOp>());
1300 
1301  if (postDomOpFilter)
1302  postDomInfo = std::make_unique<PostDominanceInfo>(
1303  postDomOpFilter->getParentOfType<func::FuncOp>());
1304 
1305  // Walk all uses of old memref; collect ops to perform replacement. We use a
1306  // DenseSet since an operation could potentially have multiple uses of a
1307  // memref (although rare), and the replacement later is going to erase ops.
1308  DenseSet<Operation *> opsToReplace;
1309  for (auto *op : oldMemRef.getUsers()) {
1310  // Skip this use if it's not dominated by domOpFilter.
1311  if (domOpFilter && !domInfo->dominates(domOpFilter, op))
1312  continue;
1313 
1314  // Skip this use if it's not post-dominated by postDomOpFilter.
1315  if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
1316  continue;
1317 
1318  // Skip dealloc's - no replacement is necessary, and a memref replacement
1319  // at other uses doesn't hurt these dealloc's.
1320  if (hasSingleEffect<MemoryEffects::Free>(op, oldMemRef) &&
1321  !replaceInDeallocOp)
1322  continue;
1323 
1324  // Check if the memref was used in a non-dereferencing context. It is fine
1325  // for the memref to be used in a non-dereferencing way outside of the
1326  // region where this replacement is happening.
1327  if (!isa<AffineMapAccessInterface>(*op)) {
1328  if (!allowNonDereferencingOps) {
1329  LLVM_DEBUG(llvm::dbgs()
1330  << "Memref replacement failed: non-deferencing memref op: \n"
1331  << *op << '\n');
1332  return failure();
1333  }
1334  // Non-dereferencing ops with the MemRefsNormalizable trait are
1335  // supported for replacement.
1336  if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
1337  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1338  "memrefs normalizable trait: \n"
1339  << *op << '\n');
1340  return failure();
1341  }
1342  }
1343 
1344  // We'll first collect and then replace --- since replacement erases the op
1345  // that has the use, and that op could be postDomFilter or domFilter itself!
1346  opsToReplace.insert(op);
1347  }
1348 
1349  for (auto *op : opsToReplace) {
1351  oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
1352  symbolOperands, allowNonDereferencingOps)))
1353  llvm_unreachable("memref replacement guaranteed to succeed here");
1354  }
1355 
1356  return success();
1357 }
1358 
1359 /// Given an operation, inserts one or more single result affine
1360 /// apply operations, results of which are exclusively used by this operation
1361 /// operation. The operands of these newly created affine apply ops are
1362 /// guaranteed to be loop iterators or terminal symbols of a function.
1363 ///
1364 /// Before
1365 ///
1366 /// affine.for %i = 0 to #map(%N)
1367 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1368 /// "send"(%idx, %A, ...)
1369 /// "compute"(%idx)
1370 ///
1371 /// After
1372 ///
1373 /// affine.for %i = 0 to #map(%N)
1374 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1375 /// "send"(%idx, %A, ...)
1376 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1377 /// "compute"(%idx_)
1378 ///
1379 /// This allows applying different transformations on send and compute (for eg.
1380 /// different shifts/delays).
1381 ///
1382 /// Returns nullptr either if none of opInst's operands were the result of an
1383 /// affine.apply and thus there was no affine computation slice to create, or if
1384 /// all the affine.apply op's supplying operands to this opInst did not have any
1385 /// uses besides this opInst; otherwise returns the list of affine.apply
1386 /// operations created in output argument `sliceOps`.
1388  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1389  // Collect all operands that are results of affine apply ops.
1390  SmallVector<Value, 4> subOperands;
1391  subOperands.reserve(opInst->getNumOperands());
1392  for (auto operand : opInst->getOperands())
1393  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1394  subOperands.push_back(operand);
1395 
1396  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1397  SmallVector<Operation *, 4> affineApplyOps;
1398  getReachableAffineApplyOps(subOperands, affineApplyOps);
1399  // Skip transforming if there are no affine maps to compose.
1400  if (affineApplyOps.empty())
1401  return;
1402 
1403  // Check if all uses of the affine apply op's lie only in this op op, in
1404  // which case there would be nothing to do.
1405  bool localized = true;
1406  for (auto *op : affineApplyOps) {
1407  for (auto result : op->getResults()) {
1408  for (auto *user : result.getUsers()) {
1409  if (user != opInst) {
1410  localized = false;
1411  break;
1412  }
1413  }
1414  }
1415  }
1416  if (localized)
1417  return;
1418 
1419  OpBuilder builder(opInst);
1420  SmallVector<Value, 4> composedOpOperands(subOperands);
1421  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1422  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1423 
1424  // Create an affine.apply for each of the map results.
1425  sliceOps->reserve(composedMap.getNumResults());
1426  for (auto resultExpr : composedMap.getResults()) {
1427  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1428  composedMap.getNumSymbols(), resultExpr);
1429  sliceOps->push_back(builder.create<AffineApplyOp>(
1430  opInst->getLoc(), singleResMap, composedOpOperands));
1431  }
1432 
1433  // Construct the new operands that include the results from the composed
1434  // affine apply op above instead of existing ones (subOperands). So, they
1435  // differ from opInst's operands only for those operands in 'subOperands', for
1436  // which they will be replaced by the corresponding one from 'sliceOps'.
1437  SmallVector<Value, 4> newOperands(opInst->getOperands());
1438  for (Value &operand : newOperands) {
1439  // Replace the subOperands from among the new operands.
1440  unsigned j, f;
1441  for (j = 0, f = subOperands.size(); j < f; j++) {
1442  if (operand == subOperands[j])
1443  break;
1444  }
1445  if (j < subOperands.size())
1446  operand = (*sliceOps)[j];
1447  }
1448  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1449  opInst->setOperand(idx, newOperands[idx]);
1450 }
1451 
1452 /// Enum to set patterns of affine expr in tiled-layout map.
1453 /// TileFloorDiv: <dim expr> div <tile size>
1454 /// TileMod: <dim expr> mod <tile size>
1455 /// TileNone: None of the above
1456 /// Example:
1457 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1458 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1459 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1460 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1462 
1463 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1464 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1465 /// tile sizes, and no other expression involves those k dimensions. This
1466 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1467 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1468 /// tiled layout, an empty vector is returned.
1470  AffineMap map,
1471  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1472  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1473  // `floordiv` and its position in `map` output.
1474  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1475  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1476  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1478  unsigned pos = 0;
1479  for (AffineExpr expr : map.getResults()) {
1480  if (expr.getKind() == AffineExprKind::FloorDiv) {
1481  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1482  if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1483  floordivExprs.emplace_back(
1484  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1485  }
1486  pos++;
1487  }
1488  // Not tiled layout if `floordivExprs` is empty.
1489  if (floordivExprs.empty()) {
1491  return success();
1492  }
1493 
1494  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1495  // not tiled layout.
1496  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1497  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1498  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1499  unsigned floordivPos = std::get<2>(fexpr);
1500 
1501  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1502  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1503  // other expr, the map is not tiled layout. Example of non tiled layout:
1504  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1505  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1506  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1507  // 256)>
1508  bool found = false;
1509  pos = 0;
1510  for (AffineExpr expr : map.getResults()) {
1511  bool notTiled = false;
1512  if (pos != floordivPos) {
1513  expr.walk([&](AffineExpr e) {
1514  if (e == floordivExprLHS) {
1515  if (expr.getKind() == AffineExprKind::Mod) {
1516  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1517  // If LHS and RHS of `mod` are the same with those of floordiv.
1518  if (floordivExprLHS == binaryExpr.getLHS() &&
1519  floordivExprRHS == binaryExpr.getRHS()) {
1520  // Save tile size (RHS of `mod`), and position of `floordiv` and
1521  // `mod` if same expr with `mod` is not found yet.
1522  if (!found) {
1523  tileSizePos.emplace_back(
1524  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1525  found = true;
1526  } else {
1527  // Non tiled layout: Have multilpe `mod` with the same LHS.
1528  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1529  // mod 256, d2 mod 256)>
1530  notTiled = true;
1531  }
1532  } else {
1533  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1534  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1535  // mod 128)>
1536  notTiled = true;
1537  }
1538  } else {
1539  // Non tiled layout: LHS is the same, but not `mod`.
1540  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1541  // floordiv 256)>
1542  notTiled = true;
1543  }
1544  }
1545  });
1546  }
1547  if (notTiled) {
1549  return success();
1550  }
1551  pos++;
1552  }
1553  }
1554  return success();
1555 }
1556 
1557 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1558 /// after normalization. Dimensions that include dynamic dimensions in the map
1559 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1560 /// dimension.
1561 ///
1562 /// Example:
1563 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1564 ///
1565 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1566 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1567 static bool
1568 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1569  SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1570  AffineExpr expr = layoutMap.getResults()[dim];
1571  // Check if affine expr of the dimension includes dynamic dimension of input
1572  // memrefType.
1573  MLIRContext *context = layoutMap.getContext();
1574  return expr
1575  .walk([&](AffineExpr e) {
1576  if (isa<AffineDimExpr>(e) &&
1577  llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1578  return e == getAffineDimExpr(dim, context);
1579  }))
1580  return WalkResult::interrupt();
1581  return WalkResult::advance();
1582  })
1583  .wasInterrupted();
1584 }
1585 
1586 /// Create affine expr to calculate dimension size for a tiled-layout map.
1588  TileExprPattern pat) {
1589  // Create map output for the patterns.
1590  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1591  // "mod <tile size>" ==> "<tile size>"
1592  AffineExpr newMapOutput;
1593  AffineBinaryOpExpr binaryExpr = nullptr;
1594  switch (pat) {
1596  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1597  newMapOutput = binaryExpr.getRHS();
1598  break;
1600  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1601  newMapOutput = getAffineBinaryOpExpr(
1602  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1603  break;
1604  default:
1605  newMapOutput = oldMapOutput;
1606  }
1607  return newMapOutput;
1608 }
1609 
1610 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1611 /// create `newDynamicSizes` from them by using AffineApplyOp.
1612 ///
1613 /// Steps for normalizing dynamic memrefs for a tiled layout map
1614 /// Example:
1615 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1616 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1617 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1618 ///
1619 /// (Before this function)
1620 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1621 /// single layout map is supported.
1622 ///
1623 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1624 /// is memref<4x?x?xf32> in the above example.
1625 ///
1626 /// (In this function)
1627 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1628 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1629 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1630 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1631 /// - New map in the above example
1632 /// #map0 = affine_map<(d0, d1) -> (d0)>
1633 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1634 /// #map2 = affine_map<(d0, d1) -> (32)>
1635 ///
1636 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1637 /// is used in dynamicSizes of new AllocOp.
1638 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1639 /// %c4 = arith.constant 4 : index
1640 /// %1 = affine.apply #map1(%c4, %0)
1641 /// %2 = affine.apply #map2(%c4, %0)
1642 static void createNewDynamicSizes(MemRefType oldMemRefType,
1643  MemRefType newMemRefType, AffineMap map,
1644  memref::AllocOp *allocOp, OpBuilder b,
1645  SmallVectorImpl<Value> &newDynamicSizes) {
1646  // Create new input for AffineApplyOp.
1647  SmallVector<Value, 4> inAffineApply;
1648  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1649  unsigned dynIdx = 0;
1650  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1651  if (oldMemRefShape[d] < 0) {
1652  // Use dynamicSizes of allocOp for dynamic dimension.
1653  inAffineApply.emplace_back(allocOp->getDynamicSizes()[dynIdx]);
1654  dynIdx++;
1655  } else {
1656  // Create ConstantOp for static dimension.
1657  auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1658  inAffineApply.emplace_back(
1659  b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
1660  }
1661  }
1662 
1663  // Create new map to calculate each dimension size of new memref for each
1664  // original map output. Only for dynamic dimesion of `newMemRefType`.
1665  unsigned newDimIdx = 0;
1666  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1668  (void)getTileSizePos(map, tileSizePos);
1669  for (AffineExpr expr : map.getResults()) {
1670  if (newMemRefShape[newDimIdx] < 0) {
1671  // Create new maps to calculate each dimension size of new memref.
1673  for (auto pos : tileSizePos) {
1674  if (newDimIdx == std::get<1>(pos))
1676  else if (newDimIdx == std::get<2>(pos))
1678  }
1679  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1680  AffineMap newMap =
1681  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1682  Value affineApp =
1683  b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
1684  newDynamicSizes.emplace_back(affineApp);
1685  }
1686  newDimIdx++;
1687  }
1688 }
1689 
1690 // TODO: Currently works for static memrefs with a single layout map.
1692  MemRefType memrefType = allocOp->getType();
1693  OpBuilder b(*allocOp);
1694 
1695  // Fetch a new memref type after normalizing the old memref to have an
1696  // identity map layout.
1697  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1698  if (newMemRefType == memrefType)
1699  // Either memrefType already had an identity map or the map couldn't be
1700  // transformed to an identity map.
1701  return failure();
1702 
1703  Value oldMemRef = allocOp->getResult();
1704 
1705  SmallVector<Value, 4> symbolOperands(allocOp->getSymbolOperands());
1706  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1707  memref::AllocOp newAlloc;
1708  // Check if `layoutMap` is a tiled layout. Only single layout map is
1709  // supported for normalizing dynamic memrefs.
1711  (void)getTileSizePos(layoutMap, tileSizePos);
1712  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1713  MemRefType oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1714  SmallVector<Value, 4> newDynamicSizes;
1715  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1716  newDynamicSizes);
1717  // Add the new dynamic sizes in new AllocOp.
1718  newAlloc =
1719  b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1720  newDynamicSizes, allocOp->getAlignmentAttr());
1721  } else {
1722  newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1723  allocOp->getAlignmentAttr());
1724  }
1725  // Replace all uses of the old memref.
1726  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1727  /*extraIndices=*/{},
1728  /*indexRemap=*/layoutMap,
1729  /*extraOperands=*/{},
1730  /*symbolOperands=*/symbolOperands,
1731  /*domOpFilter=*/nullptr,
1732  /*postDomOpFilter=*/nullptr,
1733  /*allowNonDereferencingOps=*/true))) {
1734  // If it failed (due to escapes for example), bail out.
1735  newAlloc.erase();
1736  return failure();
1737  }
1738  // Replace any uses of the original alloc op and erase it. All remaining uses
1739  // have to be dealloc's; RAMUW above would've failed otherwise.
1740  assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1741  return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1742  }));
1743  oldMemRef.replaceAllUsesWith(newAlloc);
1744  allocOp->erase();
1745  return success();
1746 }
1747 
1748 MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1749  unsigned rank = memrefType.getRank();
1750  if (rank == 0)
1751  return memrefType;
1752 
1753  if (memrefType.getLayout().isIdentity()) {
1754  // Either no maps is associated with this memref or this memref has
1755  // a trivial (identity) map.
1756  return memrefType;
1757  }
1758  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1759  unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1760 
1761  // We don't do any checks for one-to-one'ness; we assume that it is
1762  // one-to-one.
1763 
1764  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1765  // for now.
1766  // TODO: Normalize the other types of dynamic memrefs.
1768  (void)getTileSizePos(layoutMap, tileSizePos);
1769  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1770  return memrefType;
1771 
1772  // We have a single map that is not an identity map. Create a new memref
1773  // with the right shape and an identity layout map.
1774  ArrayRef<int64_t> shape = memrefType.getShape();
1775  // FlatAffineValueConstraint may later on use symbolicOperands.
1776  FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1777  SmallVector<unsigned, 4> memrefTypeDynDims;
1778  for (unsigned d = 0; d < rank; ++d) {
1779  // Use constraint system only in static dimensions.
1780  if (shape[d] > 0) {
1781  fac.addBound(BoundType::LB, d, 0);
1782  fac.addBound(BoundType::UB, d, shape[d] - 1);
1783  } else {
1784  memrefTypeDynDims.emplace_back(d);
1785  }
1786  }
1787  // We compose this map with the original index (logical) space to derive
1788  // the upper bounds for the new index space.
1789  unsigned newRank = layoutMap.getNumResults();
1790  if (failed(fac.composeMatchingMap(layoutMap)))
1791  return memrefType;
1792  // TODO: Handle semi-affine maps.
1793  // Project out the old data dimensions.
1794  fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1795  SmallVector<int64_t, 4> newShape(newRank);
1796  MLIRContext *context = memrefType.getContext();
1797  for (unsigned d = 0; d < newRank; ++d) {
1798  // Check if this dimension is dynamic.
1799  if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1800  newShape[d] = ShapedType::kDynamic;
1801  continue;
1802  }
1803  // The lower bound for the shape is always zero.
1804  std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1805  // For a static memref and an affine map with no symbols, this is
1806  // always bounded. However, when we have symbols, we may not be able to
1807  // obtain a constant upper bound. Also, mapping to a negative space is
1808  // invalid for normalization.
1809  if (!ubConst.has_value() || *ubConst < 0) {
1810  LLVM_DEBUG(llvm::dbgs()
1811  << "can't normalize map due to unknown/invalid upper bound");
1812  return memrefType;
1813  }
1814  // If dimension of new memrefType is dynamic, the value is -1.
1815  newShape[d] = *ubConst + 1;
1816  }
1817 
1818  // Create the new memref type after trivializing the old layout map.
1819  auto newMemRefType =
1820  MemRefType::Builder(memrefType)
1821  .setShape(newShape)
1823  AffineMap::getMultiDimIdentityMap(newRank, context)));
1824  return newMemRefType;
1825 }
1826 
1828  Value rhs) {
1829  DivModValue result;
1830  AffineExpr d0, d1;
1831  bindDims(b.getContext(), d0, d1);
1832  result.quotient =
1833  affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1834  result.remainder =
1835  affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1836  return result;
1837 }
1838 
1839 /// Create IR that computes the product of all elements in the set.
1841  ArrayRef<Value> set) {
1842  if (set.empty())
1843  return failure();
1844  OpFoldResult result = set[0];
1845  AffineExpr s0, s1;
1846  bindSymbols(b.getContext(), s0, s1);
1847  for (unsigned i = 1, e = set.size(); i < e; i++)
1848  result = makeComposedFoldedAffineApply(b, loc, s0 * s1, {result, set[i]});
1849  return result;
1850 }
1851 
1854  ArrayRef<Value> basis) {
1855  unsigned numDims = basis.size();
1856 
1857  SmallVector<Value> divisors;
1858  for (unsigned i = 1; i < numDims; i++) {
1859  ArrayRef<Value> slice = basis.drop_front(i);
1860  FailureOr<OpFoldResult> prod = getIndexProduct(b, loc, slice);
1861  if (failed(prod))
1862  return failure();
1863  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, *prod));
1864  }
1865 
1866  SmallVector<Value> results;
1867  results.reserve(divisors.size() + 1);
1868  Value residual = linearIndex;
1869  for (Value divisor : divisors) {
1870  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1871  results.push_back(divMod.quotient);
1872  residual = divMod.remainder;
1873  }
1874  results.push_back(residual);
1875  return results;
1876 }
1877 
1879  ArrayRef<OpFoldResult> basis,
1880  ImplicitLocOpBuilder &builder) {
1881  assert(multiIndex.size() == basis.size());
1882  SmallVector<AffineExpr> basisAffine;
1883  for (size_t i = 0; i < basis.size(); ++i) {
1884  basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
1885  }
1886 
1887  SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
1888  SmallVector<OpFoldResult> strides;
1889  strides.reserve(stridesAffine.size());
1890  llvm::transform(stridesAffine, std::back_inserter(strides),
1891  [&builder, &basis](AffineExpr strideExpr) {
1893  builder, builder.getLoc(), strideExpr, basis);
1894  });
1895 
1896  auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
1897  OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
1899  builder, builder.getLoc(), linearIndexExpr, multiIndexAndStrides);
1900 }
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, memref::AllocOp *allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1642
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition: Utils.cpp:648
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1469
static FailureOr< OpFoldResult > getIndexProduct(OpBuilder &b, Location loc, ArrayRef< Value > set)
Create IR that computes the product of all elements in the set.
Definition: Utils.cpp:1840
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1461
@ TileFloorDiv
Definition: Utils.cpp:1461
@ TileNone
Definition: Utils.cpp:1461
@ TileMod
Definition: Utils.cpp:1461
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:243
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1568
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:954
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1587
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:259
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition: Utils.cpp:907
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition: Utils.cpp:631
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:829
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:228
AffineExpr getLHS() const
Definition: AffineExpr.cpp:332
AffineExpr getRHS() const
Definition: AffineExpr.cpp:335
An integer constant appearing in affine expression.
Definition: AffineExpr.h:253
int64_t getValue() const
Definition: AffineExpr.cpp:615
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:237
unsigned getPosition() const
Definition: AffineExpr.cpp:340
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition: AffineExpr.h:69
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:883
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition: AffineExpr.h:131
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:926
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:47
MLIRContext * getContext() const
Definition: AffineMap.cpp:327
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:318
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
Definition: AffineMap.cpp:382
unsigned getNumDims() const
Definition: AffineMap.cpp:378
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:391
unsigned getNumResults() const
Definition: AffineMap.cpp:386
unsigned getNumInputs() const
Definition: AffineMap.cpp:387
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:395
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:540
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:245
unsigned getPosition() const
Definition: AffineExpr.cpp:605
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
Definition: AliasAnalysis.h:59
This class represents an argument of a Block.
Definition: Value.h:319
Block represents an ordered list of Operations.
Definition: Block.h:31
OpListType::iterator iterator
Definition: Block.h:138
SuccessorRange getSuccessors()
Definition: Block.h:265
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:243
OpListType & getOperations()
Definition: Block.h:135
iterator end()
Definition: Block.h:142
iterator begin()
Definition: Block.h:141
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:124
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:238
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:394
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:116
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:269
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:371
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:385
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:71
A class for computing basic dominance information.
Definition: Dominance.h:136
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:156
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
void projectOut(Value val)
Projects out the variable that is associate with Value.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteStrictness strictMode
Strict mode can restrict the ops that are added to the worklist during the rewrite.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition: IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Location getLoc() const
Accessors for the implied location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:201
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Definition: BuiltinTypes.h:222
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:212
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:221
This class helps build Operations.
Definition: Builders.h:209
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:242
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:447
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:555
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:433
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:464
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:414
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:444
This class represents a single result from folding an operation.
Definition: OpDefinition.h:268
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:745
void setOperand(unsigned idx, Value value)
Definition: Operation.h:346
operand_iterator operand_begin()
Definition: Operation.h:369
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:507
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
operand_iterator operand_end()
Definition: Operation.h:370
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:682
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:672
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:237
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:869
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:410
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
A class for computing basic postdominance information.
Definition: Dominance.h:195
bool postDominates(Operation *a, Operation *b)
Return true if operation A postdominates operation B.
Definition: Dominance.h:203
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
void takeBody(Region &other)
Takes body of another region (that region will have no body after this operation completes).
Definition: Region.h:241
MLIRContext * getContext() const
Definition: PatternMatch.h:822
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition: Value.h:173
user_range getUsers() const
Definition: Value.h:228
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
static WalkResult advance()
Definition: Visitors.h:52
static WalkResult interrupt()
Definition: Visitors.h:51
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
Definition: AffineOps.cpp:3944
unsigned getNumSymbols() const
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
std::optional< SmallVector< Value, 8 > > expandAffineMap(OpBuilder &builder, Location loc, AffineMap affineMap, ValueRange operands)
Create a sequence of operations that implement the affineMap applied to the given operands (as it it ...
Definition: Utils.cpp:225
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1128
FailureOr< SmallVector< Value > > delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex, ArrayRef< Value > basis)
Generate the IR to delinearize linearIndex given the basis and return the multi-index.
Definition: Utils.cpp:1853
void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo, AliasAnalysis &analysis)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:1035
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:131
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:272
Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, ValueRange dimValues, ValueRange symbolValues)
Emit code that computes the given affine expression using standard arithmetic operations applied to t...
Definition: Utils.cpp:215
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:1893
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:491
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:348
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1138
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1429
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
LogicalResult normalizeAffineFor(AffineForOp op, bool promoteSingleIter=false)
Normalize an affine.for op.
Definition: Utils.cpp:555
LogicalResult normalizeMemRef(memref::AllocOp *op)
Rewrites the memref defined by this alloc op to have an identity layout map and updates all its index...
Definition: Utils.cpp:1691
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:386
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1188
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
MemRefType normalizeMemRefType(MemRefType memrefType)
Normalizes memrefType so that the affine layout map of the memref is transformed to an identity map w...
Definition: Utils.cpp:1748
OpFoldResult linearizeIndex(ArrayRef< OpFoldResult > multiIndex, ArrayRef< OpFoldResult > basis, ImplicitLocOpBuilder &builder)
Definition: Utils.cpp:1878
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:257
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1827
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e....
Definition: Utils.cpp:681
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1387
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition: Utils.cpp:410
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either:
Definition: Utils.cpp:464
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1271
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:734
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
LogicalResult applyOpPatternsAndFold(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:349
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:62
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:363
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:103
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:599
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Definition: AffineExpr.cpp:609
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:303
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.