MLIR  18.0.0git
Utils.cpp
Go to the documentation of this file.
1 //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous transformation utilities for the Affine
10 // dialect.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
24 #include "mlir/IR/Dominance.h"
25 #include "mlir/IR/IRMapping.h"
26 #include "mlir/IR/IntegerSet.h"
28 #include <optional>
29 
30 #define DEBUG_TYPE "affine-utils"
31 
32 using namespace mlir;
33 using namespace affine;
34 using namespace presburger;
35 
36 namespace {
37 /// Visit affine expressions recursively and build the sequence of operations
38 /// that correspond to it. Visitation functions return an Value of the
39 /// expression subtree they visited or `nullptr` on error.
40 class AffineApplyExpander
41  : public AffineExprVisitor<AffineApplyExpander, Value> {
42 public:
43  /// This internal class expects arguments to be non-null, checks must be
44  /// performed at the call site.
45  AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
46  ValueRange symbolValues, Location loc)
47  : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
48  loc(loc) {}
49 
50  template <typename OpTy>
51  Value buildBinaryExpr(AffineBinaryOpExpr expr) {
52  auto lhs = visit(expr.getLHS());
53  auto rhs = visit(expr.getRHS());
54  if (!lhs || !rhs)
55  return nullptr;
56  auto op = builder.create<OpTy>(loc, lhs, rhs);
57  return op.getResult();
58  }
59 
60  Value visitAddExpr(AffineBinaryOpExpr expr) {
61  return buildBinaryExpr<arith::AddIOp>(expr);
62  }
63 
64  Value visitMulExpr(AffineBinaryOpExpr expr) {
65  return buildBinaryExpr<arith::MulIOp>(expr);
66  }
67 
68  /// Euclidean modulo operation: negative RHS is not allowed.
69  /// Remainder of the euclidean integer division is always non-negative.
70  ///
71  /// Implemented as
72  ///
73  /// a mod b =
74  /// let remainder = srem a, b;
75  /// negative = a < 0 in
76  /// select negative, remainder + b, remainder.
77  Value visitModExpr(AffineBinaryOpExpr expr) {
78  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
79  if (rhsConst.getValue() <= 0) {
80  emitError(loc, "modulo by non-positive value is not supported");
81  return nullptr;
82  }
83  }
84 
85  auto lhs = visit(expr.getLHS());
86  auto rhs = visit(expr.getRHS());
87  assert(lhs && rhs && "unexpected affine expr lowering failure");
88 
89  Value remainder = builder.create<arith::RemSIOp>(loc, lhs, rhs);
90  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
91  Value isRemainderNegative = builder.create<arith::CmpIOp>(
92  loc, arith::CmpIPredicate::slt, remainder, zeroCst);
93  Value correctedRemainder =
94  builder.create<arith::AddIOp>(loc, remainder, rhs);
95  Value result = builder.create<arith::SelectOp>(
96  loc, isRemainderNegative, correctedRemainder, remainder);
97  return result;
98  }
99 
100  /// Floor division operation (rounds towards negative infinity).
101  ///
102  /// For positive divisors, it can be implemented without branching and with a
103  /// single division operation as
104  ///
105  /// a floordiv b =
106  /// let negative = a < 0 in
107  /// let absolute = negative ? -a - 1 : a in
108  /// let quotient = absolute / b in
109  /// negative ? -quotient - 1 : quotient
110  ///
111  /// Note: this lowering does not use arith.floordivsi because the lowering of
112  /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
113  /// not one but two arith.divsi. That could be changed to one divsi, but one
114  /// way or another, going through arith.floordivsi will result in more complex
115  /// IR because arith.floordivsi is more general than affine floordiv in that
116  /// it supports negative RHS.
117  Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
118  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
119  if (rhsConst.getValue() <= 0) {
120  emitError(loc, "division by non-positive value is not supported");
121  return nullptr;
122  }
123  }
124  auto lhs = visit(expr.getLHS());
125  auto rhs = visit(expr.getRHS());
126  assert(lhs && rhs && "unexpected affine expr lowering failure");
127 
128  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
129  Value noneCst = builder.create<arith::ConstantIndexOp>(loc, -1);
130  Value negative = builder.create<arith::CmpIOp>(
131  loc, arith::CmpIPredicate::slt, lhs, zeroCst);
132  Value negatedDecremented = builder.create<arith::SubIOp>(loc, noneCst, lhs);
133  Value dividend =
134  builder.create<arith::SelectOp>(loc, negative, negatedDecremented, lhs);
135  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
136  Value correctedQuotient =
137  builder.create<arith::SubIOp>(loc, noneCst, quotient);
138  Value result = builder.create<arith::SelectOp>(loc, negative,
139  correctedQuotient, quotient);
140  return result;
141  }
142 
143  /// Ceiling division operation (rounds towards positive infinity).
144  ///
145  /// For positive divisors, it can be implemented without branching and with a
146  /// single division operation as
147  ///
148  /// a ceildiv b =
149  /// let negative = a <= 0 in
150  /// let absolute = negative ? -a : a - 1 in
151  /// let quotient = absolute / b in
152  /// negative ? -quotient : quotient + 1
153  ///
154  /// Note: not using arith.ceildivsi for the same reason as explained in the
155  /// visitFloorDivExpr comment.
156  Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
157  if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
158  if (rhsConst.getValue() <= 0) {
159  emitError(loc, "division by non-positive value is not supported");
160  return nullptr;
161  }
162  }
163  auto lhs = visit(expr.getLHS());
164  auto rhs = visit(expr.getRHS());
165  assert(lhs && rhs && "unexpected affine expr lowering failure");
166 
167  Value zeroCst = builder.create<arith::ConstantIndexOp>(loc, 0);
168  Value oneCst = builder.create<arith::ConstantIndexOp>(loc, 1);
169  Value nonPositive = builder.create<arith::CmpIOp>(
170  loc, arith::CmpIPredicate::sle, lhs, zeroCst);
171  Value negated = builder.create<arith::SubIOp>(loc, zeroCst, lhs);
172  Value decremented = builder.create<arith::SubIOp>(loc, lhs, oneCst);
173  Value dividend =
174  builder.create<arith::SelectOp>(loc, nonPositive, negated, decremented);
175  Value quotient = builder.create<arith::DivSIOp>(loc, dividend, rhs);
176  Value negatedQuotient =
177  builder.create<arith::SubIOp>(loc, zeroCst, quotient);
178  Value incrementedQuotient =
179  builder.create<arith::AddIOp>(loc, quotient, oneCst);
180  Value result = builder.create<arith::SelectOp>(
181  loc, nonPositive, negatedQuotient, incrementedQuotient);
182  return result;
183  }
184 
185  Value visitConstantExpr(AffineConstantExpr expr) {
186  auto op = builder.create<arith::ConstantIndexOp>(loc, expr.getValue());
187  return op.getResult();
188  }
189 
190  Value visitDimExpr(AffineDimExpr expr) {
191  assert(expr.getPosition() < dimValues.size() &&
192  "affine dim position out of range");
193  return dimValues[expr.getPosition()];
194  }
195 
196  Value visitSymbolExpr(AffineSymbolExpr expr) {
197  assert(expr.getPosition() < symbolValues.size() &&
198  "symbol dim position out of range");
199  return symbolValues[expr.getPosition()];
200  }
201 
202 private:
203  OpBuilder &builder;
204  ValueRange dimValues;
205  ValueRange symbolValues;
206 
207  Location loc;
208 };
209 } // namespace
210 
211 /// Create a sequence of operations that implement the `expr` applied to the
212 /// given dimension and symbol values.
214  AffineExpr expr,
215  ValueRange dimValues,
216  ValueRange symbolValues) {
217  return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
218 }
219 
220 /// Create a sequence of operations that implement the `affineMap` applied to
221 /// the given `operands` (as it it were an AffineApplyOp).
222 std::optional<SmallVector<Value, 8>>
224  AffineMap affineMap, ValueRange operands) {
225  auto numDims = affineMap.getNumDims();
226  auto expanded = llvm::to_vector<8>(
227  llvm::map_range(affineMap.getResults(),
228  [numDims, &builder, loc, operands](AffineExpr expr) {
229  return expandAffineExpr(builder, loc, expr,
230  operands.take_front(numDims),
231  operands.drop_front(numDims));
232  }));
233  if (llvm::all_of(expanded, [](Value v) { return v; }))
234  return expanded;
235  return std::nullopt;
236 }
237 
238 /// Promotes the `then` or the `else` block of `ifOp` (depending on whether
239 /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
240 /// the rest of the op.
241 static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
242  if (elseBlock)
243  assert(ifOp.hasElse() && "else block expected");
244 
245  Block *destBlock = ifOp->getBlock();
246  Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
247  destBlock->getOperations().splice(
248  Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
249  std::prev(srcBlock->end()));
250  ifOp.erase();
251 }
252 
253 /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
254 /// on. The `ifOp` could be hoisted and placed right before such an operation.
255 /// This method assumes that the ifOp has been canonicalized (to be correct and
256 /// effective).
257 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
258  // Walk up the parents past all for op that this conditional is invariant on.
259  auto ifOperands = ifOp.getOperands();
260  auto *res = ifOp.getOperation();
261  while (!isa<func::FuncOp>(res->getParentOp())) {
262  auto *parentOp = res->getParentOp();
263  if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
264  if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
265  break;
266  } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
267  for (auto iv : parallelOp.getIVs())
268  if (llvm::is_contained(ifOperands, iv))
269  break;
270  } else if (!isa<AffineIfOp>(parentOp)) {
271  // Won't walk up past anything other than affine.for/if ops.
272  break;
273  }
274  // You can always hoist up past any affine.if ops.
275  res = parentOp;
276  }
277  return res;
278 }
279 
280 /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
281 /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
282 /// otherwise the same `ifOp`.
283 static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
284  // No hoisting to do.
285  if (hoistOverOp == ifOp)
286  return ifOp;
287 
288  // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
289  // the else block. Then drop the else block of the original 'if' in the 'then'
290  // branch while promoting its then block, and analogously drop the 'then'
291  // block of the original 'if' from the 'else' branch while promoting its else
292  // block.
293  IRMapping operandMap;
294  OpBuilder b(hoistOverOp);
295  auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(),
296  ifOp.getOperands(),
297  /*elseBlock=*/true);
298 
299  // Create a clone of hoistOverOp to use for the else branch of the hoisted
300  // conditional. The else block may get optimized away if empty.
301  Operation *hoistOverOpClone = nullptr;
302  // We use this unique name to identify/find `ifOp`'s clone in the else
303  // version.
304  StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
305  operandMap.clear();
306  b.setInsertionPointAfter(hoistOverOp);
307  // We'll set an attribute to identify this op in a clone of this sub-tree.
308  ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
309  hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
310 
311  // Promote the 'then' block of the original affine.if in the then version.
312  promoteIfBlock(ifOp, /*elseBlock=*/false);
313 
314  // Move the then version to the hoisted if op's 'then' block.
315  auto *thenBlock = hoistedIfOp.getThenBlock();
316  thenBlock->getOperations().splice(thenBlock->begin(),
317  hoistOverOp->getBlock()->getOperations(),
318  Block::iterator(hoistOverOp));
319 
320  // Find the clone of the original affine.if op in the else version.
321  AffineIfOp ifCloneInElse;
322  hoistOverOpClone->walk([&](AffineIfOp ifClone) {
323  if (!ifClone->getAttr(idForIfOp))
324  return WalkResult::advance();
325  ifCloneInElse = ifClone;
326  return WalkResult::interrupt();
327  });
328  assert(ifCloneInElse && "if op clone should exist");
329  // For the else block, promote the else block of the original 'if' if it had
330  // one; otherwise, the op itself is to be erased.
331  if (!ifCloneInElse.hasElse())
332  ifCloneInElse.erase();
333  else
334  promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
335 
336  // Move the else version into the else block of the hoisted if op.
337  auto *elseBlock = hoistedIfOp.getElseBlock();
338  elseBlock->getOperations().splice(
339  elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
340  Block::iterator(hoistOverOpClone));
341 
342  return hoistedIfOp;
343 }
344 
347  ArrayRef<LoopReduction> parallelReductions,
348  AffineParallelOp *resOp) {
349  // Fail early if there are iter arguments that are not reductions.
350  unsigned numReductions = parallelReductions.size();
351  if (numReductions != forOp.getNumIterOperands())
352  return failure();
353 
354  Location loc = forOp.getLoc();
355  OpBuilder outsideBuilder(forOp);
356  AffineMap lowerBoundMap = forOp.getLowerBoundMap();
357  ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
358  AffineMap upperBoundMap = forOp.getUpperBoundMap();
359  ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
360 
361  // Creating empty 1-D affine.parallel op.
362  auto reducedValues = llvm::to_vector<4>(llvm::map_range(
363  parallelReductions, [](const LoopReduction &red) { return red.value; }));
364  auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
365  parallelReductions, [](const LoopReduction &red) { return red.kind; }));
366  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
367  loc, ValueRange(reducedValues).getTypes(), reductionKinds,
368  llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
369  llvm::ArrayRef(upperBoundMap), upperBoundOperands,
370  llvm::ArrayRef(forOp.getStepAsInt()));
371  // Steal the body of the old affine for op.
372  newPloop.getRegion().takeBody(forOp.getRegion());
373  Operation *yieldOp = &newPloop.getBody()->back();
374 
375  // Handle the initial values of reductions because the parallel loop always
376  // starts from the neutral value.
377  SmallVector<Value> newResults;
378  newResults.reserve(numReductions);
379  for (unsigned i = 0; i < numReductions; ++i) {
380  Value init = forOp.getInits()[i];
381  // This works because we are only handling single-op reductions at the
382  // moment. A switch on reduction kind or a mechanism to collect operations
383  // participating in the reduction will be necessary for multi-op reductions.
384  Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
385  assert(reductionOp && "yielded value is expected to be produced by an op");
386  outsideBuilder.getInsertionBlock()->getOperations().splice(
387  outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
388  reductionOp);
389  reductionOp->setOperands({init, newPloop->getResult(i)});
390  forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
391  }
392 
393  // Update the loop terminator to yield reduced values bypassing the reduction
394  // operation itself (now moved outside of the loop) and erase the block
395  // arguments that correspond to reductions. Note that the loop always has one
396  // "main" induction variable whenc coming from a non-parallel for.
397  unsigned numIVs = 1;
398  yieldOp->setOperands(reducedValues);
399  newPloop.getBody()->eraseArguments(numIVs, numReductions);
400 
401  forOp.erase();
402  if (resOp)
403  *resOp = newPloop;
404  return success();
405 }
406 
407 // Returns success if any hoisting happened.
408 LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
409  // Bail out early if the ifOp returns a result. TODO: Consider how to
410  // properly support this case.
411  if (ifOp.getNumResults() != 0)
412  return failure();
413 
414  // Apply canonicalization patterns and folding - this is necessary for the
415  // hoisting check to be correct (operands should be composed), and to be more
416  // effective (no unused operands). Since the pattern rewriter's folding is
417  // entangled with application of patterns, we may fold/end up erasing the op,
418  // in which case we return with `folded` being set.
419  RewritePatternSet patterns(ifOp.getContext());
420  AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
421  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
422  GreedyRewriteConfig config;
424  bool erased;
425  (void)applyOpPatternsAndFold(ifOp.getOperation(), frozenPatterns, config,
426  /*changed=*/nullptr, &erased);
427  if (erased) {
428  if (folded)
429  *folded = true;
430  return failure();
431  }
432  if (folded)
433  *folded = false;
434 
435  // The folding above should have ensured this, but the affine.if's
436  // canonicalization is missing composition of affine.applys into it.
437  assert(llvm::all_of(ifOp.getOperands(),
438  [](Value v) {
439  return isTopLevelValue(v) || isAffineForInductionVar(v);
440  }) &&
441  "operands not composed");
442 
443  // We are going hoist as high as possible.
444  // TODO: this could be customized in the future.
445  auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
446 
447  AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
448  // Nothing to hoist over.
449  if (hoistedIfOp == ifOp)
450  return failure();
451 
452  // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
453  // a sequence of affine.fors that are all perfectly nested).
455  hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
456  frozenPatterns);
457 
458  return success();
459 }
460 
461 // Return the min expr after replacing the given dim.
464  bool positivePath) {
465  if (e == dim)
466  return positivePath ? min : max;
467  if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
468  AffineExpr lhs = bin.getLHS();
469  AffineExpr rhs = bin.getRHS();
470  if (bin.getKind() == mlir::AffineExprKind::Add)
471  return substWithMin(lhs, dim, min, max, positivePath) +
472  substWithMin(rhs, dim, min, max, positivePath);
473 
474  auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
475  auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
476  if (c1 && c1.getValue() < 0)
477  return getAffineBinaryOpExpr(
478  bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
479  if (c2 && c2.getValue() < 0)
480  return getAffineBinaryOpExpr(
481  bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
482  return getAffineBinaryOpExpr(
483  bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
484  substWithMin(rhs, dim, min, max, positivePath));
485  }
486  return e;
487 }
488 
489 void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
490  // Loops with min/max in bounds are not normalized at the moment.
491  if (op.hasMinMaxBounds())
492  return;
493 
494  AffineMap lbMap = op.getLowerBoundsMap();
495  SmallVector<int64_t, 8> steps = op.getSteps();
496  // No need to do any work if the parallel op is already normalized.
497  bool isAlreadyNormalized =
498  llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
499  int64_t step = std::get<0>(tuple);
500  auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
501  return lbExpr && lbExpr.getValue() == 0 && step == 1;
502  });
503  if (isAlreadyNormalized)
504  return;
505 
506  AffineValueMap ranges;
507  AffineValueMap::difference(op.getUpperBoundsValueMap(),
508  op.getLowerBoundsValueMap(), &ranges);
509  auto builder = OpBuilder::atBlockBegin(op.getBody());
510  auto zeroExpr = builder.getAffineConstantExpr(0);
513  for (unsigned i = 0, e = steps.size(); i < e; ++i) {
514  int64_t step = steps[i];
515 
516  // Adjust the lower bound to be 0.
517  lbExprs.push_back(zeroExpr);
518 
519  // Adjust the upper bound expression: 'range / step'.
520  AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
521  ubExprs.push_back(ubExpr);
522 
523  // Adjust the corresponding IV: 'lb + i * step'.
524  BlockArgument iv = op.getBody()->getArgument(i);
525  AffineExpr lbExpr = lbMap.getResult(i);
526  unsigned nDims = lbMap.getNumDims();
527  auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
528  auto map = AffineMap::get(/*dimCount=*/nDims + 1,
529  /*symbolCount=*/lbMap.getNumSymbols(), expr);
530 
531  // Use an 'affine.apply' op that will be simplified later in subsequent
532  // canonicalizations.
533  OperandRange lbOperands = op.getLowerBoundsOperands();
534  OperandRange dimOperands = lbOperands.take_front(nDims);
535  OperandRange symbolOperands = lbOperands.drop_front(nDims);
536  SmallVector<Value, 8> applyOperands{dimOperands};
537  applyOperands.push_back(iv);
538  applyOperands.append(symbolOperands.begin(), symbolOperands.end());
539  auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
540  iv.replaceAllUsesExcept(apply, apply);
541  }
542 
543  SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
544  op.setSteps(newSteps);
545  auto newLowerMap = AffineMap::get(
546  /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
547  op.setLowerBounds({}, newLowerMap);
548  auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
549  ubExprs, op.getContext());
550  op.setUpperBounds(ranges.getOperands(), newUpperMap);
551 }
552 
554  bool promoteSingleIter) {
555  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
556  return success();
557 
558  // Check if the forop is already normalized.
559  if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
560  (op.getStep() == 1))
561  return success();
562 
563  // Check if the lower bound has a single result only. Loops with a max lower
564  // bound can't be normalized without additional support like
565  // affine.execute_region's. If the lower bound does not have a single result
566  // then skip this op.
567  if (op.getLowerBoundMap().getNumResults() != 1)
568  return failure();
569 
570  Location loc = op.getLoc();
571  OpBuilder opBuilder(op);
572  int64_t origLoopStep = op.getStepAsInt();
573 
574  // Construct the new upper bound value map.
575  AffineMap oldLbMap = op.getLowerBoundMap();
576  // The upper bound can have multiple results. To use
577  // AffineValueMap::difference, we need to have the same number of results in
578  // both lower and upper bound maps. So, we just create a value map for the
579  // lower bound with the only available lower bound result repeated to pad up
580  // to the number of upper bound results.
581  SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
582  op.getLowerBoundMap().getResult(0));
583  AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
584  AffineMap paddedLbMap =
585  AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
586  op.getContext());
587  AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
588  AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
589  AffineValueMap newUbValueMap;
590  // Compute the `upper bound - lower bound`.
591  AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
592  (void)newUbValueMap.canonicalize();
593 
594  // Scale down the upper bound value map by the loop step.
595  unsigned numResult = newUbValueMap.getNumResults();
596  SmallVector<AffineExpr> scaleDownExprs(numResult);
597  for (unsigned i = 0; i < numResult; ++i)
598  scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
599  // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
600  // step). Where `n` is the number of results in the upper bound map.
601  AffineMap scaleDownMap =
602  AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
603  AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
604 
605  // Set the newly create upper bound map and operands.
606  op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
607  op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
608  op.setStep(1);
609 
610  // Calculate the Value of new loopIV. Create affine.apply for the value of
611  // the loopIV in normalized loop.
612  opBuilder.setInsertionPointToStart(op.getBody());
613  // Construct an affine.apply op mapping the new IV to the old IV.
614  AffineMap scaleIvMap =
615  AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
616  AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
617  AffineValueMap newIvToOldIvMap;
618  AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
619  (void)newIvToOldIvMap.canonicalize();
620  auto newIV = opBuilder.create<AffineApplyOp>(
621  loc, newIvToOldIvMap.getAffineMap(), newIvToOldIvMap.getOperands());
622  op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
623  return success();
624 }
625 
626 /// Returns true if the memory operation of `destAccess` depends on `srcAccess`
627 /// inside of the innermost common surrounding affine loop between the two
628 /// accesses.
629 static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
630  const MemRefAccess &destAccess) {
631  // Affine dependence analysis is possible only if both ops in the same
632  // AffineScope.
633  if (getAffineScope(srcAccess.opInst) != getAffineScope(destAccess.opInst))
634  return false;
635 
636  unsigned nsLoops =
637  getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
638  DependenceResult result =
639  checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
640  return hasDependence(result);
641 }
642 
643 /// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
644 /// scope of the outermost `minSurroundingLoops` loops that surround them.
645 /// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
646 static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
647  unsigned minSurroundingLoops) {
648  MemRefAccess srcAccess(srcMemOp);
649  MemRefAccess destAccess(destMemOp);
650 
651  // Affine dependence analysis here is applicable only if both ops operate on
652  // the same memref and if `srcMemOp` and `destMemOp` are in the same
653  // AffineScope. Also, we can only check if our affine scope is isolated from
654  // above; otherwise, values can from outside of the affine scope that the
655  // check below cannot analyze.
656  Region *srcScope = getAffineScope(srcMemOp);
657  if (srcAccess.memref == destAccess.memref &&
658  srcScope == getAffineScope(destMemOp)) {
659  unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
660  FlatAffineValueConstraints dependenceConstraints;
661  for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
663  srcAccess, destAccess, d, &dependenceConstraints,
664  /*dependenceComponents=*/nullptr);
665  // A dependence failure or the presence of a dependence implies a
666  // side effect.
667  if (!noDependence(result))
668  return true;
669  }
670  // No side effect was seen.
671  return false;
672  }
673  // TODO: Check here if the memrefs alias: there is no side effect if
674  // `srcAccess.memref` and `destAccess.memref` don't alias.
675  return true;
676 }
677 
678 template <typename EffectType, typename T>
680  auto isLocallyAllocated = [](Value memref) {
681  auto *defOp = memref.getDefiningOp();
682  return defOp && hasSingleEffect<MemoryEffects::Allocate>(defOp, memref);
683  };
684 
685  // A boolean representing whether an intervening operation could have impacted
686  // memOp.
687  bool hasSideEffect = false;
688 
689  // Check whether the effect on memOp can be caused by a given operation op.
690  Value memref = memOp.getMemRef();
691  std::function<void(Operation *)> checkOperation = [&](Operation *op) {
692  // If the effect has alreay been found, early exit,
693  if (hasSideEffect)
694  return;
695 
696  if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
698  memEffect.getEffects(effects);
699 
700  bool opMayHaveEffect = false;
701  for (auto effect : effects) {
702  // If op causes EffectType on a potentially aliasing location for
703  // memOp, mark as having the effect.
704  if (isa<EffectType>(effect.getEffect())) {
705  // TODO: This should be replaced with a check for no aliasing.
706  // Aliasing information should be passed to this method.
707  if (effect.getValue() && effect.getValue() != memref &&
708  isLocallyAllocated(memref) &&
709  isLocallyAllocated(effect.getValue()))
710  continue;
711  opMayHaveEffect = true;
712  break;
713  }
714  }
715 
716  if (!opMayHaveEffect)
717  return;
718 
719  // If the side effect comes from an affine read or write, try to
720  // prove the side effecting `op` cannot reach `memOp`.
721  if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
722  // For ease, let's consider the case that `op` is a store and
723  // we're looking for other potential stores that overwrite memory after
724  // `start`, and before being read in `memOp`. In this case, we only
725  // need to consider other potential stores with depth >
726  // minSurroundingLoops since `start` would overwrite any store with a
727  // smaller number of surrounding loops before.
728  unsigned minSurroundingLoops =
729  getNumCommonSurroundingLoops(*start, *memOp);
730  if (mayHaveEffect(op, memOp, minSurroundingLoops))
731  hasSideEffect = true;
732  return;
733  }
734 
735  // We have an op with a memory effect and we cannot prove if it
736  // intervenes.
737  hasSideEffect = true;
738  return;
739  }
740 
742  // Recurse into the regions for this op and check whether the internal
743  // operations may have the side effect `EffectType` on memOp.
744  for (Region &region : op->getRegions())
745  for (Block &block : region)
746  for (Operation &op : block)
747  checkOperation(&op);
748  return;
749  }
750 
751  // Otherwise, conservatively assume generic operations have the effect
752  // on the operation
753  hasSideEffect = true;
754  };
755 
756  // Check all paths from ancestor op `parent` to the operation `to` for the
757  // effect. It is known that `to` must be contained within `parent`.
758  auto until = [&](Operation *parent, Operation *to) {
759  // TODO check only the paths from `parent` to `to`.
760  // Currently we fallback and check the entire parent op, rather than
761  // just the paths from the parent path, stopping after reaching `to`.
762  // This is conservatively correct, but could be made more aggressive.
763  assert(parent->isAncestor(to));
764  checkOperation(parent);
765  };
766 
767  // Check for all paths from operation `from` to operation `untilOp` for the
768  // given memory effect.
769  std::function<void(Operation *, Operation *)> recur =
770  [&](Operation *from, Operation *untilOp) {
771  assert(
772  from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
773  "Checking for side effect between two operations without a common "
774  "ancestor");
775 
776  // If the operations are in different regions, recursively consider all
777  // path from `from` to the parent of `to` and all paths from the parent
778  // of `to` to `to`.
779  if (from->getParentRegion() != untilOp->getParentRegion()) {
780  recur(from, untilOp->getParentOp());
781  until(untilOp->getParentOp(), untilOp);
782  return;
783  }
784 
785  // Now, assuming that `from` and `to` exist in the same region, perform
786  // a CFG traversal to check all the relevant operations.
787 
788  // Additional blocks to consider.
789  SmallVector<Block *, 2> todoBlocks;
790  {
791  // First consider the parent block of `from` an check all operations
792  // after `from`.
793  for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
794  iter != end && &*iter != untilOp; ++iter) {
795  checkOperation(&*iter);
796  }
797 
798  // If the parent of `from` doesn't contain `to`, add the successors
799  // to the list of blocks to check.
800  if (untilOp->getBlock() != from->getBlock())
801  for (Block *succ : from->getBlock()->getSuccessors())
802  todoBlocks.push_back(succ);
803  }
804 
806  // Traverse the CFG until hitting `to`.
807  while (!todoBlocks.empty()) {
808  Block *blk = todoBlocks.pop_back_val();
809  if (done.count(blk))
810  continue;
811  done.insert(blk);
812  for (auto &op : *blk) {
813  if (&op == untilOp)
814  break;
815  checkOperation(&op);
816  if (&op == blk->getTerminator())
817  for (Block *succ : blk->getSuccessors())
818  todoBlocks.push_back(succ);
819  }
820  }
821  };
822  recur(start, memOp);
823  return !hasSideEffect;
824 }
825 
826 /// Attempt to eliminate loadOp by replacing it with a value stored into memory
827 /// which the load is guaranteed to retrieve. This check involves three
828 /// components: 1) The store and load must be on the same location 2) The store
829 /// must dominate (and therefore must always occur prior to) the load 3) No
830 /// other operations will overwrite the memory loaded between the given load
831 /// and store. If such a value exists, the replaced `loadOp` will be added to
832 /// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
833 static void forwardStoreToLoad(AffineReadOpInterface loadOp,
834  SmallVectorImpl<Operation *> &loadOpsToErase,
835  SmallPtrSetImpl<Value> &memrefsToErase,
836  DominanceInfo &domInfo) {
837 
838  // The store op candidate for forwarding that satisfies all conditions
839  // to replace the load, if any.
840  Operation *lastWriteStoreOp = nullptr;
841 
842  for (auto *user : loadOp.getMemRef().getUsers()) {
843  auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
844  if (!storeOp)
845  continue;
846  MemRefAccess srcAccess(storeOp);
847  MemRefAccess destAccess(loadOp);
848 
849  // 1. Check if the store and the load have mathematically equivalent
850  // affine access functions; this implies that they statically refer to the
851  // same single memref element. As an example this filters out cases like:
852  // store %A[%i0 + 1]
853  // load %A[%i0]
854  // store %A[%M]
855  // load %A[%N]
856  // Use the AffineValueMap difference based memref access equality checking.
857  if (srcAccess != destAccess)
858  continue;
859 
860  // 2. The store has to dominate the load op to be candidate.
861  if (!domInfo.dominates(storeOp, loadOp))
862  continue;
863 
864  // 3. The store must reach the load. Access function equivalence only
865  // guarantees this for accesses in the same block. The load could be in a
866  // nested block that is unreachable.
867  if (storeOp->getBlock() != loadOp->getBlock() &&
868  !mustReachAtInnermost(srcAccess, destAccess))
869  continue;
870 
871  // 4. Ensure there is no intermediate operation which could replace the
872  // value in memory.
873  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(storeOp, loadOp))
874  continue;
875 
876  // We now have a candidate for forwarding.
877  assert(lastWriteStoreOp == nullptr &&
878  "multiple simultaneous replacement stores");
879  lastWriteStoreOp = storeOp;
880  }
881 
882  if (!lastWriteStoreOp)
883  return;
884 
885  // Perform the actual store to load forwarding.
886  Value storeVal =
887  cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
888  // Check if 2 values have the same shape. This is needed for affine vector
889  // loads and stores.
890  if (storeVal.getType() != loadOp.getValue().getType())
891  return;
892  loadOp.getValue().replaceAllUsesWith(storeVal);
893  // Record the memref for a later sweep to optimize away.
894  memrefsToErase.insert(loadOp.getMemRef());
895  // Record this to erase later.
896  loadOpsToErase.push_back(loadOp);
897 }
898 
899 template bool
901  affine::AffineReadOpInterface>(
902  mlir::Operation *, affine::AffineReadOpInterface);
903 
904 // This attempts to find stores which have no impact on the final result.
905 // A writing op writeA will be eliminated if there exists an op writeB if
906 // 1) writeA and writeB have mathematically equivalent affine access functions.
907 // 2) writeB postdominates writeA.
908 // 3) There is no potential read between writeA and writeB.
909 static void findUnusedStore(AffineWriteOpInterface writeA,
910  SmallVectorImpl<Operation *> &opsToErase,
911  PostDominanceInfo &postDominanceInfo) {
912 
913  for (Operation *user : writeA.getMemRef().getUsers()) {
914  // Only consider writing operations.
915  auto writeB = dyn_cast<AffineWriteOpInterface>(user);
916  if (!writeB)
917  continue;
918 
919  // The operations must be distinct.
920  if (writeB == writeA)
921  continue;
922 
923  // Both operations must lie in the same region.
924  if (writeB->getParentRegion() != writeA->getParentRegion())
925  continue;
926 
927  // Both operations must write to the same memory.
928  MemRefAccess srcAccess(writeB);
929  MemRefAccess destAccess(writeA);
930 
931  if (srcAccess != destAccess)
932  continue;
933 
934  // writeB must postdominate writeA.
935  if (!postDominanceInfo.postDominates(writeB, writeA))
936  continue;
937 
938  // There cannot be an operation which reads from memory between
939  // the two writes.
940  if (!affine::hasNoInterveningEffect<MemoryEffects::Read>(writeA, writeB))
941  continue;
942 
943  opsToErase.push_back(writeA);
944  break;
945  }
946 }
947 
948 // The load to load forwarding / redundant load elimination is similar to the
949 // store to load forwarding.
950 // loadA will be be replaced with loadB if:
951 // 1) loadA and loadB have mathematically equivalent affine access functions.
952 // 2) loadB dominates loadA.
953 // 3) There is no write between loadA and loadB.
954 static void loadCSE(AffineReadOpInterface loadA,
955  SmallVectorImpl<Operation *> &loadOpsToErase,
956  DominanceInfo &domInfo) {
958  for (auto *user : loadA.getMemRef().getUsers()) {
959  auto loadB = dyn_cast<AffineReadOpInterface>(user);
960  if (!loadB || loadB == loadA)
961  continue;
962 
963  MemRefAccess srcAccess(loadB);
964  MemRefAccess destAccess(loadA);
965 
966  // 1. The accesses should be to be to the same location.
967  if (srcAccess != destAccess) {
968  continue;
969  }
970 
971  // 2. loadB should dominate loadA.
972  if (!domInfo.dominates(loadB, loadA))
973  continue;
974 
975  // 3. There should not be a write between loadA and loadB.
976  if (!affine::hasNoInterveningEffect<MemoryEffects::Write>(
977  loadB.getOperation(), loadA))
978  continue;
979 
980  // Check if two values have the same shape. This is needed for affine vector
981  // loads.
982  if (loadB.getValue().getType() != loadA.getValue().getType())
983  continue;
984 
985  loadCandidates.push_back(loadB);
986  }
987 
988  // Of the legal load candidates, use the one that dominates all others
989  // to minimize the subsequent need to loadCSE
990  Value loadB;
991  for (AffineReadOpInterface option : loadCandidates) {
992  if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
993  return depStore == option ||
994  domInfo.dominates(option.getOperation(),
995  depStore.getOperation());
996  })) {
997  loadB = option.getValue();
998  break;
999  }
1000  }
1001 
1002  if (loadB) {
1003  loadA.getValue().replaceAllUsesWith(loadB);
1004  // Record this to erase later.
1005  loadOpsToErase.push_back(loadA);
1006  }
1007 }
1008 
1009 // The store to load forwarding and load CSE rely on three conditions:
1010 //
1011 // 1) store/load providing a replacement value and load being replaced need to
1012 // have mathematically equivalent affine access functions (checked after full
1013 // composition of load/store operands); this implies that they access the same
1014 // single memref element for all iterations of the common surrounding loop,
1015 //
1016 // 2) the store/load op should dominate the load op,
1017 //
1018 // 3) no operation that may write to memory read by the load being replaced can
1019 // occur after executing the instruction (load or store) providing the
1020 // replacement value and before the load being replaced (thus potentially
1021 // allowing overwriting the memory read by the load).
1022 //
1023 // The above conditions are simple to check, sufficient, and powerful for most
1024 // cases in practice - they are sufficient, but not necessary --- since they
1025 // don't reason about loops that are guaranteed to execute at least once or
1026 // multiple sources to forward from.
1027 //
1028 // TODO: more forwarding can be done when support for
1029 // loop/conditional live-out SSA values is available.
1030 // TODO: do general dead store elimination for memref's. This pass
1031 // currently only eliminates the stores only if no other loads/uses (other
1032 // than dealloc) remain.
1033 //
1034 void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1035  PostDominanceInfo &postDomInfo) {
1036  // Load op's whose results were replaced by those forwarded from stores.
1037  SmallVector<Operation *, 8> opsToErase;
1038 
1039  // A list of memref's that are potentially dead / could be eliminated.
1040  SmallPtrSet<Value, 4> memrefsToErase;
1041 
1042  // Walk all load's and perform store to load forwarding.
1043  f.walk([&](AffineReadOpInterface loadOp) {
1044  forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo);
1045  });
1046  for (auto *op : opsToErase)
1047  op->erase();
1048  opsToErase.clear();
1049 
1050  // Walk all store's and perform unused store elimination
1051  f.walk([&](AffineWriteOpInterface storeOp) {
1052  findUnusedStore(storeOp, opsToErase, postDomInfo);
1053  });
1054  for (auto *op : opsToErase)
1055  op->erase();
1056  opsToErase.clear();
1057 
1058  // Check if the store fwd'ed memrefs are now left with only stores and
1059  // deallocs and can thus be completely deleted. Note: the canonicalize pass
1060  // should be able to do this as well, but we'll do it here since we collected
1061  // these anyway.
1062  for (auto memref : memrefsToErase) {
1063  // If the memref hasn't been locally alloc'ed, skip.
1064  Operation *defOp = memref.getDefiningOp();
1065  if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1066  // TODO: if the memref was returned by a 'call' operation, we
1067  // could still erase it if the call had no side-effects.
1068  continue;
1069  if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1070  return !isa<AffineWriteOpInterface>(ownerOp) &&
1071  !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1072  }))
1073  continue;
1074 
1075  // Erase all stores, the dealloc, and the alloc on the memref.
1076  for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1077  user->erase();
1078  defOp->erase();
1079  }
1080 
1081  // To eliminate as many loads as possible, run load CSE after eliminating
1082  // stores. Otherwise, some stores are wrongly seen as having an intervening
1083  // effect.
1084  f.walk([&](AffineReadOpInterface loadOp) {
1085  loadCSE(loadOp, opsToErase, domInfo);
1086  });
1087  for (auto *op : opsToErase)
1088  op->erase();
1089 }
1090 
1091 // Perform the replacement in `op`.
1093  Value oldMemRef, Value newMemRef, Operation *op,
1094  ArrayRef<Value> extraIndices, AffineMap indexRemap,
1095  ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1096  bool allowNonDereferencingOps) {
1097  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1098  (void)newMemRefRank; // unused in opt mode
1099  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1100  (void)oldMemRefRank; // unused in opt mode
1101  if (indexRemap) {
1102  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1103  "symbolic operand count mismatch");
1104  assert(indexRemap.getNumInputs() ==
1105  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1106  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1107  } else {
1108  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1109  }
1110 
1111  // Assert same elemental type.
1112  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1113  cast<MemRefType>(newMemRef.getType()).getElementType());
1114 
1115  SmallVector<unsigned, 2> usePositions;
1116  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1117  if (opEntry.value() == oldMemRef)
1118  usePositions.push_back(opEntry.index());
1119  }
1120 
1121  // If memref doesn't appear, nothing to do.
1122  if (usePositions.empty())
1123  return success();
1124 
1125  if (usePositions.size() > 1) {
1126  // TODO: extend it for this case when needed (rare).
1127  assert(false && "multiple dereferencing uses in a single op not supported");
1128  return failure();
1129  }
1130 
1131  unsigned memRefOperandPos = usePositions.front();
1132 
1133  OpBuilder builder(op);
1134  // The following checks if op is dereferencing memref and performs the access
1135  // index rewrites.
1136  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1137  if (!affMapAccInterface) {
1138  if (!allowNonDereferencingOps) {
1139  // Failure: memref used in a non-dereferencing context (potentially
1140  // escapes); no replacement in these cases unless allowNonDereferencingOps
1141  // is set.
1142  return failure();
1143  }
1144  op->setOperand(memRefOperandPos, newMemRef);
1145  return success();
1146  }
1147  // Perform index rewrites for the dereferencing op and then replace the op
1148  NamedAttribute oldMapAttrPair =
1149  affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1150  AffineMap oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1151  unsigned oldMapNumInputs = oldMap.getNumInputs();
1152  SmallVector<Value, 4> oldMapOperands(
1153  op->operand_begin() + memRefOperandPos + 1,
1154  op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
1155 
1156  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1157  SmallVector<Value, 4> oldMemRefOperands;
1158  SmallVector<Value, 4> affineApplyOps;
1159  oldMemRefOperands.reserve(oldMemRefRank);
1160  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1161  for (auto resultExpr : oldMap.getResults()) {
1162  auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1163  oldMap.getNumSymbols(), resultExpr);
1164  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1165  oldMapOperands);
1166  oldMemRefOperands.push_back(afOp);
1167  affineApplyOps.push_back(afOp);
1168  }
1169  } else {
1170  oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1171  }
1172 
1173  // Construct new indices as a remap of the old ones if a remapping has been
1174  // provided. The indices of a memref come right after it, i.e.,
1175  // at position memRefOperandPos + 1.
1176  SmallVector<Value, 4> remapOperands;
1177  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1178  symbolOperands.size());
1179  remapOperands.append(extraOperands.begin(), extraOperands.end());
1180  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1181  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1182 
1183  SmallVector<Value, 4> remapOutputs;
1184  remapOutputs.reserve(oldMemRefRank);
1185 
1186  if (indexRemap &&
1187  indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1188  // Remapped indices.
1189  for (auto resultExpr : indexRemap.getResults()) {
1190  auto singleResMap = AffineMap::get(
1191  indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1192  auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
1193  remapOperands);
1194  remapOutputs.push_back(afOp);
1195  affineApplyOps.push_back(afOp);
1196  }
1197  } else {
1198  // No remapping specified.
1199  remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1200  }
1201 
1202  SmallVector<Value, 4> newMapOperands;
1203  newMapOperands.reserve(newMemRefRank);
1204 
1205  // Prepend 'extraIndices' in 'newMapOperands'.
1206  for (Value extraIndex : extraIndices) {
1207  assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1208  "invalid memory op index");
1209  newMapOperands.push_back(extraIndex);
1210  }
1211 
1212  // Append 'remapOutputs' to 'newMapOperands'.
1213  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1214 
1215  // Create new fully composed AffineMap for new op to be created.
1216  assert(newMapOperands.size() == newMemRefRank);
1217  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1218  // TODO: Avoid creating/deleting temporary AffineApplyOps here.
1219  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1220  newMap = simplifyAffineMap(newMap);
1221  canonicalizeMapAndOperands(&newMap, &newMapOperands);
1222  // Remove any affine.apply's that became dead as a result of composition.
1223  for (Value value : affineApplyOps)
1224  if (value.use_empty())
1225  value.getDefiningOp()->erase();
1226 
1227  OperationState state(op->getLoc(), op->getName());
1228  // Construct the new operation using this memref.
1229  state.operands.reserve(op->getNumOperands() + extraIndices.size());
1230  // Insert the non-memref operands.
1231  state.operands.append(op->operand_begin(),
1232  op->operand_begin() + memRefOperandPos);
1233  // Insert the new memref value.
1234  state.operands.push_back(newMemRef);
1235 
1236  // Insert the new memref map operands.
1237  state.operands.append(newMapOperands.begin(), newMapOperands.end());
1238 
1239  // Insert the remaining operands unmodified.
1240  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1241  oldMapNumInputs,
1242  op->operand_end());
1243 
1244  // Result types don't change. Both memref's are of the same elemental type.
1245  state.types.reserve(op->getNumResults());
1246  for (auto result : op->getResults())
1247  state.types.push_back(result.getType());
1248 
1249  // Add attribute for 'newMap', other Attributes do not change.
1250  auto newMapAttr = AffineMapAttr::get(newMap);
1251  for (auto namedAttr : op->getAttrs()) {
1252  if (namedAttr.getName() == oldMapAttrPair.getName())
1253  state.attributes.push_back({namedAttr.getName(), newMapAttr});
1254  else
1255  state.attributes.push_back(namedAttr);
1256  }
1257 
1258  // Create the new operation.
1259  auto *repOp = builder.create(state);
1260  op->replaceAllUsesWith(repOp);
1261  op->erase();
1262 
1263  return success();
1264 }
1265 
1267  Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1268  AffineMap indexRemap, ArrayRef<Value> extraOperands,
1269  ArrayRef<Value> symbolOperands, Operation *domOpFilter,
1270  Operation *postDomOpFilter, bool allowNonDereferencingOps,
1271  bool replaceInDeallocOp) {
1272  unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1273  (void)newMemRefRank; // unused in opt mode
1274  unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1275  (void)oldMemRefRank;
1276  if (indexRemap) {
1277  assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1278  "symbol operand count mismatch");
1279  assert(indexRemap.getNumInputs() ==
1280  extraOperands.size() + oldMemRefRank + symbolOperands.size());
1281  assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1282  } else {
1283  assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1284  }
1285 
1286  // Assert same elemental type.
1287  assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1288  cast<MemRefType>(newMemRef.getType()).getElementType());
1289 
1290  std::unique_ptr<DominanceInfo> domInfo;
1291  std::unique_ptr<PostDominanceInfo> postDomInfo;
1292  if (domOpFilter)
1293  domInfo = std::make_unique<DominanceInfo>(
1294  domOpFilter->getParentOfType<func::FuncOp>());
1295 
1296  if (postDomOpFilter)
1297  postDomInfo = std::make_unique<PostDominanceInfo>(
1298  postDomOpFilter->getParentOfType<func::FuncOp>());
1299 
1300  // Walk all uses of old memref; collect ops to perform replacement. We use a
1301  // DenseSet since an operation could potentially have multiple uses of a
1302  // memref (although rare), and the replacement later is going to erase ops.
1303  DenseSet<Operation *> opsToReplace;
1304  for (auto *op : oldMemRef.getUsers()) {
1305  // Skip this use if it's not dominated by domOpFilter.
1306  if (domOpFilter && !domInfo->dominates(domOpFilter, op))
1307  continue;
1308 
1309  // Skip this use if it's not post-dominated by postDomOpFilter.
1310  if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
1311  continue;
1312 
1313  // Skip dealloc's - no replacement is necessary, and a memref replacement
1314  // at other uses doesn't hurt these dealloc's.
1315  if (hasSingleEffect<MemoryEffects::Free>(op, oldMemRef) &&
1316  !replaceInDeallocOp)
1317  continue;
1318 
1319  // Check if the memref was used in a non-dereferencing context. It is fine
1320  // for the memref to be used in a non-dereferencing way outside of the
1321  // region where this replacement is happening.
1322  if (!isa<AffineMapAccessInterface>(*op)) {
1323  if (!allowNonDereferencingOps) {
1324  LLVM_DEBUG(llvm::dbgs()
1325  << "Memref replacement failed: non-deferencing memref op: \n"
1326  << *op << '\n');
1327  return failure();
1328  }
1329  // Non-dereferencing ops with the MemRefsNormalizable trait are
1330  // supported for replacement.
1331  if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
1332  LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1333  "memrefs normalizable trait: \n"
1334  << *op << '\n');
1335  return failure();
1336  }
1337  }
1338 
1339  // We'll first collect and then replace --- since replacement erases the op
1340  // that has the use, and that op could be postDomFilter or domFilter itself!
1341  opsToReplace.insert(op);
1342  }
1343 
1344  for (auto *op : opsToReplace) {
1346  oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
1347  symbolOperands, allowNonDereferencingOps)))
1348  llvm_unreachable("memref replacement guaranteed to succeed here");
1349  }
1350 
1351  return success();
1352 }
1353 
1354 /// Given an operation, inserts one or more single result affine
1355 /// apply operations, results of which are exclusively used by this operation
1356 /// operation. The operands of these newly created affine apply ops are
1357 /// guaranteed to be loop iterators or terminal symbols of a function.
1358 ///
1359 /// Before
1360 ///
1361 /// affine.for %i = 0 to #map(%N)
1362 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1363 /// "send"(%idx, %A, ...)
1364 /// "compute"(%idx)
1365 ///
1366 /// After
1367 ///
1368 /// affine.for %i = 0 to #map(%N)
1369 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1370 /// "send"(%idx, %A, ...)
1371 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1372 /// "compute"(%idx_)
1373 ///
1374 /// This allows applying different transformations on send and compute (for eg.
1375 /// different shifts/delays).
1376 ///
1377 /// Returns nullptr either if none of opInst's operands were the result of an
1378 /// affine.apply and thus there was no affine computation slice to create, or if
1379 /// all the affine.apply op's supplying operands to this opInst did not have any
1380 /// uses besides this opInst; otherwise returns the list of affine.apply
1381 /// operations created in output argument `sliceOps`.
1383  Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1384  // Collect all operands that are results of affine apply ops.
1385  SmallVector<Value, 4> subOperands;
1386  subOperands.reserve(opInst->getNumOperands());
1387  for (auto operand : opInst->getOperands())
1388  if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1389  subOperands.push_back(operand);
1390 
1391  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1392  SmallVector<Operation *, 4> affineApplyOps;
1393  getReachableAffineApplyOps(subOperands, affineApplyOps);
1394  // Skip transforming if there are no affine maps to compose.
1395  if (affineApplyOps.empty())
1396  return;
1397 
1398  // Check if all uses of the affine apply op's lie only in this op op, in
1399  // which case there would be nothing to do.
1400  bool localized = true;
1401  for (auto *op : affineApplyOps) {
1402  for (auto result : op->getResults()) {
1403  for (auto *user : result.getUsers()) {
1404  if (user != opInst) {
1405  localized = false;
1406  break;
1407  }
1408  }
1409  }
1410  }
1411  if (localized)
1412  return;
1413 
1414  OpBuilder builder(opInst);
1415  SmallVector<Value, 4> composedOpOperands(subOperands);
1416  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1417  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1418 
1419  // Create an affine.apply for each of the map results.
1420  sliceOps->reserve(composedMap.getNumResults());
1421  for (auto resultExpr : composedMap.getResults()) {
1422  auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1423  composedMap.getNumSymbols(), resultExpr);
1424  sliceOps->push_back(builder.create<AffineApplyOp>(
1425  opInst->getLoc(), singleResMap, composedOpOperands));
1426  }
1427 
1428  // Construct the new operands that include the results from the composed
1429  // affine apply op above instead of existing ones (subOperands). So, they
1430  // differ from opInst's operands only for those operands in 'subOperands', for
1431  // which they will be replaced by the corresponding one from 'sliceOps'.
1432  SmallVector<Value, 4> newOperands(opInst->getOperands());
1433  for (Value &operand : newOperands) {
1434  // Replace the subOperands from among the new operands.
1435  unsigned j, f;
1436  for (j = 0, f = subOperands.size(); j < f; j++) {
1437  if (operand == subOperands[j])
1438  break;
1439  }
1440  if (j < subOperands.size())
1441  operand = (*sliceOps)[j];
1442  }
1443  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1444  opInst->setOperand(idx, newOperands[idx]);
1445 }
1446 
1447 /// Enum to set patterns of affine expr in tiled-layout map.
1448 /// TileFloorDiv: <dim expr> div <tile size>
1449 /// TileMod: <dim expr> mod <tile size>
1450 /// TileNone: None of the above
1451 /// Example:
1452 /// #tiled_2d_128x256 = affine_map<(d0, d1)
1453 /// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1454 /// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1455 /// "d0 mod 128" and "d1 mod 256" ==> TileMod
1457 
1458 /// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1459 /// being floordiv'ed by respective tile sizes appeare in a mod with the same
1460 /// tile sizes, and no other expression involves those k dimensions. This
1461 /// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1462 /// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1463 /// tiled layout, an empty vector is returned.
1465  AffineMap map,
1466  SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1467  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1468  // `floordiv` and its position in `map` output.
1469  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1470  // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1471  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1473  unsigned pos = 0;
1474  for (AffineExpr expr : map.getResults()) {
1475  if (expr.getKind() == AffineExprKind::FloorDiv) {
1476  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1477  if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1478  floordivExprs.emplace_back(
1479  std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1480  }
1481  pos++;
1482  }
1483  // Not tiled layout if `floordivExprs` is empty.
1484  if (floordivExprs.empty()) {
1486  return success();
1487  }
1488 
1489  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1490  // not tiled layout.
1491  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1492  AffineExpr floordivExprLHS = std::get<0>(fexpr);
1493  AffineExpr floordivExprRHS = std::get<1>(fexpr);
1494  unsigned floordivPos = std::get<2>(fexpr);
1495 
1496  // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1497  // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1498  // other expr, the map is not tiled layout. Example of non tiled layout:
1499  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1500  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1501  // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1502  // 256)>
1503  bool found = false;
1504  pos = 0;
1505  for (AffineExpr expr : map.getResults()) {
1506  bool notTiled = false;
1507  if (pos != floordivPos) {
1508  expr.walk([&](AffineExpr e) {
1509  if (e == floordivExprLHS) {
1510  if (expr.getKind() == AffineExprKind::Mod) {
1511  AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1512  // If LHS and RHS of `mod` are the same with those of floordiv.
1513  if (floordivExprLHS == binaryExpr.getLHS() &&
1514  floordivExprRHS == binaryExpr.getRHS()) {
1515  // Save tile size (RHS of `mod`), and position of `floordiv` and
1516  // `mod` if same expr with `mod` is not found yet.
1517  if (!found) {
1518  tileSizePos.emplace_back(
1519  std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1520  found = true;
1521  } else {
1522  // Non tiled layout: Have multilpe `mod` with the same LHS.
1523  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1524  // mod 256, d2 mod 256)>
1525  notTiled = true;
1526  }
1527  } else {
1528  // Non tiled layout: RHS of `mod` is different from `floordiv`.
1529  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1530  // mod 128)>
1531  notTiled = true;
1532  }
1533  } else {
1534  // Non tiled layout: LHS is the same, but not `mod`.
1535  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1536  // floordiv 256)>
1537  notTiled = true;
1538  }
1539  }
1540  });
1541  }
1542  if (notTiled) {
1544  return success();
1545  }
1546  pos++;
1547  }
1548  }
1549  return success();
1550 }
1551 
1552 /// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1553 /// after normalization. Dimensions that include dynamic dimensions in the map
1554 /// output will become dynamic dimensions. Return true if `dim` is dynamic
1555 /// dimension.
1556 ///
1557 /// Example:
1558 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1559 ///
1560 /// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1561 /// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1562 static bool
1563 isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
1564  SmallVectorImpl<unsigned> &inMemrefTypeDynDims,
1565  MLIRContext *context) {
1566  bool isDynamicDim = false;
1567  AffineExpr expr = layoutMap.getResults()[dim];
1568  // Check if affine expr of the dimension includes dynamic dimension of input
1569  // memrefType.
1570  expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) {
1571  if (isa<AffineDimExpr>(e)) {
1572  for (unsigned dm : inMemrefTypeDynDims) {
1573  if (e == getAffineDimExpr(dm, context)) {
1574  isDynamicDim = true;
1575  }
1576  }
1577  }
1578  });
1579  return isDynamicDim;
1580 }
1581 
1582 /// Create affine expr to calculate dimension size for a tiled-layout map.
1584  TileExprPattern pat) {
1585  // Create map output for the patterns.
1586  // "floordiv <tile size>" ==> "ceildiv <tile size>"
1587  // "mod <tile size>" ==> "<tile size>"
1588  AffineExpr newMapOutput;
1589  AffineBinaryOpExpr binaryExpr = nullptr;
1590  switch (pat) {
1592  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1593  newMapOutput = binaryExpr.getRHS();
1594  break;
1596  binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1597  newMapOutput = getAffineBinaryOpExpr(
1598  AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1599  break;
1600  default:
1601  newMapOutput = oldMapOutput;
1602  }
1603  return newMapOutput;
1604 }
1605 
1606 /// Create new maps to calculate each dimension size of `newMemRefType`, and
1607 /// create `newDynamicSizes` from them by using AffineApplyOp.
1608 ///
1609 /// Steps for normalizing dynamic memrefs for a tiled layout map
1610 /// Example:
1611 /// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1612 /// %0 = dim %arg0, %c1 :memref<4x?xf32>
1613 /// %1 = alloc(%0) : memref<4x?xf32, #map0>
1614 ///
1615 /// (Before this function)
1616 /// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1617 /// single layout map is supported.
1618 ///
1619 /// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1620 /// is memref<4x?x?xf32> in the above example.
1621 ///
1622 /// (In this function)
1623 /// 3. Create new maps to calculate each dimension of the normalized memrefType
1624 /// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1625 /// dimension size can be calculated by replacing "floordiv <tile size>" with
1626 /// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1627 /// - New map in the above example
1628 /// #map0 = affine_map<(d0, d1) -> (d0)>
1629 /// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1630 /// #map2 = affine_map<(d0, d1) -> (32)>
1631 ///
1632 /// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1633 /// is used in dynamicSizes of new AllocOp.
1634 /// %0 = dim %arg0, %c1 : memref<4x?xf32>
1635 /// %c4 = arith.constant 4 : index
1636 /// %1 = affine.apply #map1(%c4, %0)
1637 /// %2 = affine.apply #map2(%c4, %0)
1638 static void createNewDynamicSizes(MemRefType oldMemRefType,
1639  MemRefType newMemRefType, AffineMap map,
1640  memref::AllocOp *allocOp, OpBuilder b,
1641  SmallVectorImpl<Value> &newDynamicSizes) {
1642  // Create new input for AffineApplyOp.
1643  SmallVector<Value, 4> inAffineApply;
1644  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1645  unsigned dynIdx = 0;
1646  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1647  if (oldMemRefShape[d] < 0) {
1648  // Use dynamicSizes of allocOp for dynamic dimension.
1649  inAffineApply.emplace_back(allocOp->getDynamicSizes()[dynIdx]);
1650  dynIdx++;
1651  } else {
1652  // Create ConstantOp for static dimension.
1653  auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1654  inAffineApply.emplace_back(
1655  b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
1656  }
1657  }
1658 
1659  // Create new map to calculate each dimension size of new memref for each
1660  // original map output. Only for dynamic dimesion of `newMemRefType`.
1661  unsigned newDimIdx = 0;
1662  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1664  (void)getTileSizePos(map, tileSizePos);
1665  for (AffineExpr expr : map.getResults()) {
1666  if (newMemRefShape[newDimIdx] < 0) {
1667  // Create new maps to calculate each dimension size of new memref.
1669  for (auto pos : tileSizePos) {
1670  if (newDimIdx == std::get<1>(pos))
1672  else if (newDimIdx == std::get<2>(pos))
1674  }
1675  AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1676  AffineMap newMap =
1677  AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1678  Value affineApp =
1679  b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
1680  newDynamicSizes.emplace_back(affineApp);
1681  }
1682  newDimIdx++;
1683  }
1684 }
1685 
1686 // TODO: Currently works for static memrefs with a single layout map.
1688  MemRefType memrefType = allocOp->getType();
1689  OpBuilder b(*allocOp);
1690 
1691  // Fetch a new memref type after normalizing the old memref to have an
1692  // identity map layout.
1693  MemRefType newMemRefType = normalizeMemRefType(memrefType);
1694  if (newMemRefType == memrefType)
1695  // Either memrefType already had an identity map or the map couldn't be
1696  // transformed to an identity map.
1697  return failure();
1698 
1699  Value oldMemRef = allocOp->getResult();
1700 
1701  SmallVector<Value, 4> symbolOperands(allocOp->getSymbolOperands());
1702  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1703  memref::AllocOp newAlloc;
1704  // Check if `layoutMap` is a tiled layout. Only single layout map is
1705  // supported for normalizing dynamic memrefs.
1707  (void)getTileSizePos(layoutMap, tileSizePos);
1708  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1709  MemRefType oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1710  SmallVector<Value, 4> newDynamicSizes;
1711  createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1712  newDynamicSizes);
1713  // Add the new dynamic sizes in new AllocOp.
1714  newAlloc =
1715  b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1716  newDynamicSizes, allocOp->getAlignmentAttr());
1717  } else {
1718  newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
1719  allocOp->getAlignmentAttr());
1720  }
1721  // Replace all uses of the old memref.
1722  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1723  /*extraIndices=*/{},
1724  /*indexRemap=*/layoutMap,
1725  /*extraOperands=*/{},
1726  /*symbolOperands=*/symbolOperands,
1727  /*domOpFilter=*/nullptr,
1728  /*postDomOpFilter=*/nullptr,
1729  /*allowNonDereferencingOps=*/true))) {
1730  // If it failed (due to escapes for example), bail out.
1731  newAlloc.erase();
1732  return failure();
1733  }
1734  // Replace any uses of the original alloc op and erase it. All remaining uses
1735  // have to be dealloc's; RAMUW above would've failed otherwise.
1736  assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1737  return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1738  }));
1739  oldMemRef.replaceAllUsesWith(newAlloc);
1740  allocOp->erase();
1741  return success();
1742 }
1743 
1744 MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1745  unsigned rank = memrefType.getRank();
1746  if (rank == 0)
1747  return memrefType;
1748 
1749  if (memrefType.getLayout().isIdentity()) {
1750  // Either no maps is associated with this memref or this memref has
1751  // a trivial (identity) map.
1752  return memrefType;
1753  }
1754  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1755  unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1756 
1757  // We don't do any checks for one-to-one'ness; we assume that it is
1758  // one-to-one.
1759 
1760  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1761  // for now.
1762  // TODO: Normalize the other types of dynamic memrefs.
1764  (void)getTileSizePos(layoutMap, tileSizePos);
1765  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1766  return memrefType;
1767 
1768  // We have a single map that is not an identity map. Create a new memref
1769  // with the right shape and an identity layout map.
1770  ArrayRef<int64_t> shape = memrefType.getShape();
1771  // FlatAffineValueConstraint may later on use symbolicOperands.
1772  FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1773  SmallVector<unsigned, 4> memrefTypeDynDims;
1774  for (unsigned d = 0; d < rank; ++d) {
1775  // Use constraint system only in static dimensions.
1776  if (shape[d] > 0) {
1777  fac.addBound(BoundType::LB, d, 0);
1778  fac.addBound(BoundType::UB, d, shape[d] - 1);
1779  } else {
1780  memrefTypeDynDims.emplace_back(d);
1781  }
1782  }
1783  // We compose this map with the original index (logical) space to derive
1784  // the upper bounds for the new index space.
1785  unsigned newRank = layoutMap.getNumResults();
1786  if (failed(fac.composeMatchingMap(layoutMap)))
1787  return memrefType;
1788  // TODO: Handle semi-affine maps.
1789  // Project out the old data dimensions.
1790  fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1791  SmallVector<int64_t, 4> newShape(newRank);
1792  MLIRContext *context = memrefType.getContext();
1793  for (unsigned d = 0; d < newRank; ++d) {
1794  // Check if this dimension is dynamic.
1795  bool isDynDim =
1796  isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims, context);
1797  if (isDynDim) {
1798  newShape[d] = ShapedType::kDynamic;
1799  } else {
1800  // The lower bound for the shape is always zero.
1801  std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1802  // For a static memref and an affine map with no symbols, this is
1803  // always bounded. However, when we have symbols, we may not be able to
1804  // obtain a constant upper bound. Also, mapping to a negative space is
1805  // invalid for normalization.
1806  if (!ubConst.has_value() || *ubConst < 0) {
1807  LLVM_DEBUG(llvm::dbgs()
1808  << "can't normalize map due to unknown/invalid upper bound");
1809  return memrefType;
1810  }
1811  // If dimension of new memrefType is dynamic, the value is -1.
1812  newShape[d] = *ubConst + 1;
1813  }
1814  }
1815 
1816  // Create the new memref type after trivializing the old layout map.
1817  MemRefType newMemRefType =
1818  MemRefType::Builder(memrefType)
1819  .setShape(newShape)
1821  AffineMap::getMultiDimIdentityMap(newRank, context)));
1822  return newMemRefType;
1823 }
1824 
1826  Value rhs) {
1827  DivModValue result;
1828  AffineExpr d0, d1;
1829  bindDims(b.getContext(), d0, d1);
1830  result.quotient =
1831  affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1832  result.remainder =
1833  affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1834  return result;
1835 }
1836 
1837 /// Create IR that computes the product of all elements in the set.
1839  ArrayRef<Value> set) {
1840  if (set.empty())
1841  return failure();
1842  OpFoldResult result = set[0];
1843  AffineExpr s0, s1;
1844  bindSymbols(b.getContext(), s0, s1);
1845  for (unsigned i = 1, e = set.size(); i < e; i++)
1846  result = makeComposedFoldedAffineApply(b, loc, s0 * s1, {result, set[i]});
1847  return result;
1848 }
1849 
1852  ArrayRef<Value> basis) {
1853  unsigned numDims = basis.size();
1854 
1855  SmallVector<Value> divisors;
1856  for (unsigned i = 1; i < numDims; i++) {
1857  ArrayRef<Value> slice = basis.drop_front(i);
1858  FailureOr<OpFoldResult> prod = getIndexProduct(b, loc, slice);
1859  if (failed(prod))
1860  return failure();
1861  divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, *prod));
1862  }
1863 
1864  SmallVector<Value> results;
1865  results.reserve(divisors.size() + 1);
1866  Value residual = linearIndex;
1867  for (Value divisor : divisors) {
1868  DivModValue divMod = getDivMod(b, loc, residual, divisor);
1869  results.push_back(divMod.quotient);
1870  residual = divMod.remainder;
1871  }
1872  results.push_back(residual);
1873  return results;
1874 }
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, memref::AllocOp *allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition: Utils.cpp:1638
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition: Utils.cpp:646
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo)
Definition: Utils.cpp:954
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition: Utils.cpp:833
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned >> &tileSizePos)
Check if map is a tiled layout.
Definition: Utils.cpp:1464
static FailureOr< OpFoldResult > getIndexProduct(OpBuilder &b, Location loc, ArrayRef< Value > set)
Create IR that computes the product of all elements in the set.
Definition: Utils.cpp:1838
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo)
Definition: Utils.cpp:909
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition: Utils.cpp:1456
@ TileFloorDiv
Definition: Utils.cpp:1456
@ TileNone
Definition: Utils.cpp:1456
@ TileMod
Definition: Utils.cpp:1456
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition: Utils.cpp:241
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition: Utils.cpp:1583
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition: Utils.cpp:257
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims, MLIRContext *context)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition: Utils.cpp:1563
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition: Utils.cpp:629
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition: PDL.cpp:63
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition: AffineExpr.h:213
AffineExpr getLHS() const
Definition: AffineExpr.cpp:317
AffineExpr getRHS() const
Definition: AffineExpr.cpp:320
An integer constant appearing in affine expression.
Definition: AffineExpr.h:238
int64_t getValue() const
Definition: AffineExpr.cpp:600
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:222
unsigned getPosition() const
Definition: AffineExpr.cpp:325
Base type for affine expression.
Definition: AffineExpr.h:68
void walk(std::function< void(AffineExpr)> callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition: AffineExpr.cpp:30
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:867
AffineExpr ceilDiv(uint64_t v) const
Definition: AffineExpr.cpp:910
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:47
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:312
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
Definition: AffineMap.cpp:378
unsigned getNumDims() const
Definition: AffineMap.cpp:374
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:387
unsigned getNumResults() const
Definition: AffineMap.cpp:382
unsigned getNumInputs() const
Definition: AffineMap.cpp:383
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:391
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:536
A symbolic identifier appearing in an affine expression.
Definition: AffineExpr.h:230
unsigned getPosition() const
Definition: AffineExpr.cpp:590
This class represents an argument of a Block.
Definition: Value.h:315
Block represents an ordered list of Operations.
Definition: Block.h:30
OpListType::iterator iterator
Definition: Block.h:133
SuccessorRange getSuccessors()
Definition: Block.h:260
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:238
OpListType & getOperations()
Definition: Block.h:130
iterator end()
Definition: Block.h:137
iterator begin()
Definition: Block.h:136
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:238
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:376
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:116
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:269
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:353
AffineMap getConstantAffineMap(int64_t val)
Returns a single constant result affine map with 0 dimensions and 0 symbols.
Definition: Builders.cpp:367
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:71
A class for computing basic dominance information.
Definition: Dominance.h:121
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:141
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
LogicalResult composeMatchingMap(AffineMap other)
Composes an affine map whose dimensions and symbols match one to one with the dimensions and symbols ...
void projectOut(Value val)
Projects out the variable that is associate with Value.
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
GreedyRewriteStrictness strictMode
Strict mode can restrict the ops that are added to the worklist during the rewrite.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition: IRMapping.h:79
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:201
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
Definition: BuiltinTypes.h:222
Builder & setShape(ArrayRef< int64_t > newShape)
Definition: BuiltinTypes.h:212
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:198
StringAttr getName() const
Return the name of the attribute.
Definition: Attributes.cpp:49
Attribute getValue() const
Return the value of the attribute.
Definition: Attributes.h:212
This class helps build Operations.
Definition: Builders.h:206
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition: Builders.h:238
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition: Builders.h:430
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:528
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:416
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:446
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:397
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:427
This class represents a single result from folding an operation.
Definition: OpDefinition.h:266
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:728
void setOperand(unsigned idx, Value value)
Definition: Operation.h:346
operand_iterator operand_begin()
Definition: Operation.h:369
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:486
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
operand_iterator operand_end()
Definition: Operation.h:370
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:665
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:655
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
Definition: Operation.cpp:236
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:852
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
result_range getResults()
Definition: Operation.h:410
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
A class for computing basic postdominance information.
Definition: Dominance.h:180
bool postDominates(Operation *a, Operation *b)
Return true if operation A postdominates operation B.
Definition: Dominance.h:188
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition: Region.h:222
void takeBody(Region &other)
Takes body of another region (that region will have no body after this operation completes).
Definition: Region.h:241
MLIRContext * getContext() const
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:378
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:125
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition: Value.cpp:61
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition: Value.h:169
user_range getUsers() const
Definition: Value.h:224
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
static WalkResult advance()
Definition: Visitors.h:52
static WalkResult interrupt()
Definition: Visitors.h:51
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
Definition: AffineOps.cpp:3924
unsigned getNumSymbols() const
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
unsigned getNumResults() const
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
LogicalResult addBound(presburger::BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands)
Adds a bound for the variable at the specified position with constraints being drawn from the specifi...
std::optional< int64_t > getConstantBound64(BoundType type, unsigned pos) const
The same, but casts to int64_t.
std::optional< SmallVector< Value, 8 > > expandAffineMap(OpBuilder &builder, Location loc, AffineMap affineMap, ValueRange operands)
Create a sequence of operations that implement the affineMap applied to the given operands (as it it ...
Definition: Utils.cpp:223
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
Definition: AffineOps.cpp:1114
FailureOr< SmallVector< Value > > delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex, ArrayRef< Value > basis)
Generate the IR to delinearize linearIndex given the basis and return the multi-index.
Definition: Utils.cpp:1851
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
Definition: LoopUtils.cpp:131
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
Definition: AffineOps.cpp:269
Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, ValueRange dimValues, ValueRange symbolValues)
Emit code that computes the given affine expression using standard arithmetic operations applied to t...
Definition: Utils.cpp:213
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition: Utils.cpp:1893
void normalizeAffineParallel(AffineParallelOp op)
Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
Definition: Utils.cpp:489
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition: Utils.cpp:346
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1124
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
Definition: AffineOps.cpp:1411
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
LogicalResult normalizeAffineFor(AffineForOp op, bool promoteSingleIter=false)
Normalize an affine.for op.
Definition: Utils.cpp:553
LogicalResult normalizeMemRef(memref::AllocOp *op)
Rewrites the memref defined by this alloc op to have an identity layout map and updates all its index...
Definition: Utils.cpp:1687
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
Definition: AffineOps.cpp:372
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1172
void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, PostDominanceInfo &postDomInfo)
Replace affine store and load accesses by scalars by forwarding stores to loads and eliminate invaria...
Definition: Utils.cpp:1034
bool hasNoInterveningEffect(Operation *start, T memOp)
Ensure that all operations that could be executed after start (noninclusive) and prior to memOp (e....
Definition: Utils.cpp:679
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
MemRefType normalizeMemRefType(MemRefType memrefType)
Normalizes memrefType so that the affine layout map of the memref is transformed to an identity map w...
Definition: Utils.cpp:1744
Region * getAffineScope(Operation *op)
Returns the closest region enclosing op that is held by an operation with trait AffineScope; nullptr ...
Definition: AffineOps.cpp:254
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1825
void createAffineComputationSlice(Operation *opInst, SmallVectorImpl< AffineApplyOp > *sliceOps)
Given an operation, inserts one or more single result affine apply operations, results of which are e...
Definition: Utils.cpp:1382
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded=nullptr)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition: Utils.cpp:408
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath=true)
Traverse e and return an AffineExpr where all occurrences of dim have been replaced by either:
Definition: Utils.cpp:462
LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)
Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...
Definition: Utils.cpp:1266
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
Definition: AffineMap.cpp:730
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
LogicalResult applyOpPatternsAndFold(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Applies the specified rewrite patterns on ops while also trying to fold these ops.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:334
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
Definition: AffineExpr.cpp:47
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:348
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:40
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:584
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:299
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.