MLIR 23.0.0git
Utils.cpp
Go to the documentation of this file.
1//===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements miscellaneous transformation utilities for the Affine
10// dialect.
11//
12//===----------------------------------------------------------------------===//
13
15
25#include "mlir/IR/Dominance.h"
26#include "mlir/IR/IRMapping.h"
27#include "mlir/IR/IntegerSet.h"
29#include "llvm/ADT/SmallVectorExtras.h"
30#include <optional>
31
32#define DEBUG_TYPE "affine-utils"
33
34using namespace mlir;
35using namespace affine;
36using namespace presburger;
37
38namespace {
39/// Visit affine expressions recursively and build the sequence of operations
40/// that correspond to it. Visitation functions return an Value of the
41/// expression subtree they visited or `nullptr` on error.
42class AffineApplyExpander
43 : public AffineExprVisitor<AffineApplyExpander, Value> {
44public:
45 /// This internal class expects arguments to be non-null, checks must be
46 /// performed at the call site.
47 AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
48 ValueRange symbolValues, Location loc)
49 : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
50 loc(loc) {}
51
52 template <typename OpTy>
53 Value buildBinaryExpr(AffineBinaryOpExpr expr,
54 arith::IntegerOverflowFlags overflowFlags =
55 arith::IntegerOverflowFlags::none) {
56 auto lhs = visit(expr.getLHS());
57 auto rhs = visit(expr.getRHS());
58 if (!lhs || !rhs)
59 return nullptr;
60 auto op = OpTy::create(builder, loc, lhs, rhs, overflowFlags);
61 return op.getResult();
62 }
63
64 Value visitAddExpr(AffineBinaryOpExpr expr) {
65 return buildBinaryExpr<arith::AddIOp>(expr);
66 }
67
68 Value visitMulExpr(AffineBinaryOpExpr expr) {
69 return buildBinaryExpr<arith::MulIOp>(expr,
70 arith::IntegerOverflowFlags::nsw);
71 }
72
73 /// Euclidean modulo operation: negative RHS is not allowed.
74 /// Remainder of the euclidean integer division is always non-negative.
75 ///
76 /// Implemented as
77 ///
78 /// a mod b =
79 /// let remainder = srem a, b;
80 /// negative = a < 0 in
81 /// select negative, remainder + b, remainder.
82 Value visitModExpr(AffineBinaryOpExpr expr) {
83 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
84 if (rhsConst.getValue() <= 0) {
85 emitError(loc, "modulo by non-positive value is not supported");
86 return nullptr;
87 }
88 }
89
90 auto lhs = visit(expr.getLHS());
91 auto rhs = visit(expr.getRHS());
92 assert(lhs && rhs && "unexpected affine expr lowering failure");
93
94 Value remainder = arith::RemSIOp::create(builder, loc, lhs, rhs);
95 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
96 Value isRemainderNegative = arith::CmpIOp::create(
97 builder, loc, arith::CmpIPredicate::slt, remainder, zeroCst);
98 Value correctedRemainder =
99 arith::AddIOp::create(builder, loc, remainder, rhs);
100 Value result = arith::SelectOp::create(builder, loc, isRemainderNegative,
101 correctedRemainder, remainder);
102 return result;
103 }
104
105 /// Floor division operation (rounds towards negative infinity).
106 ///
107 /// For positive divisors, it can be implemented without branching and with a
108 /// single division operation as
109 ///
110 /// a floordiv b =
111 /// let negative = a < 0 in
112 /// let absolute = negative ? -a - 1 : a in
113 /// let quotient = absolute / b in
114 /// negative ? -quotient - 1 : quotient
115 ///
116 /// Note: this lowering does not use arith.floordivsi because the lowering of
117 /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
118 /// not one but two arith.divsi. That could be changed to one divsi, but one
119 /// way or another, going through arith.floordivsi will result in more complex
120 /// IR because arith.floordivsi is more general than affine floordiv in that
121 /// it supports negative RHS.
122 Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
123 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
124 if (rhsConst.getValue() <= 0) {
125 emitError(loc, "division by non-positive value is not supported");
126 return nullptr;
127 }
128 }
129 auto lhs = visit(expr.getLHS());
130 auto rhs = visit(expr.getRHS());
131 assert(lhs && rhs && "unexpected affine expr lowering failure");
132
133 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
134 Value noneCst = arith::ConstantIndexOp::create(builder, loc, -1);
135 Value negative = arith::CmpIOp::create(
136 builder, loc, arith::CmpIPredicate::slt, lhs, zeroCst);
137 Value negatedDecremented =
138 arith::SubIOp::create(builder, loc, noneCst, lhs);
139 Value dividend = arith::SelectOp::create(builder, loc, negative,
140 negatedDecremented, lhs);
141 Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
142 Value correctedQuotient =
143 arith::SubIOp::create(builder, loc, noneCst, quotient);
144 Value result = arith::SelectOp::create(builder, loc, negative,
145 correctedQuotient, quotient);
146 return result;
147 }
148
149 /// Ceiling division operation (rounds towards positive infinity).
150 ///
151 /// For positive divisors, it can be implemented without branching and with a
152 /// single division operation as
153 ///
154 /// a ceildiv b =
155 /// let negative = a <= 0 in
156 /// let absolute = negative ? -a : a - 1 in
157 /// let quotient = absolute / b in
158 /// negative ? -quotient : quotient + 1
159 ///
160 /// Note: not using arith.ceildivsi for the same reason as explained in the
161 /// visitFloorDivExpr comment.
162 Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
163 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
164 if (rhsConst.getValue() <= 0) {
165 emitError(loc, "division by non-positive value is not supported");
166 return nullptr;
167 }
168 }
169 auto lhs = visit(expr.getLHS());
170 auto rhs = visit(expr.getRHS());
171 assert(lhs && rhs && "unexpected affine expr lowering failure");
172
173 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
174 Value oneCst = arith::ConstantIndexOp::create(builder, loc, 1);
175 Value nonPositive = arith::CmpIOp::create(
176 builder, loc, arith::CmpIPredicate::sle, lhs, zeroCst);
177 Value negated = arith::SubIOp::create(builder, loc, zeroCst, lhs);
178 Value decremented = arith::SubIOp::create(builder, loc, lhs, oneCst);
179 Value dividend = arith::SelectOp::create(builder, loc, nonPositive, negated,
180 decremented);
181 Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
182 Value negatedQuotient =
183 arith::SubIOp::create(builder, loc, zeroCst, quotient);
184 Value incrementedQuotient =
185 arith::AddIOp::create(builder, loc, quotient, oneCst);
186 Value result = arith::SelectOp::create(
187 builder, loc, nonPositive, negatedQuotient, incrementedQuotient);
188 return result;
189 }
190
191 Value visitConstantExpr(AffineConstantExpr expr) {
192 auto op = arith::ConstantIndexOp::create(builder, loc, expr.getValue());
193 return op.getResult();
194 }
195
196 Value visitDimExpr(AffineDimExpr expr) {
197 assert(expr.getPosition() < dimValues.size() &&
198 "affine dim position out of range");
199 return dimValues[expr.getPosition()];
200 }
201
202 Value visitSymbolExpr(AffineSymbolExpr expr) {
203 assert(expr.getPosition() < symbolValues.size() &&
204 "symbol dim position out of range");
205 return symbolValues[expr.getPosition()];
206 }
207
208private:
209 OpBuilder &builder;
210 ValueRange dimValues;
211 ValueRange symbolValues;
212
213 Location loc;
214};
215} // namespace
216
217/// Create a sequence of operations that implement the `expr` applied to the
218/// given dimension and symbol values.
219mlir::Value mlir::affine::expandAffineExpr(OpBuilder &builder, Location loc,
220 AffineExpr expr,
221 ValueRange dimValues,
222 ValueRange symbolValues) {
223 return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
224}
225
226/// Create a sequence of operations that implement the `affineMap` applied to
227/// the given `operands` (as it it were an AffineApplyOp).
228std::optional<SmallVector<Value, 8>>
229mlir::affine::expandAffineMap(OpBuilder &builder, Location loc,
230 AffineMap affineMap, ValueRange operands) {
231 auto numDims = affineMap.getNumDims();
232 auto expanded = llvm::map_to_vector<8>(
233 affineMap.getResults(),
234 [numDims, &builder, loc, operands](AffineExpr expr) {
235 return expandAffineExpr(builder, loc, expr,
236 operands.take_front(numDims),
237 operands.drop_front(numDims));
238 });
239 if (llvm::all_of(expanded, [](Value v) { return v; }))
240 return expanded;
241 return std::nullopt;
242}
243
244/// Promotes the `then` or the `else` block of `ifOp` (depending on whether
245/// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
246/// the rest of the op.
247static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
248 if (elseBlock)
249 assert(ifOp.hasElse() && "else block expected");
250
251 Block *destBlock = ifOp->getBlock();
252 Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
253 destBlock->getOperations().splice(
254 Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
255 std::prev(srcBlock->end()));
256 ifOp.erase();
257}
258
259/// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
260/// on. The `ifOp` could be hoisted and placed right before such an operation.
261/// This method assumes that the ifOp has been canonicalized (to be correct and
262/// effective).
263static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
264 // Walk up the parents past all for op that this conditional is invariant on.
265 auto ifOperands = ifOp.getOperands();
266 Operation *res = ifOp;
268 auto *parentOp = res->getParentOp();
269 if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
270 if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
271 break;
272 } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
273 if (llvm::any_of(parallelOp.getIVs(), [&](Value iv) {
274 return llvm::is_contained(ifOperands, iv);
275 }))
276 break;
277 } else if (!isa<AffineIfOp>(parentOp)) {
278 // Won't walk up past anything other than affine.for/if ops.
279 break;
280 }
281 // You can always hoist up past any affine.if ops.
282 res = parentOp;
283 }
284 return res;
285}
286
287/// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
288/// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
289/// otherwise the same `ifOp`.
290static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
291 // No hoisting to do.
292 if (hoistOverOp == ifOp)
293 return ifOp;
294
295 // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
296 // the else block. Then drop the else block of the original 'if' in the 'then'
297 // branch while promoting its then block, and analogously drop the 'then'
298 // block of the original 'if' from the 'else' branch while promoting its else
299 // block.
300 IRMapping operandMap;
301 OpBuilder b(hoistOverOp);
302 auto hoistedIfOp = AffineIfOp::create(b, ifOp.getLoc(), ifOp.getIntegerSet(),
303 ifOp.getOperands(),
304 /*elseBlock=*/true);
305
306 // Create a clone of hoistOverOp to use for the else branch of the hoisted
307 // conditional. The else block may get optimized away if empty.
308 Operation *hoistOverOpClone = nullptr;
309 // We use this unique name to identify/find `ifOp`'s clone in the else
310 // version.
311 StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
312 operandMap.clear();
313 b.setInsertionPointAfter(hoistOverOp);
314 // We'll set an attribute to identify this op in a clone of this sub-tree.
315 ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
316 hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
317
318 // Promote the 'then' block of the original affine.if in the then version.
319 promoteIfBlock(ifOp, /*elseBlock=*/false);
320
321 // Move the then version to the hoisted if op's 'then' block.
322 auto *thenBlock = hoistedIfOp.getThenBlock();
323 thenBlock->getOperations().splice(thenBlock->begin(),
324 hoistOverOp->getBlock()->getOperations(),
325 Block::iterator(hoistOverOp));
326
327 // Find the clone of the original affine.if op in the else version.
328 AffineIfOp ifCloneInElse;
329 hoistOverOpClone->walk([&](AffineIfOp ifClone) {
330 if (!ifClone->getAttr(idForIfOp))
331 return WalkResult::advance();
332 ifCloneInElse = ifClone;
333 return WalkResult::interrupt();
334 });
335 assert(ifCloneInElse && "if op clone should exist");
336 // For the else block, promote the else block of the original 'if' if it had
337 // one; otherwise, the op itself is to be erased.
338 if (!ifCloneInElse.hasElse())
339 ifCloneInElse.erase();
340 else
341 promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
342
343 // Move the else version into the else block of the hoisted if op.
344 auto *elseBlock = hoistedIfOp.getElseBlock();
345 elseBlock->getOperations().splice(
346 elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
347 Block::iterator(hoistOverOpClone));
348
349 return hoistedIfOp;
350}
351
352LogicalResult
354 ArrayRef<LoopReduction> parallelReductions,
355 AffineParallelOp *resOp) {
356 // Fail early if there are iter arguments that are not reductions.
357 unsigned numReductions = parallelReductions.size();
358 if (numReductions != forOp.getNumIterOperands())
359 return failure();
360
361 Location loc = forOp.getLoc();
362 OpBuilder outsideBuilder(forOp);
363 AffineMap lowerBoundMap = forOp.getLowerBoundMap();
364 ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
365 AffineMap upperBoundMap = forOp.getUpperBoundMap();
366 ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
367
368 // Creating empty 1-D affine.parallel op.
369 auto reducedValues = llvm::map_to_vector<4>(
370 parallelReductions, [](const LoopReduction &red) { return red.value; });
371 auto reductionKinds = llvm::map_to_vector<4>(
372 parallelReductions, [](const LoopReduction &red) { return red.kind; });
373 AffineParallelOp newPloop = AffineParallelOp::create(
374 outsideBuilder, loc, ValueRange(reducedValues).getTypes(), reductionKinds,
375 llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
376 llvm::ArrayRef(upperBoundMap), upperBoundOperands,
377 llvm::ArrayRef(forOp.getStepAsInt()));
378 // Steal the body of the old affine for op.
379 newPloop.getRegion().takeBody(forOp.getRegion());
380 Operation *yieldOp = &newPloop.getBody()->back();
381
382 // Handle the initial values of reductions because the parallel loop always
383 // starts from the neutral value.
384 SmallVector<Value> newResults;
385 newResults.reserve(numReductions);
386 for (unsigned i = 0; i < numReductions; ++i) {
387 Value init = forOp.getInits()[i];
388 // This works because we are only handling single-op reductions at the
389 // moment. A switch on reduction kind or a mechanism to collect operations
390 // participating in the reduction will be necessary for multi-op reductions.
391 Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
392 assert(reductionOp && "yielded value is expected to be produced by an op");
393 outsideBuilder.getInsertionBlock()->getOperations().splice(
394 outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
395 reductionOp);
396 reductionOp->setOperands({init, newPloop->getResult(i)});
397 forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
398 }
399
400 // Update the loop terminator to yield reduced values bypassing the reduction
401 // operation itself (now moved outside of the loop) and erase the block
402 // arguments that correspond to reductions. Note that the loop always has one
403 // "main" induction variable whenc coming from a non-parallel for.
404 unsigned numIVs = 1;
405 yieldOp->setOperands(reducedValues);
406 newPloop.getBody()->eraseArguments(numIVs, numReductions);
407
408 forOp.erase();
409 if (resOp)
410 *resOp = newPloop;
411 return success();
412}
413
414// Returns success if any hoisting happened.
415LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
416 // Bail out early if the ifOp returns a result. TODO: Consider how to
417 // properly support this case.
418 if (ifOp.getNumResults() != 0)
419 return failure();
420
421 // Apply canonicalization patterns and folding - this is necessary for the
422 // hoisting check to be correct (operands should be composed), and to be more
423 // effective (no unused operands). Since the pattern rewriter's folding is
424 // entangled with application of patterns, we may fold/end up erasing the op,
425 // in which case we return with `folded` being set.
426 RewritePatternSet patterns(ifOp.getContext());
427 AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
428 FrozenRewritePatternSet frozenPatterns(std::move(patterns));
429 bool erased;
431 ifOp.getOperation(), frozenPatterns,
433 /*changed=*/nullptr, &erased);
434 if (erased) {
435 if (folded)
436 *folded = true;
437 return failure();
438 }
439 if (folded)
440 *folded = false;
441
442 // The folding above should have ensured this.
443 assert(llvm::all_of(ifOp.getOperands(),
444 [](Value v) {
445 return isTopLevelValue(v) || isAffineInductionVar(v);
446 }) &&
447 "operands not composed");
448
449 // We are going hoist as high as possible.
450 // TODO: this could be customized in the future.
451 auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
452
453 AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
454 // Nothing to hoist over.
455 if (hoistedIfOp == ifOp)
456 return failure();
457
458 // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
459 // a sequence of affine.fors that are all perfectly nested).
461 hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
462 frozenPatterns);
463
464 return success();
465}
466
467// Return the min expr after replacing the given dim.
468AffineExpr mlir::affine::substWithMin(AffineExpr e, AffineExpr dim,
470 bool positivePath) {
471 if (e == dim)
472 return positivePath ? min : max;
473 if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
474 AffineExpr lhs = bin.getLHS();
475 AffineExpr rhs = bin.getRHS();
476 if (bin.getKind() == mlir::AffineExprKind::Add)
477 return substWithMin(lhs, dim, min, max, positivePath) +
478 substWithMin(rhs, dim, min, max, positivePath);
479
480 auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
481 auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
482 if (c1 && c1.getValue() < 0)
484 bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
485 if (c2 && c2.getValue() < 0)
487 bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
489 bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
490 substWithMin(rhs, dim, min, max, positivePath));
491 }
492 return e;
493}
494
495void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
496 // Loops with min/max in bounds are not normalized at the moment.
497 if (op.hasMinMaxBounds())
498 return;
499
500 AffineMap lbMap = op.getLowerBoundsMap();
501 SmallVector<int64_t, 8> steps = op.getSteps();
502 // No need to do any work if the parallel op is already normalized.
503 bool isAlreadyNormalized =
504 llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
505 int64_t step = std::get<0>(tuple);
506 auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
507 return lbExpr && lbExpr.getValue() == 0 && step == 1;
508 });
509 if (isAlreadyNormalized)
510 return;
511
512 AffineValueMap ranges;
513 AffineValueMap::difference(op.getUpperBoundsValueMap(),
514 op.getLowerBoundsValueMap(), &ranges);
515 auto builder = OpBuilder::atBlockBegin(op.getBody());
516 auto zeroExpr = builder.getAffineConstantExpr(0);
519 for (unsigned i = 0, e = steps.size(); i < e; ++i) {
520 int64_t step = steps[i];
521
522 // Adjust the lower bound to be 0.
523 lbExprs.push_back(zeroExpr);
524
525 // Adjust the upper bound expression: 'range / step'.
526 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
527 ubExprs.push_back(ubExpr);
528
529 // Adjust the corresponding IV: 'lb + i * step'.
530 BlockArgument iv = op.getBody()->getArgument(i);
531 AffineExpr lbExpr = lbMap.getResult(i);
532 unsigned nDims = lbMap.getNumDims();
533 auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
534 auto map = AffineMap::get(/*dimCount=*/nDims + 1,
535 /*symbolCount=*/lbMap.getNumSymbols(), expr);
536
537 // Use an 'affine.apply' op that will be simplified later in subsequent
538 // canonicalizations.
539 OperandRange lbOperands = op.getLowerBoundsOperands();
540 OperandRange dimOperands = lbOperands.take_front(nDims);
541 OperandRange symbolOperands = lbOperands.drop_front(nDims);
542 SmallVector<Value, 8> applyOperands{dimOperands};
543 applyOperands.push_back(iv);
544 applyOperands.append(symbolOperands.begin(), symbolOperands.end());
545 auto apply =
546 AffineApplyOp::create(builder, op.getLoc(), map, applyOperands);
547 iv.replaceAllUsesExcept(apply, apply);
548 }
549
550 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
551 op.setSteps(newSteps);
552 auto newLowerMap = AffineMap::get(
553 /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
554 op.setLowerBounds({}, newLowerMap);
555 auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
556 ubExprs, op.getContext());
557 op.setUpperBounds(ranges.getOperands(), newUpperMap);
558}
559
560LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
561 bool promoteSingleIter) {
562 if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
563 return success();
564
565 // Check if the forop is already normalized.
566 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
567 (op.getStep() == 1))
568 return success();
569
570 // Check if the lower bound has a single result only. Loops with a max lower
571 // bound can't be normalized without additional support like
572 // affine.execute_region's. If the lower bound does not have a single result
573 // then skip this op.
574 if (op.getLowerBoundMap().getNumResults() != 1)
575 return failure();
576
577 Location loc = op.getLoc();
578 OpBuilder opBuilder(op);
579 int64_t origLoopStep = op.getStepAsInt();
580
581 // Construct the new upper bound value map.
582 AffineMap oldLbMap = op.getLowerBoundMap();
583 // The upper bound can have multiple results. To use
584 // AffineValueMap::difference, we need to have the same number of results in
585 // both lower and upper bound maps. So, we just create a value map for the
586 // lower bound with the only available lower bound result repeated to pad up
587 // to the number of upper bound results.
588 SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
589 op.getLowerBoundMap().getResult(0));
590 AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
591 AffineMap paddedLbMap =
592 AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
593 op.getContext());
594 AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
595 AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
596 AffineValueMap newUbValueMap;
597 // Compute the `upper bound - lower bound`.
598 AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
599 (void)newUbValueMap.canonicalize();
600
601 // Scale down the upper bound value map by the loop step.
602 unsigned numResult = newUbValueMap.getNumResults();
603 SmallVector<AffineExpr> scaleDownExprs(numResult);
604 for (unsigned i = 0; i < numResult; ++i)
605 scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
606 // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
607 // step). Where `n` is the number of results in the upper bound map.
608 AffineMap scaleDownMap =
609 AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
610 AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
611
612 // Set the newly create upper bound map and operands.
613 op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
614 op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
615 op.setStep(1);
616
617 // Calculate the Value of new loopIV. Create affine.apply for the value of
618 // the loopIV in normalized loop.
619 opBuilder.setInsertionPointToStart(op.getBody());
620 // Construct an affine.apply op mapping the new IV to the old IV.
621 AffineMap scaleIvMap =
622 AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
623 AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
624 AffineValueMap newIvToOldIvMap;
625 AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
626 (void)newIvToOldIvMap.canonicalize();
627 auto newIV =
628 AffineApplyOp::create(opBuilder, loc, newIvToOldIvMap.getAffineMap(),
629 newIvToOldIvMap.getOperands());
630 op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
631 return success();
632}
633
634/// Returns true if the memory operation of `destAccess` depends on `srcAccess`
635/// inside of the innermost common surrounding affine loop between the two
636/// accesses.
637static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
638 const MemRefAccess &destAccess) {
639 // Affine dependence analysis is possible only if both ops in the same
640 // AffineScope.
641 if (getAffineAnalysisScope(srcAccess.opInst) !=
642 getAffineAnalysisScope(destAccess.opInst))
643 return false;
644
645 unsigned nsLoops =
646 getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
648 checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
649 return hasDependence(result);
650}
651
652/// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
653/// scope of the outermost `minSurroundingLoops` loops that surround them.
654/// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
655static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
656 unsigned minSurroundingLoops) {
657 MemRefAccess srcAccess(srcMemOp);
658 MemRefAccess destAccess(destMemOp);
659
660 // Affine dependence analysis here is applicable only if both ops operate on
661 // the same memref and if `srcMemOp` and `destMemOp` are in the same
662 // AffineScope. Also, we can only check if our affine scope is isolated from
663 // above; otherwise, values can from outside of the affine scope that the
664 // check below cannot analyze.
665 Region *srcScope = getAffineAnalysisScope(srcMemOp);
666 if (srcAccess.memref == destAccess.memref &&
667 srcScope == getAffineAnalysisScope(destMemOp)) {
668 unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
669 FlatAffineValueConstraints dependenceConstraints;
670 for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
672 srcAccess, destAccess, d, &dependenceConstraints,
673 /*dependenceComponents=*/nullptr);
674 // A dependence failure or the presence of a dependence implies a
675 // side effect.
676 if (!noDependence(result))
677 return true;
678 }
679 // No side effect was seen.
680 return false;
681 }
682 // TODO: Check here if the memrefs alias: there is no side effect if
683 // `srcAccess.memref` and `destAccess.memref` don't alias.
684 return true;
685}
686
687template <typename EffectType, typename T>
689 Operation *start, T memOp,
691 // A boolean representing whether an intervening operation could have impacted
692 // memOp.
693 bool hasSideEffect = false;
694
695 // Check whether the effect on memOp can be caused by a given operation op.
696 Value memref = memOp.getMemRef();
697 std::function<void(Operation *)> checkOperation = [&](Operation *op) {
698 // If the effect has alreay been found, early exit,
699 if (hasSideEffect)
700 return;
701
702 if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
704 memEffect.getEffects(effects);
705
706 bool opMayHaveEffect = false;
707 for (auto effect : effects) {
708 // If op causes EffectType on a potentially aliasing location for
709 // memOp, mark as having the effect.
710 if (isa<EffectType>(effect.getEffect())) {
711 if (effect.getValue() && effect.getValue() != memref &&
712 !mayAlias(effect.getValue(), memref))
713 continue;
714 opMayHaveEffect = true;
715 break;
716 }
717 }
718
719 if (!opMayHaveEffect)
720 return;
721
722 // If the side effect comes from an affine read or write, try to
723 // prove the side effecting `op` cannot reach `memOp`.
724 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
725 // For ease, let's consider the case that `op` is a store and
726 // we're looking for other potential stores that overwrite memory after
727 // `start`, and before being read in `memOp`. In this case, we only
728 // need to consider other potential stores with depth >
729 // minSurroundingLoops since `start` would overwrite any store with a
730 // smaller number of surrounding loops before.
731 unsigned minSurroundingLoops =
732 getNumCommonSurroundingLoops(*start, *memOp);
733 if (mayHaveEffect(op, memOp, minSurroundingLoops))
734 hasSideEffect = true;
735 return;
736 }
737
738 // We have an op with a memory effect and we cannot prove if it
739 // intervenes.
740 hasSideEffect = true;
741 return;
742 }
743
744 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
745 // Recurse into the regions for this op and check whether the internal
746 // operations may have the side effect `EffectType` on memOp.
747 for (Region &region : op->getRegions())
748 for (Block &block : region)
749 for (Operation &op : block)
750 checkOperation(&op);
751 return;
752 }
753
754 // Otherwise, conservatively assume generic operations have the effect
755 // on the operation
756 hasSideEffect = true;
757 };
758
759 // Check all paths from ancestor op `parent` to the operation `to` for the
760 // effect. It is known that `to` must be contained within `parent`.
761 auto until = [&](Operation *parent, Operation *to) {
762 // TODO check only the paths from `parent` to `to`.
763 // Currently we fallback and check the entire parent op, rather than
764 // just the paths from the parent path, stopping after reaching `to`.
765 // This is conservatively correct, but could be made more aggressive.
766 assert(parent->isAncestor(to));
767 checkOperation(parent);
768 };
769
770 // Check for all paths from operation `from` to operation `untilOp` for the
771 // given memory effect.
772 std::function<void(Operation *, Operation *)> recur =
773 [&](Operation *from, Operation *untilOp) {
774 assert(
775 from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
776 "Checking for side effect between two operations without a common "
777 "ancestor");
778
779 // If the operations are in different regions, recursively consider all
780 // path from `from` to the parent of `to` and all paths from the parent
781 // of `to` to `to`.
782 if (from->getParentRegion() != untilOp->getParentRegion()) {
783 recur(from, untilOp->getParentOp());
784 until(untilOp->getParentOp(), untilOp);
785 return;
786 }
787
788 // Now, assuming that `from` and `to` exist in the same region, perform
789 // a CFG traversal to check all the relevant operations.
790
791 // Additional blocks to consider.
792 SmallVector<Block *, 2> todoBlocks;
793 {
794 // First consider the parent block of `from` an check all operations
795 // after `from`.
796 for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
797 iter != end && &*iter != untilOp; ++iter) {
798 checkOperation(&*iter);
799 }
800
801 // If the parent of `from` doesn't contain `to`, add the successors
802 // to the list of blocks to check.
803 if (untilOp->getBlock() != from->getBlock())
804 for (Block *succ : from->getBlock()->getSuccessors())
805 todoBlocks.push_back(succ);
806 }
807
809 // Traverse the CFG until hitting `to`.
810 while (!todoBlocks.empty()) {
811 Block *blk = todoBlocks.pop_back_val();
812 if (done.count(blk))
813 continue;
814 done.insert(blk);
815 for (auto &op : *blk) {
816 if (&op == untilOp)
817 break;
818 checkOperation(&op);
819 if (&op == blk->getTerminator())
820 for (Block *succ : blk->getSuccessors())
821 todoBlocks.push_back(succ);
822 }
823 }
824 };
825 recur(start, memOp);
826 return !hasSideEffect;
827}
828
829/// Attempt to eliminate loadOp by replacing it with a value stored into memory
830/// which the load is guaranteed to retrieve. This check involves three
831/// components: 1) The store and load must be on the same location 2) The store
832/// must dominate (and therefore must always occur prior to) the load 3) No
833/// other operations will overwrite the memory loaded between the given load
834/// and store. If such a value exists, the replaced `loadOp` will be added to
835/// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
837 AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
838 SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
840
841 // The store op candidate for forwarding that satisfies all conditions
842 // to replace the load, if any.
843 Operation *lastWriteStoreOp = nullptr;
844
845 for (auto *user : loadOp.getMemRef().getUsers()) {
846 auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
847 if (!storeOp)
848 continue;
849 MemRefAccess srcAccess(storeOp);
850 MemRefAccess destAccess(loadOp);
851
852 // 1. Check if the store and the load have mathematically equivalent
853 // affine access functions; this implies that they statically refer to the
854 // same single memref element. As an example this filters out cases like:
855 // store %A[%i0 + 1]
856 // load %A[%i0]
857 // store %A[%M]
858 // load %A[%N]
859 // Use the AffineValueMap difference based memref access equality checking.
860 if (srcAccess != destAccess)
861 continue;
862
863 // 2. The store has to dominate the load op to be candidate.
864 if (!domInfo.dominates(storeOp, loadOp))
865 continue;
866
867 // 3. The store must reach the load. Access function equivalence only
868 // guarantees this for accesses in the same block. The load could be in a
869 // nested block that is unreachable.
870 if (!mustReachAtInnermost(srcAccess, destAccess))
871 continue;
872
873 // 4. Ensure there is no intermediate operation which could replace the
874 // value in memory.
876 mayAlias))
877 continue;
878
879 // We now have a candidate for forwarding.
880 assert(lastWriteStoreOp == nullptr &&
881 "multiple simultaneous replacement stores");
882 lastWriteStoreOp = storeOp;
883 }
884
885 if (!lastWriteStoreOp)
886 return;
887
888 // Perform the actual store to load forwarding.
889 Value storeVal =
890 cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
891 // Check if 2 values have the same shape. This is needed for affine vector
892 // loads and stores.
893 if (storeVal.getType() != loadOp.getValue().getType())
894 return;
895 loadOp.getValue().replaceAllUsesWith(storeVal);
896 // Record the memref for a later sweep to optimize away.
897 memrefsToErase.insert(loadOp.getMemRef());
898 // Record this to erase later.
899 loadOpsToErase.push_back(loadOp);
900}
901
902template bool
904 affine::AffineReadOpInterface>(
905 mlir::Operation *, affine::AffineReadOpInterface,
907
908// This attempts to find stores which have no impact on the final result.
909// A writing op writeA will be eliminated if there exists an op writeB if
910// 1) writeA and writeB have mathematically equivalent affine access functions.
911// 2) writeB postdominates writeA.
912// 3) There is no potential read between writeA and writeB.
913static void findUnusedStore(AffineWriteOpInterface writeA,
915 PostDominanceInfo &postDominanceInfo,
917
918 for (Operation *user : writeA.getMemRef().getUsers()) {
919 // Only consider writing operations.
920 auto writeB = dyn_cast<AffineWriteOpInterface>(user);
921 if (!writeB)
922 continue;
923
924 // The operations must be distinct.
925 if (writeB == writeA)
926 continue;
927
928 // Both operations must lie in the same region.
929 if (writeB->getParentRegion() != writeA->getParentRegion())
930 continue;
931
932 // Both operations must write to the same memory.
933 MemRefAccess srcAccess(writeB);
934 MemRefAccess destAccess(writeA);
935
936 if (srcAccess != destAccess)
937 continue;
938
939 // writeB must postdominate writeA.
940 if (!postDominanceInfo.postDominates(writeB, writeA))
941 continue;
942
943 // There cannot be an operation which reads from memory between
944 // the two writes.
946 mayAlias))
947 continue;
948
949 opsToErase.push_back(writeA);
950 break;
951 }
952}
953
954// The load to load forwarding / redundant load elimination is similar to the
955// store to load forwarding.
956// loadA will be be replaced with loadB if:
957// 1) loadA and loadB have mathematically equivalent affine access functions.
958// 2) loadB dominates loadA.
959// 3) There is no write between loadA and loadB.
960static void loadCSE(AffineReadOpInterface loadA,
961 SmallVectorImpl<Operation *> &loadOpsToErase,
962 DominanceInfo &domInfo,
965 for (auto *user : loadA.getMemRef().getUsers()) {
966 auto loadB = dyn_cast<AffineReadOpInterface>(user);
967 if (!loadB || loadB == loadA)
968 continue;
969
970 MemRefAccess srcAccess(loadB);
971 MemRefAccess destAccess(loadA);
972
973 // 1. The accesses should be to be to the same location.
974 if (srcAccess != destAccess) {
975 continue;
976 }
977
978 // 2. loadB should dominate loadA.
979 if (!domInfo.dominates(loadB, loadA))
980 continue;
981
982 // 3. There should not be a write between loadA and loadB.
984 loadB.getOperation(), loadA, mayAlias))
985 continue;
986
987 // Check if two values have the same shape. This is needed for affine vector
988 // loads.
989 if (loadB.getValue().getType() != loadA.getValue().getType())
990 continue;
991
992 loadCandidates.push_back(loadB);
993 }
994
995 // Of the legal load candidates, use the one that dominates all others
996 // to minimize the subsequent need to loadCSE
997 Value loadB;
998 for (AffineReadOpInterface option : loadCandidates) {
999 if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
1000 return depStore == option ||
1001 domInfo.dominates(option.getOperation(),
1002 depStore.getOperation());
1003 })) {
1004 loadB = option.getValue();
1005 break;
1006 }
1007 }
1008
1009 if (loadB) {
1010 loadA.getValue().replaceAllUsesWith(loadB);
1011 // Record this to erase later.
1012 loadOpsToErase.push_back(loadA);
1013 }
1014}
1015
1016// The store to load forwarding and load CSE rely on three conditions:
1017//
1018// 1) store/load providing a replacement value and load being replaced need to
1019// have mathematically equivalent affine access functions (checked after full
1020// composition of load/store operands); this implies that they access the same
1021// single memref element for all iterations of the common surrounding loop,
1022//
1023// 2) the store/load op should dominate the load op,
1024//
1025// 3) no operation that may write to memory read by the load being replaced can
1026// occur after executing the instruction (load or store) providing the
1027// replacement value and before the load being replaced (thus potentially
1028// allowing overwriting the memory read by the load).
1029//
1030// The above conditions are simple to check, sufficient, and powerful for most
1031// cases in practice - they are sufficient, but not necessary --- since they
1032// don't reason about loops that are guaranteed to execute at least once or
1033// multiple sources to forward from.
1034//
1035// TODO: more forwarding can be done when support for
1036// loop/conditional live-out SSA values is available.
1037// TODO: do general dead store elimination for memref's. This pass
1038// currently only eliminates the stores only if no other loads/uses (other
1039// than dealloc) remain.
1040//
1041void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1042 PostDominanceInfo &postDomInfo,
1043 AliasAnalysis &aliasAnalysis) {
1044 // Load op's whose results were replaced by those forwarded from stores.
1045 SmallVector<Operation *, 8> opsToErase;
1046
1047 // A list of memref's that are potentially dead / could be eliminated.
1048 SmallPtrSet<Value, 4> memrefsToErase;
1049
1050 auto mayAlias = [&](Value val1, Value val2) -> bool {
1051 return !aliasAnalysis.alias(val1, val2).isNo();
1052 };
1053
1054 // Walk all load's and perform store to load forwarding.
1055 f.walk([&](AffineReadOpInterface loadOp) {
1056 forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1057 });
1058 for (auto *op : opsToErase)
1059 op->erase();
1060 opsToErase.clear();
1061
1062 // Walk all store's and perform unused store elimination
1063 f.walk([&](AffineWriteOpInterface storeOp) {
1064 findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1065 });
1066 for (auto *op : opsToErase)
1067 op->erase();
1068 opsToErase.clear();
1069
1070 // Check if the store fwd'ed memrefs are now left with only stores and
1071 // deallocs and can thus be completely deleted. Note: the canonicalize pass
1072 // should be able to do this as well, but we'll do it here since we collected
1073 // these anyway.
1074 for (auto memref : memrefsToErase) {
1075 // If the memref hasn't been locally alloc'ed, skip.
1076 Operation *defOp = memref.getDefiningOp();
1077 if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1078 // TODO: if the memref was returned by a 'call' operation, we
1079 // could still erase it if the call had no side-effects.
1080 continue;
1081 if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1082 return !isa<AffineWriteOpInterface>(ownerOp) &&
1083 !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1084 }))
1085 continue;
1086
1087 // Erase all stores, the dealloc, and the alloc on the memref.
1088 for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1089 user->erase();
1090 defOp->erase();
1091 }
1092
1093 // To eliminate as many loads as possible, run load CSE after eliminating
1094 // stores. Otherwise, some stores are wrongly seen as having an intervening
1095 // effect.
1096 f.walk([&](AffineReadOpInterface loadOp) {
1097 loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1098 });
1099 for (auto *op : opsToErase)
1100 op->erase();
1101}
1102
1103// Checks if `op` is non dereferencing.
1104// TODO: This hardcoded check will be removed once the right interface is added.
1106 return isa<AffineMapAccessInterface, memref::LoadOp, memref::StoreOp>(op);
1107}
1108
1109// Perform the replacement in `op`.
1110LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1111 Value oldMemRef, Value newMemRef, Operation *op,
1112 ArrayRef<Value> extraIndices, AffineMap indexRemap,
1113 ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1114 bool allowNonDereferencingOps) {
1115 unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1116 (void)newMemRefRank; // unused in opt mode
1117 unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1118 (void)oldMemRefRank; // unused in opt mode
1119 if (indexRemap) {
1120 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1121 "symbolic operand count mismatch");
1122 assert(indexRemap.getNumInputs() ==
1123 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1124 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1125 } else {
1126 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1127 }
1128
1129 // Assert same elemental type.
1130 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1131 cast<MemRefType>(newMemRef.getType()).getElementType());
1132
1133 SmallVector<unsigned, 2> usePositions;
1134 for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1135 if (opEntry.value() == oldMemRef)
1136 usePositions.push_back(opEntry.index());
1137 }
1138
1139 // If memref doesn't appear, nothing to do.
1140 if (usePositions.empty())
1141 return success();
1142
1143 unsigned memRefOperandPos = usePositions.front();
1144
1145 OpBuilder builder(op);
1146 // The following checks if op is dereferencing memref and performs the access
1147 // index rewrites.
1148 if (!isDereferencingOp(op)) {
1149 if (!allowNonDereferencingOps) {
1150 // Failure: memref used in a non-dereferencing context (potentially
1151 // escapes); no replacement in these cases unless allowNonDereferencingOps
1152 // is set.
1153 return failure();
1154 }
1155 for (unsigned pos : usePositions)
1156 op->setOperand(pos, newMemRef);
1157 return success();
1158 }
1159
1160 if (usePositions.size() > 1) {
1161 // TODO: extend it for this case when needed (rare).
1162 LLVM_DEBUG(llvm::dbgs()
1163 << "multiple dereferencing uses in a single op not supported");
1164 return failure();
1165 }
1166
1167 // Perform index rewrites for the dereferencing op and then replace the op.
1168 SmallVector<Value, 4> oldMapOperands;
1169 AffineMap oldMap;
1170 unsigned oldMemRefNumIndices = oldMemRefRank;
1171 auto startIdx = op->operand_begin() + memRefOperandPos + 1;
1172 auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1173 if (affMapAccInterface) {
1174 // If `op` implements AffineMapAccessInterface, we can get the indices by
1175 // quering the number of map operands from the operand list from a certain
1176 // offset (`memRefOperandPos` in this case).
1177 NamedAttribute oldMapAttrPair =
1178 affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1179 oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1180 oldMemRefNumIndices = oldMap.getNumInputs();
1181 }
1182 oldMapOperands.assign(startIdx, startIdx + oldMemRefNumIndices);
1183
1184 // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1185 SmallVector<Value, 4> oldMemRefOperands;
1186 SmallVector<Value, 4> affineApplyOps;
1187 oldMemRefOperands.reserve(oldMemRefRank);
1188 if (affMapAccInterface &&
1189 oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1190 for (auto resultExpr : oldMap.getResults()) {
1191 auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1192 oldMap.getNumSymbols(), resultExpr);
1193 auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1194 oldMapOperands);
1195 oldMemRefOperands.push_back(afOp);
1196 affineApplyOps.push_back(afOp);
1197 }
1198 } else {
1199 oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1200 }
1201
1202 // Construct new indices as a remap of the old ones if a remapping has been
1203 // provided. The indices of a memref come right after it, i.e.,
1204 // at position memRefOperandPos + 1.
1205 SmallVector<Value, 4> remapOperands;
1206 remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1207 symbolOperands.size());
1208 remapOperands.append(extraOperands.begin(), extraOperands.end());
1209 remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1210 remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1211
1212 SmallVector<Value, 4> remapOutputs;
1213 remapOutputs.reserve(oldMemRefRank);
1214 if (indexRemap &&
1215 indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1216 // Remapped indices.
1217 for (auto resultExpr : indexRemap.getResults()) {
1218 auto singleResMap = AffineMap::get(
1219 indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1220 auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1221 remapOperands);
1222 remapOutputs.push_back(afOp);
1223 affineApplyOps.push_back(afOp);
1224 }
1225 } else {
1226 // No remapping specified.
1227 remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1228 }
1229 SmallVector<Value, 4> newMapOperands;
1230 newMapOperands.reserve(newMemRefRank);
1231
1232 // Prepend 'extraIndices' in 'newMapOperands'.
1233 for (Value extraIndex : extraIndices) {
1234 assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1235 "invalid memory op index");
1236 newMapOperands.push_back(extraIndex);
1237 }
1238
1239 // Append 'remapOutputs' to 'newMapOperands'.
1240 newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1241
1242 // Create new fully composed AffineMap for new op to be created.
1243 assert(newMapOperands.size() == newMemRefRank);
1244 auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1245 fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1246 newMap = simplifyAffineMap(newMap);
1247 canonicalizeMapAndOperands(&newMap, &newMapOperands);
1248 // Remove any affine.apply's that became dead as a result of composition.
1249 for (Value value : affineApplyOps)
1250 if (value.use_empty())
1251 value.getDefiningOp()->erase();
1252
1253 OperationState state(op->getLoc(), op->getName());
1254 // Construct the new operation using this memref.
1255 state.operands.reserve(op->getNumOperands() + extraIndices.size());
1256 // Insert the non-memref operands.
1257 state.operands.append(op->operand_begin(),
1258 op->operand_begin() + memRefOperandPos);
1259 // Insert the new memref value.
1260 state.operands.push_back(newMemRef);
1261
1262 // Insert the new memref map operands.
1263 if (affMapAccInterface) {
1264 state.operands.append(newMapOperands.begin(), newMapOperands.end());
1265 } else {
1266 // In the case of dereferencing ops not implementing
1267 // AffineMapAccessInterface, we need to apply the values of `newMapOperands`
1268 // to the `newMap` to get the correct indices.
1269 for (unsigned i = 0; i < newMemRefRank; i++) {
1270 state.operands.push_back(AffineApplyOp::create(
1271 builder, op->getLoc(),
1272 AffineMap::get(newMap.getNumDims(), newMap.getNumSymbols(),
1273 newMap.getResult(i)),
1274 newMapOperands));
1275 }
1276 }
1277
1278 // Insert the remaining operands unmodified.
1279 unsigned oldMapNumInputs = oldMapOperands.size();
1280 state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1281 oldMapNumInputs,
1282 op->operand_end());
1283 // Result types don't change. Both memref's are of the same elemental type.
1284 state.types.reserve(op->getNumResults());
1285 for (auto result : op->getResults())
1286 state.types.push_back(result.getType());
1287
1288 // Add attribute for 'newMap', other Attributes do not change.
1289 auto newMapAttr = AffineMapAttr::get(newMap);
1290 for (auto namedAttr : op->getAttrs()) {
1291 if (affMapAccInterface &&
1292 namedAttr.getName() ==
1293 affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef).getName())
1294 state.attributes.push_back({namedAttr.getName(), newMapAttr});
1295 else
1296 state.attributes.push_back(namedAttr);
1297 }
1298
1299 // Create the new operation.
1300 auto *repOp = builder.create(state);
1301 op->replaceAllUsesWith(repOp);
1302 op->erase();
1303
1304 return success();
1305}
1306
1307LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1308 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1309 AffineMap indexRemap, ArrayRef<Value> extraOperands,
1310 ArrayRef<Value> symbolOperands,
1311 llvm::function_ref<bool(Operation *)> userFilterFn,
1312 bool allowNonDereferencingOps, bool replaceInDeallocOp) {
1313 unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1314 (void)newMemRefRank; // unused in opt mode
1315 unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1316 (void)oldMemRefRank;
1317 if (indexRemap) {
1318 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1319 "symbol operand count mismatch");
1320 assert(indexRemap.getNumInputs() ==
1321 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1322 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1323 } else {
1324 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1325 }
1326
1327 // Assert same elemental type.
1328 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1329 cast<MemRefType>(newMemRef.getType()).getElementType());
1330
1331 // Walk all uses of old memref; collect ops to perform replacement. We use a
1332 // DenseSet since an operation could potentially have multiple uses of a
1333 // memref (although rare), and the replacement later is going to erase ops.
1334 DenseSet<Operation *> opsToReplace;
1335 for (auto *user : oldMemRef.getUsers()) {
1336 // Check if this user doesn't pass the filter.
1337 if (userFilterFn && !userFilterFn(user))
1338 continue;
1339
1340 // Skip dealloc's - no replacement is necessary, and a memref replacement
1341 // at other uses doesn't hurt these dealloc's.
1342 if (hasSingleEffect<MemoryEffects::Free>(user, oldMemRef) &&
1343 !replaceInDeallocOp)
1344 continue;
1345
1346 // Check if the memref was used in a non-dereferencing context. It is fine
1347 // for the memref to be used in a non-dereferencing way outside of the
1348 // region where this replacement is happening.
1349 if (!isa<AffineMapAccessInterface>(*user)) {
1350 if (!allowNonDereferencingOps) {
1351 LLVM_DEBUG(
1352 llvm::dbgs()
1353 << "Memref replacement failed: non-deferencing memref user: \n"
1354 << *user << '\n');
1355 return failure();
1356 }
1357 // Non-dereferencing ops with the MemRefsNormalizable trait are
1358 // supported for replacement.
1359 if (!user->hasTrait<OpTrait::MemRefsNormalizable>()) {
1360 LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1361 "memrefs normalizable trait: \n"
1362 << *user << '\n');
1363 return failure();
1364 }
1365 }
1366
1367 // We'll first collect and then replace --- since replacement erases the
1368 // user that has the use, and that user could be postDomFilter or domFilter
1369 // itself!
1370 opsToReplace.insert(user);
1371 }
1372
1373 for (auto *user : opsToReplace) {
1374 if (failed(replaceAllMemRefUsesWith(
1375 oldMemRef, newMemRef, user, extraIndices, indexRemap, extraOperands,
1376 symbolOperands, allowNonDereferencingOps)))
1377 llvm_unreachable("memref replacement guaranteed to succeed here");
1378 }
1379
1380 return success();
1381}
1382
1383/// Given an operation, inserts one or more single result affine
1384/// apply operations, results of which are exclusively used by this operation
1385/// operation. The operands of these newly created affine apply ops are
1386/// guaranteed to be loop iterators or terminal symbols of a function.
1387///
1388/// Before
1389///
1390/// affine.for %i = 0 to #map(%N)
1391/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1392/// "send"(%idx, %A, ...)
1393/// "compute"(%idx)
1394///
1395/// After
1396///
1397/// affine.for %i = 0 to #map(%N)
1398/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1399/// "send"(%idx, %A, ...)
1400/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1401/// "compute"(%idx_)
1402///
1403/// This allows applying different transformations on send and compute (for eg.
1404/// different shifts/delays).
1405///
1406/// Returns nullptr either if none of opInst's operands were the result of an
1407/// affine.apply and thus there was no affine computation slice to create, or if
1408/// all the affine.apply op's supplying operands to this opInst did not have any
1409/// uses besides this opInst; otherwise returns the list of affine.apply
1410/// operations created in output argument `sliceOps`.
1411void mlir::affine::createAffineComputationSlice(
1412 Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1413 // Collect all operands that are results of affine apply ops.
1414 SmallVector<Value, 4> subOperands;
1415 subOperands.reserve(opInst->getNumOperands());
1416 for (auto operand : opInst->getOperands())
1417 if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1418 subOperands.push_back(operand);
1419
1420 // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1421 SmallVector<Operation *, 4> affineApplyOps;
1422 getReachableAffineApplyOps(subOperands, affineApplyOps);
1423 // Skip transforming if there are no affine maps to compose.
1424 if (affineApplyOps.empty())
1425 return;
1426
1427 // Check if all uses of the affine apply op's lie only in this op op, in
1428 // which case there would be nothing to do.
1429 bool localized = true;
1430 for (auto *op : affineApplyOps) {
1431 for (auto result : op->getResults()) {
1432 for (auto *user : result.getUsers()) {
1433 if (user != opInst) {
1434 localized = false;
1435 break;
1436 }
1437 }
1438 }
1439 }
1440 if (localized)
1441 return;
1442
1443 OpBuilder builder(opInst);
1444 SmallVector<Value, 4> composedOpOperands(subOperands);
1445 auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1446 fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1447
1448 // Create an affine.apply for each of the map results.
1449 sliceOps->reserve(composedMap.getNumResults());
1450 for (auto resultExpr : composedMap.getResults()) {
1451 auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1452 composedMap.getNumSymbols(), resultExpr);
1453 sliceOps->push_back(AffineApplyOp::create(
1454 builder, opInst->getLoc(), singleResMap, composedOpOperands));
1455 }
1456
1457 // Construct the new operands that include the results from the composed
1458 // affine apply op above instead of existing ones (subOperands). So, they
1459 // differ from opInst's operands only for those operands in 'subOperands', for
1460 // which they will be replaced by the corresponding one from 'sliceOps'.
1461 SmallVector<Value, 4> newOperands(opInst->getOperands());
1462 for (Value &operand : newOperands) {
1463 // Replace the subOperands from among the new operands.
1464 unsigned j, f;
1465 for (j = 0, f = subOperands.size(); j < f; j++) {
1466 if (operand == subOperands[j])
1467 break;
1468 }
1469 if (j < subOperands.size())
1470 operand = (*sliceOps)[j];
1471 }
1472 for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1473 opInst->setOperand(idx, newOperands[idx]);
1474}
1475
1476/// Enum to set patterns of affine expr in tiled-layout map.
1477/// TileFloorDiv: <dim expr> div <tile size>
1478/// TileMod: <dim expr> mod <tile size>
1479/// TileNone: None of the above
1480/// Example:
1481/// #tiled_2d_128x256 = affine_map<(d0, d1)
1482/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1483/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1484/// "d0 mod 128" and "d1 mod 256" ==> TileMod
1486
1487/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1488/// being floordiv'ed by respective tile sizes appeare in a mod with the same
1489/// tile sizes, and no other expression involves those k dimensions. This
1490/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1491/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1492/// tiled layout, an empty vector is returned.
1493static LogicalResult getTileSizePos(
1494 AffineMap map,
1495 SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1496 // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1497 // `floordiv` and its position in `map` output.
1498 // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1499 // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1500 // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1502 unsigned pos = 0;
1503 for (AffineExpr expr : map.getResults()) {
1504 if (expr.getKind() == AffineExprKind::FloorDiv) {
1505 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1506 if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1507 floordivExprs.emplace_back(
1508 std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1509 }
1510 pos++;
1511 }
1512 // Not tiled layout if `floordivExprs` is empty.
1513 if (floordivExprs.empty()) {
1515 return success();
1516 }
1517
1518 // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1519 // not tiled layout.
1520 for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1521 AffineExpr floordivExprLHS = std::get<0>(fexpr);
1522 AffineExpr floordivExprRHS = std::get<1>(fexpr);
1523 unsigned floordivPos = std::get<2>(fexpr);
1524
1525 // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1526 // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1527 // other expr, the map is not tiled layout. Example of non tiled layout:
1528 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1529 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1530 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1531 // 256)>
1532 bool found = false;
1533 pos = 0;
1534 for (AffineExpr expr : map.getResults()) {
1535 bool notTiled = false;
1536 if (pos != floordivPos) {
1537 expr.walk([&](AffineExpr e) {
1538 if (e == floordivExprLHS) {
1539 if (expr.getKind() == AffineExprKind::Mod) {
1540 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1541 // If LHS and RHS of `mod` are the same with those of floordiv.
1542 if (floordivExprLHS == binaryExpr.getLHS() &&
1543 floordivExprRHS == binaryExpr.getRHS()) {
1544 // Save tile size (RHS of `mod`), and position of `floordiv` and
1545 // `mod` if same expr with `mod` is not found yet.
1546 if (!found) {
1547 tileSizePos.emplace_back(
1548 std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1549 found = true;
1550 } else {
1551 // Non tiled layout: Have multilpe `mod` with the same LHS.
1552 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1553 // mod 256, d2 mod 256)>
1554 notTiled = true;
1555 }
1556 } else {
1557 // Non tiled layout: RHS of `mod` is different from `floordiv`.
1558 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1559 // mod 128)>
1560 notTiled = true;
1561 }
1562 } else {
1563 // Non tiled layout: LHS is the same, but not `mod`.
1564 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1565 // floordiv 256)>
1566 notTiled = true;
1567 }
1568 }
1569 });
1570 }
1571 if (notTiled) {
1573 return success();
1574 }
1575 pos++;
1576 }
1577 }
1578 return success();
1579}
1580
1581/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1582/// after normalization. Dimensions that include dynamic dimensions in the map
1583/// output will become dynamic dimensions. Return true if `dim` is dynamic
1584/// dimension.
1585///
1586/// Example:
1587/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1588///
1589/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1590/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1591static bool
1593 SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1594 AffineExpr expr = layoutMap.getResults()[dim];
1595 // Check if affine expr of the dimension includes dynamic dimension of input
1596 // memrefType.
1597 MLIRContext *context = layoutMap.getContext();
1598 return expr
1599 .walk([&](AffineExpr e) {
1600 if (isa<AffineDimExpr>(e) &&
1601 llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1602 return e == getAffineDimExpr(dim, context);
1603 }))
1604 return WalkResult::interrupt();
1605 return WalkResult::advance();
1606 })
1607 .wasInterrupted();
1608}
1609
1610/// Create affine expr to calculate dimension size for a tiled-layout map.
1612 TileExprPattern pat) {
1613 // Create map output for the patterns.
1614 // "floordiv <tile size>" ==> "ceildiv <tile size>"
1615 // "mod <tile size>" ==> "<tile size>"
1616 AffineExpr newMapOutput;
1617 AffineBinaryOpExpr binaryExpr = nullptr;
1618 switch (pat) {
1620 binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1621 newMapOutput = binaryExpr.getRHS();
1622 break;
1624 binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1625 newMapOutput = getAffineBinaryOpExpr(
1626 AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1627 break;
1628 default:
1629 newMapOutput = oldMapOutput;
1630 }
1631 return newMapOutput;
1632}
1633
1634/// Create new maps to calculate each dimension size of `newMemRefType`, and
1635/// create `newDynamicSizes` from them by using AffineApplyOp.
1636///
1637/// Steps for normalizing dynamic memrefs for a tiled layout map
1638/// Example:
1639/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1640/// %0 = dim %arg0, %c1 :memref<4x?xf32>
1641/// %1 = alloc(%0) : memref<4x?xf32, #map0>
1642///
1643/// (Before this function)
1644/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1645/// single layout map is supported.
1646///
1647/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1648/// is memref<4x?x?xf32> in the above example.
1649///
1650/// (In this function)
1651/// 3. Create new maps to calculate each dimension of the normalized memrefType
1652/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1653/// dimension size can be calculated by replacing "floordiv <tile size>" with
1654/// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1655/// - New map in the above example
1656/// #map0 = affine_map<(d0, d1) -> (d0)>
1657/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1658/// #map2 = affine_map<(d0, d1) -> (32)>
1659///
1660/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1661/// is used in dynamicSizes of new AllocOp.
1662/// %0 = dim %arg0, %c1 : memref<4x?xf32>
1663/// %c4 = arith.constant 4 : index
1664/// %1 = affine.apply #map1(%c4, %0)
1665/// %2 = affine.apply #map2(%c4, %0)
1666template <typename AllocLikeOp>
1667static void createNewDynamicSizes(MemRefType oldMemRefType,
1668 MemRefType newMemRefType, AffineMap map,
1669 AllocLikeOp allocOp, OpBuilder b,
1670 SmallVectorImpl<Value> &newDynamicSizes) {
1671 // Create new input for AffineApplyOp.
1672 SmallVector<Value, 4> inAffineApply;
1673 ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1674 unsigned dynIdx = 0;
1675 for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1676 if (oldMemRefShape[d] < 0) {
1677 // Use dynamicSizes of allocOp for dynamic dimension.
1678 inAffineApply.emplace_back(allocOp.getDynamicSizes()[dynIdx]);
1679 dynIdx++;
1680 } else {
1681 // Create ConstantOp for static dimension.
1682 auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1683 inAffineApply.emplace_back(
1684 arith::ConstantOp::create(b, allocOp.getLoc(), constantAttr));
1685 }
1686 }
1687
1688 // Create new map to calculate each dimension size of new memref for each
1689 // original map output. Only for dynamic dimesion of `newMemRefType`.
1690 unsigned newDimIdx = 0;
1691 ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1693 (void)getTileSizePos(map, tileSizePos);
1694 for (AffineExpr expr : map.getResults()) {
1695 if (newMemRefShape[newDimIdx] < 0) {
1696 // Create new maps to calculate each dimension size of new memref.
1698 for (auto pos : tileSizePos) {
1699 if (newDimIdx == std::get<1>(pos))
1701 else if (newDimIdx == std::get<2>(pos))
1703 }
1704 AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1705 AffineMap newMap =
1706 AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1707 Value affineApp =
1708 AffineApplyOp::create(b, allocOp.getLoc(), newMap, inAffineApply);
1709 newDynamicSizes.emplace_back(affineApp);
1710 }
1711 newDimIdx++;
1712 }
1713}
1714
1715template <typename AllocLikeOp>
1716LogicalResult mlir::affine::normalizeMemRef(AllocLikeOp allocOp) {
1717 MemRefType memrefType = allocOp.getType();
1718 OpBuilder b(allocOp);
1719
1720 // Fetch a new memref type after normalizing the old memref to have an
1721 // identity map layout.
1722 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1723 if (newMemRefType == memrefType)
1724 // Either memrefType already had an identity map or the map couldn't be
1725 // transformed to an identity map.
1726 return failure();
1727
1728 Value oldMemRef = allocOp.getResult();
1729
1730 SmallVector<Value, 4> symbolOperands(allocOp.getSymbolOperands());
1731 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1732 AllocLikeOp newAlloc;
1733 // Check if `layoutMap` is a tiled layout. Only single layout map is
1734 // supported for normalizing dynamic memrefs.
1736 (void)getTileSizePos(layoutMap, tileSizePos);
1737 if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1738 auto oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1739 SmallVector<Value, 4> newDynamicSizes;
1740 createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1741 newDynamicSizes);
1742 // Add the new dynamic sizes in new AllocOp.
1743 newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1744 newDynamicSizes, allocOp.getAlignmentAttr());
1745 } else {
1746 newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1747 allocOp.getAlignmentAttr());
1748 }
1749 // Replace all uses of the old memref.
1750 if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1751 /*extraIndices=*/{},
1752 /*indexRemap=*/layoutMap,
1753 /*extraOperands=*/{},
1754 /*symbolOperands=*/symbolOperands,
1755 /*userFilterFn=*/nullptr,
1756 /*allowNonDereferencingOps=*/true))) {
1757 // If it failed (due to escapes for example), bail out.
1758 newAlloc.erase();
1759 return failure();
1760 }
1761 // Replace any uses of the original alloc op and erase it. All remaining uses
1762 // have to be dealloc's; RAMUW above would've failed otherwise.
1763 assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1764 return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1765 }));
1766 oldMemRef.replaceAllUsesWith(newAlloc);
1767 allocOp.erase();
1768 return success();
1769}
1770
1771LogicalResult
1772mlir::affine::normalizeMemRef(memref::ReinterpretCastOp reinterpretCastOp) {
1773 MemRefType memrefType = reinterpretCastOp.getType();
1774 AffineMap oldLayoutMap = memrefType.getLayout().getAffineMap();
1775 Value oldMemRef = reinterpretCastOp.getResult();
1776
1777 // If `oldLayoutMap` is identity, `memrefType` is already normalized.
1778 if (oldLayoutMap.isIdentity())
1779 return success();
1780
1781 // Fetch a new memref type after normalizing the old memref to have an
1782 // identity map layout.
1783 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1784 if (newMemRefType == memrefType)
1785 // `oldLayoutMap` couldn't be transformed to an identity map.
1786 return failure();
1787
1788 uint64_t newRank = newMemRefType.getRank();
1789 SmallVector<Value> mapOperands(oldLayoutMap.getNumDims() +
1790 oldLayoutMap.getNumSymbols());
1791 SmallVector<Value> oldStrides = reinterpretCastOp.getStrides();
1792 Location loc = reinterpretCastOp.getLoc();
1793 // As `newMemRefType` is normalized, it is unit strided.
1794 SmallVector<int64_t> newStaticStrides(newRank, 1);
1795 SmallVector<int64_t> newStaticOffsets(newRank, 0);
1796 ArrayRef<int64_t> oldShape = memrefType.getShape();
1797 ValueRange oldSizes = reinterpretCastOp.getSizes();
1798 unsigned idx = 0;
1799 OpBuilder b(reinterpretCastOp);
1800 // Collect the map operands which will be used to compute the new normalized
1801 // memref shape.
1802 for (unsigned i = 0, e = memrefType.getRank(); i < e; i++) {
1803 if (memrefType.isDynamicDim(i))
1804 mapOperands[i] =
1805 arith::SubIOp::create(b, loc, oldSizes[0].getType(), oldSizes[idx++],
1807 else
1808 mapOperands[i] = arith::ConstantIndexOp::create(b, loc, oldShape[i] - 1);
1809 }
1810 for (unsigned i = 0, e = oldStrides.size(); i < e; i++)
1811 mapOperands[memrefType.getRank() + i] = oldStrides[i];
1812 SmallVector<Value> newSizes;
1813 ArrayRef<int64_t> newShape = newMemRefType.getShape();
1814 // Compute size along all the dimensions of the new normalized memref.
1815 for (unsigned i = 0; i < newRank; i++) {
1816 if (!newMemRefType.isDynamicDim(i))
1817 continue;
1818 newSizes.push_back(AffineApplyOp::create(
1819 b, loc,
1820 AffineMap::get(oldLayoutMap.getNumDims(), oldLayoutMap.getNumSymbols(),
1821 oldLayoutMap.getResult(i)),
1822 mapOperands));
1823 }
1824 for (unsigned i = 0, e = newSizes.size(); i < e; i++) {
1825 newSizes[i] =
1826 arith::AddIOp::create(b, loc, newSizes[i].getType(), newSizes[i],
1828 }
1829 // Create the new reinterpret_cast op.
1830 auto newReinterpretCast = memref::ReinterpretCastOp::create(
1831 b, loc, newMemRefType, reinterpretCastOp.getSource(),
1832 /*offsets=*/ValueRange(), newSizes,
1833 /*strides=*/ValueRange(),
1834 /*static_offsets=*/newStaticOffsets,
1835 /*static_sizes=*/newShape,
1836 /*static_strides=*/newStaticStrides);
1837
1838 // Replace all uses of the old memref.
1839 if (failed(replaceAllMemRefUsesWith(oldMemRef,
1840 /*newMemRef=*/newReinterpretCast,
1841 /*extraIndices=*/{},
1842 /*indexRemap=*/oldLayoutMap,
1843 /*extraOperands=*/{},
1844 /*symbolOperands=*/oldStrides,
1845 /*userFilterFn=*/nullptr,
1846 /*allowNonDereferencingOps=*/true))) {
1847 // If it failed (due to escapes for example), bail out.
1848 newReinterpretCast.erase();
1849 return failure();
1850 }
1851
1852 oldMemRef.replaceAllUsesWith(newReinterpretCast);
1853 reinterpretCastOp.erase();
1854 return success();
1855}
1856
1857template LogicalResult
1858mlir::affine::normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
1859template LogicalResult
1860mlir::affine::normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
1861
1862MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1863 unsigned rank = memrefType.getRank();
1864 if (rank == 0)
1865 return memrefType;
1866
1867 if (memrefType.getLayout().isIdentity()) {
1868 // Either no maps is associated with this memref or this memref has
1869 // a trivial (identity) map.
1870 return memrefType;
1871 }
1872 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1873 unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1874
1875 // We don't do any checks for one-to-one'ness; we assume that it is
1876 // one-to-one.
1877
1878 // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1879 // for now.
1880 // TODO: Normalize the other types of dynamic memrefs.
1882 (void)getTileSizePos(layoutMap, tileSizePos);
1883 if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1884 return memrefType;
1885
1886 // We have a single map that is not an identity map. Create a new memref
1887 // with the right shape and an identity layout map.
1888 ArrayRef<int64_t> shape = memrefType.getShape();
1889 // FlatAffineValueConstraint may later on use symbolicOperands.
1890 FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1891 SmallVector<unsigned, 4> memrefTypeDynDims;
1892 for (unsigned d = 0; d < rank; ++d) {
1893 // Use constraint system only in static dimensions.
1894 if (shape[d] > 0) {
1895 fac.addBound(BoundType::LB, d, 0);
1896 fac.addBound(BoundType::UB, d, shape[d] - 1);
1897 } else {
1898 memrefTypeDynDims.emplace_back(d);
1899 }
1900 }
1901 // We compose this map with the original index (logical) space to derive
1902 // the upper bounds for the new index space.
1903 unsigned newRank = layoutMap.getNumResults();
1904 if (failed(fac.composeMatchingMap(layoutMap)))
1905 return memrefType;
1906 // TODO: Handle semi-affine maps.
1907 // Project out the old data dimensions.
1908 fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1909 SmallVector<int64_t, 4> newShape(newRank);
1910 MLIRContext *context = memrefType.getContext();
1911 for (unsigned d = 0; d < newRank; ++d) {
1912 // Check if this dimension is dynamic.
1913 if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1914 newShape[d] = ShapedType::kDynamic;
1915 continue;
1916 }
1917 // The lower bound for the shape is always zero.
1918 std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1919 // For a static memref and an affine map with no symbols, this is
1920 // always bounded. However, when we have symbols, we may not be able to
1921 // obtain a constant upper bound. Also, mapping to a negative space is
1922 // invalid for normalization.
1923 if (!ubConst.has_value() || *ubConst < 0) {
1924 LLVM_DEBUG(llvm::dbgs()
1925 << "can't normalize map due to unknown/invalid upper bound");
1926 return memrefType;
1927 }
1928 // If dimension of new memrefType is dynamic, the value is -1.
1929 newShape[d] = *ubConst + 1;
1930 }
1931
1932 // Create the new memref type after trivializing the old layout map.
1933 auto newMemRefType =
1934 MemRefType::Builder(memrefType)
1935 .setShape(newShape)
1936 .setLayout(AffineMapAttr::get(
1937 AffineMap::getMultiDimIdentityMap(newRank, context)));
1938 return newMemRefType;
1939}
1940
1941DivModValue mlir::affine::getDivMod(OpBuilder &b, Location loc, Value lhs,
1942 Value rhs) {
1943 DivModValue result;
1944 AffineExpr d0, d1;
1945 bindDims(b.getContext(), d0, d1);
1946 result.quotient =
1947 affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1948 result.remainder =
1949 affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1950 return result;
1951}
1952
1953/// Create an affine map that computes `lhs` * `rhs`, composing in any other
1954/// affine maps.
1955static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1956 Location loc,
1958 OpFoldResult rhs) {
1959 AffineExpr s0, s1;
1960 bindSymbols(b.getContext(), s0, s1);
1961 return makeComposedFoldedAffineApply(b, loc, s0 * s1, {lhs, rhs});
1962}
1963
1964FailureOr<SmallVector<Value>>
1965mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1966 ArrayRef<Value> basis, bool hasOuterBound) {
1967 if (hasOuterBound)
1968 basis = basis.drop_front();
1969
1970 // Note: the divisors are backwards due to the scan.
1971 SmallVector<Value> divisors;
1972 OpFoldResult basisProd = b.getIndexAttr(1);
1973 for (OpFoldResult basisElem : llvm::reverse(basis)) {
1974 FailureOr<OpFoldResult> nextProd =
1975 composedAffineMultiply(b, loc, basisElem, basisProd);
1976 if (failed(nextProd))
1977 return failure();
1978 basisProd = *nextProd;
1979 divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1980 }
1981
1982 SmallVector<Value> results;
1983 results.reserve(divisors.size() + 1);
1984 Value residual = linearIndex;
1985 for (Value divisor : llvm::reverse(divisors)) {
1986 DivModValue divMod = getDivMod(b, loc, residual, divisor);
1987 results.push_back(divMod.quotient);
1988 residual = divMod.remainder;
1989 }
1990 results.push_back(residual);
1991 return results;
1992}
1993
1994FailureOr<SmallVector<Value>>
1995mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1997 bool hasOuterBound) {
1998 if (hasOuterBound)
1999 basis = basis.drop_front();
2000
2001 // Note: the divisors are backwards due to the scan.
2002 SmallVector<Value> divisors;
2003 OpFoldResult basisProd = b.getIndexAttr(1);
2004 for (OpFoldResult basisElem : llvm::reverse(basis)) {
2005 FailureOr<OpFoldResult> nextProd =
2006 composedAffineMultiply(b, loc, basisElem, basisProd);
2007 if (failed(nextProd))
2008 return failure();
2009 basisProd = *nextProd;
2010 divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
2011 }
2012
2013 SmallVector<Value> results;
2014 results.reserve(divisors.size() + 1);
2015 Value residual = linearIndex;
2016 for (Value divisor : llvm::reverse(divisors)) {
2017 DivModValue divMod = getDivMod(b, loc, residual, divisor);
2018 results.push_back(divMod.quotient);
2019 residual = divMod.remainder;
2020 }
2021 results.push_back(residual);
2022 return results;
2023}
2024
2025OpFoldResult mlir::affine::linearizeIndex(ArrayRef<OpFoldResult> multiIndex,
2027 ImplicitLocOpBuilder &builder) {
2028 return linearizeIndex(builder, builder.getLoc(), multiIndex, basis);
2029}
2030
2031OpFoldResult mlir::affine::linearizeIndex(OpBuilder &builder, Location loc,
2032 ArrayRef<OpFoldResult> multiIndex,
2033 ArrayRef<OpFoldResult> basis) {
2034 assert(multiIndex.size() == basis.size() ||
2035 multiIndex.size() == basis.size() + 1);
2036 SmallVector<AffineExpr> basisAffine;
2037
2038 // Add a fake initial size in order to make the later index linearization
2039 // computations line up if an outer bound is not provided.
2040 if (multiIndex.size() == basis.size() + 1)
2041 basisAffine.push_back(getAffineConstantExpr(1, builder.getContext()));
2042
2043 for (size_t i = 0; i < basis.size(); ++i) {
2044 basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
2045 }
2046
2047 SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
2049 strides.reserve(stridesAffine.size());
2050 llvm::transform(stridesAffine, std::back_inserter(strides),
2051 [&builder, &basis, loc](AffineExpr strideExpr) {
2053 builder, loc, strideExpr, basis);
2054 });
2055
2056 auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2057 OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
2058 return affine::makeComposedFoldedAffineApply(builder, loc, linearIndexExpr,
2059 multiIndexAndStrides);
2060}
return success()
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition Utils.cpp:655
static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp)
A helper for the mechanics of mlir::hoistAffineIfOp.
Definition Utils.cpp:290
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, AllocLikeOp allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition Utils.cpp:1667
static bool isDereferencingOp(Operation *op)
Definition Utils.cpp:1105
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned > > &tileSizePos)
Check if map is a tiled layout.
Definition Utils.cpp:1493
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition Utils.cpp:1485
@ TileFloorDiv
Definition Utils.cpp:1485
@ TileNone
Definition Utils.cpp:1485
@ TileMod
Definition Utils.cpp:1485
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition Utils.cpp:247
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition Utils.cpp:1592
static FailureOr< OpFoldResult > composedAffineMultiply(OpBuilder &b, Location loc, OpFoldResult lhs, OpFoldResult rhs)
Create an affine map that computes lhs * rhs, composing in any other affine maps.
Definition Utils.cpp:1955
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition Utils.cpp:960
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition Utils.cpp:1611
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition Utils.cpp:913
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition Utils.cpp:637
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition Utils.cpp:836
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition Utils.cpp:263
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition PDL.cpp:62
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition AffineExpr.h:214
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
Definition AffineExpr.h:239
int64_t getValue() const
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition AffineExpr.h:117
AffineExprKind getKind() const
Return the classification for this type.
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
MLIRContext * getContext() const
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
bool isIdentity() const
Returns true if this affine map is an identity affine map.
A symbolic identifier appearing in an affine expression.
Definition AffineExpr.h:231
unsigned getPosition() const
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
OpListType & getOperations()
Definition Block.h:147
SuccessorRange getSuccessors()
Definition Block.h:280
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
iterator end()
Definition Block.h:154
iterator begin()
Definition Block.h:153
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition Builders.cpp:387
AffineExpr getAffineConstantExpr(int64_t constant)
Definition Builders.cpp:372
AffineExpr getAffineDimExpr(unsigned position)
Definition Builders.cpp:364
MLIRContext * getContext() const
Definition Builders.h:56
A class for computing basic dominance information.
Definition Dominance.h:140
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition Dominance.h:158
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Definition Builders.h:630
Location getLoc() const
Accessors for the implied location.
Definition Builders.h:663
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This is a builder type that keeps local references to arguments.
Builder & setShape(ArrayRef< int64_t > newShape)
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
NamedAttribute represents a combination of a name and an Attribute value.
Definition Attributes.h:164
Attribute getValue() const
Return the value of the attribute.
Definition Attributes.h:179
This class helps build Operations.
Definition Builders.h:207
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition Builders.h:240
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:445
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:442
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition Builders.cpp:457
This class represents a single result from folding an operation.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Value getOperand(unsigned idx)
Definition Operation.h:350
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:749
void setOperand(unsigned idx, Value value)
Definition Operation.h:351
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:512
operand_iterator operand_begin()
Definition Operation.h:374
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:213
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
unsigned getNumOperands()
Definition Operation.h:346
operand_iterator operand_end()
Definition Operation.h:375
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
user_range getUsers()
Returns a range of all users.
Definition Operation.h:873
result_range getResults()
Definition Operation.h:415
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition Operation.h:230
void erase()
Remove this operation from its parent block and delete it.
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:404
A class for computing basic postdominance information.
Definition Dominance.h:204
bool postDominates(Operation *a, Operation *b) const
Return true if operation A postdominates operation B.
Definition Dominance.h:213
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition Region.h:222
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition Value.h:149
user_range getUsers() const
Definition Value.h:218
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:363
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition Utils.cpp:2109
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition Utils.cpp:353
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition Utils.cpp:688
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:573
Value linearizeIndex(ValueRange indices, ArrayRef< int64_t > strides, int64_t offset, Type integerType, Location loc, OpBuilder &builder)
Generates IR to perform index linearization with the given indices and their corresponding strides,...
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:305
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:120
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool hasSingleEffect(Operation *op)
Returns "true" if op has only an effect of type EffectTy.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
Definition AffineExpr.h:50
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
Definition AffineExpr.h:46
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
Definition AffineExpr.h:48
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:112
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.