MLIR 23.0.0git
Utils.cpp
Go to the documentation of this file.
1//===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements miscellaneous transformation utilities for the Affine
10// dialect.
11//
12//===----------------------------------------------------------------------===//
13
15
25#include "mlir/IR/Dominance.h"
26#include "mlir/IR/IRMapping.h"
27#include "mlir/IR/IntegerSet.h"
29#include "llvm/ADT/SmallVectorExtras.h"
30#include <optional>
31
32#define DEBUG_TYPE "affine-utils"
33
34using namespace mlir;
35using namespace affine;
36using namespace presburger;
37
38namespace {
39/// Visit affine expressions recursively and build the sequence of operations
40/// that correspond to it. Visitation functions return an Value of the
41/// expression subtree they visited or `nullptr` on error.
42class AffineApplyExpander
43 : public AffineExprVisitor<AffineApplyExpander, Value> {
44public:
45 /// This internal class expects arguments to be non-null, checks must be
46 /// performed at the call site.
47 AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
48 ValueRange symbolValues, Location loc)
49 : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
50 loc(loc) {}
51
52 template <typename OpTy>
53 Value buildBinaryExpr(AffineBinaryOpExpr expr,
54 arith::IntegerOverflowFlags overflowFlags =
55 arith::IntegerOverflowFlags::none) {
56 auto lhs = visit(expr.getLHS());
57 auto rhs = visit(expr.getRHS());
58 if (!lhs || !rhs)
59 return nullptr;
60 auto op = OpTy::create(builder, loc, lhs, rhs, overflowFlags);
61 return op.getResult();
62 }
63
64 Value visitAddExpr(AffineBinaryOpExpr expr) {
65 return buildBinaryExpr<arith::AddIOp>(expr);
66 }
67
68 Value visitMulExpr(AffineBinaryOpExpr expr) {
69 return buildBinaryExpr<arith::MulIOp>(expr,
70 arith::IntegerOverflowFlags::nsw);
71 }
72
73 /// Euclidean modulo operation: negative RHS is not allowed.
74 /// Remainder of the euclidean integer division is always non-negative.
75 ///
76 /// Implemented as
77 ///
78 /// a mod b =
79 /// let remainder = srem a, b;
80 /// negative = a < 0 in
81 /// select negative, remainder + b, remainder.
82 Value visitModExpr(AffineBinaryOpExpr expr) {
83 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
84 if (rhsConst.getValue() <= 0) {
85 emitError(loc, "modulo by non-positive value is not supported");
86 return nullptr;
87 }
88 }
89
90 auto lhs = visit(expr.getLHS());
91 auto rhs = visit(expr.getRHS());
92 assert(lhs && rhs && "unexpected affine expr lowering failure");
93
94 Value remainder = arith::RemSIOp::create(builder, loc, lhs, rhs);
95 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
96 Value isRemainderNegative = arith::CmpIOp::create(
97 builder, loc, arith::CmpIPredicate::slt, remainder, zeroCst);
98 Value correctedRemainder =
99 arith::AddIOp::create(builder, loc, remainder, rhs);
100 Value result = arith::SelectOp::create(builder, loc, isRemainderNegative,
101 correctedRemainder, remainder);
102 return result;
103 }
104
105 /// Floor division operation (rounds towards negative infinity).
106 ///
107 /// For positive divisors, it can be implemented without branching and with a
108 /// single division operation as
109 ///
110 /// a floordiv b =
111 /// let negative = a < 0 in
112 /// let absolute = negative ? -a - 1 : a in
113 /// let quotient = absolute / b in
114 /// negative ? -quotient - 1 : quotient
115 ///
116 /// Note: this lowering does not use arith.floordivsi because the lowering of
117 /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
118 /// not one but two arith.divsi. That could be changed to one divsi, but one
119 /// way or another, going through arith.floordivsi will result in more complex
120 /// IR because arith.floordivsi is more general than affine floordiv in that
121 /// it supports negative RHS.
122 Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
123 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
124 if (rhsConst.getValue() <= 0) {
125 emitError(loc, "division by non-positive value is not supported");
126 return nullptr;
127 }
128 }
129 auto lhs = visit(expr.getLHS());
130 auto rhs = visit(expr.getRHS());
131 assert(lhs && rhs && "unexpected affine expr lowering failure");
132
133 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
134 Value noneCst = arith::ConstantIndexOp::create(builder, loc, -1);
135 Value negative = arith::CmpIOp::create(
136 builder, loc, arith::CmpIPredicate::slt, lhs, zeroCst);
137 Value negatedDecremented =
138 arith::SubIOp::create(builder, loc, noneCst, lhs);
139 Value dividend = arith::SelectOp::create(builder, loc, negative,
140 negatedDecremented, lhs);
141 Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
142 Value correctedQuotient =
143 arith::SubIOp::create(builder, loc, noneCst, quotient);
144 Value result = arith::SelectOp::create(builder, loc, negative,
145 correctedQuotient, quotient);
146 return result;
147 }
148
149 /// Ceiling division operation (rounds towards positive infinity).
150 ///
151 /// For positive divisors, it can be implemented without branching and with a
152 /// single division operation as
153 ///
154 /// a ceildiv b =
155 /// let negative = a <= 0 in
156 /// let absolute = negative ? -a : a - 1 in
157 /// let quotient = absolute / b in
158 /// negative ? -quotient : quotient + 1
159 ///
160 /// Note: not using arith.ceildivsi for the same reason as explained in the
161 /// visitFloorDivExpr comment.
162 Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
163 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
164 if (rhsConst.getValue() <= 0) {
165 emitError(loc, "division by non-positive value is not supported");
166 return nullptr;
167 }
168 }
169 auto lhs = visit(expr.getLHS());
170 auto rhs = visit(expr.getRHS());
171 assert(lhs && rhs && "unexpected affine expr lowering failure");
172
173 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
174 Value oneCst = arith::ConstantIndexOp::create(builder, loc, 1);
175 Value nonPositive = arith::CmpIOp::create(
176 builder, loc, arith::CmpIPredicate::sle, lhs, zeroCst);
177 Value negated = arith::SubIOp::create(builder, loc, zeroCst, lhs);
178 Value decremented = arith::SubIOp::create(builder, loc, lhs, oneCst);
179 Value dividend = arith::SelectOp::create(builder, loc, nonPositive, negated,
180 decremented);
181 Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
182 Value negatedQuotient =
183 arith::SubIOp::create(builder, loc, zeroCst, quotient);
184 Value incrementedQuotient =
185 arith::AddIOp::create(builder, loc, quotient, oneCst);
186 Value result = arith::SelectOp::create(
187 builder, loc, nonPositive, negatedQuotient, incrementedQuotient);
188 return result;
189 }
190
191 Value visitConstantExpr(AffineConstantExpr expr) {
192 auto op = arith::ConstantIndexOp::create(builder, loc, expr.getValue());
193 return op.getResult();
194 }
195
196 Value visitDimExpr(AffineDimExpr expr) {
197 assert(expr.getPosition() < dimValues.size() &&
198 "affine dim position out of range");
199 return dimValues[expr.getPosition()];
200 }
201
202 Value visitSymbolExpr(AffineSymbolExpr expr) {
203 assert(expr.getPosition() < symbolValues.size() &&
204 "symbol dim position out of range");
205 return symbolValues[expr.getPosition()];
206 }
207
208private:
209 OpBuilder &builder;
210 ValueRange dimValues;
211 ValueRange symbolValues;
212
213 Location loc;
214};
215} // namespace
216
217/// Create a sequence of operations that implement the `expr` applied to the
218/// given dimension and symbol values.
219mlir::Value mlir::affine::expandAffineExpr(OpBuilder &builder, Location loc,
220 AffineExpr expr,
221 ValueRange dimValues,
222 ValueRange symbolValues) {
223 return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
224}
225
226/// Create a sequence of operations that implement the `affineMap` applied to
227/// the given `operands` (as it it were an AffineApplyOp).
228std::optional<SmallVector<Value, 8>>
229mlir::affine::expandAffineMap(OpBuilder &builder, Location loc,
230 AffineMap affineMap, ValueRange operands) {
231 auto numDims = affineMap.getNumDims();
232 auto expanded = llvm::map_to_vector<8>(
233 affineMap.getResults(),
234 [numDims, &builder, loc, operands](AffineExpr expr) {
235 return expandAffineExpr(builder, loc, expr,
236 operands.take_front(numDims),
237 operands.drop_front(numDims));
238 });
239 if (llvm::all_of(expanded, [](Value v) { return v; }))
240 return expanded;
241 return std::nullopt;
242}
243
244/// Promotes the `then` or the `else` block of `ifOp` (depending on whether
245/// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
246/// the rest of the op.
247static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
248 if (elseBlock)
249 assert(ifOp.hasElse() && "else block expected");
250
251 Block *destBlock = ifOp->getBlock();
252 Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
253 destBlock->getOperations().splice(
254 Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
255 std::prev(srcBlock->end()));
256 ifOp.erase();
257}
258
259/// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
260/// on. The `ifOp` could be hoisted and placed right before such an operation.
261/// This method assumes that the ifOp has been canonicalized (to be correct and
262/// effective).
263static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
264 // Walk up the parents past all for op that this conditional is invariant on.
265 auto ifOperands = ifOp.getOperands();
266 Operation *res = ifOp;
268 auto *parentOp = res->getParentOp();
269 if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
270 if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
271 break;
272 } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
273 if (llvm::any_of(parallelOp.getIVs(), [&](Value iv) {
274 return llvm::is_contained(ifOperands, iv);
275 }))
276 break;
277 } else if (!isa<AffineIfOp>(parentOp)) {
278 // Won't walk up past anything other than affine.for/if ops.
279 break;
280 }
281 // You can always hoist up past any affine.if ops.
282 res = parentOp;
283 }
284 return res;
285}
286
287/// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
288/// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
289/// otherwise the same `ifOp`.
290static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
291 // No hoisting to do.
292 if (hoistOverOp == ifOp)
293 return ifOp;
294
295 // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
296 // the else block. Then drop the else block of the original 'if' in the 'then'
297 // branch while promoting its then block, and analogously drop the 'then'
298 // block of the original 'if' from the 'else' branch while promoting its else
299 // block.
300 IRMapping operandMap;
301 OpBuilder b(hoistOverOp);
302 auto hoistedIfOp = AffineIfOp::create(b, ifOp.getLoc(), ifOp.getIntegerSet(),
303 ifOp.getOperands(),
304 /*elseBlock=*/true);
305
306 // Create a clone of hoistOverOp to use for the else branch of the hoisted
307 // conditional. The else block may get optimized away if empty.
308 Operation *hoistOverOpClone = nullptr;
309 // We use this unique name to identify/find `ifOp`'s clone in the else
310 // version.
311 StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
312 operandMap.clear();
313 b.setInsertionPointAfter(hoistOverOp);
314 // We'll set an attribute to identify this op in a clone of this sub-tree.
315 ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
316 hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
317
318 // Promote the 'then' block of the original affine.if in the then version.
319 promoteIfBlock(ifOp, /*elseBlock=*/false);
320
321 // Move the then version to the hoisted if op's 'then' block.
322 auto *thenBlock = hoistedIfOp.getThenBlock();
323 thenBlock->getOperations().splice(thenBlock->begin(),
324 hoistOverOp->getBlock()->getOperations(),
325 Block::iterator(hoistOverOp));
326
327 // Find the clone of the original affine.if op in the else version.
328 AffineIfOp ifCloneInElse;
329 hoistOverOpClone->walk([&](AffineIfOp ifClone) {
330 if (!ifClone->getAttr(idForIfOp))
331 return WalkResult::advance();
332 ifCloneInElse = ifClone;
333 return WalkResult::interrupt();
334 });
335 assert(ifCloneInElse && "if op clone should exist");
336 // For the else block, promote the else block of the original 'if' if it had
337 // one; otherwise, the op itself is to be erased.
338 if (!ifCloneInElse.hasElse())
339 ifCloneInElse.erase();
340 else
341 promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
342
343 // Move the else version into the else block of the hoisted if op.
344 auto *elseBlock = hoistedIfOp.getElseBlock();
345 elseBlock->getOperations().splice(
346 elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
347 Block::iterator(hoistOverOpClone));
348
349 return hoistedIfOp;
350}
351
352LogicalResult
354 ArrayRef<LoopReduction> parallelReductions,
355 AffineParallelOp *resOp) {
356 // Fail early if there are iter arguments that are not reductions.
357 unsigned numReductions = parallelReductions.size();
358 if (numReductions != forOp.getNumIterOperands())
359 return failure();
360
361 Location loc = forOp.getLoc();
362 OpBuilder outsideBuilder(forOp);
363 AffineMap lowerBoundMap = forOp.getLowerBoundMap();
364 ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
365 AffineMap upperBoundMap = forOp.getUpperBoundMap();
366 ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
367
368 // Creating empty 1-D affine.parallel op.
369 auto reducedValues = llvm::map_to_vector<4>(
370 parallelReductions, [](const LoopReduction &red) { return red.value; });
371 auto reductionKinds = llvm::map_to_vector<4>(
372 parallelReductions, [](const LoopReduction &red) { return red.kind; });
373 AffineParallelOp newPloop = AffineParallelOp::create(
374 outsideBuilder, loc, ValueRange(reducedValues).getTypes(), reductionKinds,
375 llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
376 llvm::ArrayRef(upperBoundMap), upperBoundOperands,
377 llvm::ArrayRef(forOp.getStepAsInt()));
378 // Steal the body of the old affine for op.
379 newPloop.getRegion().takeBody(forOp.getRegion());
380 Operation *yieldOp = &newPloop.getBody()->back();
381
382 // Handle the initial values of reductions because the parallel loop always
383 // starts from the neutral value.
384 SmallVector<Value> newResults;
385 newResults.reserve(numReductions);
386 for (unsigned i = 0; i < numReductions; ++i) {
387 Value init = forOp.getInits()[i];
388 // This works because we are only handling single-op reductions at the
389 // moment. A switch on reduction kind or a mechanism to collect operations
390 // participating in the reduction will be necessary for multi-op reductions.
391 Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
392 assert(reductionOp && "yielded value is expected to be produced by an op");
393 outsideBuilder.getInsertionBlock()->getOperations().splice(
394 outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
395 reductionOp);
396 reductionOp->setOperands({init, newPloop->getResult(i)});
397 forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
398 }
399
400 // Update the loop terminator to yield reduced values bypassing the reduction
401 // operation itself (now moved outside of the loop) and erase the block
402 // arguments that correspond to reductions. Note that the loop always has one
403 // "main" induction variable whenc coming from a non-parallel for.
404 unsigned numIVs = 1;
405 yieldOp->setOperands(reducedValues);
406 newPloop.getBody()->eraseArguments(numIVs, numReductions);
407
408 forOp.erase();
409 if (resOp)
410 *resOp = newPloop;
411 return success();
412}
413
414// Returns success if any hoisting happened.
415LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
416 // Bail out early if the ifOp returns a result. TODO: Consider how to
417 // properly support this case.
418 if (ifOp.getNumResults() != 0)
419 return failure();
420
421 // Apply canonicalization patterns and folding - this is necessary for the
422 // hoisting check to be correct (operands should be composed), and to be more
423 // effective (no unused operands). Since the pattern rewriter's folding is
424 // entangled with application of patterns, we may fold/end up erasing the op,
425 // in which case we return with `folded` being set.
426 RewritePatternSet patterns(ifOp.getContext());
427 AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
428 FrozenRewritePatternSet frozenPatterns(std::move(patterns));
429 bool erased;
431 ifOp.getOperation(), frozenPatterns,
433 /*changed=*/nullptr, &erased);
434 if (erased) {
435 if (folded)
436 *folded = true;
437 return failure();
438 }
439 if (folded)
440 *folded = false;
441
442 // The folding above should have ensured this.
443 assert(llvm::all_of(ifOp.getOperands(),
444 [](Value v) {
445 return isTopLevelValue(v) || isAffineInductionVar(v);
446 }) &&
447 "operands not composed");
448
449 // We are going hoist as high as possible.
450 // TODO: this could be customized in the future.
451 auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
452
453 AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
454 // Nothing to hoist over.
455 if (hoistedIfOp == ifOp)
456 return failure();
457
458 // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
459 // a sequence of affine.fors that are all perfectly nested).
461 hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
462 frozenPatterns);
463
464 return success();
465}
466
467// Return the min expr after replacing the given dim.
468AffineExpr mlir::affine::substWithMin(AffineExpr e, AffineExpr dim,
470 bool positivePath) {
471 if (e == dim)
472 return positivePath ? min : max;
473 if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
474 AffineExpr lhs = bin.getLHS();
475 AffineExpr rhs = bin.getRHS();
476 if (bin.getKind() == mlir::AffineExprKind::Add)
477 return substWithMin(lhs, dim, min, max, positivePath) +
478 substWithMin(rhs, dim, min, max, positivePath);
479
480 auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
481 auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
482 if (c1 && c1.getValue() < 0)
484 bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
485 if (c2 && c2.getValue() < 0)
487 bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
489 bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
490 substWithMin(rhs, dim, min, max, positivePath));
491 }
492 return e;
493}
494
495void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
496 // Loops with min/max in bounds are not normalized at the moment.
497 if (op.hasMinMaxBounds())
498 return;
499
500 AffineMap lbMap = op.getLowerBoundsMap();
501 SmallVector<int64_t, 8> steps = op.getSteps();
502 // No need to do any work if the parallel op is already normalized.
503 bool isAlreadyNormalized =
504 llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
505 int64_t step = std::get<0>(tuple);
506 auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
507 return lbExpr && lbExpr.getValue() == 0 && step == 1;
508 });
509 if (isAlreadyNormalized)
510 return;
511
512 AffineValueMap ranges;
513 AffineValueMap::difference(op.getUpperBoundsValueMap(),
514 op.getLowerBoundsValueMap(), &ranges);
515 auto builder = OpBuilder::atBlockBegin(op.getBody());
516 auto zeroExpr = builder.getAffineConstantExpr(0);
519 for (unsigned i = 0, e = steps.size(); i < e; ++i) {
520 int64_t step = steps[i];
521
522 // Adjust the lower bound to be 0.
523 lbExprs.push_back(zeroExpr);
524
525 // Adjust the upper bound expression: 'range / step'.
526 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
527 ubExprs.push_back(ubExpr);
528
529 // Adjust the corresponding IV: 'lb + i * step'.
530 BlockArgument iv = op.getBody()->getArgument(i);
531 AffineExpr lbExpr = lbMap.getResult(i);
532 unsigned nDims = lbMap.getNumDims();
533 auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
534 auto map = AffineMap::get(/*dimCount=*/nDims + 1,
535 /*symbolCount=*/lbMap.getNumSymbols(), expr);
536
537 // Use an 'affine.apply' op that will be simplified later in subsequent
538 // canonicalizations.
539 OperandRange lbOperands = op.getLowerBoundsOperands();
540 OperandRange dimOperands = lbOperands.take_front(nDims);
541 OperandRange symbolOperands = lbOperands.drop_front(nDims);
542 SmallVector<Value, 8> applyOperands{dimOperands};
543 applyOperands.push_back(iv);
544 applyOperands.append(symbolOperands.begin(), symbolOperands.end());
545 auto apply =
546 AffineApplyOp::create(builder, op.getLoc(), map, applyOperands);
547 iv.replaceAllUsesExcept(apply, apply);
548 }
549
550 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
551 op.setSteps(newSteps);
552 auto newLowerMap = AffineMap::get(
553 /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
554 op.setLowerBounds({}, newLowerMap);
555 auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
556 ubExprs, op.getContext());
557 op.setUpperBounds(ranges.getOperands(), newUpperMap);
558}
559
560LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
561 bool promoteSingleIter) {
562 if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
563 return success();
564
565 // Check if the forop is already normalized.
566 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
567 (op.getStep() == 1))
568 return success();
569
570 // Check if the lower bound has a single result only. Loops with a max lower
571 // bound can't be normalized without additional support like
572 // affine.execute_region's. If the lower bound does not have a single result
573 // then skip this op.
574 if (op.getLowerBoundMap().getNumResults() != 1)
575 return failure();
576
577 Location loc = op.getLoc();
578 OpBuilder opBuilder(op);
579 int64_t origLoopStep = op.getStepAsInt();
580
581 // Construct the new upper bound value map.
582 AffineMap oldLbMap = op.getLowerBoundMap();
583 // The upper bound can have multiple results. To use
584 // AffineValueMap::difference, we need to have the same number of results in
585 // both lower and upper bound maps. So, we just create a value map for the
586 // lower bound with the only available lower bound result repeated to pad up
587 // to the number of upper bound results.
588 SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
589 op.getLowerBoundMap().getResult(0));
590 AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
591 AffineMap paddedLbMap =
592 AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
593 op.getContext());
594 AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
595 AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
596 AffineValueMap newUbValueMap;
597 // Compute the `upper bound - lower bound`.
598 AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
599 (void)newUbValueMap.canonicalize();
600
601 // Scale down the upper bound value map by the loop step.
602 unsigned numResult = newUbValueMap.getNumResults();
603 SmallVector<AffineExpr> scaleDownExprs(numResult);
604 for (unsigned i = 0; i < numResult; ++i)
605 scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
606 // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
607 // step). Where `n` is the number of results in the upper bound map.
608 AffineMap scaleDownMap =
609 AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
610 AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
611
612 // Set the newly create upper bound map and operands.
613 op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
614 op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
615 op.setStep(1);
616
617 // Calculate the Value of new loopIV. Create affine.apply for the value of
618 // the loopIV in normalized loop.
619 opBuilder.setInsertionPointToStart(op.getBody());
620 // Construct an affine.apply op mapping the new IV to the old IV.
621 AffineMap scaleIvMap =
622 AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
623 AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
624 AffineValueMap newIvToOldIvMap;
625 AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
626 (void)newIvToOldIvMap.canonicalize();
627 auto newIV =
628 AffineApplyOp::create(opBuilder, loc, newIvToOldIvMap.getAffineMap(),
629 newIvToOldIvMap.getOperands());
630 op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
631 return success();
632}
633
634/// Returns true if the memory operation of `destAccess` depends on `srcAccess`
635/// inside of the innermost common surrounding affine loop between the two
636/// accesses.
637static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
638 const MemRefAccess &destAccess) {
639 // Affine dependence analysis is possible only if both ops in the same
640 // AffineScope.
641 if (getAffineAnalysisScope(srcAccess.opInst) !=
642 getAffineAnalysisScope(destAccess.opInst))
643 return false;
644
645 unsigned nsLoops =
646 getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
648 checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
649 return hasDependence(result);
650}
651
652/// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
653/// scope of the outermost `minSurroundingLoops` loops that surround them.
654/// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
655static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
656 unsigned minSurroundingLoops) {
657 MemRefAccess srcAccess(srcMemOp);
658 MemRefAccess destAccess(destMemOp);
659
660 // Affine dependence analysis here is applicable only if both ops operate on
661 // the same memref and if `srcMemOp` and `destMemOp` are in the same
662 // AffineScope. Also, we can only check if our affine scope is isolated from
663 // above; otherwise, values can from outside of the affine scope that the
664 // check below cannot analyze.
665 Region *srcScope = getAffineAnalysisScope(srcMemOp);
666 if (srcAccess.memref == destAccess.memref &&
667 srcScope == getAffineAnalysisScope(destMemOp)) {
668 unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
669 FlatAffineValueConstraints dependenceConstraints;
670 for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
672 srcAccess, destAccess, d, &dependenceConstraints,
673 /*dependenceComponents=*/nullptr);
674 // A dependence failure or the presence of a dependence implies a
675 // side effect.
676 if (!noDependence(result))
677 return true;
678 }
679 // No side effect was seen.
680 return false;
681 }
682 // TODO: Check here if the memrefs alias: there is no side effect if
683 // `srcAccess.memref` and `destAccess.memref` don't alias.
684 return true;
685}
686
687template <typename EffectType, typename T>
689 Operation *start, T memOp,
691 // A boolean representing whether an intervening operation could have impacted
692 // memOp.
693 bool hasSideEffect = false;
694
695 // Check whether the effect on memOp can be caused by a given operation op.
696 Value memref = memOp.getMemRef();
697 std::function<void(Operation *)> checkOperation = [&](Operation *op) {
698 // If the effect has alreay been found, early exit,
699 if (hasSideEffect)
700 return;
701
702 if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
704 memEffect.getEffects(effects);
705
706 bool opMayHaveEffect = false;
707 for (auto effect : effects) {
708 // If op causes EffectType on a potentially aliasing location for
709 // memOp, mark as having the effect.
710 if (isa<EffectType>(effect.getEffect())) {
711 if (effect.getValue() && effect.getValue() != memref &&
712 !mayAlias(effect.getValue(), memref))
713 continue;
714 opMayHaveEffect = true;
715 break;
716 }
717 }
718
719 if (!opMayHaveEffect)
720 return;
721
722 // If the side effect comes from an affine read or write, try to
723 // prove the side effecting `op` cannot reach `memOp`.
724 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
725 // For ease, let's consider the case that `op` is a store and
726 // we're looking for other potential stores that overwrite memory after
727 // `start`, and before being read in `memOp`. In this case, we only
728 // need to consider other potential stores with depth >
729 // minSurroundingLoops since `start` would overwrite any store with a
730 // smaller number of surrounding loops before.
731 unsigned minSurroundingLoops =
732 getNumCommonSurroundingLoops(*start, *memOp);
733 if (mayHaveEffect(op, memOp, minSurroundingLoops))
734 hasSideEffect = true;
735 return;
736 }
737
738 // We have an op with a memory effect and we cannot prove if it
739 // intervenes.
740 hasSideEffect = true;
741 return;
742 }
743
744 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
745 // Recurse into the regions for this op and check whether the internal
746 // operations may have the side effect `EffectType` on memOp.
747 for (Region &region : op->getRegions())
748 for (Block &block : region)
749 for (Operation &op : block)
750 checkOperation(&op);
751 return;
752 }
753
754 // Otherwise, conservatively assume generic operations have the effect
755 // on the operation
756 hasSideEffect = true;
757 };
758
759 // Check all paths from ancestor op `parent` to the operation `to` for the
760 // effect. It is known that `to` must be contained within `parent`.
761 auto until = [&](Operation *parent, Operation *to) {
762 // TODO check only the paths from `parent` to `to`.
763 // Currently we fallback and check the entire parent op, rather than
764 // just the paths from the parent path, stopping after reaching `to`.
765 // This is conservatively correct, but could be made more aggressive.
766 assert(parent->isAncestor(to));
767 checkOperation(parent);
768 };
769
770 // Check for all paths from operation `from` to operation `untilOp` for the
771 // given memory effect.
772 std::function<void(Operation *, Operation *)> recur =
773 [&](Operation *from, Operation *untilOp) {
774 assert(
775 from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
776 "Checking for side effect between two operations without a common "
777 "ancestor");
778
779 // If the operations are in different regions, recursively consider all
780 // path from `from` to the parent of `to` and all paths from the parent
781 // of `to` to `to`.
782 if (from->getParentRegion() != untilOp->getParentRegion()) {
783 recur(from, untilOp->getParentOp());
784 until(untilOp->getParentOp(), untilOp);
785 return;
786 }
787
788 // Now, assuming that `from` and `to` exist in the same region, perform
789 // a CFG traversal to check all the relevant operations.
790
791 // Additional blocks to consider.
792 SmallVector<Block *, 2> todoBlocks;
793 {
794 // First consider the parent block of `from` an check all operations
795 // after `from`.
796 for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
797 iter != end && &*iter != untilOp; ++iter) {
798 checkOperation(&*iter);
799 }
800
801 // If the parent of `from` doesn't contain `to`, add the successors
802 // to the list of blocks to check.
803 if (untilOp->getBlock() != from->getBlock())
804 for (Block *succ : from->getBlock()->getSuccessors())
805 todoBlocks.push_back(succ);
806 }
807
809 // Traverse the CFG until hitting `to`.
810 while (!todoBlocks.empty()) {
811 Block *blk = todoBlocks.pop_back_val();
812 if (done.count(blk))
813 continue;
814 done.insert(blk);
815 for (auto &op : *blk) {
816 if (&op == untilOp)
817 break;
818 checkOperation(&op);
819 if (&op == blk->getTerminator())
820 for (Block *succ : blk->getSuccessors())
821 todoBlocks.push_back(succ);
822 }
823 }
824 };
825 recur(start, memOp);
826 return !hasSideEffect;
827}
828
829/// Attempt to eliminate loadOp by replacing it with a value stored into memory
830/// which the load is guaranteed to retrieve. This check involves three
831/// components: 1) The store and load must be on the same location 2) The store
832/// must dominate (and therefore must always occur prior to) the load 3) No
833/// other operations will overwrite the memory loaded between the given load
834/// and store. If such a value exists, the replaced `loadOp` will be added to
835/// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
837 AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
838 SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
840
841 // The store op candidate for forwarding that satisfies all conditions
842 // to replace the load, if any.
843 Operation *lastWriteStoreOp = nullptr;
844
845 for (auto *user : loadOp.getMemRef().getUsers()) {
846 auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
847 if (!storeOp)
848 continue;
849 MemRefAccess srcAccess(storeOp);
850 MemRefAccess destAccess(loadOp);
851
852 // 1. Check if the store and the load have mathematically equivalent
853 // affine access functions; this implies that they statically refer to the
854 // same single memref element. As an example this filters out cases like:
855 // store %A[%i0 + 1]
856 // load %A[%i0]
857 // store %A[%M]
858 // load %A[%N]
859 // Use the AffineValueMap difference based memref access equality checking.
860 if (srcAccess != destAccess)
861 continue;
862
863 // 2. The store has to dominate the load op to be candidate.
864 if (!domInfo.dominates(storeOp, loadOp))
865 continue;
866
867 // 3. The store must reach the load. Access function equivalence only
868 // guarantees this for accesses in the same block. The load could be in a
869 // nested block that is unreachable.
870 if (!mustReachAtInnermost(srcAccess, destAccess))
871 continue;
872
873 // 4. Ensure there is no intermediate operation which could replace the
874 // value in memory.
876 mayAlias))
877 continue;
878
879 // We now have a candidate for forwarding.
880 assert(lastWriteStoreOp == nullptr &&
881 "multiple simultaneous replacement stores");
882 lastWriteStoreOp = storeOp;
883 }
884
885 if (!lastWriteStoreOp)
886 return;
887
888 // Perform the actual store to load forwarding.
889 Value storeVal =
890 cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
891 // Check if 2 values have the same shape. This is needed for affine vector
892 // loads and stores.
893 if (storeVal.getType() != loadOp.getValue().getType())
894 return;
895 loadOp.getValue().replaceAllUsesWith(storeVal);
896 // Record the memref for a later sweep to optimize away.
897 memrefsToErase.insert(loadOp.getMemRef());
898 // Record this to erase later.
899 loadOpsToErase.push_back(loadOp);
900}
901
902template bool
904 affine::AffineReadOpInterface>(
905 mlir::Operation *, affine::AffineReadOpInterface,
907
908// This attempts to find stores which have no impact on the final result.
909// A writing op writeA will be eliminated if there exists an op writeB if
910// 1) writeA and writeB have mathematically equivalent affine access functions.
911// 2) writeB writes the same type as writeA (so it fully covers writeA's bytes).
912// 3) writeB postdominates writeA.
913// 4) There is no potential read between writeA and writeB.
914static void findUnusedStore(AffineWriteOpInterface writeA,
916 PostDominanceInfo &postDominanceInfo,
918
919 for (Operation *user : writeA.getMemRef().getUsers()) {
920 // Only consider writing operations.
921 auto writeB = dyn_cast<AffineWriteOpInterface>(user);
922 if (!writeB)
923 continue;
924
925 // The operations must be distinct.
926 if (writeB == writeA)
927 continue;
928
929 // Both operations must lie in the same region.
930 if (writeB->getParentRegion() != writeA->getParentRegion())
931 continue;
932
933 // Both operations must write to the same memory.
934 MemRefAccess srcAccess(writeB);
935 MemRefAccess destAccess(writeA);
936
937 if (srcAccess != destAccess)
938 continue;
939
940 // Check that the store types match. If types differ, writeB may not cover
941 // all bytes written by writeA (e.g. a narrower vector type), so
942 // conservatively assume writeA is not dead.
943 // One could be tempted whether writeA type is smaller than writeB, however
944 // it can become tricky with cases like vector<4xi6> vs vector<3xi8> due to
945 // padding that can be datalayout dependent.
946 if (writeA.getValueToStore().getType() !=
947 writeB.getValueToStore().getType())
948 continue;
949
950 // writeB must postdominate writeA.
951 if (!postDominanceInfo.postDominates(writeB, writeA))
952 continue;
953
954 // There cannot be an operation which reads from memory between
955 // the two writes.
957 mayAlias))
958 continue;
959
960 opsToErase.push_back(writeA);
961 break;
962 }
963}
964
965// The load to load forwarding / redundant load elimination is similar to the
966// store to load forwarding.
967// loadA will be be replaced with loadB if:
968// 1) loadA and loadB have mathematically equivalent affine access functions.
969// 2) loadB dominates loadA.
970// 3) There is no write between loadA and loadB.
971static void loadCSE(AffineReadOpInterface loadA,
972 SmallVectorImpl<Operation *> &loadOpsToErase,
973 DominanceInfo &domInfo,
976 for (auto *user : loadA.getMemRef().getUsers()) {
977 auto loadB = dyn_cast<AffineReadOpInterface>(user);
978 if (!loadB || loadB == loadA)
979 continue;
980
981 MemRefAccess srcAccess(loadB);
982 MemRefAccess destAccess(loadA);
983
984 // 1. The accesses should be to be to the same location.
985 if (srcAccess != destAccess) {
986 continue;
987 }
988
989 // 2. loadB should dominate loadA.
990 if (!domInfo.dominates(loadB, loadA))
991 continue;
992
993 // 3. There should not be a write between loadA and loadB.
995 loadB.getOperation(), loadA, mayAlias))
996 continue;
997
998 // Check if two values have the same shape. This is needed for affine vector
999 // loads.
1000 if (loadB.getValue().getType() != loadA.getValue().getType())
1001 continue;
1002
1003 loadCandidates.push_back(loadB);
1004 }
1005
1006 // Of the legal load candidates, use the one that dominates all others
1007 // to minimize the subsequent need to loadCSE
1008 Value loadB;
1009 for (AffineReadOpInterface option : loadCandidates) {
1010 if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
1011 return depStore == option ||
1012 domInfo.dominates(option.getOperation(),
1013 depStore.getOperation());
1014 })) {
1015 loadB = option.getValue();
1016 break;
1017 }
1018 }
1019
1020 if (loadB) {
1021 loadA.getValue().replaceAllUsesWith(loadB);
1022 // Record this to erase later.
1023 loadOpsToErase.push_back(loadA);
1024 }
1025}
1026
1027// The store to load forwarding and load CSE rely on three conditions:
1028//
1029// 1) store/load providing a replacement value and load being replaced need to
1030// have mathematically equivalent affine access functions (checked after full
1031// composition of load/store operands); this implies that they access the same
1032// single memref element for all iterations of the common surrounding loop,
1033//
1034// 2) the store/load op should dominate the load op,
1035//
1036// 3) no operation that may write to memory read by the load being replaced can
1037// occur after executing the instruction (load or store) providing the
1038// replacement value and before the load being replaced (thus potentially
1039// allowing overwriting the memory read by the load).
1040//
1041// The above conditions are simple to check, sufficient, and powerful for most
1042// cases in practice - they are sufficient, but not necessary --- since they
1043// don't reason about loops that are guaranteed to execute at least once or
1044// multiple sources to forward from.
1045//
1046// TODO: more forwarding can be done when support for
1047// loop/conditional live-out SSA values is available.
1048// TODO: do general dead store elimination for memref's. This pass
1049// currently only eliminates the stores only if no other loads/uses (other
1050// than dealloc) remain.
1051//
1052void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1053 PostDominanceInfo &postDomInfo,
1054 AliasAnalysis &aliasAnalysis) {
1055 // Load op's whose results were replaced by those forwarded from stores.
1056 SmallVector<Operation *, 8> opsToErase;
1057
1058 // A list of memref's that are potentially dead / could be eliminated.
1059 SmallPtrSet<Value, 4> memrefsToErase;
1060
1061 auto mayAlias = [&](Value val1, Value val2) -> bool {
1062 return !aliasAnalysis.alias(val1, val2).isNo();
1063 };
1064
1065 // Walk all load's and perform store to load forwarding.
1066 f.walk([&](AffineReadOpInterface loadOp) {
1067 forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1068 });
1069 for (auto *op : opsToErase)
1070 op->erase();
1071 opsToErase.clear();
1072
1073 // Walk all store's and perform unused store elimination
1074 f.walk([&](AffineWriteOpInterface storeOp) {
1075 findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1076 });
1077 for (auto *op : opsToErase)
1078 op->erase();
1079 opsToErase.clear();
1080
1081 // Check if the store fwd'ed memrefs are now left with only stores and
1082 // deallocs and can thus be completely deleted. Note: the canonicalize pass
1083 // should be able to do this as well, but we'll do it here since we collected
1084 // these anyway.
1085 for (auto memref : memrefsToErase) {
1086 // If the memref hasn't been locally alloc'ed, skip.
1087 Operation *defOp = memref.getDefiningOp();
1088 if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1089 // TODO: if the memref was returned by a 'call' operation, we
1090 // could still erase it if the call had no side-effects.
1091 continue;
1092 if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1093 return !isa<AffineWriteOpInterface>(ownerOp) &&
1094 !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1095 }))
1096 continue;
1097
1098 // Erase all stores, the dealloc, and the alloc on the memref.
1099 for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1100 user->erase();
1101 defOp->erase();
1102 }
1103
1104 // To eliminate as many loads as possible, run load CSE after eliminating
1105 // stores. Otherwise, some stores are wrongly seen as having an intervening
1106 // effect.
1107 f.walk([&](AffineReadOpInterface loadOp) {
1108 loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1109 });
1110 for (auto *op : opsToErase)
1111 op->erase();
1112}
1113
1114// Checks if `op` is non dereferencing.
1115// TODO: This hardcoded check will be removed once the right interface is added.
1117 return isa<AffineMapAccessInterface, memref::LoadOp, memref::StoreOp>(op);
1118}
1119
1120// Perform the replacement in `op`.
1121LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1122 Value oldMemRef, Value newMemRef, Operation *op,
1123 ArrayRef<Value> extraIndices, AffineMap indexRemap,
1124 ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1125 bool allowNonDereferencingOps) {
1126 unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1127 (void)newMemRefRank; // unused in opt mode
1128 unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1129 (void)oldMemRefRank; // unused in opt mode
1130 if (indexRemap) {
1131 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1132 "symbolic operand count mismatch");
1133 assert(indexRemap.getNumInputs() ==
1134 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1135 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1136 } else {
1137 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1138 }
1139
1140 // Assert same elemental type.
1141 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1142 cast<MemRefType>(newMemRef.getType()).getElementType());
1143
1144 SmallVector<unsigned, 2> usePositions;
1145 for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1146 if (opEntry.value() == oldMemRef)
1147 usePositions.push_back(opEntry.index());
1148 }
1149
1150 // If memref doesn't appear, nothing to do.
1151 if (usePositions.empty())
1152 return success();
1153
1154 unsigned memRefOperandPos = usePositions.front();
1155
1156 OpBuilder builder(op);
1157 // The following checks if op is dereferencing memref and performs the access
1158 // index rewrites.
1159 if (!isDereferencingOp(op)) {
1160 if (!allowNonDereferencingOps) {
1161 // Failure: memref used in a non-dereferencing context (potentially
1162 // escapes); no replacement in these cases unless allowNonDereferencingOps
1163 // is set.
1164 return failure();
1165 }
1166 for (unsigned pos : usePositions)
1167 op->setOperand(pos, newMemRef);
1168 return success();
1169 }
1170
1171 if (usePositions.size() > 1) {
1172 // TODO: extend it for this case when needed (rare).
1173 LLVM_DEBUG(llvm::dbgs()
1174 << "multiple dereferencing uses in a single op not supported");
1175 return failure();
1176 }
1177
1178 // Perform index rewrites for the dereferencing op and then replace the op.
1179 SmallVector<Value, 4> oldMapOperands;
1180 AffineMap oldMap;
1181 unsigned oldMemRefNumIndices = oldMemRefRank;
1182 auto startIdx = op->operand_begin() + memRefOperandPos + 1;
1183 auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1184 if (affMapAccInterface) {
1185 // If `op` implements AffineMapAccessInterface, we can get the indices by
1186 // quering the number of map operands from the operand list from a certain
1187 // offset (`memRefOperandPos` in this case).
1188 NamedAttribute oldMapAttrPair =
1189 affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1190 oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1191 oldMemRefNumIndices = oldMap.getNumInputs();
1192 }
1193 oldMapOperands.assign(startIdx, startIdx + oldMemRefNumIndices);
1194
1195 // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1196 SmallVector<Value, 4> oldMemRefOperands;
1197 SmallVector<Value, 4> affineApplyOps;
1198 oldMemRefOperands.reserve(oldMemRefRank);
1199 if (affMapAccInterface &&
1200 oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1201 for (auto resultExpr : oldMap.getResults()) {
1202 auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1203 oldMap.getNumSymbols(), resultExpr);
1204 auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1205 oldMapOperands);
1206 oldMemRefOperands.push_back(afOp);
1207 affineApplyOps.push_back(afOp);
1208 }
1209 } else {
1210 oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1211 }
1212
1213 // Construct new indices as a remap of the old ones if a remapping has been
1214 // provided. The indices of a memref come right after it, i.e.,
1215 // at position memRefOperandPos + 1.
1216 SmallVector<Value, 4> remapOperands;
1217 remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1218 symbolOperands.size());
1219 remapOperands.append(extraOperands.begin(), extraOperands.end());
1220 remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1221 remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1222
1223 SmallVector<Value, 4> remapOutputs;
1224 remapOutputs.reserve(oldMemRefRank);
1225 if (indexRemap &&
1226 indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1227 // Remapped indices.
1228 for (auto resultExpr : indexRemap.getResults()) {
1229 auto singleResMap = AffineMap::get(
1230 indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1231 auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1232 remapOperands);
1233 remapOutputs.push_back(afOp);
1234 affineApplyOps.push_back(afOp);
1235 }
1236 } else {
1237 // No remapping specified.
1238 remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1239 }
1240 SmallVector<Value, 4> newMapOperands;
1241 newMapOperands.reserve(newMemRefRank);
1242
1243 // Prepend 'extraIndices' in 'newMapOperands'.
1244 for (Value extraIndex : extraIndices) {
1245 assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1246 "invalid memory op index");
1247 newMapOperands.push_back(extraIndex);
1248 }
1249
1250 // Append 'remapOutputs' to 'newMapOperands'.
1251 newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1252
1253 // Create new fully composed AffineMap for new op to be created.
1254 assert(newMapOperands.size() == newMemRefRank);
1255 auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1256 fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1257 newMap = simplifyAffineMap(newMap);
1258 canonicalizeMapAndOperands(&newMap, &newMapOperands);
1259 // Remove any affine.apply's that became dead as a result of composition.
1260 for (Value value : affineApplyOps)
1261 if (value.use_empty())
1262 value.getDefiningOp()->erase();
1263
1264 OperationState state(op->getLoc(), op->getName());
1265 // Construct the new operation using this memref.
1266 state.operands.reserve(op->getNumOperands() + extraIndices.size());
1267 // Insert the non-memref operands.
1268 state.operands.append(op->operand_begin(),
1269 op->operand_begin() + memRefOperandPos);
1270 // Insert the new memref value.
1271 state.operands.push_back(newMemRef);
1272
1273 // Insert the new memref map operands.
1274 if (affMapAccInterface) {
1275 state.operands.append(newMapOperands.begin(), newMapOperands.end());
1276 } else {
1277 // In the case of dereferencing ops not implementing
1278 // AffineMapAccessInterface, we need to apply the values of `newMapOperands`
1279 // to the `newMap` to get the correct indices.
1280 for (unsigned i = 0; i < newMemRefRank; i++) {
1281 state.operands.push_back(AffineApplyOp::create(
1282 builder, op->getLoc(),
1283 AffineMap::get(newMap.getNumDims(), newMap.getNumSymbols(),
1284 newMap.getResult(i)),
1285 newMapOperands));
1286 }
1287 }
1288
1289 // Insert the remaining operands unmodified.
1290 unsigned oldMapNumInputs = oldMapOperands.size();
1291 state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1292 oldMapNumInputs,
1293 op->operand_end());
1294 // Result types don't change. Both memref's are of the same elemental type.
1295 state.types.reserve(op->getNumResults());
1296 for (auto result : op->getResults())
1297 state.types.push_back(result.getType());
1298
1299 // Add attribute for 'newMap', other Attributes do not change.
1300 auto newMapAttr = AffineMapAttr::get(newMap);
1301 for (auto namedAttr : op->getAttrs()) {
1302 if (affMapAccInterface &&
1303 namedAttr.getName() ==
1304 affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef).getName())
1305 state.attributes.push_back({namedAttr.getName(), newMapAttr});
1306 else
1307 state.attributes.push_back(namedAttr);
1308 }
1309
1310 // Create the new operation.
1311 auto *repOp = builder.create(state);
1312 op->replaceAllUsesWith(repOp);
1313 op->erase();
1314
1315 return success();
1316}
1317
1318LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1319 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1320 AffineMap indexRemap, ArrayRef<Value> extraOperands,
1321 ArrayRef<Value> symbolOperands,
1322 llvm::function_ref<bool(Operation *)> userFilterFn,
1323 bool allowNonDereferencingOps, bool replaceInDeallocOp) {
1324 unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1325 (void)newMemRefRank; // unused in opt mode
1326 unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1327 (void)oldMemRefRank;
1328 if (indexRemap) {
1329 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1330 "symbol operand count mismatch");
1331 assert(indexRemap.getNumInputs() ==
1332 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1333 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1334 } else {
1335 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1336 }
1337
1338 // Assert same elemental type.
1339 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1340 cast<MemRefType>(newMemRef.getType()).getElementType());
1341
1342 // Walk all uses of old memref; collect ops to perform replacement. We use a
1343 // DenseSet since an operation could potentially have multiple uses of a
1344 // memref (although rare), and the replacement later is going to erase ops.
1345 DenseSet<Operation *> opsToReplace;
1346 for (auto *user : oldMemRef.getUsers()) {
1347 // Check if this user doesn't pass the filter.
1348 if (userFilterFn && !userFilterFn(user))
1349 continue;
1350
1351 // Skip dealloc's - no replacement is necessary, and a memref replacement
1352 // at other uses doesn't hurt these dealloc's.
1353 if (hasSingleEffect<MemoryEffects::Free>(user, oldMemRef) &&
1354 !replaceInDeallocOp)
1355 continue;
1356
1357 // Check if the memref was used in a non-dereferencing context. It is fine
1358 // for the memref to be used in a non-dereferencing way outside of the
1359 // region where this replacement is happening.
1360 if (!isa<AffineMapAccessInterface>(*user)) {
1361 if (!allowNonDereferencingOps) {
1362 LLVM_DEBUG(
1363 llvm::dbgs()
1364 << "Memref replacement failed: non-deferencing memref user: \n"
1365 << *user << '\n');
1366 return failure();
1367 }
1368 // Non-dereferencing ops with the MemRefsNormalizable trait are
1369 // supported for replacement.
1370 if (!user->hasTrait<OpTrait::MemRefsNormalizable>()) {
1371 LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1372 "memrefs normalizable trait: \n"
1373 << *user << '\n');
1374 return failure();
1375 }
1376 }
1377
1378 // We'll first collect and then replace --- since replacement erases the
1379 // user that has the use, and that user could be postDomFilter or domFilter
1380 // itself!
1381 opsToReplace.insert(user);
1382 }
1383
1384 for (auto *user : opsToReplace) {
1385 if (failed(replaceAllMemRefUsesWith(
1386 oldMemRef, newMemRef, user, extraIndices, indexRemap, extraOperands,
1387 symbolOperands, allowNonDereferencingOps)))
1388 return failure();
1389 }
1390
1391 return success();
1392}
1393
1394/// Given an operation, inserts one or more single result affine
1395/// apply operations, results of which are exclusively used by this operation
1396/// operation. The operands of these newly created affine apply ops are
1397/// guaranteed to be loop iterators or terminal symbols of a function.
1398///
1399/// Before
1400///
1401/// affine.for %i = 0 to #map(%N)
1402/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1403/// "send"(%idx, %A, ...)
1404/// "compute"(%idx)
1405///
1406/// After
1407///
1408/// affine.for %i = 0 to #map(%N)
1409/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1410/// "send"(%idx, %A, ...)
1411/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1412/// "compute"(%idx_)
1413///
1414/// This allows applying different transformations on send and compute (for eg.
1415/// different shifts/delays).
1416///
1417/// Returns nullptr either if none of opInst's operands were the result of an
1418/// affine.apply and thus there was no affine computation slice to create, or if
1419/// all the affine.apply op's supplying operands to this opInst did not have any
1420/// uses besides this opInst; otherwise returns the list of affine.apply
1421/// operations created in output argument `sliceOps`.
1422void mlir::affine::createAffineComputationSlice(
1423 Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1424 // Collect all operands that are results of affine apply ops.
1425 SmallVector<Value, 4> subOperands;
1426 subOperands.reserve(opInst->getNumOperands());
1427 for (auto operand : opInst->getOperands())
1428 if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1429 subOperands.push_back(operand);
1430
1431 // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1432 SmallVector<Operation *, 4> affineApplyOps;
1433 getReachableAffineApplyOps(subOperands, affineApplyOps);
1434 // Skip transforming if there are no affine maps to compose.
1435 if (affineApplyOps.empty())
1436 return;
1437
1438 // Check if all uses of the affine apply op's lie only in this op op, in
1439 // which case there would be nothing to do.
1440 bool localized = true;
1441 for (auto *op : affineApplyOps) {
1442 for (auto result : op->getResults()) {
1443 for (auto *user : result.getUsers()) {
1444 if (user != opInst) {
1445 localized = false;
1446 break;
1447 }
1448 }
1449 }
1450 }
1451 if (localized)
1452 return;
1453
1454 OpBuilder builder(opInst);
1455 SmallVector<Value, 4> composedOpOperands(subOperands);
1456 auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1457 fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1458
1459 // Create an affine.apply for each of the map results.
1460 sliceOps->reserve(composedMap.getNumResults());
1461 for (auto resultExpr : composedMap.getResults()) {
1462 auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1463 composedMap.getNumSymbols(), resultExpr);
1464 sliceOps->push_back(AffineApplyOp::create(
1465 builder, opInst->getLoc(), singleResMap, composedOpOperands));
1466 }
1467
1468 // Construct the new operands that include the results from the composed
1469 // affine apply op above instead of existing ones (subOperands). So, they
1470 // differ from opInst's operands only for those operands in 'subOperands', for
1471 // which they will be replaced by the corresponding one from 'sliceOps'.
1472 SmallVector<Value, 4> newOperands(opInst->getOperands());
1473 for (Value &operand : newOperands) {
1474 // Replace the subOperands from among the new operands.
1475 unsigned j, f;
1476 for (j = 0, f = subOperands.size(); j < f; j++) {
1477 if (operand == subOperands[j])
1478 break;
1479 }
1480 if (j < subOperands.size())
1481 operand = (*sliceOps)[j];
1482 }
1483 for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1484 opInst->setOperand(idx, newOperands[idx]);
1485}
1486
1487/// Enum to set patterns of affine expr in tiled-layout map.
1488/// TileFloorDiv: <dim expr> div <tile size>
1489/// TileMod: <dim expr> mod <tile size>
1490/// TileNone: None of the above
1491/// Example:
1492/// #tiled_2d_128x256 = affine_map<(d0, d1)
1493/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1494/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1495/// "d0 mod 128" and "d1 mod 256" ==> TileMod
1497
1498/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1499/// being floordiv'ed by respective tile sizes appeare in a mod with the same
1500/// tile sizes, and no other expression involves those k dimensions. This
1501/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1502/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1503/// tiled layout, an empty vector is returned.
1504static LogicalResult getTileSizePos(
1505 AffineMap map,
1506 SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1507 // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1508 // `floordiv` and its position in `map` output.
1509 // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1510 // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1511 // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1513 unsigned pos = 0;
1514 for (AffineExpr expr : map.getResults()) {
1515 if (expr.getKind() == AffineExprKind::FloorDiv) {
1516 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1517 if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1518 floordivExprs.emplace_back(
1519 std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1520 }
1521 pos++;
1522 }
1523 // Not tiled layout if `floordivExprs` is empty.
1524 if (floordivExprs.empty()) {
1526 return success();
1527 }
1528
1529 // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1530 // not tiled layout.
1531 for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1532 AffineExpr floordivExprLHS = std::get<0>(fexpr);
1533 AffineExpr floordivExprRHS = std::get<1>(fexpr);
1534 unsigned floordivPos = std::get<2>(fexpr);
1535
1536 // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1537 // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1538 // other expr, the map is not tiled layout. Example of non tiled layout:
1539 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1540 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1541 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1542 // 256)>
1543 bool found = false;
1544 pos = 0;
1545 for (AffineExpr expr : map.getResults()) {
1546 bool notTiled = false;
1547 if (pos != floordivPos) {
1548 expr.walk([&](AffineExpr e) {
1549 if (e == floordivExprLHS) {
1550 if (expr.getKind() == AffineExprKind::Mod) {
1551 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1552 // If LHS and RHS of `mod` are the same with those of floordiv.
1553 if (floordivExprLHS == binaryExpr.getLHS() &&
1554 floordivExprRHS == binaryExpr.getRHS()) {
1555 // Save tile size (RHS of `mod`), and position of `floordiv` and
1556 // `mod` if same expr with `mod` is not found yet.
1557 if (!found) {
1558 tileSizePos.emplace_back(
1559 std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1560 found = true;
1561 } else {
1562 // Non tiled layout: Have multilpe `mod` with the same LHS.
1563 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1564 // mod 256, d2 mod 256)>
1565 notTiled = true;
1566 }
1567 } else {
1568 // Non tiled layout: RHS of `mod` is different from `floordiv`.
1569 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1570 // mod 128)>
1571 notTiled = true;
1572 }
1573 } else {
1574 // Non tiled layout: LHS is the same, but not `mod`.
1575 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1576 // floordiv 256)>
1577 notTiled = true;
1578 }
1579 }
1580 });
1581 }
1582 if (notTiled) {
1584 return success();
1585 }
1586 pos++;
1587 }
1588 }
1589 return success();
1590}
1591
1592/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1593/// after normalization. Dimensions that include dynamic dimensions in the map
1594/// output will become dynamic dimensions. Return true if `dim` is dynamic
1595/// dimension.
1596///
1597/// Example:
1598/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1599///
1600/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1601/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1602static bool
1604 SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1605 AffineExpr expr = layoutMap.getResults()[dim];
1606 // Check if affine expr of the dimension includes dynamic dimension of input
1607 // memrefType.
1608 MLIRContext *context = layoutMap.getContext();
1609 return expr
1610 .walk([&](AffineExpr e) {
1611 if (isa<AffineDimExpr>(e) &&
1612 llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1613 return e == getAffineDimExpr(dim, context);
1614 }))
1615 return WalkResult::interrupt();
1616 return WalkResult::advance();
1617 })
1618 .wasInterrupted();
1619}
1620
1621/// Create affine expr to calculate dimension size for a tiled-layout map.
1623 TileExprPattern pat) {
1624 // Create map output for the patterns.
1625 // "floordiv <tile size>" ==> "ceildiv <tile size>"
1626 // "mod <tile size>" ==> "<tile size>"
1627 AffineExpr newMapOutput;
1628 AffineBinaryOpExpr binaryExpr = nullptr;
1629 switch (pat) {
1631 binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1632 newMapOutput = binaryExpr.getRHS();
1633 break;
1635 binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1636 newMapOutput = getAffineBinaryOpExpr(
1637 AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1638 break;
1639 default:
1640 newMapOutput = oldMapOutput;
1641 }
1642 return newMapOutput;
1643}
1644
1645/// Create new maps to calculate each dimension size of `newMemRefType`, and
1646/// create `newDynamicSizes` from them by using AffineApplyOp.
1647///
1648/// Steps for normalizing dynamic memrefs for a tiled layout map
1649/// Example:
1650/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1651/// %0 = dim %arg0, %c1 :memref<4x?xf32>
1652/// %1 = alloc(%0) : memref<4x?xf32, #map0>
1653///
1654/// (Before this function)
1655/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1656/// single layout map is supported.
1657///
1658/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1659/// is memref<4x?x?xf32> in the above example.
1660///
1661/// (In this function)
1662/// 3. Create new maps to calculate each dimension of the normalized memrefType
1663/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1664/// dimension size can be calculated by replacing "floordiv <tile size>" with
1665/// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1666/// - New map in the above example
1667/// #map0 = affine_map<(d0, d1) -> (d0)>
1668/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1669/// #map2 = affine_map<(d0, d1) -> (32)>
1670///
1671/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1672/// is used in dynamicSizes of new AllocOp.
1673/// %0 = dim %arg0, %c1 : memref<4x?xf32>
1674/// %c4 = arith.constant 4 : index
1675/// %1 = affine.apply #map1(%c4, %0)
1676/// %2 = affine.apply #map2(%c4, %0)
1677template <typename AllocLikeOp>
1678static void createNewDynamicSizes(MemRefType oldMemRefType,
1679 MemRefType newMemRefType, AffineMap map,
1680 AllocLikeOp allocOp, OpBuilder b,
1681 SmallVectorImpl<Value> &newDynamicSizes) {
1682 // Create new input for AffineApplyOp.
1683 SmallVector<Value, 4> inAffineApply;
1684 ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1685 unsigned dynIdx = 0;
1686 for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1687 if (oldMemRefShape[d] < 0) {
1688 // Use dynamicSizes of allocOp for dynamic dimension.
1689 inAffineApply.emplace_back(allocOp.getDynamicSizes()[dynIdx]);
1690 dynIdx++;
1691 } else {
1692 // Create ConstantOp for static dimension.
1693 auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1694 inAffineApply.emplace_back(
1695 arith::ConstantOp::create(b, allocOp.getLoc(), constantAttr));
1696 }
1697 }
1698
1699 // Create new map to calculate each dimension size of new memref for each
1700 // original map output. Only for dynamic dimesion of `newMemRefType`.
1701 unsigned newDimIdx = 0;
1702 ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1704 (void)getTileSizePos(map, tileSizePos);
1705 for (AffineExpr expr : map.getResults()) {
1706 if (newMemRefShape[newDimIdx] < 0) {
1707 // Create new maps to calculate each dimension size of new memref.
1709 for (auto pos : tileSizePos) {
1710 if (newDimIdx == std::get<1>(pos))
1712 else if (newDimIdx == std::get<2>(pos))
1714 }
1715 AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1716 AffineMap newMap =
1717 AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1718 Value affineApp =
1719 AffineApplyOp::create(b, allocOp.getLoc(), newMap, inAffineApply);
1720 newDynamicSizes.emplace_back(affineApp);
1721 }
1722 newDimIdx++;
1723 }
1724}
1725
1726template <typename AllocLikeOp>
1727LogicalResult mlir::affine::normalizeMemRef(AllocLikeOp allocOp) {
1728 MemRefType memrefType = allocOp.getType();
1729 OpBuilder b(allocOp);
1730
1731 // Fetch a new memref type after normalizing the old memref to have an
1732 // identity map layout.
1733 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1734 if (newMemRefType == memrefType)
1735 // Either memrefType already had an identity map or the map couldn't be
1736 // transformed to an identity map.
1737 return failure();
1738
1739 Value oldMemRef = allocOp.getResult();
1740
1741 SmallVector<Value, 4> symbolOperands(allocOp.getSymbolOperands());
1742 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1743 AllocLikeOp newAlloc;
1744 // Check if `layoutMap` is a tiled layout. Only single layout map is
1745 // supported for normalizing dynamic memrefs.
1747 (void)getTileSizePos(layoutMap, tileSizePos);
1748 if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1749 auto oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1750 SmallVector<Value, 4> newDynamicSizes;
1751 createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1752 newDynamicSizes);
1753 // Add the new dynamic sizes in new AllocOp.
1754 newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1755 newDynamicSizes, allocOp.getAlignmentAttr());
1756 } else {
1757 newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1758 allocOp.getAlignmentAttr());
1759 }
1760 // Replace all uses of the old memref.
1761 if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1762 /*extraIndices=*/{},
1763 /*indexRemap=*/layoutMap,
1764 /*extraOperands=*/{},
1765 /*symbolOperands=*/symbolOperands,
1766 /*userFilterFn=*/nullptr,
1767 /*allowNonDereferencingOps=*/true))) {
1768 // If it failed (due to escapes for example), bail out.
1769 newAlloc.erase();
1770 return failure();
1771 }
1772 // Replace any uses of the original alloc op and erase it. All remaining uses
1773 // have to be dealloc's; RAMUW above would've failed otherwise.
1774 assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1775 return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1776 }));
1777 oldMemRef.replaceAllUsesWith(newAlloc);
1778 allocOp.erase();
1779 return success();
1780}
1781
1782LogicalResult
1783mlir::affine::normalizeMemRef(memref::ReinterpretCastOp reinterpretCastOp) {
1784 MemRefType memrefType = reinterpretCastOp.getType();
1785 AffineMap oldLayoutMap = memrefType.getLayout().getAffineMap();
1786 Value oldMemRef = reinterpretCastOp.getResult();
1787
1788 // If `oldLayoutMap` is identity, `memrefType` is already normalized.
1789 if (oldLayoutMap.isIdentity())
1790 return success();
1791
1792 // Fetch a new memref type after normalizing the old memref to have an
1793 // identity map layout.
1794 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1795 if (newMemRefType == memrefType)
1796 // `oldLayoutMap` couldn't be transformed to an identity map.
1797 return failure();
1798
1799 uint64_t newRank = newMemRefType.getRank();
1800 SmallVector<Value> mapOperands(oldLayoutMap.getNumDims() +
1801 oldLayoutMap.getNumSymbols());
1802 SmallVector<Value> oldStrides = reinterpretCastOp.getStrides();
1803 Location loc = reinterpretCastOp.getLoc();
1804 // As `newMemRefType` is normalized, it is unit strided.
1805 SmallVector<int64_t> newStaticStrides(newRank, 1);
1806 SmallVector<int64_t> newStaticOffsets(newRank, 0);
1807 ArrayRef<int64_t> oldShape = memrefType.getShape();
1808 ValueRange oldSizes = reinterpretCastOp.getSizes();
1809 unsigned idx = 0;
1810 OpBuilder b(reinterpretCastOp);
1811 // Collect the map operands which will be used to compute the new normalized
1812 // memref shape.
1813 for (unsigned i = 0, e = memrefType.getRank(); i < e; i++) {
1814 if (memrefType.isDynamicDim(i))
1815 mapOperands[i] =
1816 arith::SubIOp::create(b, loc, oldSizes[0].getType(), oldSizes[idx++],
1818 else
1819 mapOperands[i] = arith::ConstantIndexOp::create(b, loc, oldShape[i] - 1);
1820 }
1821 for (unsigned i = 0, e = oldStrides.size(); i < e; i++)
1822 mapOperands[memrefType.getRank() + i] = oldStrides[i];
1823 SmallVector<Value> newSizes;
1824 ArrayRef<int64_t> newShape = newMemRefType.getShape();
1825 // Compute size along all the dimensions of the new normalized memref.
1826 for (unsigned i = 0; i < newRank; i++) {
1827 if (!newMemRefType.isDynamicDim(i))
1828 continue;
1829 newSizes.push_back(AffineApplyOp::create(
1830 b, loc,
1831 AffineMap::get(oldLayoutMap.getNumDims(), oldLayoutMap.getNumSymbols(),
1832 oldLayoutMap.getResult(i)),
1833 mapOperands));
1834 }
1835 for (auto &newSize : newSizes) {
1836 newSize = arith::AddIOp::create(b, loc, newSize.getType(), newSize,
1838 }
1839 // Create the new reinterpret_cast op.
1840 auto newReinterpretCast = memref::ReinterpretCastOp::create(
1841 b, loc, newMemRefType, reinterpretCastOp.getSource(),
1842 /*offsets=*/ValueRange(), newSizes,
1843 /*strides=*/ValueRange(),
1844 /*static_offsets=*/newStaticOffsets,
1845 /*static_sizes=*/newShape,
1846 /*static_strides=*/newStaticStrides);
1847
1848 // Replace all uses of the old memref.
1849 if (failed(replaceAllMemRefUsesWith(oldMemRef,
1850 /*newMemRef=*/newReinterpretCast,
1851 /*extraIndices=*/{},
1852 /*indexRemap=*/oldLayoutMap,
1853 /*extraOperands=*/{},
1854 /*symbolOperands=*/oldStrides,
1855 /*userFilterFn=*/nullptr,
1856 /*allowNonDereferencingOps=*/true))) {
1857 // If it failed (due to escapes for example), bail out.
1858 newReinterpretCast.erase();
1859 return failure();
1860 }
1861
1862 oldMemRef.replaceAllUsesWith(newReinterpretCast);
1863 reinterpretCastOp.erase();
1864 return success();
1865}
1866
1867template LogicalResult
1868mlir::affine::normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
1869template LogicalResult
1870mlir::affine::normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
1871
1872MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1873 unsigned rank = memrefType.getRank();
1874 if (rank == 0)
1875 return memrefType;
1876
1877 if (memrefType.getLayout().isIdentity()) {
1878 // Either no maps is associated with this memref or this memref has
1879 // a trivial (identity) map.
1880 return memrefType;
1881 }
1882 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1883 unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1884
1885 // We don't do any checks for one-to-one'ness; we assume that it is
1886 // one-to-one.
1887
1888 // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1889 // for now.
1890 // TODO: Normalize the other types of dynamic memrefs.
1892 (void)getTileSizePos(layoutMap, tileSizePos);
1893 if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1894 return memrefType;
1895
1896 // We have a single map that is not an identity map. Create a new memref
1897 // with the right shape and an identity layout map.
1898 ArrayRef<int64_t> shape = memrefType.getShape();
1899 // FlatAffineValueConstraint may later on use symbolicOperands.
1900 FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1901 SmallVector<unsigned, 4> memrefTypeDynDims;
1902 for (unsigned d = 0; d < rank; ++d) {
1903 // Use constraint system only in static dimensions.
1904 if (shape[d] > 0) {
1905 fac.addBound(BoundType::LB, d, 0);
1906 fac.addBound(BoundType::UB, d, shape[d] - 1);
1907 } else {
1908 memrefTypeDynDims.emplace_back(d);
1909 }
1910 }
1911 // We compose this map with the original index (logical) space to derive
1912 // the upper bounds for the new index space.
1913 unsigned newRank = layoutMap.getNumResults();
1914 if (failed(fac.composeMatchingMap(layoutMap)))
1915 return memrefType;
1916 // TODO: Handle semi-affine maps.
1917 // Project out the old data dimensions.
1918 fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1919 SmallVector<int64_t, 4> newShape(newRank);
1920 MLIRContext *context = memrefType.getContext();
1921 for (unsigned d = 0; d < newRank; ++d) {
1922 // Check if this dimension is dynamic.
1923 if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1924 newShape[d] = ShapedType::kDynamic;
1925 continue;
1926 }
1927 // The lower bound for the shape is always zero.
1928 std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1929 // For a static memref and an affine map with no symbols, this is
1930 // always bounded. However, when we have symbols, we may not be able to
1931 // obtain a constant upper bound. Also, mapping to a negative space is
1932 // invalid for normalization.
1933 if (!ubConst.has_value() || *ubConst < 0) {
1934 LLVM_DEBUG(llvm::dbgs()
1935 << "can't normalize map due to unknown/invalid upper bound");
1936 return memrefType;
1937 }
1938 // If dimension of new memrefType is dynamic, the value is -1.
1939 newShape[d] = *ubConst + 1;
1940 }
1941
1942 // Create the new memref type after trivializing the old layout map.
1943 auto newMemRefType =
1944 MemRefType::Builder(memrefType)
1945 .setShape(newShape)
1946 .setLayout(AffineMapAttr::get(
1947 AffineMap::getMultiDimIdentityMap(newRank, context)));
1948 return newMemRefType;
1949}
1950
1951DivModValue mlir::affine::getDivMod(OpBuilder &b, Location loc, Value lhs,
1952 Value rhs) {
1953 DivModValue result;
1954 AffineExpr d0, d1;
1955 bindDims(b.getContext(), d0, d1);
1956 result.quotient =
1957 affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1958 result.remainder =
1959 affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1960 return result;
1961}
1962
1963/// Create an affine map that computes `lhs` * `rhs`, composing in any other
1964/// affine maps.
1965static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1966 Location loc,
1968 OpFoldResult rhs) {
1969 AffineExpr s0, s1;
1970 bindSymbols(b.getContext(), s0, s1);
1971 return makeComposedFoldedAffineApply(b, loc, s0 * s1, {lhs, rhs});
1972}
1973
1974FailureOr<SmallVector<Value>>
1975mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1976 ArrayRef<Value> basis, bool hasOuterBound) {
1977 if (hasOuterBound)
1978 basis = basis.drop_front();
1979
1980 // Note: the divisors are backwards due to the scan.
1981 SmallVector<Value> divisors;
1982 OpFoldResult basisProd = b.getIndexAttr(1);
1983 for (OpFoldResult basisElem : llvm::reverse(basis)) {
1984 FailureOr<OpFoldResult> nextProd =
1985 composedAffineMultiply(b, loc, basisElem, basisProd);
1986 if (failed(nextProd))
1987 return failure();
1988 basisProd = *nextProd;
1989 divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1990 }
1991
1992 SmallVector<Value> results;
1993 results.reserve(divisors.size() + 1);
1994 Value residual = linearIndex;
1995 for (Value divisor : llvm::reverse(divisors)) {
1996 DivModValue divMod = getDivMod(b, loc, residual, divisor);
1997 results.push_back(divMod.quotient);
1998 residual = divMod.remainder;
1999 }
2000 results.push_back(residual);
2001 return results;
2002}
2003
2004FailureOr<SmallVector<Value>>
2005mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
2007 bool hasOuterBound) {
2008 if (hasOuterBound)
2009 basis = basis.drop_front();
2010
2011 // Note: the divisors are backwards due to the scan.
2012 SmallVector<Value> divisors;
2013 OpFoldResult basisProd = b.getIndexAttr(1);
2014 for (OpFoldResult basisElem : llvm::reverse(basis)) {
2015 FailureOr<OpFoldResult> nextProd =
2016 composedAffineMultiply(b, loc, basisElem, basisProd);
2017 if (failed(nextProd))
2018 return failure();
2019 basisProd = *nextProd;
2020 divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
2021 }
2022
2023 SmallVector<Value> results;
2024 results.reserve(divisors.size() + 1);
2025 Value residual = linearIndex;
2026 for (Value divisor : llvm::reverse(divisors)) {
2027 DivModValue divMod = getDivMod(b, loc, residual, divisor);
2028 results.push_back(divMod.quotient);
2029 residual = divMod.remainder;
2030 }
2031 results.push_back(residual);
2032 return results;
2033}
2034
2035OpFoldResult mlir::affine::linearizeIndex(ArrayRef<OpFoldResult> multiIndex,
2037 ImplicitLocOpBuilder &builder) {
2038 return linearizeIndex(builder, builder.getLoc(), multiIndex, basis);
2039}
2040
2041OpFoldResult mlir::affine::linearizeIndex(OpBuilder &builder, Location loc,
2042 ArrayRef<OpFoldResult> multiIndex,
2043 ArrayRef<OpFoldResult> basis) {
2044 assert(multiIndex.size() == basis.size() ||
2045 multiIndex.size() == basis.size() + 1);
2046 SmallVector<AffineExpr> basisAffine;
2047
2048 // Add a fake initial size in order to make the later index linearization
2049 // computations line up if an outer bound is not provided.
2050 if (multiIndex.size() == basis.size() + 1)
2051 basisAffine.push_back(getAffineConstantExpr(1, builder.getContext()));
2052
2053 for (size_t i = 0; i < basis.size(); ++i) {
2054 basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
2055 }
2056
2057 SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
2059 strides.reserve(stridesAffine.size());
2060 llvm::transform(stridesAffine, std::back_inserter(strides),
2061 [&builder, &basis, loc](AffineExpr strideExpr) {
2063 builder, loc, strideExpr, basis);
2064 });
2065
2066 auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2067 OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
2068 return affine::makeComposedFoldedAffineApply(builder, loc, linearIndexExpr,
2069 multiIndexAndStrides);
2070}
return success()
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition Utils.cpp:655
static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp)
A helper for the mechanics of mlir::hoistAffineIfOp.
Definition Utils.cpp:290
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, AllocLikeOp allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition Utils.cpp:1678
static bool isDereferencingOp(Operation *op)
Definition Utils.cpp:1116
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned > > &tileSizePos)
Check if map is a tiled layout.
Definition Utils.cpp:1504
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition Utils.cpp:1496
@ TileFloorDiv
Definition Utils.cpp:1496
@ TileNone
Definition Utils.cpp:1496
@ TileMod
Definition Utils.cpp:1496
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition Utils.cpp:247
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition Utils.cpp:1603
static FailureOr< OpFoldResult > composedAffineMultiply(OpBuilder &b, Location loc, OpFoldResult lhs, OpFoldResult rhs)
Create an affine map that computes lhs * rhs, composing in any other affine maps.
Definition Utils.cpp:1965
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition Utils.cpp:971
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition Utils.cpp:1622
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition Utils.cpp:914
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition Utils.cpp:637
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition Utils.cpp:836
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition Utils.cpp:263
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition PDL.cpp:62
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition AffineExpr.h:214
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
Definition AffineExpr.h:239
int64_t getValue() const
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition AffineExpr.h:117
AffineExprKind getKind() const
Return the classification for this type.
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
MLIRContext * getContext() const
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
bool isIdentity() const
Returns true if this affine map is an identity affine map.
A symbolic identifier appearing in an affine expression.
Definition AffineExpr.h:231
unsigned getPosition() const
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
This class represents an argument of a Block.
Definition Value.h:306
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
OpListType & getOperations()
Definition Block.h:147
SuccessorRange getSuccessors()
Definition Block.h:280
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
iterator end()
Definition Block.h:154
iterator begin()
Definition Block.h:153
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:112
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition Builders.cpp:391
AffineExpr getAffineConstantExpr(int64_t constant)
Definition Builders.cpp:376
AffineExpr getAffineDimExpr(unsigned position)
Definition Builders.cpp:368
MLIRContext * getContext() const
Definition Builders.h:56
A class for computing basic dominance information.
Definition Dominance.h:140
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition Dominance.h:158
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Definition Builders.h:632
Location getLoc() const
Accessors for the implied location.
Definition Builders.h:665
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This is a builder type that keeps local references to arguments.
Builder & setShape(ArrayRef< int64_t > newShape)
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
NamedAttribute represents a combination of a name and an Attribute value.
Definition Attributes.h:164
Attribute getValue() const
Return the value of the attribute.
Definition Attributes.h:179
This class helps build Operations.
Definition Builders.h:209
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition Builders.h:242
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:447
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:444
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition Builders.cpp:461
This class represents a single result from folding an operation.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition ValueRange.h:44
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Value getOperand(unsigned idx)
Definition Operation.h:376
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:775
void setOperand(unsigned idx, Value value)
Definition Operation.h:377
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:538
operand_iterator operand_begin()
Definition Operation.h:400
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:231
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:433
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:241
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:252
unsigned getNumOperands()
Definition Operation.h:372
operand_iterator operand_end()
Definition Operation.h:401
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:116
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:404
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition Operation.h:289
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition Operation.h:298
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:823
user_range getUsers()
Returns a range of all users.
Definition Operation.h:899
result_range getResults()
Definition Operation.h:441
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition Operation.h:248
void erase()
Remove this operation from its parent block and delete it.
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:430
A class for computing basic postdominance information.
Definition Dominance.h:204
bool postDominates(Operation *a, Operation *b) const
Return true if operation A postdominates operation B.
Definition Dominance.h:213
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition Region.h:233
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition Value.h:149
user_range getUsers() const
Definition Value.h:218
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:363
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition Utils.cpp:2105
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition Utils.cpp:353
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition Utils.cpp:688
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717
Value linearizeIndex(ValueRange indices, ArrayRef< int64_t > strides, int64_t offset, Type integerType, Location loc, OpBuilder &builder)
Generates IR to perform index linearization with the given indices and their corresponding strides,...
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:307
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:122
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool hasSingleEffect(Operation *op)
Returns "true" if op has only an effect of type EffectTy.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
Definition AffineExpr.h:50
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
Definition AffineExpr.h:46
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
Definition AffineExpr.h:48
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:114
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.