MLIR 22.0.0git
Utils.cpp
Go to the documentation of this file.
1//===- Utils.cpp ---- Utilities for affine dialect transformation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements miscellaneous transformation utilities for the Affine
10// dialect.
11//
12//===----------------------------------------------------------------------===//
13
15
25#include "mlir/IR/Dominance.h"
26#include "mlir/IR/IRMapping.h"
27#include "mlir/IR/IntegerSet.h"
29#include <optional>
30
31#define DEBUG_TYPE "affine-utils"
32
33using namespace mlir;
34using namespace affine;
35using namespace presburger;
36
37namespace {
38/// Visit affine expressions recursively and build the sequence of operations
39/// that correspond to it. Visitation functions return an Value of the
40/// expression subtree they visited or `nullptr` on error.
41class AffineApplyExpander
42 : public AffineExprVisitor<AffineApplyExpander, Value> {
43public:
44 /// This internal class expects arguments to be non-null, checks must be
45 /// performed at the call site.
46 AffineApplyExpander(OpBuilder &builder, ValueRange dimValues,
47 ValueRange symbolValues, Location loc)
48 : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
49 loc(loc) {}
50
51 template <typename OpTy>
52 Value buildBinaryExpr(AffineBinaryOpExpr expr,
53 arith::IntegerOverflowFlags overflowFlags =
54 arith::IntegerOverflowFlags::none) {
55 auto lhs = visit(expr.getLHS());
56 auto rhs = visit(expr.getRHS());
57 if (!lhs || !rhs)
58 return nullptr;
59 auto op = OpTy::create(builder, loc, lhs, rhs, overflowFlags);
60 return op.getResult();
61 }
62
63 Value visitAddExpr(AffineBinaryOpExpr expr) {
64 return buildBinaryExpr<arith::AddIOp>(expr);
65 }
66
67 Value visitMulExpr(AffineBinaryOpExpr expr) {
68 return buildBinaryExpr<arith::MulIOp>(expr,
69 arith::IntegerOverflowFlags::nsw);
70 }
71
72 /// Euclidean modulo operation: negative RHS is not allowed.
73 /// Remainder of the euclidean integer division is always non-negative.
74 ///
75 /// Implemented as
76 ///
77 /// a mod b =
78 /// let remainder = srem a, b;
79 /// negative = a < 0 in
80 /// select negative, remainder + b, remainder.
81 Value visitModExpr(AffineBinaryOpExpr expr) {
82 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
83 if (rhsConst.getValue() <= 0) {
84 emitError(loc, "modulo by non-positive value is not supported");
85 return nullptr;
86 }
87 }
88
89 auto lhs = visit(expr.getLHS());
90 auto rhs = visit(expr.getRHS());
91 assert(lhs && rhs && "unexpected affine expr lowering failure");
92
93 Value remainder = arith::RemSIOp::create(builder, loc, lhs, rhs);
94 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
95 Value isRemainderNegative = arith::CmpIOp::create(
96 builder, loc, arith::CmpIPredicate::slt, remainder, zeroCst);
97 Value correctedRemainder =
98 arith::AddIOp::create(builder, loc, remainder, rhs);
99 Value result = arith::SelectOp::create(builder, loc, isRemainderNegative,
100 correctedRemainder, remainder);
101 return result;
102 }
103
104 /// Floor division operation (rounds towards negative infinity).
105 ///
106 /// For positive divisors, it can be implemented without branching and with a
107 /// single division operation as
108 ///
109 /// a floordiv b =
110 /// let negative = a < 0 in
111 /// let absolute = negative ? -a - 1 : a in
112 /// let quotient = absolute / b in
113 /// negative ? -quotient - 1 : quotient
114 ///
115 /// Note: this lowering does not use arith.floordivsi because the lowering of
116 /// that to arith.divsi (see populateCeilFloorDivExpandOpsPatterns) generates
117 /// not one but two arith.divsi. That could be changed to one divsi, but one
118 /// way or another, going through arith.floordivsi will result in more complex
119 /// IR because arith.floordivsi is more general than affine floordiv in that
120 /// it supports negative RHS.
121 Value visitFloorDivExpr(AffineBinaryOpExpr expr) {
122 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
123 if (rhsConst.getValue() <= 0) {
124 emitError(loc, "division by non-positive value is not supported");
125 return nullptr;
126 }
127 }
128 auto lhs = visit(expr.getLHS());
129 auto rhs = visit(expr.getRHS());
130 assert(lhs && rhs && "unexpected affine expr lowering failure");
131
132 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
133 Value noneCst = arith::ConstantIndexOp::create(builder, loc, -1);
134 Value negative = arith::CmpIOp::create(
135 builder, loc, arith::CmpIPredicate::slt, lhs, zeroCst);
136 Value negatedDecremented =
137 arith::SubIOp::create(builder, loc, noneCst, lhs);
138 Value dividend = arith::SelectOp::create(builder, loc, negative,
139 negatedDecremented, lhs);
140 Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
141 Value correctedQuotient =
142 arith::SubIOp::create(builder, loc, noneCst, quotient);
143 Value result = arith::SelectOp::create(builder, loc, negative,
144 correctedQuotient, quotient);
145 return result;
146 }
147
148 /// Ceiling division operation (rounds towards positive infinity).
149 ///
150 /// For positive divisors, it can be implemented without branching and with a
151 /// single division operation as
152 ///
153 /// a ceildiv b =
154 /// let negative = a <= 0 in
155 /// let absolute = negative ? -a : a - 1 in
156 /// let quotient = absolute / b in
157 /// negative ? -quotient : quotient + 1
158 ///
159 /// Note: not using arith.ceildivsi for the same reason as explained in the
160 /// visitFloorDivExpr comment.
161 Value visitCeilDivExpr(AffineBinaryOpExpr expr) {
162 if (auto rhsConst = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
163 if (rhsConst.getValue() <= 0) {
164 emitError(loc, "division by non-positive value is not supported");
165 return nullptr;
166 }
167 }
168 auto lhs = visit(expr.getLHS());
169 auto rhs = visit(expr.getRHS());
170 assert(lhs && rhs && "unexpected affine expr lowering failure");
171
172 Value zeroCst = arith::ConstantIndexOp::create(builder, loc, 0);
173 Value oneCst = arith::ConstantIndexOp::create(builder, loc, 1);
174 Value nonPositive = arith::CmpIOp::create(
175 builder, loc, arith::CmpIPredicate::sle, lhs, zeroCst);
176 Value negated = arith::SubIOp::create(builder, loc, zeroCst, lhs);
177 Value decremented = arith::SubIOp::create(builder, loc, lhs, oneCst);
178 Value dividend = arith::SelectOp::create(builder, loc, nonPositive, negated,
179 decremented);
180 Value quotient = arith::DivSIOp::create(builder, loc, dividend, rhs);
181 Value negatedQuotient =
182 arith::SubIOp::create(builder, loc, zeroCst, quotient);
183 Value incrementedQuotient =
184 arith::AddIOp::create(builder, loc, quotient, oneCst);
185 Value result = arith::SelectOp::create(
186 builder, loc, nonPositive, negatedQuotient, incrementedQuotient);
187 return result;
188 }
189
190 Value visitConstantExpr(AffineConstantExpr expr) {
191 auto op = arith::ConstantIndexOp::create(builder, loc, expr.getValue());
192 return op.getResult();
193 }
194
195 Value visitDimExpr(AffineDimExpr expr) {
196 assert(expr.getPosition() < dimValues.size() &&
197 "affine dim position out of range");
198 return dimValues[expr.getPosition()];
199 }
200
201 Value visitSymbolExpr(AffineSymbolExpr expr) {
202 assert(expr.getPosition() < symbolValues.size() &&
203 "symbol dim position out of range");
204 return symbolValues[expr.getPosition()];
205 }
206
207private:
208 OpBuilder &builder;
209 ValueRange dimValues;
210 ValueRange symbolValues;
211
212 Location loc;
213};
214} // namespace
215
216/// Create a sequence of operations that implement the `expr` applied to the
217/// given dimension and symbol values.
218mlir::Value mlir::affine::expandAffineExpr(OpBuilder &builder, Location loc,
219 AffineExpr expr,
220 ValueRange dimValues,
221 ValueRange symbolValues) {
222 return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
223}
224
225/// Create a sequence of operations that implement the `affineMap` applied to
226/// the given `operands` (as it it were an AffineApplyOp).
227std::optional<SmallVector<Value, 8>>
228mlir::affine::expandAffineMap(OpBuilder &builder, Location loc,
229 AffineMap affineMap, ValueRange operands) {
230 auto numDims = affineMap.getNumDims();
231 auto expanded = llvm::to_vector<8>(
232 llvm::map_range(affineMap.getResults(),
233 [numDims, &builder, loc, operands](AffineExpr expr) {
234 return expandAffineExpr(builder, loc, expr,
235 operands.take_front(numDims),
236 operands.drop_front(numDims));
237 }));
238 if (llvm::all_of(expanded, [](Value v) { return v; }))
239 return expanded;
240 return std::nullopt;
241}
242
243/// Promotes the `then` or the `else` block of `ifOp` (depending on whether
244/// `elseBlock` is false or true) into `ifOp`'s containing block, and discards
245/// the rest of the op.
246static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
247 if (elseBlock)
248 assert(ifOp.hasElse() && "else block expected");
249
250 Block *destBlock = ifOp->getBlock();
251 Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock();
252 destBlock->getOperations().splice(
253 Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(),
254 std::prev(srcBlock->end()));
255 ifOp.erase();
256}
257
258/// Returns the outermost affine.for/parallel op that the `ifOp` is invariant
259/// on. The `ifOp` could be hoisted and placed right before such an operation.
260/// This method assumes that the ifOp has been canonicalized (to be correct and
261/// effective).
262static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
263 // Walk up the parents past all for op that this conditional is invariant on.
264 auto ifOperands = ifOp.getOperands();
265 Operation *res = ifOp;
267 auto *parentOp = res->getParentOp();
268 if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
269 if (llvm::is_contained(ifOperands, forOp.getInductionVar()))
270 break;
271 } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) {
272 if (llvm::any_of(parallelOp.getIVs(), [&](Value iv) {
273 return llvm::is_contained(ifOperands, iv);
274 }))
275 break;
276 } else if (!isa<AffineIfOp>(parentOp)) {
277 // Won't walk up past anything other than affine.for/if ops.
278 break;
279 }
280 // You can always hoist up past any affine.if ops.
281 res = parentOp;
282 }
283 return res;
284}
285
286/// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over
287/// `hoistOverOp`. Returns the new hoisted op if any hoisting happened,
288/// otherwise the same `ifOp`.
289static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
290 // No hoisting to do.
291 if (hoistOverOp == ifOp)
292 return ifOp;
293
294 // Create the hoisted 'if' first. Then, clone the op we are hoisting over for
295 // the else block. Then drop the else block of the original 'if' in the 'then'
296 // branch while promoting its then block, and analogously drop the 'then'
297 // block of the original 'if' from the 'else' branch while promoting its else
298 // block.
299 IRMapping operandMap;
300 OpBuilder b(hoistOverOp);
301 auto hoistedIfOp = AffineIfOp::create(b, ifOp.getLoc(), ifOp.getIntegerSet(),
302 ifOp.getOperands(),
303 /*elseBlock=*/true);
304
305 // Create a clone of hoistOverOp to use for the else branch of the hoisted
306 // conditional. The else block may get optimized away if empty.
307 Operation *hoistOverOpClone = nullptr;
308 // We use this unique name to identify/find `ifOp`'s clone in the else
309 // version.
310 StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting");
311 operandMap.clear();
312 b.setInsertionPointAfter(hoistOverOp);
313 // We'll set an attribute to identify this op in a clone of this sub-tree.
314 ifOp->setAttr(idForIfOp, b.getBoolAttr(true));
315 hoistOverOpClone = b.clone(*hoistOverOp, operandMap);
316
317 // Promote the 'then' block of the original affine.if in the then version.
318 promoteIfBlock(ifOp, /*elseBlock=*/false);
319
320 // Move the then version to the hoisted if op's 'then' block.
321 auto *thenBlock = hoistedIfOp.getThenBlock();
322 thenBlock->getOperations().splice(thenBlock->begin(),
323 hoistOverOp->getBlock()->getOperations(),
324 Block::iterator(hoistOverOp));
325
326 // Find the clone of the original affine.if op in the else version.
327 AffineIfOp ifCloneInElse;
328 hoistOverOpClone->walk([&](AffineIfOp ifClone) {
329 if (!ifClone->getAttr(idForIfOp))
330 return WalkResult::advance();
331 ifCloneInElse = ifClone;
332 return WalkResult::interrupt();
333 });
334 assert(ifCloneInElse && "if op clone should exist");
335 // For the else block, promote the else block of the original 'if' if it had
336 // one; otherwise, the op itself is to be erased.
337 if (!ifCloneInElse.hasElse())
338 ifCloneInElse.erase();
339 else
340 promoteIfBlock(ifCloneInElse, /*elseBlock=*/true);
341
342 // Move the else version into the else block of the hoisted if op.
343 auto *elseBlock = hoistedIfOp.getElseBlock();
344 elseBlock->getOperations().splice(
345 elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(),
346 Block::iterator(hoistOverOpClone));
347
348 return hoistedIfOp;
349}
350
351LogicalResult
353 ArrayRef<LoopReduction> parallelReductions,
354 AffineParallelOp *resOp) {
355 // Fail early if there are iter arguments that are not reductions.
356 unsigned numReductions = parallelReductions.size();
357 if (numReductions != forOp.getNumIterOperands())
358 return failure();
359
360 Location loc = forOp.getLoc();
361 OpBuilder outsideBuilder(forOp);
362 AffineMap lowerBoundMap = forOp.getLowerBoundMap();
363 ValueRange lowerBoundOperands = forOp.getLowerBoundOperands();
364 AffineMap upperBoundMap = forOp.getUpperBoundMap();
365 ValueRange upperBoundOperands = forOp.getUpperBoundOperands();
366
367 // Creating empty 1-D affine.parallel op.
368 auto reducedValues = llvm::to_vector<4>(llvm::map_range(
369 parallelReductions, [](const LoopReduction &red) { return red.value; }));
370 auto reductionKinds = llvm::to_vector<4>(llvm::map_range(
371 parallelReductions, [](const LoopReduction &red) { return red.kind; }));
372 AffineParallelOp newPloop = AffineParallelOp::create(
373 outsideBuilder, loc, ValueRange(reducedValues).getTypes(), reductionKinds,
374 llvm::ArrayRef(lowerBoundMap), lowerBoundOperands,
375 llvm::ArrayRef(upperBoundMap), upperBoundOperands,
376 llvm::ArrayRef(forOp.getStepAsInt()));
377 // Steal the body of the old affine for op.
378 newPloop.getRegion().takeBody(forOp.getRegion());
379 Operation *yieldOp = &newPloop.getBody()->back();
380
381 // Handle the initial values of reductions because the parallel loop always
382 // starts from the neutral value.
383 SmallVector<Value> newResults;
384 newResults.reserve(numReductions);
385 for (unsigned i = 0; i < numReductions; ++i) {
386 Value init = forOp.getInits()[i];
387 // This works because we are only handling single-op reductions at the
388 // moment. A switch on reduction kind or a mechanism to collect operations
389 // participating in the reduction will be necessary for multi-op reductions.
390 Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp();
391 assert(reductionOp && "yielded value is expected to be produced by an op");
392 outsideBuilder.getInsertionBlock()->getOperations().splice(
393 outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(),
394 reductionOp);
395 reductionOp->setOperands({init, newPloop->getResult(i)});
396 forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0));
397 }
398
399 // Update the loop terminator to yield reduced values bypassing the reduction
400 // operation itself (now moved outside of the loop) and erase the block
401 // arguments that correspond to reductions. Note that the loop always has one
402 // "main" induction variable whenc coming from a non-parallel for.
403 unsigned numIVs = 1;
404 yieldOp->setOperands(reducedValues);
405 newPloop.getBody()->eraseArguments(numIVs, numReductions);
406
407 forOp.erase();
408 if (resOp)
409 *resOp = newPloop;
410 return success();
411}
412
413// Returns success if any hoisting happened.
414LogicalResult mlir::affine::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
415 // Bail out early if the ifOp returns a result. TODO: Consider how to
416 // properly support this case.
417 if (ifOp.getNumResults() != 0)
418 return failure();
419
420 // Apply canonicalization patterns and folding - this is necessary for the
421 // hoisting check to be correct (operands should be composed), and to be more
422 // effective (no unused operands). Since the pattern rewriter's folding is
423 // entangled with application of patterns, we may fold/end up erasing the op,
424 // in which case we return with `folded` being set.
425 RewritePatternSet patterns(ifOp.getContext());
426 AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext());
427 FrozenRewritePatternSet frozenPatterns(std::move(patterns));
428 bool erased;
430 ifOp.getOperation(), frozenPatterns,
432 /*changed=*/nullptr, &erased);
433 if (erased) {
434 if (folded)
435 *folded = true;
436 return failure();
437 }
438 if (folded)
439 *folded = false;
440
441 // The folding above should have ensured this.
442 assert(llvm::all_of(ifOp.getOperands(),
443 [](Value v) {
444 return isTopLevelValue(v) || isAffineInductionVar(v);
445 }) &&
446 "operands not composed");
447
448 // We are going hoist as high as possible.
449 // TODO: this could be customized in the future.
450 auto *hoistOverOp = getOutermostInvariantForOp(ifOp);
451
452 AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp);
453 // Nothing to hoist over.
454 if (hoistedIfOp == ifOp)
455 return failure();
456
457 // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up
458 // a sequence of affine.fors that are all perfectly nested).
460 hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(),
461 frozenPatterns);
462
463 return success();
464}
465
466// Return the min expr after replacing the given dim.
467AffineExpr mlir::affine::substWithMin(AffineExpr e, AffineExpr dim,
469 bool positivePath) {
470 if (e == dim)
471 return positivePath ? min : max;
472 if (auto bin = dyn_cast<AffineBinaryOpExpr>(e)) {
473 AffineExpr lhs = bin.getLHS();
474 AffineExpr rhs = bin.getRHS();
475 if (bin.getKind() == mlir::AffineExprKind::Add)
476 return substWithMin(lhs, dim, min, max, positivePath) +
477 substWithMin(rhs, dim, min, max, positivePath);
478
479 auto c1 = dyn_cast<AffineConstantExpr>(bin.getLHS());
480 auto c2 = dyn_cast<AffineConstantExpr>(bin.getRHS());
481 if (c1 && c1.getValue() < 0)
483 bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath));
484 if (c2 && c2.getValue() < 0)
486 bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2);
488 bin.getKind(), substWithMin(lhs, dim, min, max, positivePath),
489 substWithMin(rhs, dim, min, max, positivePath));
490 }
491 return e;
492}
493
494void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
495 // Loops with min/max in bounds are not normalized at the moment.
496 if (op.hasMinMaxBounds())
497 return;
498
499 AffineMap lbMap = op.getLowerBoundsMap();
500 SmallVector<int64_t, 8> steps = op.getSteps();
501 // No need to do any work if the parallel op is already normalized.
502 bool isAlreadyNormalized =
503 llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
504 int64_t step = std::get<0>(tuple);
505 auto lbExpr = dyn_cast<AffineConstantExpr>(std::get<1>(tuple));
506 return lbExpr && lbExpr.getValue() == 0 && step == 1;
507 });
508 if (isAlreadyNormalized)
509 return;
510
511 AffineValueMap ranges;
512 AffineValueMap::difference(op.getUpperBoundsValueMap(),
513 op.getLowerBoundsValueMap(), &ranges);
514 auto builder = OpBuilder::atBlockBegin(op.getBody());
515 auto zeroExpr = builder.getAffineConstantExpr(0);
518 for (unsigned i = 0, e = steps.size(); i < e; ++i) {
519 int64_t step = steps[i];
520
521 // Adjust the lower bound to be 0.
522 lbExprs.push_back(zeroExpr);
523
524 // Adjust the upper bound expression: 'range / step'.
525 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
526 ubExprs.push_back(ubExpr);
527
528 // Adjust the corresponding IV: 'lb + i * step'.
529 BlockArgument iv = op.getBody()->getArgument(i);
530 AffineExpr lbExpr = lbMap.getResult(i);
531 unsigned nDims = lbMap.getNumDims();
532 auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
533 auto map = AffineMap::get(/*dimCount=*/nDims + 1,
534 /*symbolCount=*/lbMap.getNumSymbols(), expr);
535
536 // Use an 'affine.apply' op that will be simplified later in subsequent
537 // canonicalizations.
538 OperandRange lbOperands = op.getLowerBoundsOperands();
539 OperandRange dimOperands = lbOperands.take_front(nDims);
540 OperandRange symbolOperands = lbOperands.drop_front(nDims);
541 SmallVector<Value, 8> applyOperands{dimOperands};
542 applyOperands.push_back(iv);
543 applyOperands.append(symbolOperands.begin(), symbolOperands.end());
544 auto apply =
545 AffineApplyOp::create(builder, op.getLoc(), map, applyOperands);
546 iv.replaceAllUsesExcept(apply, apply);
547 }
548
549 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
550 op.setSteps(newSteps);
551 auto newLowerMap = AffineMap::get(
552 /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
553 op.setLowerBounds({}, newLowerMap);
554 auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
555 ubExprs, op.getContext());
556 op.setUpperBounds(ranges.getOperands(), newUpperMap);
557}
558
559LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
560 bool promoteSingleIter) {
561 if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
562 return success();
563
564 // Check if the forop is already normalized.
565 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
566 (op.getStep() == 1))
567 return success();
568
569 // Check if the lower bound has a single result only. Loops with a max lower
570 // bound can't be normalized without additional support like
571 // affine.execute_region's. If the lower bound does not have a single result
572 // then skip this op.
573 if (op.getLowerBoundMap().getNumResults() != 1)
574 return failure();
575
576 Location loc = op.getLoc();
577 OpBuilder opBuilder(op);
578 int64_t origLoopStep = op.getStepAsInt();
579
580 // Construct the new upper bound value map.
581 AffineMap oldLbMap = op.getLowerBoundMap();
582 // The upper bound can have multiple results. To use
583 // AffineValueMap::difference, we need to have the same number of results in
584 // both lower and upper bound maps. So, we just create a value map for the
585 // lower bound with the only available lower bound result repeated to pad up
586 // to the number of upper bound results.
587 SmallVector<AffineExpr> lbExprs(op.getUpperBoundMap().getNumResults(),
588 op.getLowerBoundMap().getResult(0));
589 AffineValueMap lbMap(oldLbMap, op.getLowerBoundOperands());
590 AffineMap paddedLbMap =
591 AffineMap::get(oldLbMap.getNumDims(), oldLbMap.getNumSymbols(), lbExprs,
592 op.getContext());
593 AffineValueMap paddedLbValueMap(paddedLbMap, op.getLowerBoundOperands());
594 AffineValueMap ubValueMap(op.getUpperBoundMap(), op.getUpperBoundOperands());
595 AffineValueMap newUbValueMap;
596 // Compute the `upper bound - lower bound`.
597 AffineValueMap::difference(ubValueMap, paddedLbValueMap, &newUbValueMap);
598 (void)newUbValueMap.canonicalize();
599
600 // Scale down the upper bound value map by the loop step.
601 unsigned numResult = newUbValueMap.getNumResults();
602 SmallVector<AffineExpr> scaleDownExprs(numResult);
603 for (unsigned i = 0; i < numResult; ++i)
604 scaleDownExprs[i] = opBuilder.getAffineDimExpr(i).ceilDiv(origLoopStep);
605 // `scaleDownMap` is (d0, d1, ..., d_n) -> (d0 / step, d1 / step, ..., d_n /
606 // step). Where `n` is the number of results in the upper bound map.
607 AffineMap scaleDownMap =
608 AffineMap::get(numResult, 0, scaleDownExprs, op.getContext());
609 AffineMap newUbMap = scaleDownMap.compose(newUbValueMap.getAffineMap());
610
611 // Set the newly create upper bound map and operands.
612 op.setUpperBound(newUbValueMap.getOperands(), newUbMap);
613 op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
614 op.setStep(1);
615
616 // Calculate the Value of new loopIV. Create affine.apply for the value of
617 // the loopIV in normalized loop.
618 opBuilder.setInsertionPointToStart(op.getBody());
619 // Construct an affine.apply op mapping the new IV to the old IV.
620 AffineMap scaleIvMap =
621 AffineMap::get(1, 0, -opBuilder.getAffineDimExpr(0) * origLoopStep);
622 AffineValueMap scaleIvValueMap(scaleIvMap, ValueRange{op.getInductionVar()});
623 AffineValueMap newIvToOldIvMap;
624 AffineValueMap::difference(lbMap, scaleIvValueMap, &newIvToOldIvMap);
625 (void)newIvToOldIvMap.canonicalize();
626 auto newIV =
627 AffineApplyOp::create(opBuilder, loc, newIvToOldIvMap.getAffineMap(),
628 newIvToOldIvMap.getOperands());
629 op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
630 return success();
631}
632
633/// Returns true if the memory operation of `destAccess` depends on `srcAccess`
634/// inside of the innermost common surrounding affine loop between the two
635/// accesses.
636static bool mustReachAtInnermost(const MemRefAccess &srcAccess,
637 const MemRefAccess &destAccess) {
638 // Affine dependence analysis is possible only if both ops in the same
639 // AffineScope.
640 if (getAffineAnalysisScope(srcAccess.opInst) !=
641 getAffineAnalysisScope(destAccess.opInst))
642 return false;
643
644 unsigned nsLoops =
645 getNumCommonSurroundingLoops(*srcAccess.opInst, *destAccess.opInst);
647 checkMemrefAccessDependence(srcAccess, destAccess, nsLoops + 1);
648 return hasDependence(result);
649}
650
651/// Returns true if `srcMemOp` may have an effect on `destMemOp` within the
652/// scope of the outermost `minSurroundingLoops` loops that surround them.
653/// `srcMemOp` and `destMemOp` are expected to be affine read/write ops.
654static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp,
655 unsigned minSurroundingLoops) {
656 MemRefAccess srcAccess(srcMemOp);
657 MemRefAccess destAccess(destMemOp);
658
659 // Affine dependence analysis here is applicable only if both ops operate on
660 // the same memref and if `srcMemOp` and `destMemOp` are in the same
661 // AffineScope. Also, we can only check if our affine scope is isolated from
662 // above; otherwise, values can from outside of the affine scope that the
663 // check below cannot analyze.
664 Region *srcScope = getAffineAnalysisScope(srcMemOp);
665 if (srcAccess.memref == destAccess.memref &&
666 srcScope == getAffineAnalysisScope(destMemOp)) {
667 unsigned nsLoops = getNumCommonSurroundingLoops(*srcMemOp, *destMemOp);
668 FlatAffineValueConstraints dependenceConstraints;
669 for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
671 srcAccess, destAccess, d, &dependenceConstraints,
672 /*dependenceComponents=*/nullptr);
673 // A dependence failure or the presence of a dependence implies a
674 // side effect.
675 if (!noDependence(result))
676 return true;
677 }
678 // No side effect was seen.
679 return false;
680 }
681 // TODO: Check here if the memrefs alias: there is no side effect if
682 // `srcAccess.memref` and `destAccess.memref` don't alias.
683 return true;
684}
685
686template <typename EffectType, typename T>
688 Operation *start, T memOp,
690 // A boolean representing whether an intervening operation could have impacted
691 // memOp.
692 bool hasSideEffect = false;
693
694 // Check whether the effect on memOp can be caused by a given operation op.
695 Value memref = memOp.getMemRef();
696 std::function<void(Operation *)> checkOperation = [&](Operation *op) {
697 // If the effect has alreay been found, early exit,
698 if (hasSideEffect)
699 return;
700
701 if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(op)) {
703 memEffect.getEffects(effects);
704
705 bool opMayHaveEffect = false;
706 for (auto effect : effects) {
707 // If op causes EffectType on a potentially aliasing location for
708 // memOp, mark as having the effect.
709 if (isa<EffectType>(effect.getEffect())) {
710 if (effect.getValue() && effect.getValue() != memref &&
711 !mayAlias(effect.getValue(), memref))
712 continue;
713 opMayHaveEffect = true;
714 break;
715 }
716 }
717
718 if (!opMayHaveEffect)
719 return;
720
721 // If the side effect comes from an affine read or write, try to
722 // prove the side effecting `op` cannot reach `memOp`.
723 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
724 // For ease, let's consider the case that `op` is a store and
725 // we're looking for other potential stores that overwrite memory after
726 // `start`, and before being read in `memOp`. In this case, we only
727 // need to consider other potential stores with depth >
728 // minSurroundingLoops since `start` would overwrite any store with a
729 // smaller number of surrounding loops before.
730 unsigned minSurroundingLoops =
731 getNumCommonSurroundingLoops(*start, *memOp);
732 if (mayHaveEffect(op, memOp, minSurroundingLoops))
733 hasSideEffect = true;
734 return;
735 }
736
737 // We have an op with a memory effect and we cannot prove if it
738 // intervenes.
739 hasSideEffect = true;
740 return;
741 }
742
743 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
744 // Recurse into the regions for this op and check whether the internal
745 // operations may have the side effect `EffectType` on memOp.
746 for (Region &region : op->getRegions())
747 for (Block &block : region)
748 for (Operation &op : block)
749 checkOperation(&op);
750 return;
751 }
752
753 // Otherwise, conservatively assume generic operations have the effect
754 // on the operation
755 hasSideEffect = true;
756 };
757
758 // Check all paths from ancestor op `parent` to the operation `to` for the
759 // effect. It is known that `to` must be contained within `parent`.
760 auto until = [&](Operation *parent, Operation *to) {
761 // TODO check only the paths from `parent` to `to`.
762 // Currently we fallback and check the entire parent op, rather than
763 // just the paths from the parent path, stopping after reaching `to`.
764 // This is conservatively correct, but could be made more aggressive.
765 assert(parent->isAncestor(to));
766 checkOperation(parent);
767 };
768
769 // Check for all paths from operation `from` to operation `untilOp` for the
770 // given memory effect.
771 std::function<void(Operation *, Operation *)> recur =
772 [&](Operation *from, Operation *untilOp) {
773 assert(
774 from->getParentRegion()->isAncestor(untilOp->getParentRegion()) &&
775 "Checking for side effect between two operations without a common "
776 "ancestor");
777
778 // If the operations are in different regions, recursively consider all
779 // path from `from` to the parent of `to` and all paths from the parent
780 // of `to` to `to`.
781 if (from->getParentRegion() != untilOp->getParentRegion()) {
782 recur(from, untilOp->getParentOp());
783 until(untilOp->getParentOp(), untilOp);
784 return;
785 }
786
787 // Now, assuming that `from` and `to` exist in the same region, perform
788 // a CFG traversal to check all the relevant operations.
789
790 // Additional blocks to consider.
791 SmallVector<Block *, 2> todoBlocks;
792 {
793 // First consider the parent block of `from` an check all operations
794 // after `from`.
795 for (auto iter = ++from->getIterator(), end = from->getBlock()->end();
796 iter != end && &*iter != untilOp; ++iter) {
797 checkOperation(&*iter);
798 }
799
800 // If the parent of `from` doesn't contain `to`, add the successors
801 // to the list of blocks to check.
802 if (untilOp->getBlock() != from->getBlock())
803 for (Block *succ : from->getBlock()->getSuccessors())
804 todoBlocks.push_back(succ);
805 }
806
808 // Traverse the CFG until hitting `to`.
809 while (!todoBlocks.empty()) {
810 Block *blk = todoBlocks.pop_back_val();
811 if (done.count(blk))
812 continue;
813 done.insert(blk);
814 for (auto &op : *blk) {
815 if (&op == untilOp)
816 break;
817 checkOperation(&op);
818 if (&op == blk->getTerminator())
819 for (Block *succ : blk->getSuccessors())
820 todoBlocks.push_back(succ);
821 }
822 }
823 };
824 recur(start, memOp);
825 return !hasSideEffect;
826}
827
828/// Attempt to eliminate loadOp by replacing it with a value stored into memory
829/// which the load is guaranteed to retrieve. This check involves three
830/// components: 1) The store and load must be on the same location 2) The store
831/// must dominate (and therefore must always occur prior to) the load 3) No
832/// other operations will overwrite the memory loaded between the given load
833/// and store. If such a value exists, the replaced `loadOp` will be added to
834/// `loadOpsToErase` and its memref will be added to `memrefsToErase`.
836 AffineReadOpInterface loadOp, SmallVectorImpl<Operation *> &loadOpsToErase,
837 SmallPtrSetImpl<Value> &memrefsToErase, DominanceInfo &domInfo,
839
840 // The store op candidate for forwarding that satisfies all conditions
841 // to replace the load, if any.
842 Operation *lastWriteStoreOp = nullptr;
843
844 for (auto *user : loadOp.getMemRef().getUsers()) {
845 auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
846 if (!storeOp)
847 continue;
848 MemRefAccess srcAccess(storeOp);
849 MemRefAccess destAccess(loadOp);
850
851 // 1. Check if the store and the load have mathematically equivalent
852 // affine access functions; this implies that they statically refer to the
853 // same single memref element. As an example this filters out cases like:
854 // store %A[%i0 + 1]
855 // load %A[%i0]
856 // store %A[%M]
857 // load %A[%N]
858 // Use the AffineValueMap difference based memref access equality checking.
859 if (srcAccess != destAccess)
860 continue;
861
862 // 2. The store has to dominate the load op to be candidate.
863 if (!domInfo.dominates(storeOp, loadOp))
864 continue;
865
866 // 3. The store must reach the load. Access function equivalence only
867 // guarantees this for accesses in the same block. The load could be in a
868 // nested block that is unreachable.
869 if (!mustReachAtInnermost(srcAccess, destAccess))
870 continue;
871
872 // 4. Ensure there is no intermediate operation which could replace the
873 // value in memory.
875 mayAlias))
876 continue;
877
878 // We now have a candidate for forwarding.
879 assert(lastWriteStoreOp == nullptr &&
880 "multiple simultaneous replacement stores");
881 lastWriteStoreOp = storeOp;
882 }
883
884 if (!lastWriteStoreOp)
885 return;
886
887 // Perform the actual store to load forwarding.
888 Value storeVal =
889 cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
890 // Check if 2 values have the same shape. This is needed for affine vector
891 // loads and stores.
892 if (storeVal.getType() != loadOp.getValue().getType())
893 return;
894 loadOp.getValue().replaceAllUsesWith(storeVal);
895 // Record the memref for a later sweep to optimize away.
896 memrefsToErase.insert(loadOp.getMemRef());
897 // Record this to erase later.
898 loadOpsToErase.push_back(loadOp);
899}
900
901template bool
903 affine::AffineReadOpInterface>(
904 mlir::Operation *, affine::AffineReadOpInterface,
906
907// This attempts to find stores which have no impact on the final result.
908// A writing op writeA will be eliminated if there exists an op writeB if
909// 1) writeA and writeB have mathematically equivalent affine access functions.
910// 2) writeB postdominates writeA.
911// 3) There is no potential read between writeA and writeB.
912static void findUnusedStore(AffineWriteOpInterface writeA,
914 PostDominanceInfo &postDominanceInfo,
916
917 for (Operation *user : writeA.getMemRef().getUsers()) {
918 // Only consider writing operations.
919 auto writeB = dyn_cast<AffineWriteOpInterface>(user);
920 if (!writeB)
921 continue;
922
923 // The operations must be distinct.
924 if (writeB == writeA)
925 continue;
926
927 // Both operations must lie in the same region.
928 if (writeB->getParentRegion() != writeA->getParentRegion())
929 continue;
930
931 // Both operations must write to the same memory.
932 MemRefAccess srcAccess(writeB);
933 MemRefAccess destAccess(writeA);
934
935 if (srcAccess != destAccess)
936 continue;
937
938 // writeB must postdominate writeA.
939 if (!postDominanceInfo.postDominates(writeB, writeA))
940 continue;
941
942 // There cannot be an operation which reads from memory between
943 // the two writes.
945 mayAlias))
946 continue;
947
948 opsToErase.push_back(writeA);
949 break;
950 }
951}
952
953// The load to load forwarding / redundant load elimination is similar to the
954// store to load forwarding.
955// loadA will be be replaced with loadB if:
956// 1) loadA and loadB have mathematically equivalent affine access functions.
957// 2) loadB dominates loadA.
958// 3) There is no write between loadA and loadB.
959static void loadCSE(AffineReadOpInterface loadA,
960 SmallVectorImpl<Operation *> &loadOpsToErase,
961 DominanceInfo &domInfo,
964 for (auto *user : loadA.getMemRef().getUsers()) {
965 auto loadB = dyn_cast<AffineReadOpInterface>(user);
966 if (!loadB || loadB == loadA)
967 continue;
968
969 MemRefAccess srcAccess(loadB);
970 MemRefAccess destAccess(loadA);
971
972 // 1. The accesses should be to be to the same location.
973 if (srcAccess != destAccess) {
974 continue;
975 }
976
977 // 2. loadB should dominate loadA.
978 if (!domInfo.dominates(loadB, loadA))
979 continue;
980
981 // 3. There should not be a write between loadA and loadB.
983 loadB.getOperation(), loadA, mayAlias))
984 continue;
985
986 // Check if two values have the same shape. This is needed for affine vector
987 // loads.
988 if (loadB.getValue().getType() != loadA.getValue().getType())
989 continue;
990
991 loadCandidates.push_back(loadB);
992 }
993
994 // Of the legal load candidates, use the one that dominates all others
995 // to minimize the subsequent need to loadCSE
996 Value loadB;
997 for (AffineReadOpInterface option : loadCandidates) {
998 if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) {
999 return depStore == option ||
1000 domInfo.dominates(option.getOperation(),
1001 depStore.getOperation());
1002 })) {
1003 loadB = option.getValue();
1004 break;
1005 }
1006 }
1007
1008 if (loadB) {
1009 loadA.getValue().replaceAllUsesWith(loadB);
1010 // Record this to erase later.
1011 loadOpsToErase.push_back(loadA);
1012 }
1013}
1014
1015// The store to load forwarding and load CSE rely on three conditions:
1016//
1017// 1) store/load providing a replacement value and load being replaced need to
1018// have mathematically equivalent affine access functions (checked after full
1019// composition of load/store operands); this implies that they access the same
1020// single memref element for all iterations of the common surrounding loop,
1021//
1022// 2) the store/load op should dominate the load op,
1023//
1024// 3) no operation that may write to memory read by the load being replaced can
1025// occur after executing the instruction (load or store) providing the
1026// replacement value and before the load being replaced (thus potentially
1027// allowing overwriting the memory read by the load).
1028//
1029// The above conditions are simple to check, sufficient, and powerful for most
1030// cases in practice - they are sufficient, but not necessary --- since they
1031// don't reason about loops that are guaranteed to execute at least once or
1032// multiple sources to forward from.
1033//
1034// TODO: more forwarding can be done when support for
1035// loop/conditional live-out SSA values is available.
1036// TODO: do general dead store elimination for memref's. This pass
1037// currently only eliminates the stores only if no other loads/uses (other
1038// than dealloc) remain.
1039//
1040void mlir::affine::affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
1041 PostDominanceInfo &postDomInfo,
1042 AliasAnalysis &aliasAnalysis) {
1043 // Load op's whose results were replaced by those forwarded from stores.
1044 SmallVector<Operation *, 8> opsToErase;
1045
1046 // A list of memref's that are potentially dead / could be eliminated.
1047 SmallPtrSet<Value, 4> memrefsToErase;
1048
1049 auto mayAlias = [&](Value val1, Value val2) -> bool {
1050 return !aliasAnalysis.alias(val1, val2).isNo();
1051 };
1052
1053 // Walk all load's and perform store to load forwarding.
1054 f.walk([&](AffineReadOpInterface loadOp) {
1055 forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo, mayAlias);
1056 });
1057 for (auto *op : opsToErase)
1058 op->erase();
1059 opsToErase.clear();
1060
1061 // Walk all store's and perform unused store elimination
1062 f.walk([&](AffineWriteOpInterface storeOp) {
1063 findUnusedStore(storeOp, opsToErase, postDomInfo, mayAlias);
1064 });
1065 for (auto *op : opsToErase)
1066 op->erase();
1067 opsToErase.clear();
1068
1069 // Check if the store fwd'ed memrefs are now left with only stores and
1070 // deallocs and can thus be completely deleted. Note: the canonicalize pass
1071 // should be able to do this as well, but we'll do it here since we collected
1072 // these anyway.
1073 for (auto memref : memrefsToErase) {
1074 // If the memref hasn't been locally alloc'ed, skip.
1075 Operation *defOp = memref.getDefiningOp();
1076 if (!defOp || !hasSingleEffect<MemoryEffects::Allocate>(defOp, memref))
1077 // TODO: if the memref was returned by a 'call' operation, we
1078 // could still erase it if the call had no side-effects.
1079 continue;
1080 if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
1081 return !isa<AffineWriteOpInterface>(ownerOp) &&
1082 !hasSingleEffect<MemoryEffects::Free>(ownerOp, memref);
1083 }))
1084 continue;
1085
1086 // Erase all stores, the dealloc, and the alloc on the memref.
1087 for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
1088 user->erase();
1089 defOp->erase();
1090 }
1091
1092 // To eliminate as many loads as possible, run load CSE after eliminating
1093 // stores. Otherwise, some stores are wrongly seen as having an intervening
1094 // effect.
1095 f.walk([&](AffineReadOpInterface loadOp) {
1096 loadCSE(loadOp, opsToErase, domInfo, mayAlias);
1097 });
1098 for (auto *op : opsToErase)
1099 op->erase();
1100}
1101
1102// Checks if `op` is non dereferencing.
1103// TODO: This hardcoded check will be removed once the right interface is added.
1105 return isa<AffineMapAccessInterface, memref::LoadOp, memref::StoreOp>(op);
1106}
1107
1108// Perform the replacement in `op`.
1109LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1110 Value oldMemRef, Value newMemRef, Operation *op,
1111 ArrayRef<Value> extraIndices, AffineMap indexRemap,
1112 ArrayRef<Value> extraOperands, ArrayRef<Value> symbolOperands,
1113 bool allowNonDereferencingOps) {
1114 unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1115 (void)newMemRefRank; // unused in opt mode
1116 unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1117 (void)oldMemRefRank; // unused in opt mode
1118 if (indexRemap) {
1119 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1120 "symbolic operand count mismatch");
1121 assert(indexRemap.getNumInputs() ==
1122 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1123 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1124 } else {
1125 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1126 }
1127
1128 // Assert same elemental type.
1129 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1130 cast<MemRefType>(newMemRef.getType()).getElementType());
1131
1132 SmallVector<unsigned, 2> usePositions;
1133 for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
1134 if (opEntry.value() == oldMemRef)
1135 usePositions.push_back(opEntry.index());
1136 }
1137
1138 // If memref doesn't appear, nothing to do.
1139 if (usePositions.empty())
1140 return success();
1141
1142 unsigned memRefOperandPos = usePositions.front();
1143
1144 OpBuilder builder(op);
1145 // The following checks if op is dereferencing memref and performs the access
1146 // index rewrites.
1147 if (!isDereferencingOp(op)) {
1148 if (!allowNonDereferencingOps) {
1149 // Failure: memref used in a non-dereferencing context (potentially
1150 // escapes); no replacement in these cases unless allowNonDereferencingOps
1151 // is set.
1152 return failure();
1153 }
1154 for (unsigned pos : usePositions)
1155 op->setOperand(pos, newMemRef);
1156 return success();
1157 }
1158
1159 if (usePositions.size() > 1) {
1160 // TODO: extend it for this case when needed (rare).
1161 LLVM_DEBUG(llvm::dbgs()
1162 << "multiple dereferencing uses in a single op not supported");
1163 return failure();
1164 }
1165
1166 // Perform index rewrites for the dereferencing op and then replace the op.
1167 SmallVector<Value, 4> oldMapOperands;
1168 AffineMap oldMap;
1169 unsigned oldMemRefNumIndices = oldMemRefRank;
1170 auto startIdx = op->operand_begin() + memRefOperandPos + 1;
1171 auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
1172 if (affMapAccInterface) {
1173 // If `op` implements AffineMapAccessInterface, we can get the indices by
1174 // quering the number of map operands from the operand list from a certain
1175 // offset (`memRefOperandPos` in this case).
1176 NamedAttribute oldMapAttrPair =
1177 affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
1178 oldMap = cast<AffineMapAttr>(oldMapAttrPair.getValue()).getValue();
1179 oldMemRefNumIndices = oldMap.getNumInputs();
1180 }
1181 oldMapOperands.assign(startIdx, startIdx + oldMemRefNumIndices);
1182
1183 // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
1184 SmallVector<Value, 4> oldMemRefOperands;
1185 SmallVector<Value, 4> affineApplyOps;
1186 oldMemRefOperands.reserve(oldMemRefRank);
1187 if (affMapAccInterface &&
1188 oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
1189 for (auto resultExpr : oldMap.getResults()) {
1190 auto singleResMap = AffineMap::get(oldMap.getNumDims(),
1191 oldMap.getNumSymbols(), resultExpr);
1192 auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1193 oldMapOperands);
1194 oldMemRefOperands.push_back(afOp);
1195 affineApplyOps.push_back(afOp);
1196 }
1197 } else {
1198 oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
1199 }
1200
1201 // Construct new indices as a remap of the old ones if a remapping has been
1202 // provided. The indices of a memref come right after it, i.e.,
1203 // at position memRefOperandPos + 1.
1204 SmallVector<Value, 4> remapOperands;
1205 remapOperands.reserve(extraOperands.size() + oldMemRefRank +
1206 symbolOperands.size());
1207 remapOperands.append(extraOperands.begin(), extraOperands.end());
1208 remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
1209 remapOperands.append(symbolOperands.begin(), symbolOperands.end());
1210
1211 SmallVector<Value, 4> remapOutputs;
1212 remapOutputs.reserve(oldMemRefRank);
1213 if (indexRemap &&
1214 indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
1215 // Remapped indices.
1216 for (auto resultExpr : indexRemap.getResults()) {
1217 auto singleResMap = AffineMap::get(
1218 indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
1219 auto afOp = AffineApplyOp::create(builder, op->getLoc(), singleResMap,
1220 remapOperands);
1221 remapOutputs.push_back(afOp);
1222 affineApplyOps.push_back(afOp);
1223 }
1224 } else {
1225 // No remapping specified.
1226 remapOutputs.assign(remapOperands.begin(), remapOperands.end());
1227 }
1228 SmallVector<Value, 4> newMapOperands;
1229 newMapOperands.reserve(newMemRefRank);
1230
1231 // Prepend 'extraIndices' in 'newMapOperands'.
1232 for (Value extraIndex : extraIndices) {
1233 assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
1234 "invalid memory op index");
1235 newMapOperands.push_back(extraIndex);
1236 }
1237
1238 // Append 'remapOutputs' to 'newMapOperands'.
1239 newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
1240
1241 // Create new fully composed AffineMap for new op to be created.
1242 assert(newMapOperands.size() == newMemRefRank);
1243 auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
1244 fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
1245 newMap = simplifyAffineMap(newMap);
1246 canonicalizeMapAndOperands(&newMap, &newMapOperands);
1247 // Remove any affine.apply's that became dead as a result of composition.
1248 for (Value value : affineApplyOps)
1249 if (value.use_empty())
1250 value.getDefiningOp()->erase();
1251
1252 OperationState state(op->getLoc(), op->getName());
1253 // Construct the new operation using this memref.
1254 state.operands.reserve(op->getNumOperands() + extraIndices.size());
1255 // Insert the non-memref operands.
1256 state.operands.append(op->operand_begin(),
1257 op->operand_begin() + memRefOperandPos);
1258 // Insert the new memref value.
1259 state.operands.push_back(newMemRef);
1260
1261 // Insert the new memref map operands.
1262 if (affMapAccInterface) {
1263 state.operands.append(newMapOperands.begin(), newMapOperands.end());
1264 } else {
1265 // In the case of dereferencing ops not implementing
1266 // AffineMapAccessInterface, we need to apply the values of `newMapOperands`
1267 // to the `newMap` to get the correct indices.
1268 for (unsigned i = 0; i < newMemRefRank; i++) {
1269 state.operands.push_back(AffineApplyOp::create(
1270 builder, op->getLoc(),
1271 AffineMap::get(newMap.getNumDims(), newMap.getNumSymbols(),
1272 newMap.getResult(i)),
1273 newMapOperands));
1274 }
1275 }
1276
1277 // Insert the remaining operands unmodified.
1278 unsigned oldMapNumInputs = oldMapOperands.size();
1279 state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
1280 oldMapNumInputs,
1281 op->operand_end());
1282 // Result types don't change. Both memref's are of the same elemental type.
1283 state.types.reserve(op->getNumResults());
1284 for (auto result : op->getResults())
1285 state.types.push_back(result.getType());
1286
1287 // Add attribute for 'newMap', other Attributes do not change.
1288 auto newMapAttr = AffineMapAttr::get(newMap);
1289 for (auto namedAttr : op->getAttrs()) {
1290 if (affMapAccInterface &&
1291 namedAttr.getName() ==
1292 affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef).getName())
1293 state.attributes.push_back({namedAttr.getName(), newMapAttr});
1294 else
1295 state.attributes.push_back(namedAttr);
1296 }
1297
1298 // Create the new operation.
1299 auto *repOp = builder.create(state);
1300 op->replaceAllUsesWith(repOp);
1301 op->erase();
1302
1303 return success();
1304}
1305
1306LogicalResult mlir::affine::replaceAllMemRefUsesWith(
1307 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
1308 AffineMap indexRemap, ArrayRef<Value> extraOperands,
1309 ArrayRef<Value> symbolOperands,
1310 llvm::function_ref<bool(Operation *)> userFilterFn,
1311 bool allowNonDereferencingOps, bool replaceInDeallocOp) {
1312 unsigned newMemRefRank = cast<MemRefType>(newMemRef.getType()).getRank();
1313 (void)newMemRefRank; // unused in opt mode
1314 unsigned oldMemRefRank = cast<MemRefType>(oldMemRef.getType()).getRank();
1315 (void)oldMemRefRank;
1316 if (indexRemap) {
1317 assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
1318 "symbol operand count mismatch");
1319 assert(indexRemap.getNumInputs() ==
1320 extraOperands.size() + oldMemRefRank + symbolOperands.size());
1321 assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
1322 } else {
1323 assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
1324 }
1325
1326 // Assert same elemental type.
1327 assert(cast<MemRefType>(oldMemRef.getType()).getElementType() ==
1328 cast<MemRefType>(newMemRef.getType()).getElementType());
1329
1330 // Walk all uses of old memref; collect ops to perform replacement. We use a
1331 // DenseSet since an operation could potentially have multiple uses of a
1332 // memref (although rare), and the replacement later is going to erase ops.
1333 DenseSet<Operation *> opsToReplace;
1334 for (auto *user : oldMemRef.getUsers()) {
1335 // Check if this user doesn't pass the filter.
1336 if (userFilterFn && !userFilterFn(user))
1337 continue;
1338
1339 // Skip dealloc's - no replacement is necessary, and a memref replacement
1340 // at other uses doesn't hurt these dealloc's.
1341 if (hasSingleEffect<MemoryEffects::Free>(user, oldMemRef) &&
1342 !replaceInDeallocOp)
1343 continue;
1344
1345 // Check if the memref was used in a non-dereferencing context. It is fine
1346 // for the memref to be used in a non-dereferencing way outside of the
1347 // region where this replacement is happening.
1348 if (!isa<AffineMapAccessInterface>(*user)) {
1349 if (!allowNonDereferencingOps) {
1350 LLVM_DEBUG(
1351 llvm::dbgs()
1352 << "Memref replacement failed: non-deferencing memref user: \n"
1353 << *user << '\n');
1354 return failure();
1355 }
1356 // Non-dereferencing ops with the MemRefsNormalizable trait are
1357 // supported for replacement.
1358 if (!user->hasTrait<OpTrait::MemRefsNormalizable>()) {
1359 LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
1360 "memrefs normalizable trait: \n"
1361 << *user << '\n');
1362 return failure();
1363 }
1364 }
1365
1366 // We'll first collect and then replace --- since replacement erases the
1367 // user that has the use, and that user could be postDomFilter or domFilter
1368 // itself!
1369 opsToReplace.insert(user);
1370 }
1371
1372 for (auto *user : opsToReplace) {
1373 if (failed(replaceAllMemRefUsesWith(
1374 oldMemRef, newMemRef, user, extraIndices, indexRemap, extraOperands,
1375 symbolOperands, allowNonDereferencingOps)))
1376 llvm_unreachable("memref replacement guaranteed to succeed here");
1377 }
1378
1379 return success();
1380}
1381
1382/// Given an operation, inserts one or more single result affine
1383/// apply operations, results of which are exclusively used by this operation
1384/// operation. The operands of these newly created affine apply ops are
1385/// guaranteed to be loop iterators or terminal symbols of a function.
1386///
1387/// Before
1388///
1389/// affine.for %i = 0 to #map(%N)
1390/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1391/// "send"(%idx, %A, ...)
1392/// "compute"(%idx)
1393///
1394/// After
1395///
1396/// affine.for %i = 0 to #map(%N)
1397/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
1398/// "send"(%idx, %A, ...)
1399/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
1400/// "compute"(%idx_)
1401///
1402/// This allows applying different transformations on send and compute (for eg.
1403/// different shifts/delays).
1404///
1405/// Returns nullptr either if none of opInst's operands were the result of an
1406/// affine.apply and thus there was no affine computation slice to create, or if
1407/// all the affine.apply op's supplying operands to this opInst did not have any
1408/// uses besides this opInst; otherwise returns the list of affine.apply
1409/// operations created in output argument `sliceOps`.
1410void mlir::affine::createAffineComputationSlice(
1411 Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
1412 // Collect all operands that are results of affine apply ops.
1413 SmallVector<Value, 4> subOperands;
1414 subOperands.reserve(opInst->getNumOperands());
1415 for (auto operand : opInst->getOperands())
1416 if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
1417 subOperands.push_back(operand);
1418
1419 // Gather sequence of AffineApplyOps reachable from 'subOperands'.
1420 SmallVector<Operation *, 4> affineApplyOps;
1421 getReachableAffineApplyOps(subOperands, affineApplyOps);
1422 // Skip transforming if there are no affine maps to compose.
1423 if (affineApplyOps.empty())
1424 return;
1425
1426 // Check if all uses of the affine apply op's lie only in this op op, in
1427 // which case there would be nothing to do.
1428 bool localized = true;
1429 for (auto *op : affineApplyOps) {
1430 for (auto result : op->getResults()) {
1431 for (auto *user : result.getUsers()) {
1432 if (user != opInst) {
1433 localized = false;
1434 break;
1435 }
1436 }
1437 }
1438 }
1439 if (localized)
1440 return;
1441
1442 OpBuilder builder(opInst);
1443 SmallVector<Value, 4> composedOpOperands(subOperands);
1444 auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
1445 fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
1446
1447 // Create an affine.apply for each of the map results.
1448 sliceOps->reserve(composedMap.getNumResults());
1449 for (auto resultExpr : composedMap.getResults()) {
1450 auto singleResMap = AffineMap::get(composedMap.getNumDims(),
1451 composedMap.getNumSymbols(), resultExpr);
1452 sliceOps->push_back(AffineApplyOp::create(
1453 builder, opInst->getLoc(), singleResMap, composedOpOperands));
1454 }
1455
1456 // Construct the new operands that include the results from the composed
1457 // affine apply op above instead of existing ones (subOperands). So, they
1458 // differ from opInst's operands only for those operands in 'subOperands', for
1459 // which they will be replaced by the corresponding one from 'sliceOps'.
1460 SmallVector<Value, 4> newOperands(opInst->getOperands());
1461 for (Value &operand : newOperands) {
1462 // Replace the subOperands from among the new operands.
1463 unsigned j, f;
1464 for (j = 0, f = subOperands.size(); j < f; j++) {
1465 if (operand == subOperands[j])
1466 break;
1467 }
1468 if (j < subOperands.size())
1469 operand = (*sliceOps)[j];
1470 }
1471 for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++)
1472 opInst->setOperand(idx, newOperands[idx]);
1473}
1474
1475/// Enum to set patterns of affine expr in tiled-layout map.
1476/// TileFloorDiv: <dim expr> div <tile size>
1477/// TileMod: <dim expr> mod <tile size>
1478/// TileNone: None of the above
1479/// Example:
1480/// #tiled_2d_128x256 = affine_map<(d0, d1)
1481/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1482/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
1483/// "d0 mod 128" and "d1 mod 256" ==> TileMod
1485
1486/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
1487/// being floordiv'ed by respective tile sizes appeare in a mod with the same
1488/// tile sizes, and no other expression involves those k dimensions. This
1489/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
1490/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
1491/// tiled layout, an empty vector is returned.
1492static LogicalResult getTileSizePos(
1493 AffineMap map,
1494 SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
1495 // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
1496 // `floordiv` and its position in `map` output.
1497 // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
1498 // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
1499 // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
1501 unsigned pos = 0;
1502 for (AffineExpr expr : map.getResults()) {
1503 if (expr.getKind() == AffineExprKind::FloorDiv) {
1504 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1505 if (isa<AffineConstantExpr>(binaryExpr.getRHS()))
1506 floordivExprs.emplace_back(
1507 std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
1508 }
1509 pos++;
1510 }
1511 // Not tiled layout if `floordivExprs` is empty.
1512 if (floordivExprs.empty()) {
1514 return success();
1515 }
1516
1517 // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
1518 // not tiled layout.
1519 for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
1520 AffineExpr floordivExprLHS = std::get<0>(fexpr);
1521 AffineExpr floordivExprRHS = std::get<1>(fexpr);
1522 unsigned floordivPos = std::get<2>(fexpr);
1523
1524 // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
1525 // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
1526 // other expr, the map is not tiled layout. Example of non tiled layout:
1527 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
1528 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
1529 // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
1530 // 256)>
1531 bool found = false;
1532 pos = 0;
1533 for (AffineExpr expr : map.getResults()) {
1534 bool notTiled = false;
1535 if (pos != floordivPos) {
1536 expr.walk([&](AffineExpr e) {
1537 if (e == floordivExprLHS) {
1538 if (expr.getKind() == AffineExprKind::Mod) {
1539 AffineBinaryOpExpr binaryExpr = cast<AffineBinaryOpExpr>(expr);
1540 // If LHS and RHS of `mod` are the same with those of floordiv.
1541 if (floordivExprLHS == binaryExpr.getLHS() &&
1542 floordivExprRHS == binaryExpr.getRHS()) {
1543 // Save tile size (RHS of `mod`), and position of `floordiv` and
1544 // `mod` if same expr with `mod` is not found yet.
1545 if (!found) {
1546 tileSizePos.emplace_back(
1547 std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
1548 found = true;
1549 } else {
1550 // Non tiled layout: Have multilpe `mod` with the same LHS.
1551 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1552 // mod 256, d2 mod 256)>
1553 notTiled = true;
1554 }
1555 } else {
1556 // Non tiled layout: RHS of `mod` is different from `floordiv`.
1557 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1558 // mod 128)>
1559 notTiled = true;
1560 }
1561 } else {
1562 // Non tiled layout: LHS is the same, but not `mod`.
1563 // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
1564 // floordiv 256)>
1565 notTiled = true;
1566 }
1567 }
1568 });
1569 }
1570 if (notTiled) {
1572 return success();
1573 }
1574 pos++;
1575 }
1576 }
1577 return success();
1578}
1579
1580/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
1581/// after normalization. Dimensions that include dynamic dimensions in the map
1582/// output will become dynamic dimensions. Return true if `dim` is dynamic
1583/// dimension.
1584///
1585/// Example:
1586/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1587///
1588/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
1589/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32>
1590static bool
1592 SmallVectorImpl<unsigned> &inMemrefTypeDynDims) {
1593 AffineExpr expr = layoutMap.getResults()[dim];
1594 // Check if affine expr of the dimension includes dynamic dimension of input
1595 // memrefType.
1596 MLIRContext *context = layoutMap.getContext();
1597 return expr
1598 .walk([&](AffineExpr e) {
1599 if (isa<AffineDimExpr>(e) &&
1600 llvm::any_of(inMemrefTypeDynDims, [&](unsigned dim) {
1601 return e == getAffineDimExpr(dim, context);
1602 }))
1603 return WalkResult::interrupt();
1604 return WalkResult::advance();
1605 })
1606 .wasInterrupted();
1607}
1608
1609/// Create affine expr to calculate dimension size for a tiled-layout map.
1611 TileExprPattern pat) {
1612 // Create map output for the patterns.
1613 // "floordiv <tile size>" ==> "ceildiv <tile size>"
1614 // "mod <tile size>" ==> "<tile size>"
1615 AffineExpr newMapOutput;
1616 AffineBinaryOpExpr binaryExpr = nullptr;
1617 switch (pat) {
1619 binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1620 newMapOutput = binaryExpr.getRHS();
1621 break;
1623 binaryExpr = cast<AffineBinaryOpExpr>(oldMapOutput);
1624 newMapOutput = getAffineBinaryOpExpr(
1625 AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
1626 break;
1627 default:
1628 newMapOutput = oldMapOutput;
1629 }
1630 return newMapOutput;
1631}
1632
1633/// Create new maps to calculate each dimension size of `newMemRefType`, and
1634/// create `newDynamicSizes` from them by using AffineApplyOp.
1635///
1636/// Steps for normalizing dynamic memrefs for a tiled layout map
1637/// Example:
1638/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
1639/// %0 = dim %arg0, %c1 :memref<4x?xf32>
1640/// %1 = alloc(%0) : memref<4x?xf32, #map0>
1641///
1642/// (Before this function)
1643/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
1644/// single layout map is supported.
1645///
1646/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
1647/// is memref<4x?x?xf32> in the above example.
1648///
1649/// (In this function)
1650/// 3. Create new maps to calculate each dimension of the normalized memrefType
1651/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
1652/// dimension size can be calculated by replacing "floordiv <tile size>" with
1653/// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
1654/// - New map in the above example
1655/// #map0 = affine_map<(d0, d1) -> (d0)>
1656/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
1657/// #map2 = affine_map<(d0, d1) -> (32)>
1658///
1659/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
1660/// is used in dynamicSizes of new AllocOp.
1661/// %0 = dim %arg0, %c1 : memref<4x?xf32>
1662/// %c4 = arith.constant 4 : index
1663/// %1 = affine.apply #map1(%c4, %0)
1664/// %2 = affine.apply #map2(%c4, %0)
1665template <typename AllocLikeOp>
1666static void createNewDynamicSizes(MemRefType oldMemRefType,
1667 MemRefType newMemRefType, AffineMap map,
1668 AllocLikeOp allocOp, OpBuilder b,
1669 SmallVectorImpl<Value> &newDynamicSizes) {
1670 // Create new input for AffineApplyOp.
1671 SmallVector<Value, 4> inAffineApply;
1672 ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
1673 unsigned dynIdx = 0;
1674 for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
1675 if (oldMemRefShape[d] < 0) {
1676 // Use dynamicSizes of allocOp for dynamic dimension.
1677 inAffineApply.emplace_back(allocOp.getDynamicSizes()[dynIdx]);
1678 dynIdx++;
1679 } else {
1680 // Create ConstantOp for static dimension.
1681 auto constantAttr = b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
1682 inAffineApply.emplace_back(
1683 arith::ConstantOp::create(b, allocOp.getLoc(), constantAttr));
1684 }
1685 }
1686
1687 // Create new map to calculate each dimension size of new memref for each
1688 // original map output. Only for dynamic dimesion of `newMemRefType`.
1689 unsigned newDimIdx = 0;
1690 ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
1692 (void)getTileSizePos(map, tileSizePos);
1693 for (AffineExpr expr : map.getResults()) {
1694 if (newMemRefShape[newDimIdx] < 0) {
1695 // Create new maps to calculate each dimension size of new memref.
1697 for (auto pos : tileSizePos) {
1698 if (newDimIdx == std::get<1>(pos))
1700 else if (newDimIdx == std::get<2>(pos))
1702 }
1703 AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
1704 AffineMap newMap =
1705 AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
1706 Value affineApp =
1707 AffineApplyOp::create(b, allocOp.getLoc(), newMap, inAffineApply);
1708 newDynamicSizes.emplace_back(affineApp);
1709 }
1710 newDimIdx++;
1711 }
1712}
1713
1714template <typename AllocLikeOp>
1715LogicalResult mlir::affine::normalizeMemRef(AllocLikeOp allocOp) {
1716 MemRefType memrefType = allocOp.getType();
1717 OpBuilder b(allocOp);
1718
1719 // Fetch a new memref type after normalizing the old memref to have an
1720 // identity map layout.
1721 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1722 if (newMemRefType == memrefType)
1723 // Either memrefType already had an identity map or the map couldn't be
1724 // transformed to an identity map.
1725 return failure();
1726
1727 Value oldMemRef = allocOp.getResult();
1728
1729 SmallVector<Value, 4> symbolOperands(allocOp.getSymbolOperands());
1730 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1731 AllocLikeOp newAlloc;
1732 // Check if `layoutMap` is a tiled layout. Only single layout map is
1733 // supported for normalizing dynamic memrefs.
1735 (void)getTileSizePos(layoutMap, tileSizePos);
1736 if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
1737 auto oldMemRefType = cast<MemRefType>(oldMemRef.getType());
1738 SmallVector<Value, 4> newDynamicSizes;
1739 createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
1740 newDynamicSizes);
1741 // Add the new dynamic sizes in new AllocOp.
1742 newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1743 newDynamicSizes, allocOp.getAlignmentAttr());
1744 } else {
1745 newAlloc = AllocLikeOp::create(b, allocOp.getLoc(), newMemRefType,
1746 allocOp.getAlignmentAttr());
1747 }
1748 // Replace all uses of the old memref.
1749 if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
1750 /*extraIndices=*/{},
1751 /*indexRemap=*/layoutMap,
1752 /*extraOperands=*/{},
1753 /*symbolOperands=*/symbolOperands,
1754 /*userFilterFn=*/nullptr,
1755 /*allowNonDereferencingOps=*/true))) {
1756 // If it failed (due to escapes for example), bail out.
1757 newAlloc.erase();
1758 return failure();
1759 }
1760 // Replace any uses of the original alloc op and erase it. All remaining uses
1761 // have to be dealloc's; RAMUW above would've failed otherwise.
1762 assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) {
1763 return hasSingleEffect<MemoryEffects::Free>(op, oldMemRef);
1764 }));
1765 oldMemRef.replaceAllUsesWith(newAlloc);
1766 allocOp.erase();
1767 return success();
1768}
1769
1770LogicalResult
1771mlir::affine::normalizeMemRef(memref::ReinterpretCastOp reinterpretCastOp) {
1772 MemRefType memrefType = reinterpretCastOp.getType();
1773 AffineMap oldLayoutMap = memrefType.getLayout().getAffineMap();
1774 Value oldMemRef = reinterpretCastOp.getResult();
1775
1776 // If `oldLayoutMap` is identity, `memrefType` is already normalized.
1777 if (oldLayoutMap.isIdentity())
1778 return success();
1779
1780 // Fetch a new memref type after normalizing the old memref to have an
1781 // identity map layout.
1782 MemRefType newMemRefType = normalizeMemRefType(memrefType);
1783 if (newMemRefType == memrefType)
1784 // `oldLayoutMap` couldn't be transformed to an identity map.
1785 return failure();
1786
1787 uint64_t newRank = newMemRefType.getRank();
1788 SmallVector<Value> mapOperands(oldLayoutMap.getNumDims() +
1789 oldLayoutMap.getNumSymbols());
1790 SmallVector<Value> oldStrides = reinterpretCastOp.getStrides();
1791 Location loc = reinterpretCastOp.getLoc();
1792 // As `newMemRefType` is normalized, it is unit strided.
1793 SmallVector<int64_t> newStaticStrides(newRank, 1);
1794 SmallVector<int64_t> newStaticOffsets(newRank, 0);
1795 ArrayRef<int64_t> oldShape = memrefType.getShape();
1796 ValueRange oldSizes = reinterpretCastOp.getSizes();
1797 unsigned idx = 0;
1798 OpBuilder b(reinterpretCastOp);
1799 // Collect the map operands which will be used to compute the new normalized
1800 // memref shape.
1801 for (unsigned i = 0, e = memrefType.getRank(); i < e; i++) {
1802 if (memrefType.isDynamicDim(i))
1803 mapOperands[i] =
1804 arith::SubIOp::create(b, loc, oldSizes[0].getType(), oldSizes[idx++],
1806 else
1807 mapOperands[i] = arith::ConstantIndexOp::create(b, loc, oldShape[i] - 1);
1808 }
1809 for (unsigned i = 0, e = oldStrides.size(); i < e; i++)
1810 mapOperands[memrefType.getRank() + i] = oldStrides[i];
1811 SmallVector<Value> newSizes;
1812 ArrayRef<int64_t> newShape = newMemRefType.getShape();
1813 // Compute size along all the dimensions of the new normalized memref.
1814 for (unsigned i = 0; i < newRank; i++) {
1815 if (!newMemRefType.isDynamicDim(i))
1816 continue;
1817 newSizes.push_back(AffineApplyOp::create(
1818 b, loc,
1819 AffineMap::get(oldLayoutMap.getNumDims(), oldLayoutMap.getNumSymbols(),
1820 oldLayoutMap.getResult(i)),
1821 mapOperands));
1822 }
1823 for (unsigned i = 0, e = newSizes.size(); i < e; i++) {
1824 newSizes[i] =
1825 arith::AddIOp::create(b, loc, newSizes[i].getType(), newSizes[i],
1827 }
1828 // Create the new reinterpret_cast op.
1829 auto newReinterpretCast = memref::ReinterpretCastOp::create(
1830 b, loc, newMemRefType, reinterpretCastOp.getSource(),
1831 /*offsets=*/ValueRange(), newSizes,
1832 /*strides=*/ValueRange(),
1833 /*static_offsets=*/newStaticOffsets,
1834 /*static_sizes=*/newShape,
1835 /*static_strides=*/newStaticStrides);
1836
1837 // Replace all uses of the old memref.
1838 if (failed(replaceAllMemRefUsesWith(oldMemRef,
1839 /*newMemRef=*/newReinterpretCast,
1840 /*extraIndices=*/{},
1841 /*indexRemap=*/oldLayoutMap,
1842 /*extraOperands=*/{},
1843 /*symbolOperands=*/oldStrides,
1844 /*userFilterFn=*/nullptr,
1845 /*allowNonDereferencingOps=*/true))) {
1846 // If it failed (due to escapes for example), bail out.
1847 newReinterpretCast.erase();
1848 return failure();
1849 }
1850
1851 oldMemRef.replaceAllUsesWith(newReinterpretCast);
1852 reinterpretCastOp.erase();
1853 return success();
1854}
1855
1856template LogicalResult
1857mlir::affine::normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
1858template LogicalResult
1859mlir::affine::normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
1860
1861MemRefType mlir::affine::normalizeMemRefType(MemRefType memrefType) {
1862 unsigned rank = memrefType.getRank();
1863 if (rank == 0)
1864 return memrefType;
1865
1866 if (memrefType.getLayout().isIdentity()) {
1867 // Either no maps is associated with this memref or this memref has
1868 // a trivial (identity) map.
1869 return memrefType;
1870 }
1871 AffineMap layoutMap = memrefType.getLayout().getAffineMap();
1872 unsigned numSymbolicOperands = layoutMap.getNumSymbols();
1873
1874 // We don't do any checks for one-to-one'ness; we assume that it is
1875 // one-to-one.
1876
1877 // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
1878 // for now.
1879 // TODO: Normalize the other types of dynamic memrefs.
1881 (void)getTileSizePos(layoutMap, tileSizePos);
1882 if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
1883 return memrefType;
1884
1885 // We have a single map that is not an identity map. Create a new memref
1886 // with the right shape and an identity layout map.
1887 ArrayRef<int64_t> shape = memrefType.getShape();
1888 // FlatAffineValueConstraint may later on use symbolicOperands.
1889 FlatAffineValueConstraints fac(rank, numSymbolicOperands);
1890 SmallVector<unsigned, 4> memrefTypeDynDims;
1891 for (unsigned d = 0; d < rank; ++d) {
1892 // Use constraint system only in static dimensions.
1893 if (shape[d] > 0) {
1894 fac.addBound(BoundType::LB, d, 0);
1895 fac.addBound(BoundType::UB, d, shape[d] - 1);
1896 } else {
1897 memrefTypeDynDims.emplace_back(d);
1898 }
1899 }
1900 // We compose this map with the original index (logical) space to derive
1901 // the upper bounds for the new index space.
1902 unsigned newRank = layoutMap.getNumResults();
1903 if (failed(fac.composeMatchingMap(layoutMap)))
1904 return memrefType;
1905 // TODO: Handle semi-affine maps.
1906 // Project out the old data dimensions.
1907 fac.projectOut(newRank, fac.getNumVars() - newRank - fac.getNumLocalVars());
1908 SmallVector<int64_t, 4> newShape(newRank);
1909 MLIRContext *context = memrefType.getContext();
1910 for (unsigned d = 0; d < newRank; ++d) {
1911 // Check if this dimension is dynamic.
1912 if (isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims)) {
1913 newShape[d] = ShapedType::kDynamic;
1914 continue;
1915 }
1916 // The lower bound for the shape is always zero.
1917 std::optional<int64_t> ubConst = fac.getConstantBound64(BoundType::UB, d);
1918 // For a static memref and an affine map with no symbols, this is
1919 // always bounded. However, when we have symbols, we may not be able to
1920 // obtain a constant upper bound. Also, mapping to a negative space is
1921 // invalid for normalization.
1922 if (!ubConst.has_value() || *ubConst < 0) {
1923 LLVM_DEBUG(llvm::dbgs()
1924 << "can't normalize map due to unknown/invalid upper bound");
1925 return memrefType;
1926 }
1927 // If dimension of new memrefType is dynamic, the value is -1.
1928 newShape[d] = *ubConst + 1;
1929 }
1930
1931 // Create the new memref type after trivializing the old layout map.
1932 auto newMemRefType =
1933 MemRefType::Builder(memrefType)
1934 .setShape(newShape)
1935 .setLayout(AffineMapAttr::get(
1936 AffineMap::getMultiDimIdentityMap(newRank, context)));
1937 return newMemRefType;
1938}
1939
1940DivModValue mlir::affine::getDivMod(OpBuilder &b, Location loc, Value lhs,
1941 Value rhs) {
1942 DivModValue result;
1943 AffineExpr d0, d1;
1944 bindDims(b.getContext(), d0, d1);
1945 result.quotient =
1946 affine::makeComposedAffineApply(b, loc, d0.floorDiv(d1), {lhs, rhs});
1947 result.remainder =
1948 affine::makeComposedAffineApply(b, loc, d0 % d1, {lhs, rhs});
1949 return result;
1950}
1951
1952/// Create an affine map that computes `lhs` * `rhs`, composing in any other
1953/// affine maps.
1954static FailureOr<OpFoldResult> composedAffineMultiply(OpBuilder &b,
1955 Location loc,
1957 OpFoldResult rhs) {
1958 AffineExpr s0, s1;
1959 bindSymbols(b.getContext(), s0, s1);
1960 return makeComposedFoldedAffineApply(b, loc, s0 * s1, {lhs, rhs});
1961}
1962
1963FailureOr<SmallVector<Value>>
1964mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1965 ArrayRef<Value> basis, bool hasOuterBound) {
1966 if (hasOuterBound)
1967 basis = basis.drop_front();
1968
1969 // Note: the divisors are backwards due to the scan.
1970 SmallVector<Value> divisors;
1971 OpFoldResult basisProd = b.getIndexAttr(1);
1972 for (OpFoldResult basisElem : llvm::reverse(basis)) {
1973 FailureOr<OpFoldResult> nextProd =
1974 composedAffineMultiply(b, loc, basisElem, basisProd);
1975 if (failed(nextProd))
1976 return failure();
1977 basisProd = *nextProd;
1978 divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
1979 }
1980
1981 SmallVector<Value> results;
1982 results.reserve(divisors.size() + 1);
1983 Value residual = linearIndex;
1984 for (Value divisor : llvm::reverse(divisors)) {
1985 DivModValue divMod = getDivMod(b, loc, residual, divisor);
1986 results.push_back(divMod.quotient);
1987 residual = divMod.remainder;
1988 }
1989 results.push_back(residual);
1990 return results;
1991}
1992
1993FailureOr<SmallVector<Value>>
1994mlir::affine::delinearizeIndex(OpBuilder &b, Location loc, Value linearIndex,
1996 bool hasOuterBound) {
1997 if (hasOuterBound)
1998 basis = basis.drop_front();
1999
2000 // Note: the divisors are backwards due to the scan.
2001 SmallVector<Value> divisors;
2002 OpFoldResult basisProd = b.getIndexAttr(1);
2003 for (OpFoldResult basisElem : llvm::reverse(basis)) {
2004 FailureOr<OpFoldResult> nextProd =
2005 composedAffineMultiply(b, loc, basisElem, basisProd);
2006 if (failed(nextProd))
2007 return failure();
2008 basisProd = *nextProd;
2009 divisors.push_back(getValueOrCreateConstantIndexOp(b, loc, basisProd));
2010 }
2011
2012 SmallVector<Value> results;
2013 results.reserve(divisors.size() + 1);
2014 Value residual = linearIndex;
2015 for (Value divisor : llvm::reverse(divisors)) {
2016 DivModValue divMod = getDivMod(b, loc, residual, divisor);
2017 results.push_back(divMod.quotient);
2018 residual = divMod.remainder;
2019 }
2020 results.push_back(residual);
2021 return results;
2022}
2023
2024OpFoldResult mlir::affine::linearizeIndex(ArrayRef<OpFoldResult> multiIndex,
2026 ImplicitLocOpBuilder &builder) {
2027 return linearizeIndex(builder, builder.getLoc(), multiIndex, basis);
2028}
2029
2030OpFoldResult mlir::affine::linearizeIndex(OpBuilder &builder, Location loc,
2031 ArrayRef<OpFoldResult> multiIndex,
2032 ArrayRef<OpFoldResult> basis) {
2033 assert(multiIndex.size() == basis.size() ||
2034 multiIndex.size() == basis.size() + 1);
2035 SmallVector<AffineExpr> basisAffine;
2036
2037 // Add a fake initial size in order to make the later index linearization
2038 // computations line up if an outer bound is not provided.
2039 if (multiIndex.size() == basis.size() + 1)
2040 basisAffine.push_back(getAffineConstantExpr(1, builder.getContext()));
2041
2042 for (size_t i = 0; i < basis.size(); ++i) {
2043 basisAffine.push_back(getAffineSymbolExpr(i, builder.getContext()));
2044 }
2045
2046 SmallVector<AffineExpr> stridesAffine = computeStrides(basisAffine);
2048 strides.reserve(stridesAffine.size());
2049 llvm::transform(stridesAffine, std::back_inserter(strides),
2050 [&builder, &basis, loc](AffineExpr strideExpr) {
2052 builder, loc, strideExpr, basis);
2053 });
2054
2055 auto &&[linearIndexExpr, multiIndexAndStrides] = computeLinearIndex(
2056 OpFoldResult(builder.getIndexAttr(0)), strides, multiIndex);
2057 return affine::makeComposedFoldedAffineApply(builder, loc, linearIndexExpr,
2058 multiIndexAndStrides);
2059}
return success()
static bool mayHaveEffect(Operation *srcMemOp, Operation *destMemOp, unsigned minSurroundingLoops)
Returns true if srcMemOp may have an effect on destMemOp within the scope of the outermost minSurroun...
Definition Utils.cpp:654
static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp)
A helper for the mechanics of mlir::hoistAffineIfOp.
Definition Utils.cpp:289
static void createNewDynamicSizes(MemRefType oldMemRefType, MemRefType newMemRefType, AffineMap map, AllocLikeOp allocOp, OpBuilder b, SmallVectorImpl< Value > &newDynamicSizes)
Create new maps to calculate each dimension size of newMemRefType, and create newDynamicSizes from th...
Definition Utils.cpp:1666
static bool isDereferencingOp(Operation *op)
Definition Utils.cpp:1104
static LogicalResult getTileSizePos(AffineMap map, SmallVectorImpl< std::tuple< AffineExpr, unsigned, unsigned > > &tileSizePos)
Check if map is a tiled layout.
Definition Utils.cpp:1492
TileExprPattern
Enum to set patterns of affine expr in tiled-layout map.
Definition Utils.cpp:1484
@ TileFloorDiv
Definition Utils.cpp:1484
@ TileNone
Definition Utils.cpp:1484
@ TileMod
Definition Utils.cpp:1484
static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock)
Promotes the then or the else block of ifOp (depending on whether elseBlock is false or true) into if...
Definition Utils.cpp:246
static bool isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, SmallVectorImpl< unsigned > &inMemrefTypeDynDims)
Check if dim dimension of memrefType with layoutMap becomes dynamic after normalization.
Definition Utils.cpp:1591
static FailureOr< OpFoldResult > composedAffineMultiply(OpBuilder &b, Location loc, OpFoldResult lhs, OpFoldResult rhs)
Create an affine map that computes lhs * rhs, composing in any other affine maps.
Definition Utils.cpp:1954
static void loadCSE(AffineReadOpInterface loadA, SmallVectorImpl< Operation * > &loadOpsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition Utils.cpp:959
static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, TileExprPattern pat)
Create affine expr to calculate dimension size for a tiled-layout map.
Definition Utils.cpp:1610
static void findUnusedStore(AffineWriteOpInterface writeA, SmallVectorImpl< Operation * > &opsToErase, PostDominanceInfo &postDominanceInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Definition Utils.cpp:912
static bool mustReachAtInnermost(const MemRefAccess &srcAccess, const MemRefAccess &destAccess)
Returns true if the memory operation of destAccess depends on srcAccess inside of the innermost commo...
Definition Utils.cpp:636
static void forwardStoreToLoad(AffineReadOpInterface loadOp, SmallVectorImpl< Operation * > &loadOpsToErase, SmallPtrSetImpl< Value > &memrefsToErase, DominanceInfo &domInfo, llvm::function_ref< bool(Value, Value)> mayAlias)
Attempt to eliminate loadOp by replacing it with a value stored into memory which the load is guarant...
Definition Utils.cpp:835
static Operation * getOutermostInvariantForOp(AffineIfOp ifOp)
Returns the outermost affine.for/parallel op that the ifOp is invariant on.
Definition Utils.cpp:262
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition PDL.cpp:62
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Affine binary operation expression.
Definition AffineExpr.h:214
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
Definition AffineExpr.h:239
int64_t getValue() const
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
RetT walk(FnT &&callback) const
Walk all of the AffineExpr's in this expression in postorder.
Definition AffineExpr.h:117
AffineExprKind getKind() const
Return the classification for this type.
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
MLIRContext * getContext() const
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
unsigned getNumInputs() const
AffineExpr getResult(unsigned idx) const
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
bool isIdentity() const
Returns true if this affine map is an identity affine map.
A symbolic identifier appearing in an affine expression.
Definition AffineExpr.h:231
unsigned getPosition() const
This class represents the main alias analysis interface in MLIR.
AliasResult alias(Value lhs, Value rhs)
Given two values, return their aliasing behavior.
bool isNo() const
Returns if this result indicates no possibility of aliasing.
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:140
OpListType & getOperations()
Definition Block.h:137
SuccessorRange getSuccessors()
Definition Block.h:270
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
iterator end()
Definition Block.h:144
iterator begin()
Definition Block.h:143
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition Builders.cpp:387
AffineExpr getAffineConstantExpr(int64_t constant)
Definition Builders.cpp:372
AffineExpr getAffineDimExpr(unsigned position)
Definition Builders.cpp:364
MLIRContext * getContext() const
Definition Builders.h:56
A class for computing basic dominance information.
Definition Dominance.h:140
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition Dominance.h:158
This class represents a frozen set of patterns that can be processed by a pattern applicator.
This class allows control over how the GreedyPatternRewriteDriver works.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
void clear()
Clears all mappings held by the mapper.
Definition IRMapping.h:79
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Definition Builders.h:630
Location getLoc() const
Accessors for the implied location.
Definition Builders.h:663
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This is a builder type that keeps local references to arguments.
Builder & setShape(ArrayRef< int64_t > newShape)
Builder & setLayout(MemRefLayoutAttrInterface newLayout)
NamedAttribute represents a combination of a name and an Attribute value.
Definition Attributes.h:164
Attribute getValue() const
Return the value of the attribute.
Definition Attributes.h:179
This class helps build Operations.
Definition Builders.h:207
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
Definition Builders.h:240
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Definition Builders.h:445
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:442
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition Builders.cpp:457
This class represents a single result from folding an operation.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class provides the API for ops that are known to be isolated from above.
This class implements the operand iterators for the Operation class.
Definition ValueRange.h:43
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Value getOperand(unsigned idx)
Definition Operation.h:350
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:749
void setOperand(unsigned idx, Value value)
Definition Operation.h:351
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:512
operand_iterator operand_begin()
Definition Operation.h:374
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:213
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
unsigned getNumOperands()
Definition Operation.h:346
operand_iterator operand_end()
Definition Operation.h:375
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition Operation.h:263
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition Operation.h:272
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
user_range getUsers()
Returns a range of all users.
Definition Operation.h:873
result_range getResults()
Definition Operation.h:415
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition Operation.h:230
void erase()
Remove this operation from its parent block and delete it.
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:404
A class for computing basic postdominance information.
Definition Dominance.h:204
bool postDominates(Operation *a, Operation *b) const
Return true if operation A postdominates operation B.
Definition Dominance.h:213
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition Region.cpp:45
bool isAncestor(Region *other)
Return true if this region is ancestor of the other region.
Definition Region.h:222
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
void replaceAllUsesExcept(Value newValue, const SmallPtrSetImpl< Operation * > &exceptions)
Replace all uses of 'this' value with 'newValue', updating anything in the IR that uses 'this' to use...
Definition Value.cpp:71
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Definition Value.h:149
user_range getUsers() const
Definition Value.h:218
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
An AffineValueMap is an affine map plus its ML value operands and results for analysis purposes.
LogicalResult canonicalize()
Attempts to canonicalize the map and operands.
ArrayRef< Value > getOperands() const
AffineExpr getResult(unsigned i)
static void difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res)
Return the value map that is the difference of value maps 'a' and 'b', represented as an affine map a...
FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:359
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
bool isValidDim(Value value)
Returns true if the given Value can be used as a dimension id in the region of the closest surroundin...
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b)
Returns the number of surrounding loops common to both A and B.
Definition Utils.cpp:2108
DependenceResult checkMemrefAccessDependence(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints=nullptr, SmallVector< DependenceComponent, 2 > *dependenceComponents=nullptr, bool allowRAR=false)
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition Utils.cpp:352
void canonicalizeMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands)
Modifies both map and operands in-place so as to:
void getReachableAffineApplyOps(ArrayRef< Value > operands, SmallVectorImpl< Operation * > &affineApplyOps)
Returns in affineApplyOps, the sequence of those AffineApplyOp Operations that are reachable via a se...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Region * getAffineAnalysisScope(Operation *op)
Returns the closest region enclosing op that is held by a non-affine operation; nullptr if there is n...
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
bool isValidSymbol(Value value)
Returns true if the given value can be used as a symbol in the region of the closest surrounding op t...
bool hasDependence(DependenceResult result)
Utility function that returns true if the provided DependenceResult corresponds to a dependence resul...
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition Utils.cpp:687
bool noDependence(DependenceResult result)
Returns true if the provided DependenceResult corresponds to the absence of a dependence.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561
Value linearizeIndex(ValueRange indices, ArrayRef< int64_t > strides, int64_t offset, Type integerType, Location loc, OpBuilder &builder)
Generates IR to perform index linearization with the given indices and their corresponding strides,...
Include the generated interface declarations.
AffineMap simplifyAffineMap(AffineMap map)
Simplifies an affine map by simplifying its underlying AffineExpr results.
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
std::pair< AffineExpr, SmallVector< OpFoldResult > > computeLinearIndex(OpFoldResult sourceOffset, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices)
Compute linear index from provided strides and indices, assuming strided layout.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:128
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
bool hasSingleEffect(Operation *op)
Returns "true" if op has only an effect of type EffectTy.
LogicalResult applyOpPatternsGreedily(ArrayRef< Operation * > ops, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr, bool *allErased=nullptr)
Rewrite the specified ops by repeatedly applying the highest benefit patterns in a greedy worklist dr...
@ CeilDiv
RHS of ceildiv is always a constant or a symbolic expression.
Definition AffineExpr.h:50
@ Mod
RHS of mod is always a constant or a symbolic expression with a positive value.
Definition AffineExpr.h:46
@ FloorDiv
RHS of floordiv is always a constant or a symbolic expression.
Definition AffineExpr.h:48
AffineExpr getAffineBinaryOpExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs)
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
@ ExistingOps
Only pre-existing ops are processed.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
The following effect indicates that the operation reads from some resource.
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Checks whether two accesses to the same memref access the same element.
A description of a (parallelizable) reduction in an affine loop.
arith::AtomicRMWKind kind
Reduction kind.
Value value
The value being reduced.
Encapsulates a memref load or store access information.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.