29#define GEN_PASS_DEF_SCFFORLOOPPEELING
30#define GEN_PASS_DEF_SCFFORLOOPSPECIALIZATION
31#define GEN_PASS_DEF_SCFPARALLELLOOPSPECIALIZATION
32#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
46 constantIndices.reserve(op.getUpperBound().size());
47 for (
auto bound : op.getUpperBound()) {
48 auto minOp = bound.getDefiningOp<AffineMinOp>();
51 int64_t minConstant = std::numeric_limits<int64_t>::max();
52 for (
AffineExpr expr : minOp.getMap().getResults()) {
53 if (
auto constantIndex = dyn_cast<AffineConstantExpr>(expr))
54 minConstant = std::min(minConstant, constantIndex.getValue());
56 if (minConstant == std::numeric_limits<int64_t>::max())
58 constantIndices.push_back(minConstant);
64 for (
auto bound : llvm::zip(op.getUpperBound(), constantIndices)) {
67 Value cmp = arith::CmpIOp::create(
b, op.getLoc(), arith::CmpIPredicate::eq,
68 std::get<0>(bound), constant);
69 cond = cond ? arith::AndIOp::create(
b, op.getLoc(), cond, cmp) : cmp;
70 map.
map(std::get<0>(bound), constant);
72 auto ifOp = scf::IfOp::create(
b, op.getLoc(), cond,
true);
73 ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);
74 ifOp.getElseBodyBuilder().clone(*op.getOperation());
83 auto bound = op.getUpperBound();
84 auto minOp = bound.getDefiningOp<AffineMinOp>();
87 int64_t minConstant = std::numeric_limits<int64_t>::max();
88 for (
AffineExpr expr : minOp.getMap().getResults()) {
89 if (
auto constantIndex = dyn_cast<AffineConstantExpr>(expr))
90 minConstant = std::min(minConstant, constantIndex.getValue());
92 if (minConstant == std::numeric_limits<int64_t>::max())
97 Value constant = arith::ConstantOp::create(
99 IntegerAttr::get(op.getUpperBound().getType(), minConstant));
100 Value cond = arith::CmpIOp::create(
b, op.getLoc(), arith::CmpIPredicate::eq,
102 map.
map(bound, constant);
103 auto ifOp = scf::IfOp::create(
b, op.getLoc(), cond,
true);
104 ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);
105 ifOp.getElseBodyBuilder().clone(*op.getOperation());
124 ForOp &partialIteration,
Value &splitBound) {
132 if (stepInt && *stepInt <= 1)
137 if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
146 if (
auto constExpr = dyn_cast<AffineConstantExpr>(map.
getResult(0)))
147 if (constExpr.getValue() == 0)
151 auto modMap =
AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
152 b.setInsertionPoint(forOp);
153 auto loc = forOp.getLoc();
154 splitBound =
b.createOrFold<AffineApplyOp>(loc, modMap,
156 forOp.getUpperBound(),
158 if (splitBound.
getType() != forOp.getLowerBound().getType())
159 splitBound =
b.createOrFold<arith::IndexCastOp>(
160 loc, forOp.getLowerBound().
getType(), splitBound);
163 b.setInsertionPointAfter(forOp);
164 partialIteration = cast<ForOp>(
b.clone(*forOp.getOperation()));
165 partialIteration.getLowerBoundMutable().assign(splitBound);
166 b.replaceAllUsesWith(forOp.getResults(), partialIteration->getResults());
167 partialIteration.getInitArgsMutable().assign(forOp->getResults());
170 b.modifyOpInPlace(forOp,
171 [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
177 ForOp partialIteration,
179 Value mainIv = forOp.getInductionVar();
180 Value partialIv = partialIteration.getInductionVar();
181 assert(forOp.getStep() == partialIteration.getStep() &&
182 "expected same step in main and partial loop");
183 Value step = forOp.getStep();
186 if (!isa<AffineMinOp, AffineMaxOp>(affineOp))
193 partialIteration.walk([&](
Operation *affineOp) {
194 if (!isa<AffineMinOp, AffineMaxOp>(affineOp))
204 ForOp &partialIteration) {
205 Value previousUb = forOp.getUpperBound();
207 if (failed(
peelForLoop(rewriter, forOp, partialIteration, splitBound)))
222 ForOp &firstIteration) {
229 if (lbInt && ubInt && stepInt &&
ceil(
float(*ubInt - *lbInt) / *stepInt) <= 1)
237 b.setInsertionPoint(forOp);
238 auto loc = forOp.getLoc();
239 Value splitBound =
b.createOrFold<AffineApplyOp>(
240 loc, ubMap,
ValueRange{forOp.getLowerBound(), forOp.getStep()});
241 if (splitBound.
getType() != forOp.getUpperBound().getType())
242 splitBound =
b.createOrFold<arith::IndexCastOp>(
243 loc, forOp.getUpperBound().
getType(), splitBound);
246 firstIteration = cast<ForOp>(
b.clone(*forOp.getOperation()));
247 b.modifyOpInPlace(firstIteration, [&]() {
248 firstIteration.getUpperBoundMutable().assign(splitBound);
251 b.modifyOpInPlace(forOp, [&]() {
252 forOp.getInitArgsMutable().assign(firstIteration->getResults());
253 forOp.getLowerBoundMutable().assign(splitBound);
264 ForLoopPeelingPattern(
MLIRContext *ctx,
bool peelFront,
bool skipPartial)
266 skipPartial(skipPartial) {}
268 LogicalResult matchAndRewrite(ForOp forOp,
270 if (forOp.getUnsignedCmp())
272 "unsigned loops are not supported");
278 scf::ForOp partialIteration;
282 peelForLoopFirstIteration(rewriter, forOp, partialIteration))) {
297 peelForLoopAndSimplifyBounds(rewriter, forOp, partialIteration)))
326struct ParallelLoopSpecialization
328 ParallelLoopSpecialization> {
329 void runOnOperation()
override {
330 getOperation()->walk(
335struct ForLoopSpecialization
337 void runOnOperation()
override {
343 using impl::SCFForLoopPeelingBase<ForLoopPeeling>::SCFForLoopPeelingBase;
345 void runOnOperation()
override {
346 auto *parentOp = getOperation();
347 MLIRContext *ctx = parentOp->getContext();
349 patterns.add<ForLoopPeelingPattern>(ctx, peelFront, skipPartial);
353 parentOp->walk([](Operation *op) {
362 return std::make_unique<ParallelLoopSpecialization>();
366 return std::make_unique<ForLoopSpecialization>();
static void specializeForLoopForUnrolling(ForOp op)
Rewrite a for loop with bounds defined by an affine.min with a constant into 2 loops after checking i...
static void specializeParallelLoopForUnrolling(ParallelOp op)
Rewrite a parallel loop with bounds defined by an affine.min with a constant into 2 loops after check...
static constexpr char kPeeledLoopLabel[]
static void rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp, ForOp partialIteration, Value previousUb)
static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, ForOp &partialIteration, Value &splitBound)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
static constexpr char kPartialIterationLabel[]
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineExpr getResult(unsigned idx) const
This is a utility class for mapping one set of IR entities to another.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation is the basic unit of execution within MLIR.
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
static WalkResult advance()
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
DynamicAPInt ceil(const Fraction &f)
LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
LogicalResult peelForLoopFirstIteration(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Peel the first iteration out of the scf.for loop.
LogicalResult rewritePeeledMinMaxOp(RewriterBase &rewriter, Operation *op, Value iv, Value ub, Value step, bool insideLoop)
Try to simplify the given affine.min/max operation op after loop peeling.
Include the generated interface declarations.
std::unique_ptr< Pass > createParallelLoopSpecializationPass()
Creates a pass that specializes parallel loop for unrolling and vectorization.
std::unique_ptr< Pass > createForLoopSpecializationPass()
Creates a pass that specializes for loop for unrolling and vectorization.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
LogicalResult applyPatternsGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...