29 #define GEN_PASS_DEF_SCFFORLOOPPEELING
30 #define GEN_PASS_DEF_SCFFORLOOPSPECIALIZATION
31 #define GEN_PASS_DEF_SCFPARALLELLOOPSPECIALIZATION
32 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
38 using scf::ParallelOp;
46 constantIndices.reserve(op.getUpperBound().size());
47 for (
auto bound : op.getUpperBound()) {
48 auto minOp = bound.getDefiningOp<AffineMinOp>();
52 for (
AffineExpr expr : minOp.getMap().getResults()) {
58 constantIndices.push_back(minConstant);
64 for (
auto bound : llvm::zip(op.getUpperBound(), constantIndices)) {
67 Value cmp = arith::CmpIOp::create(b, op.getLoc(), arith::CmpIPredicate::eq,
68 std::get<0>(bound), constant);
69 cond = cond ? arith::AndIOp::create(b, op.getLoc(), cond, cmp) : cmp;
70 map.
map(std::get<0>(bound), constant);
72 auto ifOp = scf::IfOp::create(b, op.getLoc(), cond,
true);
73 ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);
74 ifOp.getElseBodyBuilder().clone(*op.getOperation());
83 auto bound = op.getUpperBound();
84 auto minOp = bound.getDefiningOp<AffineMinOp>();
88 for (
AffineExpr expr : minOp.getMap().getResults()) {
97 Value constant = arith::ConstantOp::create(
100 Value cond = arith::CmpIOp::create(b, op.getLoc(), arith::CmpIPredicate::eq,
102 map.
map(bound, constant);
103 auto ifOp = scf::IfOp::create(b, op.getLoc(), cond,
true);
104 ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);
105 ifOp.getElseBodyBuilder().clone(*op.getOperation());
121 ForOp &partialIteration,
Value &splitBound) {
122 RewriterBase::InsertionGuard guard(b);
129 if (stepInt && *stepInt <= 1)
134 if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
143 if (
auto constExpr = dyn_cast<AffineConstantExpr>(map.
getResult(0)))
144 if (constExpr.getValue() == 0)
148 auto modMap =
AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
150 auto loc = forOp.getLoc();
153 forOp.getUpperBound(),
155 if (splitBound.
getType() != forOp.getLowerBound().getType())
157 loc, forOp.getLowerBound().getType(), splitBound);
161 partialIteration = cast<ForOp>(b.
clone(*forOp.getOperation()));
162 partialIteration.getLowerBoundMutable().assign(splitBound);
164 partialIteration.getInitArgsMutable().assign(forOp->getResults());
168 [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
174 ForOp partialIteration,
176 Value mainIv = forOp.getInductionVar();
177 Value partialIv = partialIteration.getInductionVar();
178 assert(forOp.getStep() == partialIteration.getStep() &&
179 "expected same step in main and partial loop");
180 Value step = forOp.getStep();
183 if (!isa<AffineMinOp, AffineMaxOp>(affineOp))
190 partialIteration.walk([&](
Operation *affineOp) {
191 if (!isa<AffineMinOp, AffineMaxOp>(affineOp))
201 ForOp &partialIteration) {
202 Value previousUb = forOp.getUpperBound();
219 ForOp &firstIteration) {
220 RewriterBase::InsertionGuard guard(b);
226 if (lbInt && ubInt && stepInt &&
ceil(
float(*ubInt - *lbInt) / *stepInt) <= 1)
235 auto loc = forOp.getLoc();
237 loc, ubMap,
ValueRange{forOp.getLowerBound(), forOp.getStep()});
238 if (splitBound.
getType() != forOp.getUpperBound().getType())
240 loc, forOp.getUpperBound().getType(), splitBound);
244 map.
map(forOp.getUpperBound(), splitBound);
245 firstIteration = cast<ForOp>(b.
clone(*forOp.getOperation(), map));
249 forOp.getInitArgsMutable().assign(firstIteration->getResults());
250 forOp.getLowerBoundMutable().assign(splitBound);
261 ForLoopPeelingPattern(
MLIRContext *ctx,
bool peelFront,
bool skipPartial)
263 skipPartial(skipPartial) {}
265 LogicalResult matchAndRewrite(ForOp forOp,
267 if (forOp.getUnsignedCmp())
269 "unsigned loops are not supported");
275 scf::ForOp partialIteration;
323 struct ParallelLoopSpecialization
324 :
public impl::SCFParallelLoopSpecializationBase<
325 ParallelLoopSpecialization> {
326 void runOnOperation()
override {
327 getOperation()->walk(
332 struct ForLoopSpecialization
333 :
public impl::SCFForLoopSpecializationBase<ForLoopSpecialization> {
334 void runOnOperation()
override {
339 struct ForLoopPeeling :
public impl::SCFForLoopPeelingBase<ForLoopPeeling> {
340 void runOnOperation()
override {
341 auto *parentOp = getOperation();
344 patterns.add<ForLoopPeelingPattern>(ctx, peelFront, skipPartial);
357 return std::make_unique<ParallelLoopSpecialization>();
361 return std::make_unique<ForLoopSpecialization>();
365 return std::make_unique<ForLoopPeeling>();
static void specializeForLoopForUnrolling(ForOp op)
Rewrite a for loop with bounds defined by an affine.min with a constant into 2 loops after checking i...
static void specializeParallelLoopForUnrolling(ParallelOp op)
Rewrite a parallel loop with bounds defined by an affine.min with a constant into 2 loops after check...
static constexpr char kPeeledLoopLabel[]
static void rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp, ForOp partialIteration, Value previousUb)
static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, ForOp &partialIteration, Value &splitBound)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
static constexpr char kPartialIterationLabel[]
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
AffineExpr getResult(unsigned idx) const
MLIRContext * getContext() const
This is a utility class for mapping one set of IR entities to another.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Operation is the basic unit of execution within MLIR.
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
virtual void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
static WalkResult advance()
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
void fullyComposeAffineMapAndOperands(AffineMap *map, SmallVectorImpl< Value > *operands, bool composeAffineMin=false)
Given an affine map map and its input operands, this method composes into map, maps of AffineApplyOps...
DynamicAPInt ceil(const Fraction &f)
LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
LogicalResult peelForLoopFirstIteration(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Peel the first iteration out of the scf.for loop.
LogicalResult rewritePeeledMinMaxOp(RewriterBase &rewriter, Operation *op, Value iv, Value ub, Value step, bool insideLoop)
Try to simplify the given affine.min/max operation op after loop peeling.
Value constantIndex(OpBuilder &builder, Location loc, int64_t i)
Generates a constant of index type.
Include the generated interface declarations.
std::unique_ptr< Pass > createParallelLoopSpecializationPass()
Creates a pass that specializes parallel loop for unrolling and vectorization.
std::unique_ptr< Pass > createForLoopSpecializationPass()
Creates a pass that specializes for loop for unrolling and vectorization.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult applyPatternsGreedily(Region ®ion, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
std::unique_ptr< Pass > createForLoopPeelingPass()
Creates a pass that peels for loops at their upper bounds for better vectorization.
const FrozenRewritePatternSet & patterns
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...