MLIR 22.0.0git
Transforms.h
Go to the documentation of this file.
1//===- Transforms.h - SCF dialect transformation utilities ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header file defines transformations on SCF operations.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef MLIR_DIALECT_SCF_TRANSFORMS_TRANSFORMS_H_
14#define MLIR_DIALECT_SCF_TRANSFORMS_TRANSFORMS_H_
15
18#include "mlir/Support/LLVM.h"
19#include "llvm/ADT/ArrayRef.h"
20
21namespace mlir {
22class Region;
23class RewriterBase;
24class Operation;
25class Value;
26
27namespace scf {
28
29class IfOp;
30class ForallOp;
31class ForOp;
32class ParallelOp;
33class WhileOp;
34
35/// Try converting scf.forall into a set of nested scf.for loops.
36/// The newly created scf.for ops will be returned through the `results`
37/// vector if provided.
38LogicalResult forallToForLoop(RewriterBase &rewriter, ForallOp forallOp,
39 SmallVectorImpl<Operation *> *results = nullptr);
40
41/// Try converting scf.forall into an scf.parallel loop.
42/// The conversion is only supported for forall operations with no results.
43LogicalResult forallToParallelLoop(RewriterBase &rewriter, ForallOp forallOp,
44 ParallelOp *result = nullptr);
45
46/// Try converting scf.forall into an scf.parallel loop.
47/// The conversion is only supported for parallel operations with no results.
48FailureOr<scf::LoopNest> parallelForToNestedFors(RewriterBase &rewriter,
49 ParallelOp parallelOp);
50
51/// Fuses all adjacent scf.parallel operations with identical bounds and step
52/// into one scf.parallel operations. Uses a naive aliasing and dependency
53/// analysis.
54/// User can additionally customize alias checking with `mayAlias` hook.
55/// `mayAlias` must return false if 2 values are guaranteed to not alias.
58
59/// Rewrite a for loop with bounds/step that potentially do not divide evenly
60/// into a for loop where the step divides the iteration space evenly, followed
61/// by another scf.for for the last (partial) iteration (if any; returned via
62/// `partialIteration`). This transformation is called "loop peeling".
63///
64/// This transformation is beneficial for a wide range of transformations such
65/// as vectorization or loop tiling: It enables additional canonicalizations
66/// inside the peeled loop body such as rewriting masked loads into unmaked
67/// loads.
68///
69/// E.g., assuming a lower bound of 0 (for illustration purposes):
70/// ```
71/// scf.for %iv = %c0 to %ub step %c4 {
72/// (loop body)
73/// }
74/// ```
75/// is rewritten into the following pseudo IR:
76/// ```
77/// %newUb = %ub - (%ub mod %c4)
78/// scf.for %iv = %c0 to %newUb step %c4 {
79/// (loop body)
80/// }
81/// scf.for %iv2 = %newUb to %ub {
82/// (loop body)
83/// }
84/// ```
85///
86/// After loop peeling, this function tries to simplify affine.min and
87/// affine.max ops in the body of the peeled loop and in the body of the partial
88/// iteration loop, taking advantage of the fact that the peeled loop has only
89/// "full" iterations. This simplification is expected to enable further
90/// canonicalization opportunities through other patterns.
91///
92/// The return value indicates whether the loop was rewritten or not. Loops are
93/// not rewritten if:
94/// * Loop step size is 1 or
95/// * Loop bounds and step size are static, and step already divides the
96/// iteration space evenly.
97///
98/// Note: This function rewrites the given scf.for loop in-place and creates a
99/// new scf.for operation for the last iteration. It replaces all uses of the
100/// unpeeled loop with the results of the newly generated scf.for.
101LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp,
102 scf::ForOp &partialIteration);
103
104/// Peel the first iteration out of the scf.for loop. If there is only one
105/// iteration, return the original loop.
106LogicalResult peelForLoopFirstIteration(RewriterBase &rewriter, ForOp forOp,
107 scf::ForOp &partialIteration);
108
109/// Tile a parallel loop of the form
110/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
111/// step (%arg4, %arg5)
112///
113/// into
114/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
115/// step (%arg4*tileSize[0],
116/// %arg5*tileSize[1])
117/// scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
118/// min(tileSize[1], %arg3-%j1))
119/// step (%arg4, %arg5)
120/// The old loop is replaced with the new one.
121///
122/// The function returns the resulting ParallelOps, i.e. {outer_loop_op,
123/// inner_loop_op}.
124std::pair<ParallelOp, ParallelOp>
126 bool noMinMaxBounds);
127
128/// Options to dictate how loops should be pipelined.
130 /// Lambda returning all the operation in the forOp, with their stage, in the
131 /// order picked for the pipelined loop.
132 using GetScheduleFnType = std::function<void(
133 scf::ForOp, std::vector<std::pair<Operation *, unsigned>> &)>;
140 /// Lambda called by the pipeliner to allow the user to annotate the IR while
141 /// it is generated.
142 /// The callback passes the operation created along with the part of the
143 /// pipeline and the iteration index. The iteration index is always 0 for the
144 /// kernel. For the prologue and epilogue, it corresponds to the iteration
145 /// peeled out of the loop in the range [0, maxStage].
147 std::function<void(Operation *, PipelinerPart, unsigned)>;
149
150 /// Control whether the epilogue should be peeled out of the loop or
151 /// operations should be predicated to skip the early stages in the last loop
152 /// iterations. If the epilogue is predicated; the user needs to provide a
153 /// lambda to generate the predicated version of operations.
154 bool peelEpilogue = true;
155
156 /// Control whether the transformation checks that the number of iterations is
157 /// greater or equal to the number of stages and skip the transformation if
158 /// this is not the case. If the loop is dynamic and this is set to true and
159 /// the loop bounds are not static the pipeliner will have to predicate
160 /// operations in the the prologue/epilogue.
162
163 // Callback to predicate operations when the prologue or epilogue are not
164 // peeled. This takes the original operation, an i1 predicate value and the
165 // pattern rewriter. It is expected to replace the given operation with
166 // the predicated equivalent and return it, or return nullptr if the
167 // predication is impossible. In the latter case, pipelining will fail and
168 // may leave IR in a partially transformed state.
170 std::function<Operation *(RewriterBase &, Operation *, Value)>;
172
173 // TODO: add option to decide if the prologue should be peeled.
174};
175
176/// Generate a pipelined version of the scf.for loop based on the schedule given
177/// as option. This applies the mechanical transformation of changing the loop
178/// and generating the prologue/epilogue for the pipelining and doesn't make any
179/// decision regarding the schedule.
180/// Based on the options the loop is split into several stages.
181/// The transformation assumes that the scheduling given by user is valid.
182/// For example if we break a loop into 3 stages named S0, S1, S2 we would
183/// generate the following code with the number in parenthesis as the iteration
184/// index:
185///
186/// S0(0) // Prologue
187/// S0(1) S1(0) // Prologue
188/// scf.for %I = %C0 to %N - 2 {
189/// S0(I+2) S1(I+1) S2(I) // Pipelined kernel
190/// }
191/// S1(N) S2(N-1) // Epilogue
192/// S2(N) // Epilogue
193///
194/// If `modifiedIR` is provided, it will be set to a value that indicates
195/// whether pipelining modified the IR before failing, signaling to the caller
196/// whether they can proceed with different transformations.
197FailureOr<ForOp> pipelineForLoop(RewriterBase &rewriter, ForOp forOp,
199 bool *modifiedIR = nullptr);
200
201/// Create zero-trip-check around a `while` op and return the new loop op in the
202/// check. The while loop is rotated to avoid evaluating the condition twice
203///
204/// By default the check won't be created for do-while loop as it is not
205/// required. `forceCreateCheck` can force the creation.
206///
207/// It turns:
208///
209/// scf.while (%arg0 = %init) : (i32) -> i64 {
210/// %val = .., %arg0 : i64
211/// %cond = arith.cmpi .., %arg0 : i32
212/// scf.condition(%cond) %val : i64
213/// } do {
214/// ^bb0(%arg1: i64):
215/// %next = .., %arg1 : i32
216/// scf.yield %next : i32
217/// }
218///
219/// into:
220///
221/// %pre_val = .., %init : i64
222/// %pre_cond = arith.cmpi .., %init : i32
223/// scf.if %pre_cond -> i64 {
224/// %res = scf.while (%arg1 = %va0) : (i64) -> i64 {
225/// %next = .., %arg1 : i32
226/// %val = .., %next : i64
227/// %cond = arith.cmpi .., %next : i32
228/// scf.condition(%cond) %val : i64
229/// } do {
230/// ^bb0(%arg2: i64):
231/// %scf.yield %arg2 : i32
232/// }
233/// scf.yield %res : i64
234/// } else {
235/// scf.yield %pre_val : i64
236/// }
237///
238/// Failure mechanism is not implemented for this function, so it currently
239/// always returns a `WhileOp` operation: a new one if the transformation took
240/// place or the input `whileOp` if the loop was already in a `do-while` form
241/// and `forceCreateCheck` is `false`.
242FailureOr<WhileOp> wrapWhileLoopInZeroTripCheck(WhileOp whileOp,
243 RewriterBase &rewriter,
244 bool forceCreateCheck = false);
245
246/// Try to uplift `scf.while` op to `scf.for`.
247/// Uplifitng expects a specific ops pattern:
248/// * `before` block consisting of single arith.cmp op
249/// * `after` block containing arith.addi
250FailureOr<ForOp> upliftWhileToForLoop(RewriterBase &rewriter, WhileOp loop);
251
252} // namespace scf
253} // namespace mlir
254
255#endif // MLIR_DIALECT_SCF_TRANSFORMS_TRANSFORMS_H_
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static llvm::ManagedStatic< PassManagerOptions > options
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
FailureOr< WhileOp > wrapWhileLoopInZeroTripCheck(WhileOp whileOp, RewriterBase &rewriter, bool forceCreateCheck=false)
Create zero-trip-check around a while op and return the new loop op in the check.
void naivelyFuseParallelOps(Region &region, llvm::function_ref< bool(Value, Value)> mayAlias)
Fuses all adjacent scf.parallel operations with identical bounds and step into one scf....
LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
LogicalResult forallToForLoop(RewriterBase &rewriter, ForallOp forallOp, SmallVectorImpl< Operation * > *results=nullptr)
Try converting scf.forall into a set of nested scf.for loops.
LogicalResult peelForLoopFirstIteration(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Peel the first iteration out of the scf.for loop.
FailureOr< ForOp > upliftWhileToForLoop(RewriterBase &rewriter, WhileOp loop)
Try to uplift scf.while op to scf.for.
LogicalResult forallToParallelLoop(RewriterBase &rewriter, ForallOp forallOp, ParallelOp *result=nullptr)
Try converting scf.forall into an scf.parallel loop.
FailureOr< scf::LoopNest > parallelForToNestedFors(RewriterBase &rewriter, ParallelOp parallelOp)
Try converting scf.forall into an scf.parallel loop.
std::pair< ParallelOp, ParallelOp > tileParallelLoop(ParallelOp op, llvm::ArrayRef< int64_t > tileSizes, bool noMinMaxBounds)
Tile a parallel loop of the form scf.parallel (i0, i1) = (arg0, arg1) to (arg2, arg3) step (arg4,...
FailureOr< ForOp > pipelineForLoop(RewriterBase &rewriter, ForOp forOp, const PipeliningOption &options, bool *modifiedIR=nullptr)
Generate a pipelined version of the scf.for loop based on the schedule given as option.
Include the generated interface declarations.
Options to dictate how loops should be pipelined.
Definition Transforms.h:129
std::function< Operation *(RewriterBase &, Operation *, Value)> PredicateOpFn
Definition Transforms.h:169
GetScheduleFnType getScheduleFn
Definition Transforms.h:134
std::function< void( scf::ForOp, std::vector< std::pair< Operation *, unsigned > > &)> GetScheduleFnType
Lambda returning all the operation in the forOp, with their stage, in the order picked for the pipeli...
Definition Transforms.h:132
bool peelEpilogue
Control whether the epilogue should be peeled out of the loop or operations should be predicated to s...
Definition Transforms.h:154
std::function< void(Operation *, PipelinerPart, unsigned)> AnnotationlFnType
Lambda called by the pipeliner to allow the user to annotate the IR while it is generated.
Definition Transforms.h:146
AnnotationlFnType annotateFn
Definition Transforms.h:148
bool supportDynamicLoops
Control whether the transformation checks that the number of iterations is greater or equal to the nu...
Definition Transforms.h:161