MLIR  20.0.0git
Transforms.h
Go to the documentation of this file.
1 //===- Transforms.h - SCF dialect transformation utilities ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines transformations on SCF operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef MLIR_DIALECT_SCF_TRANSFORMS_TRANSFORMS_H_
14 #define MLIR_DIALECT_SCF_TRANSFORMS_TRANSFORMS_H_
15 
17 #include "mlir/Support/LLVM.h"
18 #include "llvm/ADT/ArrayRef.h"
19 
20 namespace mlir {
21 class Region;
22 class RewriterBase;
23 class Operation;
24 class Value;
25 
26 namespace scf {
27 
28 class IfOp;
29 class ForallOp;
30 class ForOp;
31 class ParallelOp;
32 class WhileOp;
33 
34 /// Try converting scf.forall into a set of nested scf.for loops.
35 /// The newly created scf.for ops will be returned through the `results`
36 /// vector if provided.
37 LogicalResult forallToForLoop(RewriterBase &rewriter, ForallOp forallOp,
38  SmallVectorImpl<Operation *> *results = nullptr);
39 
40 /// Try converting scf.forall into an scf.parallel loop.
41 /// The conversion is only supported for forall operations with no results.
42 LogicalResult forallToParallelLoop(RewriterBase &rewriter, ForallOp forallOp,
43  ParallelOp *result = nullptr);
44 
45 /// Fuses all adjacent scf.parallel operations with identical bounds and step
46 /// into one scf.parallel operations. Uses a naive aliasing and dependency
47 /// analysis.
48 /// User can additionally customize alias checking with `mayAlias` hook.
49 /// `mayAlias` must return false if 2 values are guaranteed to not alias.
50 void naivelyFuseParallelOps(Region &region,
52 
53 /// Rewrite a for loop with bounds/step that potentially do not divide evenly
54 /// into a for loop where the step divides the iteration space evenly, followed
55 /// by another scf.for for the last (partial) iteration (if any; returned via
56 /// `partialIteration`). This transformation is called "loop peeling".
57 ///
58 /// This transformation is beneficial for a wide range of transformations such
59 /// as vectorization or loop tiling: It enables additional canonicalizations
60 /// inside the peeled loop body such as rewriting masked loads into unmaked
61 /// loads.
62 ///
63 /// E.g., assuming a lower bound of 0 (for illustration purposes):
64 /// ```
65 /// scf.for %iv = %c0 to %ub step %c4 {
66 /// (loop body)
67 /// }
68 /// ```
69 /// is rewritten into the following pseudo IR:
70 /// ```
71 /// %newUb = %ub - (%ub mod %c4)
72 /// scf.for %iv = %c0 to %newUb step %c4 {
73 /// (loop body)
74 /// }
75 /// scf.for %iv2 = %newUb to %ub {
76 /// (loop body)
77 /// }
78 /// ```
79 ///
80 /// After loop peeling, this function tries to simplify affine.min and
81 /// affine.max ops in the body of the peeled loop and in the body of the partial
82 /// iteration loop, taking advantage of the fact that the peeled loop has only
83 /// "full" iterations. This simplification is expected to enable further
84 /// canonicalization opportunities through other patterns.
85 ///
86 /// The return value indicates whether the loop was rewritten or not. Loops are
87 /// not rewritten if:
88 /// * Loop step size is 1 or
89 /// * Loop bounds and step size are static, and step already divides the
90 /// iteration space evenly.
91 ///
92 /// Note: This function rewrites the given scf.for loop in-place and creates a
93 /// new scf.for operation for the last iteration. It replaces all uses of the
94 /// unpeeled loop with the results of the newly generated scf.for.
95 LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp,
96  scf::ForOp &partialIteration);
97 
98 /// Peel the first iteration out of the scf.for loop. If there is only one
99 /// iteration, return the original loop.
100 LogicalResult peelForLoopFirstIteration(RewriterBase &rewriter, ForOp forOp,
101  scf::ForOp &partialIteration);
102 
103 /// Tile a parallel loop of the form
104 /// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
105 /// step (%arg4, %arg5)
106 ///
107 /// into
108 /// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
109 /// step (%arg4*tileSize[0],
110 /// %arg5*tileSize[1])
111 /// scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
112 /// min(tileSize[1], %arg3-%j1))
113 /// step (%arg4, %arg5)
114 /// The old loop is replaced with the new one.
115 ///
116 /// The function returns the resulting ParallelOps, i.e. {outer_loop_op,
117 /// inner_loop_op}.
118 std::pair<ParallelOp, ParallelOp>
119 tileParallelLoop(ParallelOp op, llvm::ArrayRef<int64_t> tileSizes,
120  bool noMinMaxBounds);
121 
122 /// Options to dictate how loops should be pipelined.
124  /// Lambda returning all the operation in the forOp, with their stage, in the
125  /// order picked for the pipelined loop.
126  using GetScheduleFnType = std::function<void(
127  scf::ForOp, std::vector<std::pair<Operation *, unsigned>> &)>;
129  enum class PipelinerPart {
130  Prologue,
131  Kernel,
132  Epilogue,
133  };
134  /// Lambda called by the pipeliner to allow the user to annotate the IR while
135  /// it is generated.
136  /// The callback passes the operation created along with the part of the
137  /// pipeline and the iteration index. The iteration index is always 0 for the
138  /// kernel. For the prologue and epilogue, it corresponds to the iteration
139  /// peeled out of the loop in the range [0, maxStage[.
141  std::function<void(Operation *, PipelinerPart, unsigned)>;
143 
144  /// Control whether the epilogue should be peeled out of the loop or
145  /// operations should be predicated to skip the early stages in the last loop
146  /// iterations. If the epilogue is predicated; the user needs to provide a
147  /// lambda to generate the predicated version of operations.
148  bool peelEpilogue = true;
149 
150  /// Control whether the transformation checks that the number of iterations is
151  /// greater or equal to the number of stages and skip the transformation if
152  /// this is not the case. If the loop is dynamic and this is set to true and
153  /// the loop bounds are not static the pipeliner will have to predicate
154  /// operations in the the prologue/epilogue.
155  bool supportDynamicLoops = false;
156 
157  // Callback to predicate operations when the prologue or epilogue are not
158  // peeled. This takes the original operation, an i1 predicate value and the
159  // pattern rewriter. It is expected to replace the given operation with
160  // the predicated equivalent and return it, or return nullptr if the
161  // predication is impossible. In the latter case, pipelining will fail and
162  // may leave IR in a partially transformed state.
164  std::function<Operation *(RewriterBase &, Operation *, Value)>;
166 
167  // TODO: add option to decide if the prologue should be peeled.
168 };
169 
170 /// Generate a pipelined version of the scf.for loop based on the schedule given
171 /// as option. This applies the mechanical transformation of changing the loop
172 /// and generating the prologue/epilogue for the pipelining and doesn't make any
173 /// decision regarding the schedule.
174 /// Based on the options the loop is split into several stages.
175 /// The transformation assumes that the scheduling given by user is valid.
176 /// For example if we break a loop into 3 stages named S0, S1, S2 we would
177 /// generate the following code with the number in parenthesis as the iteration
178 /// index:
179 ///
180 /// S0(0) // Prologue
181 /// S0(1) S1(0) // Prologue
182 /// scf.for %I = %C0 to %N - 2 {
183 /// S0(I+2) S1(I+1) S2(I) // Pipelined kernel
184 /// }
185 /// S1(N) S2(N-1) // Epilogue
186 /// S2(N) // Epilogue
187 ///
188 /// If `modifiedIR` is provided, it will be set to a value that indicates
189 /// whether pipelining modified the IR before failing, signaling to the caller
190 /// whether they can proceed with different transformations.
191 FailureOr<ForOp> pipelineForLoop(RewriterBase &rewriter, ForOp forOp,
192  const PipeliningOption &options,
193  bool *modifiedIR = nullptr);
194 
195 /// Create zero-trip-check around a `while` op and return the new loop op in the
196 /// check. The while loop is rotated to avoid evaluating the condition twice
197 ///
198 /// By default the check won't be created for do-while loop as it is not
199 /// required. `forceCreateCheck` can force the creation.
200 ///
201 /// It turns:
202 ///
203 /// scf.while (%arg0 = %init) : (i32) -> i64 {
204 /// %val = .., %arg0 : i64
205 /// %cond = arith.cmpi .., %arg0 : i32
206 /// scf.condition(%cond) %val : i64
207 /// } do {
208 /// ^bb0(%arg1: i64):
209 /// %next = .., %arg1 : i32
210 /// scf.yield %next : i32
211 /// }
212 ///
213 /// into:
214 ///
215 /// %pre_val = .., %init : i64
216 /// %pre_cond = arith.cmpi .., %init : i32
217 /// scf.if %pre_cond -> i64 {
218 /// %res = scf.while (%arg1 = %va0) : (i64) -> i64 {
219 /// %next = .., %arg1 : i32
220 /// %val = .., %next : i64
221 /// %cond = arith.cmpi .., %next : i32
222 /// scf.condition(%cond) %val : i64
223 /// } do {
224 /// ^bb0(%arg2: i64):
225 /// %scf.yield %arg2 : i32
226 /// }
227 /// scf.yield %res : i64
228 /// } else {
229 /// scf.yield %pre_val : i64
230 /// }
231 FailureOr<WhileOp> wrapWhileLoopInZeroTripCheck(WhileOp whileOp,
232  RewriterBase &rewriter,
233  bool forceCreateCheck = false);
234 
235 /// Try to uplift `scf.while` op to `scf.for`.
236 /// Uplifitng expects a specific ops pattern:
237 /// * `before` block consisting of single arith.cmp op
238 /// * `after` block containing arith.addi
239 FailureOr<ForOp> upliftWhileToForLoop(RewriterBase &rewriter, WhileOp loop);
240 
241 } // namespace scf
242 } // namespace mlir
243 
244 #endif // MLIR_DIALECT_SCF_TRANSFORMS_TRANSFORMS_H_
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static llvm::ManagedStatic< PassManagerOptions > options
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:400
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
FailureOr< ForOp > upliftWhileToForLoop(RewriterBase &rewriter, WhileOp loop)
Try to uplift scf.while op to scf.for.
void naivelyFuseParallelOps(Region &region, llvm::function_ref< bool(Value, Value)> mayAlias)
Fuses all adjacent scf.parallel operations with identical bounds and step into one scf....
std::pair< ParallelOp, ParallelOp > tileParallelLoop(ParallelOp op, llvm::ArrayRef< int64_t > tileSizes, bool noMinMaxBounds)
Tile a parallel loop of the form scf.parallel (i0, i1) = (arg0, arg1) to (arg2, arg3) step (arg4,...
LogicalResult peelForLoopAndSimplifyBounds(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
LogicalResult forallToForLoop(RewriterBase &rewriter, ForallOp forallOp, SmallVectorImpl< Operation * > *results=nullptr)
Try converting scf.forall into a set of nested scf.for loops.
LogicalResult peelForLoopFirstIteration(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Peel the first iteration out of the scf.for loop.
LogicalResult forallToParallelLoop(RewriterBase &rewriter, ForallOp forallOp, ParallelOp *result=nullptr)
Try converting scf.forall into an scf.parallel loop.
FailureOr< WhileOp > wrapWhileLoopInZeroTripCheck(WhileOp whileOp, RewriterBase &rewriter, bool forceCreateCheck=false)
Create zero-trip-check around a while op and return the new loop op in the check.
FailureOr< ForOp > pipelineForLoop(RewriterBase &rewriter, ForOp forOp, const PipeliningOption &options, bool *modifiedIR=nullptr)
Generate a pipelined version of the scf.for loop based on the schedule given as option.
Include the generated interface declarations.
Options to dictate how loops should be pipelined.
Definition: Transforms.h:123
std::function< void(scf::ForOp, std::vector< std::pair< Operation *, unsigned > > &)> GetScheduleFnType
Lambda returning all the operation in the forOp, with their stage, in the order picked for the pipeli...
Definition: Transforms.h:127
GetScheduleFnType getScheduleFn
Definition: Transforms.h:128
std::function< void(Operation *, PipelinerPart, unsigned)> AnnotationlFnType
Lambda called by the pipeliner to allow the user to annotate the IR while it is generated.
Definition: Transforms.h:141
bool peelEpilogue
Control whether the epilogue should be peeled out of the loop or operations should be predicated to s...
Definition: Transforms.h:148
AnnotationlFnType annotateFn
Definition: Transforms.h:142
bool supportDynamicLoops
Control whether the transformation checks that the number of iterations is greater or equal to the nu...
Definition: Transforms.h:155
std::function< Operation *(RewriterBase &, Operation *, Value)> PredicateOpFn
Definition: Transforms.h:164