MLIR 23.0.0git
Utils.h
Go to the documentation of this file.
1//===- Utils.h - SCF dialect utilities --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header file defines prototypes for various SCF utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef MLIR_DIALECT_SCF_UTILS_UTILS_H_
14#define MLIR_DIALECT_SCF_UTILS_UTILS_H_
15
18#include "mlir/Support/LLVM.h"
19#include "llvm/ADT/STLExtras.h"
20#include <optional>
21#include <tuple>
22
23namespace mlir {
24class Location;
25class Operation;
26class OpBuilder;
27class Region;
28class RewriterBase;
29class ValueRange;
30class Value;
31
32namespace func {
33class CallOp;
34class FuncOp;
35} // namespace func
36
37/// Update a perfectly nested loop nest to yield new values from the innermost
38/// loop and propagating it up through the loop nest. This function
39/// - Expects `loopNest` to be a perfectly nested loop with outer most loop
40/// first and innermost loop last.
41/// - `newIterOperands` are the initialization values to be used for the
42/// outermost loop
43/// - `newYielValueFn` is the callback that generates the new values to be
44/// yielded from within the innermost loop.
45/// - The original loops are not erased, but are left in a "no-op" state where
46/// the body of the loop just yields the basic block arguments that correspond
47/// to the initialization values of a loop. The original loops are dead after
48/// this method.
49/// - If `replaceIterOperandsUsesInLoop` is true, all uses of the
50/// `newIterOperands` within the generated new loop are replaced with the
51/// corresponding `BlockArgument` in the loop body.
53 RewriterBase &rewriter, MutableArrayRef<scf::ForOp> loopNest,
54 ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn,
55 bool replaceIterOperandsUsesInLoop = true);
56
57/// Outline a region with a single block into a new FuncOp.
58/// Assumes the FuncOp result types is the type of the yielded operands of the
59/// single block. This constraint makes it easy to determine the result.
60/// This method also clones the `arith::ConstantIndexOp` at the start of
61/// `outlinedFuncBody` to alloc simple canonicalizations.
62/// Creates a new FuncOp and thus cannot be used in a FuncOp pass.
63/// The client is responsible for providing a unique `funcName` that will not
64/// collide with another FuncOp name. If `callOp` is provided, it will be set
65/// to point to the operation that calls the outlined function.
66// TODO: support more than single-block regions.
67// TODO: more flexible constant handling.
68FailureOr<func::FuncOp>
70 StringRef funcName, func::CallOp *callOp = nullptr);
71
72/// Outline the then and/or else regions of `ifOp` as follows:
73/// - if `thenFn` is not null, `thenFnName` must be specified and the `then`
74/// region is inlined into a new FuncOp that is captured by the pointer.
75/// - if `elseFn` is not null, `elseFnName` must be specified and the `else`
76/// region is inlined into a new FuncOp that is captured by the pointer.
77/// Creates new FuncOps and thus cannot be used in a FuncOp pass.
78/// The client is responsible for providing a unique `thenFnName`/`elseFnName`
79/// that will not collide with another FuncOp name.
80LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn,
81 StringRef thenFnName, func::FuncOp *elseFn,
82 StringRef elseFnName);
83
84/// Get a list of innermost parallel loops contained in `rootOp`. Innermost
85/// parallel loops are those that do not contain further parallel loops
86/// themselves.
89
90/// Return the min/max expressions for `value` if it is an induction variable
91/// from scf.for or scf.parallel loop.
92/// if `loopFilter` is passed, the filter determines which loop to consider.
93/// Other induction variables are ignored.
94std::optional<std::pair<AffineExpr, AffineExpr>>
97 llvm::function_ref<bool(Operation *)> loopFilter = nullptr);
98
99/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
100/// `loops` contains a list of perfectly nested loops with bounds and steps
101/// independent of any loop induction variable involved in the nest.
102LogicalResult coalesceLoops(MutableArrayRef<scf::ForOp> loops);
103LogicalResult coalesceLoops(RewriterBase &rewriter,
105
106/// Walk an affine.for to find a band to coalesce.
107LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op);
108
109/// Take the ParallelLoop and for each set of dimension indices, combine them
110/// into a single dimension. combinedDimensions must contain each index into
111/// loops exactly once.
112void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops,
113 ArrayRef<std::vector<unsigned>> combinedDimensions);
114
116 std::optional<scf::ForOp> mainLoopOp = std::nullopt;
117 std::optional<scf::ForOp> epilogueLoopOp = std::nullopt;
118};
119
120/// Unrolls this for operation by the specified unroll factor. Returns the
121/// unrolled main loop and the epilogue loop, if the loop is unrolled. Otherwise
122/// returns failure if the loop cannot be unrolled either due to restrictions or
123/// due to invalid unroll factors. Requires positive loop bounds and step. If
124/// specified, annotates the Ops in each unrolled iteration by applying
125/// `annotateFn`.
126FailureOr<UnrolledLoopInfo> loopUnrollByFactor(
127 scf::ForOp forOp, uint64_t unrollFactor,
128 function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
129
130/// Unrolls this loop completely.
131LogicalResult loopUnrollFull(scf::ForOp forOp);
132
133/// Unrolls and jams this `scf.for` operation by the specified unroll factor.
134/// Returns failure if the loop cannot be unrolled either due to restrictions or
135/// due to invalid unroll factors. In case of unroll factor of 1, the function
136/// bails out without doing anything (returns success). Currently, only constant
137/// trip count that are divided by the unroll factor is supported. Currently,
138/// for operations with results are not supported.
139LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor);
140
141/// Materialize bounds and step of a zero-based and unit-step loop derived by
142/// normalizing the specified bounds and step.
145 OpFoldResult step);
146
147/// Get back the original induction variable values after loop normalization.
149 Value normalizedIv, OpFoldResult origLb,
150 OpFoldResult origStep);
151
152/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
153/// parametric tile sizes that the outer loops have a fixed number of iterations
154/// as defined in `sizes`.
156using TileLoops = std::pair<Loops, Loops>;
157TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef<int64_t> sizes);
158
159/// Performs tiling fo imperfectly nested loops (with interchange) by
160/// strip-mining the `forOps` by `sizes` and sinking them, in their order of
161/// occurrence in `forOps`, under each of the `targets`.
162/// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
163/// nested immediately under each of `targets`.
165 ArrayRef<scf::ForOp> targets);
166
167/// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
168/// and sinking them, in their order of occurrence in `forOps`, under `target`.
169/// Returns the new AffineForOps, one per `forOps`, nested immediately under
170/// `target`.
172 scf::ForOp target);
173
174/// Tile a nest of scf::ForOp loops rooted at `rootForOp` with the given
175/// (parametric) sizes. Sizes are expected to be strictly positive values at
176/// runtime. If more sizes than loops are provided, discard the trailing values
177/// in sizes. Assumes the loop nest is permutable.
178/// Returns the newly created intra-tile loops.
179Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes);
180
181/// Get perfectly nested sequence of loops starting at root of loop nest
182/// (the first op being another AffineFor, and the second op - a terminator).
183/// A loop is perfectly nested iff: the first op in the loop's body is another
184/// AffineForOp, and the second op is a terminator).
186 scf::ForOp root);
187
188/// Given two scf.forall loops, `target` and `source`, fuses `target` into
189/// `source`. Assumes that the given loops are siblings and are independent of
190/// each other.
191///
192/// This function does not perform any legality checks and simply fuses the
193/// loops. The caller is responsible for ensuring that the loops are legal to
194/// fuse.
195scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target,
196 scf::ForallOp source,
197 RewriterBase &rewriter);
198
199/// Given two scf.for loops, `target` and `source`, fuses `target` into
200/// `source`. Assumes that the given loops are siblings and are independent of
201/// each other.
202///
203/// This function does not perform any legality checks and simply fuses the
204/// loops. The caller is responsible for ensuring that the loops are legal to
205/// fuse.
206scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source,
207 RewriterBase &rewriter);
208
209/// Normalize an `scf.forall` operation. Returns `failure()`if normalization
210/// fails.
211// On `success()` returns the
212/// newly created operation with all uses of the original operation replaced
213/// with results of the new operation.
214FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
215 scf::ForallOp forallOp);
216
217/// Check if the provided loops are perfectly nested for-loops. Perfect nesting
218/// means:
219/// 1. All loops are scf.for operations
220/// 2. Each outer loop's region iter args match the inner loop's init args
221/// 3. Each outer loop's yields match the inner loop's results
222/// 4. Each region iter arg and result has exactly one use
224
225/// Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs'
226/// and 'yieldedValues' as the block arguments and yielded values of the loop.
227/// The content of the loop body is replicated 'unrollFactor' times, calling
228/// 'ivRemapFn' to remap 'iv' for each unrolled body. If specified, annotates
229/// the Ops in each unrolled iteration using annotateFn. If provided,
230/// 'clonedToSrcOpsMap' is populated with the mappings from the cloned ops to
231/// the original op.
233 Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
234 function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
235 function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
236 ValueRange iterArgs, ValueRange yieldedValues,
237 IRMapping *clonedToSrcOpsMap = nullptr);
238
239/// Unroll this scf::Parallel loop by the specified unroll factors. Returns the
240/// unrolled loop if the unroll succeded; otherwise returns failure if the loop
241/// cannot be unrolled either due to restrictions or to invalid unroll factors.
242/// Requires positive loop bounds and step. If specified, annotates the Ops in
243/// each unrolled iteration by applying `annotateFn`.
244/// If provided, 'clonedToSrcOpsMap' is populated with the mappings from the
245/// cloned ops to the original op.
246FailureOr<scf::ParallelOp> parallelLoopUnrollByFactors(
247 scf::ParallelOp op, ArrayRef<uint64_t> unrollFactors,
248 RewriterBase &rewriter,
249 function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
250 IRMapping *clonedToSrcOpsMap = nullptr);
251
252/// Get constant loop bounds and steps for each of the induction variables of
253/// the given loop operation, if all the loop's ranges are constant. Each entry
254/// in the returned vector is a tuple (lowerBound, upperBound, step).
256getConstLoopBounds(mlir::LoopLikeOpInterface loopOp);
257
258/// Get constant trip counts for each of the induction variables of the given
259/// loop operation. If any of the loop's trip counts is not constant, return an
260/// empty vector.
262getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp);
263
264} // namespace mlir
265
266#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Block represents an ordered list of Operations.
Definition Block.h:33
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:209
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
Definition Utils.cpp:1327
bool isPerfectlyNestedForLoops(MutableArrayRef< LoopLikeOpInterface > loops)
Check if the provided loops are perfectly nested for-loops.
Definition Utils.cpp:1525
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
Definition Utils.cpp:218
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
Definition Utils.cpp:36
std::function< SmallVector< Value >( OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
Definition Utils.cpp:995
void generateUnrolledLoop(Block *loopBodyBlock, Value iv, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues, IRMapping *clonedToSrcOpsMap=nullptr)
Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs' and 'yieldedValues' as the...
Definition Utils.cpp:295
LogicalResult loopUnrollFull(scf::ForOp forOp)
Unrolls this loop completely.
Definition Utils.cpp:495
llvm::SmallVector< llvm::APInt > getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp)
Get constant trip counts for each of the induction variables of the given loop operation.
Definition Utils.cpp:1583
std::pair< Loops, Loops > TileLoops
Definition Utils.h:156
llvm::SmallVector< std::tuple< int64_t, int64_t, int64_t > > getConstLoopBounds(mlir::LoopLikeOpInterface loopOp)
Get constant loop bounds and steps for each of the induction variables of the given loop operation,...
Definition Utils.cpp:1564
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned > > combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
Definition Utils.cpp:1070
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
Definition Utils.cpp:1315
FailureOr< UnrolledLoopInfo > loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
Definition Utils.cpp:368
LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor)
Unrolls and jams this scf.for operation by the specified unroll factor.
Definition Utils.cpp:523
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
Definition Utils.cpp:241
FailureOr< scf::ParallelOp > parallelLoopUnrollByFactors(scf::ParallelOp op, ArrayRef< uint64_t > unrollFactors, RewriterBase &rewriter, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, IRMapping *clonedToSrcOpsMap=nullptr)
Unroll this scf::Parallel loop by the specified unroll factors.
Definition Utils.cpp:1601
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition Utils.cpp:1294
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region &region, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
Definition Utils.cpp:115
void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Get back the original induction variable values after loop normalization.
Definition Utils.cpp:775
std::optional< std::pair< AffineExpr, AffineExpr > > getSCFMinMaxExpr(Value value, SmallVectorImpl< Value > &dims, SmallVectorImpl< Value > &symbols, llvm::function_ref< bool(Operation *)> loopFilter=nullptr)
Return the min/max expressions for value if it is an induction variable from scf.for or scf....
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
Definition Utils.cpp:1374
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition Utils.cpp:987
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
Definition Utils.cpp:1427
llvm::function_ref< Fn > function_ref
Definition LLVM.h:144
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Definition Utils.cpp:1332
Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
Materialize bounds and step of a zero-based and unit-step loop derived by normalizing the specified b...
Definition Utils.cpp:704
SmallVector< scf::ForOp, 8 > Loops
Tile a nest of standard for loops rooted at rootForOp by finding such parametric tile sizes that the ...
Definition Utils.h:155
FailureOr< scf::ForallOp > normalizeForallOp(RewriterBase &rewriter, scf::ForallOp forallOp)
Normalize an scf.forall operation.
Definition Utils.cpp:1480
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
std::optional< scf::ForOp > epilogueLoopOp
Definition Utils.h:117
std::optional< scf::ForOp > mainLoopOp
Definition Utils.h:116