MLIR 22.0.0git
LoopUtils.h
Go to the documentation of this file.
1//===- LoopUtils.h - Loop transformation utilities --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header file defines prototypes for various loop transformation utility
10// methods: these are not passes by themselves but are used either by passes,
11// optimization sequences, or in turn by other transformation utilities.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef MLIR_DIALECT_AFFINE_LOOPUTILS_H
16#define MLIR_DIALECT_AFFINE_LOOPUTILS_H
17
18#include "mlir/IR/Block.h"
19#include "mlir/Support/LLVM.h"
21#include <optional>
22
23namespace mlir {
24class AffineMap;
25class LoopLikeOpInterface;
26class OpBuilder;
27class Value;
28class ValueRange;
29
30namespace func {
31class FuncOp;
32} // namespace func
33
34namespace scf {
35class ForOp;
36class ParallelOp;
37} // namespace scf
38
39namespace affine {
40class AffineForOp;
41struct MemRefRegion;
42
43/// Unrolls this for operation completely if the trip count is known to be
44/// constant. Returns failure otherwise.
45LogicalResult loopUnrollFull(AffineForOp forOp);
46
47/// Unrolls this for operation by the specified unroll factor. Returns failure
48/// if the loop cannot be unrolled either due to restrictions or due to invalid
49/// unroll factors. Requires positive loop bounds and step. If specified,
50/// annotates the Ops in each unrolled iteration by applying `annotateFn`.
51/// When `cleanUpUnroll` is true, we can ensure the cleanup loop is unrolled
52/// regardless of the unroll factor.
53LogicalResult loopUnrollByFactor(
54 AffineForOp forOp, uint64_t unrollFactor,
55 function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
56 bool cleanUpUnroll = false);
57
58/// Unrolls this loop by the specified unroll factor or its trip count,
59/// whichever is lower.
60LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor);
61
62/// Returns true if `loops` is a perfectly nested loop nest, where loops appear
63/// in it from outermost to innermost.
64[[maybe_unused]] bool isPerfectlyNested(ArrayRef<AffineForOp> loops);
65
66/// Get perfectly nested sequence of loops starting at root of loop nest
67/// (the first op being another AffineFor, and the second op - a terminator).
68/// A loop is perfectly nested iff: the first op in the loop's body is another
69/// AffineForOp, and the second op is a terminator).
70void getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
71 AffineForOp root);
72
73/// Unrolls and jams this loop by the specified factor. `forOp` can be a loop
74/// with iteration arguments performing supported reductions and its inner loops
75/// can have iteration arguments. Returns success if the loop is successfully
76/// unroll-jammed.
77LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
78 uint64_t unrollJamFactor);
79
80/// Unrolls and jams this loop by the specified factor or by the trip count (if
81/// constant), whichever is lower.
82LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
83 uint64_t unrollJamFactor);
84
85/// Promotes the loop body of a AffineForOp to its containing block if the loop
86/// was known to have a single iteration.
87LogicalResult promoteIfSingleIteration(AffineForOp forOp);
88
89/// Promotes all single iteration AffineForOp's in the Function, i.e., moves
90/// their body into the containing Block.
91void promoteSingleIterationLoops(func::FuncOp f);
92
93/// Skew the operations in an affine.for's body with the specified
94/// operation-wise shifts. The shifts are with respect to the original execution
95/// order, and are multiplied by the loop 'step' before being applied. If
96/// `unrollPrologueEpilogue` is set, fully unroll the prologue and epilogue
97/// loops when possible.
98LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
99 bool unrollPrologueEpilogue = false);
100
101/// Tiles the specified band of perfectly nested loops creating tile-space loops
102/// and intra-tile loops. A band is a contiguous set of loops. This utility
103/// doesn't check for the validity of tiling itself, but just performs it.
104LogicalResult
106 ArrayRef<unsigned> tileSizes,
107 SmallVectorImpl<AffineForOp> *tiledNest = nullptr);
108
109/// Tiles the specified band of perfectly nested loops creating tile-space
110/// loops and intra-tile loops, using SSA values as tiling parameters. A band
111/// is a contiguous set of loops.
114 SmallVectorImpl<AffineForOp> *tiledNest = nullptr);
115
116/// Performs loop interchange on 'forOpA' and 'forOpB'. Requires that 'forOpA'
117/// and 'forOpB' are part of a perfectly nested sequence of loops.
118void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB);
119
120/// Checks if the loop interchange permutation 'loopPermMap', of the perfectly
121/// nested sequence of loops in 'loops', would violate dependences (loop 'i' in
122/// 'loops' is mapped to location 'j = 'loopPermMap[i]' in the interchange).
124 ArrayRef<unsigned> loopPermMap);
125
126/// Performs a loop permutation on a perfectly nested loop nest `inputNest`
127/// (where the contained loops appear from outer to inner) as specified by the
128/// permutation `permMap`: loop 'i' in `inputNest` is mapped to location
129/// 'loopPermMap[i]', where positions 0, 1, ... are from the outermost position
130/// to inner. Returns the position in `inputNest` of the AffineForOp that
131/// becomes the new outermost loop of this nest. This method always succeeds,
132/// asserts out on invalid input / specifications.
133unsigned permuteLoops(ArrayRef<AffineForOp> inputNest,
134 ArrayRef<unsigned> permMap);
135
136// Sinks all sequential loops to the innermost levels (while preserving
137// relative order among them) and moves all parallel loops to the
138// outermost (while again preserving relative order among them).
139// Returns AffineForOp of the root of the new loop nest after loop interchanges.
140AffineForOp sinkSequentialLoops(AffineForOp forOp);
141
142/// Performs tiling fo imperfectly nested loops (with interchange) by
143/// strip-mining the `forOps` by `sizes` and sinking them, in their order of
144/// occurrence in `forOps`, under each of the `targets`.
145/// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
146/// nested immediately under each of `targets`.
148 ArrayRef<uint64_t> sizes,
149 ArrayRef<AffineForOp> targets);
150
151/// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
152/// and sinking them, in their order of occurrence in `forOps`, under `target`.
153/// Returns the new AffineForOps, one per `forOps`, nested immediately under
154/// `target`.
156 ArrayRef<uint64_t> sizes, AffineForOp target);
157
158/// Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
160 // True if DMAs should be generated instead of point-wise copies.
162 // The slower memory space from which data is to be moved.
164 // Memory space of the faster one (typically a scratchpad).
166 // Memory space to place tags in: only meaningful for DMAs.
168 // Capacity of the fast memory space in bytes.
170};
171
172/// Performs explicit copying for the contiguous sequence of operations in the
173/// block iterator range [`begin', `end'), where `end' can't be past the
174/// terminator of the block (since additional operations are potentially
175/// inserted right before `end`. `copyOptions` provides various parameters, and
176/// the output argument `copyNests` is the set of all copy nests inserted, each
177/// represented by its root affine.for. Since we generate alloc's and dealloc's
178/// for all fast buffers (before and after the range of operations resp. or at a
179/// hoisted position), all of the fast memory capacity is assumed to be
180/// available for processing this block range. When 'filterMemRef' is specified,
181/// copies are only generated for the provided MemRef. Returns success if the
182/// explicit copying succeeded for all memrefs on which affine load/stores were
183/// encountered. For memrefs for whose element types a size in bytes can't be
184/// computed (`index` type), their capacity is not accounted for and the
185/// `fastMemCapacityBytes` copy option would be non-functional in such cases.
186LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end,
187 const AffineCopyOptions &copyOptions,
188 std::optional<Value> filterMemRef,
189 DenseSet<Operation *> &copyNests);
190
191/// A convenience version of affineDataCopyGenerate for all ops in the body of
192/// an AffineForOp.
193LogicalResult affineDataCopyGenerate(AffineForOp forOp,
194 const AffineCopyOptions &copyOptions,
195 std::optional<Value> filterMemRef,
196 DenseSet<Operation *> &copyNests);
197
198/// Result for calling generateCopyForMemRegion.
199struct CopyGenerateResult {
200 // Number of bytes used by alloc.
201 uint64_t sizeInBytes;
202
203 // The newly created buffer allocation.
204 Operation *alloc;
205
206 // Generated loop nest for copying data between the allocated buffer and the
207 // original memref.
208 Operation *copyNest;
209};
210
211/// generateCopyForMemRegion is similar to affineDataCopyGenerate, but works
212/// with a single memref region. `memrefRegion` is supposed to contain analysis
213/// information within analyzedOp. The generated prologue and epilogue always
214/// surround `analyzedOp`.
215///
216/// Note that `analyzedOp` is a single op for API convenience, and the
217/// [begin, end) version can be added as needed.
218///
219/// Also note that certain options in `copyOptions` aren't looked at anymore,
220/// like slowMemorySpace.
221LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion,
222 Operation *analyzedOp,
223 const AffineCopyOptions &copyOptions,
224 CopyGenerateResult &result);
225
226/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
227/// `loops` contains a list of perfectly nested loops outermost to innermost
228/// that are normalized (step one and lower bound of zero) and with bounds and
229/// steps independent of any loop induction variable involved in the nest.
230/// Coalescing affine.for loops is not always possible, i.e., the result may not
231/// be representable using affine.for.
233
234/// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of
235/// size given by `numProcessors`. This is achieved by embedding the SSA values
236/// corresponding to `processorIds` and `numProcessors` into the bounds and step
237/// of the `forOp`. No check is performed on the legality of the rewrite, it is
238/// the caller's responsibility to ensure legality.
239///
240/// Requires that `processorIds` and `numProcessors` have the same size and that
241/// for each idx, `processorIds`[idx] takes, at runtime, all values between 0
242/// and `numProcessors`[idx] - 1. This corresponds to traditional use cases for:
243/// 1. GPU (threadIdx, get_local_id(), ...)
244/// 2. MPI (MPI_Comm_rank)
245/// 3. OpenMP (omp_get_thread_num)
246///
247/// Example:
248/// Assuming a 2-d grid with processorIds = [blockIdx.x, threadIdx.x] and
249/// numProcessors = [gridDim.x, blockDim.x], the loop:
250///
251/// ```
252/// scf.for %i = %lb to %ub step %step {
253/// ...
254/// }
255/// ```
256///
257/// is rewritten into a version resembling the following pseudo-IR:
258///
259/// ```
260/// scf.for %i = %lb + %step * (threadIdx.x + blockIdx.x * blockDim.x)
261/// to %ub step %gridDim.x * blockDim.x * %step {
262/// ...
263/// }
264/// ```
265void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef<Value> processorId,
266 ArrayRef<Value> numProcessors);
267
268/// Gathers all AffineForOps in 'func.func' grouped by loop depth.
269void gatherLoops(func::FuncOp func,
270 std::vector<SmallVector<AffineForOp, 2>> &depthToLoops);
271
272/// Creates an AffineForOp while ensuring that the lower and upper bounds are
273/// canonicalized, i.e., unused and duplicate operands are removed, any constant
274/// operands propagated/folded in, and duplicate bound maps dropped.
275AffineForOp createCanonicalizedAffineForOp(OpBuilder b, Location loc,
276 ValueRange lbOperands,
277 AffineMap lbMap,
278 ValueRange ubOperands,
279 AffineMap ubMap, int64_t step = 1);
280
281/// Separates full tiles from partial tiles for a perfect nest `nest` by
282/// generating a conditional guard that selects between the full tile version
283/// and the partial tile version using an AffineIfOp. The original loop nest
284/// is replaced by this guarded two version form.
285///
286/// affine.if (cond)
287/// // full_tile
288/// else
289/// // partial tile
290///
291LogicalResult
292separateFullTiles(MutableArrayRef<AffineForOp> nest,
293 SmallVectorImpl<AffineForOp> *fullTileNest = nullptr);
294
295/// Walk an affine.for to find a band to coalesce.
296LogicalResult coalescePerfectlyNestedAffineLoops(AffineForOp op);
297
298/// Count the number of loops surrounding `operand` such that operand could be
299/// hoisted above.
300/// Stop counting at the first loop over which the operand cannot be hoisted.
301/// This counts any LoopLikeOpInterface, not just affine.for.
303} // namespace affine
304} // namespace mlir
305
306#endif // MLIR_DIALECT_AFFINE_LOOPUTILS_H
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
OpListType::iterator iterator
Definition Block.h:140
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
This class represents an operand of an operation.
Definition Value.h:257
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
LogicalResult loopUnrollFull(AffineForOp forOp)
Unrolls this for operation completely if the trip count is known to be constant.
LogicalResult promoteIfSingleIteration(AffineForOp forOp)
Promotes the loop body of a AffineForOp to its containing block if the loop was known to have a singl...
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor or by the trip count (if constant),...
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr, bool cleanUpUnroll=false)
Unrolls this for operation by the specified unroll factor.
void getPerfectlyNestedLoops(SmallVectorImpl< AffineForOp > &nestedLoops, AffineForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef< uint64_t > shifts, bool unrollPrologueEpilogue=false)
Skew the operations in an affine.for's body with the specified operation-wise shifts.
bool isValidLoopInterchangePermutation(ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
Checks if the loop interchange permutation 'loopPermMap', of the perfectly nested sequence of loops i...
LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor)
Unrolls this loop by the specified unroll factor or its trip count, whichever is lower.
unsigned permuteLoops(ArrayRef< AffineForOp > inputNest, ArrayRef< unsigned > permMap)
Performs a loop permutation on a perfectly nested loop nest inputNest (where the contained loops appe...
LogicalResult loopUnrollJamByFactor(AffineForOp forOp, uint64_t unrollJamFactor)
Unrolls and jams this loop by the specified factor.
LogicalResult tilePerfectlyNestedParametric(MutableArrayRef< AffineForOp > input, ArrayRef< Value > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops,...
bool isPerfectlyNested(ArrayRef< AffineForOp > loops)
Returns true if loops is a perfectly nested loop nest, where loops appear in it from outermost to inn...
void promoteSingleIterationLoops(func::FuncOp f)
Promotes all single iteration AffineForOp's in the Function, i.e., moves their body into the containi...
int64_t numEnclosingInvariantLoops(OpOperand &operand)
Performs explicit copying for the contiguous sequence of operations in the block iterator range [‘beg...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
AffineForOp sinkSequentialLoops(AffineForOp forOp)
LogicalResult tilePerfectlyNested(MutableArrayRef< AffineForOp > input, ArrayRef< unsigned > tileSizes, SmallVectorImpl< AffineForOp > *tiledNest=nullptr)
Tiles the specified band of perfectly nested loops creating tile-space loops and intra-tile loops.
void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB)
Performs loop interchange on 'forOpA' and 'forOpB'.
Include the generated interface declarations.
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:128
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
Definition Utils.cpp:986
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152
Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
Definition LoopUtils.h:159