MLIR 23.0.0git
Utils.h
Go to the documentation of this file.
1//===- Utils.h - Affine dialect utilities -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header file declares a set of utilities for the affine dialect ops.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef MLIR_DIALECT_AFFINE_UTILS_H
14#define MLIR_DIALECT_AFFINE_UTILS_H
15
20#include <optional>
21
22namespace mlir {
23class DominanceInfo;
24class Operation;
27
28namespace func {
29class FuncOp;
30} // namespace func
31
32namespace memref {
33class AllocOp;
34class AllocaOp;
35class ReinterpretCastOp;
36} // namespace memref
37
38namespace affine {
39class AffineForOp;
40class AffineIfOp;
41class AffineParallelOp;
42
44
45/// Replaces a parallel affine.for op with a 1-d affine.parallel op. `forOp`'s
46/// body is taken by the affine.parallel op and the former is erased.
47/// (mlir::isLoopParallel can be used to detect a parallel affine.for op.) The
48/// reductions specified in `parallelReductions` are also parallelized.
49/// Parallelization will fail in the presence of loop iteration arguments that
50/// are not listed in `parallelReductions`. `resOp` if non-null is set to the
51/// newly created affine.parallel op.
52LogicalResult affineParallelize(AffineForOp forOp,
53 ArrayRef<LoopReduction> parallelReductions = {},
54 AffineParallelOp *resOp = nullptr);
55
56/// Hoists out affine.if/else to as high as possible, i.e., past all invariant
57/// affine.fors/parallel's. Returns success if any hoisting happened; folded` is
58/// set to true if the op was folded or erased. This hoisting could lead to
59/// significant code expansion in some cases.
60LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr);
61
62/// Holds parameters to perform n-D vectorization on a single loop nest.
63/// For example, for the following loop nest:
64///
65/// func @vec2d(%in: memref<64x128x512xf32>, %out: memref<64x128x512xf32>) {
66/// affine.for %i0 = 0 to 64 {
67/// affine.for %i1 = 0 to 128 {
68/// affine.for %i2 = 0 to 512 {
69/// %ld = affine.load %in[%i0, %i1, %i2] : memref<64x128x512xf32>
70/// affine.store %ld, %out[%i0, %i1, %i2] : memref<64x128x512xf32>
71/// }
72/// }
73/// }
74/// return
75/// }
76///
77/// and VectorizationStrategy = 'vectorSizes = {8, 4}', 'loopToVectorDim =
78/// {{i1->0}, {i2->1}}', SuperVectorizer will generate:
79///
80/// func @vec2d(%arg0: memref<64x128x512xf32>, %arg1: memref<64x128x512xf32>) {
81/// affine.for %arg2 = 0 to 64 {
82/// affine.for %arg3 = 0 to 128 step 8 {
83/// affine.for %arg4 = 0 to 512 step 4 {
84/// %cst = arith.constant 0.000000e+00 : f32
85/// %0 = vector.transfer_read %arg0[%arg2, %arg3, %arg4], %cst : ...
86/// vector.transfer_write %0, %arg1[%arg2, %arg3, %arg4] : ...
87/// }
88/// }
89/// }
90/// return
91/// }
92// TODO: Hoist to a VectorizationStrategy.cpp when appropriate.
93struct VectorizationStrategy {
94 // Vectorization factors to apply to each target vector dimension.
95 // Each factor will be applied to a different loop.
96 SmallVector<int64_t, 8> vectorSizes;
97 // Maps each AffineForOp vectorization candidate with its vector dimension.
98 // The candidate will be vectorized using the vectorization factor in
99 // 'vectorSizes' for that dimension.
100 DenseMap<Operation *, unsigned> loopToVectorDim;
101 // Maps loops that implement vectorizable reductions to the corresponding
102 // reduction descriptors.
103 ReductionLoopMap reductionLoops;
104};
105
106/// Vectorize affine loops that are children of parentOp (including itself)
107void vectorizeChildAffineLoops(Operation *parentOp, bool vectorizeReductions,
108 ArrayRef<int64_t> vectorSizes,
109 ArrayRef<int64_t> fastestVaryingPattern);
110
111/// Replace affine store and load accesses by scalars by forwarding stores to
112/// loads and eliminate invariant affine loads; consequently, eliminate dead
113/// allocs.
114void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
115 PostDominanceInfo &postDomInfo,
116 AliasAnalysis &analysis);
117
118/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
119/// 'vectorSizes'. By default, each vectorization factor is applied
120/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
121/// be optionally used to provide a different loop vectorization order.
122/// If `reductionLoops` is not empty, the given reduction loops may be
123/// vectorized along the reduction dimension.
124/// TODO: Vectorizing reductions is supported only for 1-D vectorization.
125void vectorizeAffineLoops(
126 Operation *parentOp,
127 llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
128 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern,
129 const ReductionLoopMap &reductionLoops = ReductionLoopMap());
130
131/// External utility to vectorize affine loops from a single loop nest using an
132/// n-D vectorization strategy (see doc in VectorizationStrategy definition).
133/// Loops are provided in a 2D vector container. The first dimension represents
134/// the nesting level relative to the loops to be vectorized. The second
135/// dimension contains the loops. This means that:
136/// a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]',
137/// b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'.
138///
139/// For example, for the following loop nest:
140///
141/// func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>,
142/// %out0: memref<64x128x512xf32>,
143/// %out1: memref<64x128x128xf32>) {
144/// affine.for %i0 = 0 to 64 {
145/// affine.for %i1 = 0 to 128 {
146/// affine.for %i2 = 0 to 512 {
147/// %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32>
148/// affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32>
149/// }
150/// affine.for %i3 = 0 to 128 {
151/// %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32>
152/// affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32>
153/// }
154/// }
155/// }
156/// return
157/// }
158///
159/// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two
160/// innermost loops;
161/// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost
162/// loops;
163/// loops = {{%i2}}, to vectorize only the first innermost loop;
164/// loops = {{%i3}}, to vectorize only the second innermost loop;
165/// loops = {{%i1}}, to vectorize only the middle loop.
166LogicalResult
167vectorizeAffineLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops,
168 const VectorizationStrategy &strategy);
169
170/// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
171/// As currently implemented, this transformation cannot fail and will return
172/// early if the op is already in a normalized form.
173void normalizeAffineParallel(AffineParallelOp op);
174
175/// Normalize an affine.for op. An affine.for op is normalized by converting the
176/// lower bound to zero and loop step to one. The upper bound is set to the trip
177/// count of the loop. Original loops must have a lower bound with only a single
178/// result. There is no such restriction on upper bounds. Returns success if the
179/// loop has been normalized (or is already in the normal form). If
180/// `promoteSingleIter` is true, the loop is simply promoted if it has a single
181/// iteration.
182LogicalResult normalizeAffineFor(AffineForOp op,
183 bool promoteSingleIter = false);
184
185/// Traverse `e` and return an AffineExpr where all occurrences of `dim` have
186/// been replaced by either:
187/// - `min` if `positivePath` is true when we reach an occurrence of `dim`
188/// - `max` if `positivePath` is true when we reach an occurrence of `dim`
189/// `positivePath` is negated each time we hit a multiplicative or divisive
190/// binary op with a constant negative coefficient.
191AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min,
192 AffineExpr max, bool positivePath = true);
193
194/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
195/// optionally remapping the old memref's indices using the supplied affine map,
196/// `indexRemap`. The new memref could be of a different shape or rank.
197/// `extraIndices` provides any additional access indices to be added to the
198/// start.
199///
200/// `indexRemap` remaps indices of the old memref access to a new set of indices
201/// that are used to index the memref. Additional input operands to indexRemap
202/// can be optionally provided in `extraOperands`, and they occupy the start
203/// of its input list. `indexRemap`'s dimensional inputs are expected to
204/// correspond to memref's indices, and its symbolic inputs if any should be
205/// provided in `symbolOperands`.
206//
207/// If `userFilterFn` is specified, restrict replacement to only those users
208/// that pass the specified filter (i.e., the filter returns true).
209///
210/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing
211/// uses of a memref without any requirement for access index rewrites as long
212/// as the user operation has the MemRefsNormalizable trait. The default value
213/// of this flag is false.
214///
215/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to
216/// also be a candidate for replacement. The default value of this flag is
217/// false.
218///
219/// Returns true on success and false if the replacement is not possible,
220/// whenever a memref is used as an operand in a non-dereferencing context and
221/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref
222/// which are left untouched. See comments at function definition for an
223/// example.
224//
225// Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
226// The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
227// index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
228// d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
229// extra operands, note that 'indexRemap' would just be applied to existing
230// indices (%i, %j).
231//
232// TODO: allow extraIndices to be added at any position.
233LogicalResult replaceAllMemRefUsesWith(
234 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices = {},
235 AffineMap indexRemap = AffineMap(), ArrayRef<Value> extraOperands = {},
236 ArrayRef<Value> symbolOperands = {},
237 llvm::function_ref<bool(Operation *)> userFilterFn = nullptr,
238 bool allowNonDereferencingOps = false, bool replaceInDeallocOp = false);
239
240/// Performs the same replacement as the other version above but only for the
241/// dereferencing uses of `oldMemRef` in `op`, except in cases where
242/// 'allowNonDereferencingOps' is set to true where we replace the
243/// non-dereferencing uses as well.
244LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
245 Operation *op,
246 ArrayRef<Value> extraIndices = {},
247 AffineMap indexRemap = AffineMap(),
248 ArrayRef<Value> extraOperands = {},
249 ArrayRef<Value> symbolOperands = {},
250 bool allowNonDereferencingOps = false);
251
252/// Rewrites the memref defined by alloc or reinterpret_cast op to have an
253/// identity layout map and updates all its indexing uses. Returns failure if
254/// any of its uses escape (while leaving the IR in a valid state).
255template <typename AllocLikeOp>
256LogicalResult normalizeMemRef(AllocLikeOp op);
257extern template LogicalResult
258normalizeMemRef<memref::AllocaOp>(memref::AllocaOp op);
259extern template LogicalResult
260normalizeMemRef<memref::AllocOp>(memref::AllocOp op);
261LogicalResult normalizeMemRef(memref::ReinterpretCastOp op);
262
263/// Normalizes `memrefType` so that the affine layout map of the memref is
264/// transformed to an identity map with a new shape being computed for the
265/// normalized memref type and returns it. The old memref type is simplify
266/// returned if the normalization failed.
267MemRefType normalizeMemRefType(MemRefType memrefType);
268
269/// Given an operation, inserts one or more single result affine apply
270/// operations, results of which are exclusively used by this operation.
271/// The operands of these newly created affine apply ops are
272/// guaranteed to be loop iterators or terminal symbols of a function.
273///
274/// Before
275///
276/// affine.for %i = 0 to #map(%N)
277/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
278/// send %A[%idx], ...
279/// %v = "compute"(%idx, ...)
280///
281/// After
282///
283/// affine.for %i = 0 to #map(%N)
284/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
285/// send %A[%idx], ...
286/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
287/// %v = "compute"(%idx_, ...)
288
289/// This allows the application of different transformations on send and
290/// compute (for eg. different shifts/delays)
291///
292/// Fills `sliceOps` with the list of affine.apply operations.
293/// In the following cases, `sliceOps` remains empty:
294/// 1. If none of opInst's operands were the result of an affine.apply
295/// (i.e., there was no affine computation slice to create).
296/// 2. If all the affine.apply op's supplying operands to this opInst did not
297/// have any uses other than those in this opInst.
298void createAffineComputationSlice(Operation *opInst,
299 SmallVectorImpl<AffineApplyOp> *sliceOps);
300
301/// Emit code that computes the given affine expression using standard
302/// arithmetic operations applied to the provided dimension and symbol values.
303Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr,
304 ValueRange dimValues, ValueRange symbolValues);
305
306/// Create a sequence of operations that implement the `affineMap` applied to
307/// the given `operands` (as it it were an AffineApplyOp).
308std::optional<SmallVector<Value, 8>> expandAffineMap(OpBuilder &builder,
309 Location loc,
310 AffineMap affineMap,
311 ValueRange operands);
312
313/// Holds the result of (div a, b) and (mod a, b).
314struct DivModValue {
315 Value quotient;
316 Value remainder;
317};
318
319/// Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
320DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs);
321
322/// Generate the IR to delinearize `linearIndex` given the `basis` and return
323/// the multi-index. `hasOuterBound` indicates whether `basis` has an entry
324/// given the size of the first multi-index result - if it is true, the function
325/// will return `basis.size()` values, otherwise, it will return `basis.size() +
326/// 1`.
327FailureOr<SmallVector<Value>> delinearizeIndex(OpBuilder &b, Location loc,
328 Value linearIndex,
329 ArrayRef<Value> basis,
330 bool hasOuterBound = true);
331
332FailureOr<SmallVector<Value>> delinearizeIndex(OpBuilder &b, Location loc,
333 Value linearIndex,
334 ArrayRef<OpFoldResult> basis,
335 bool hasOuterBound = true);
336
337// Generate IR that extracts the linear index from a multi-index according to
338// a basis/shape. The basis may contain either `multiIndex.size()` or
339// `multiIndex.size() - 1` elements.
340OpFoldResult linearizeIndex(ArrayRef<OpFoldResult> multiIndex,
341 ArrayRef<OpFoldResult> basis,
342 ImplicitLocOpBuilder &builder);
343
344OpFoldResult linearizeIndex(OpBuilder &builder, Location loc,
345 ArrayRef<OpFoldResult> multiIndex,
346 ArrayRef<OpFoldResult> basis);
347
348/// Ensure that all operations that could be executed after `start`
349/// (noninclusive) and prior to `memOp` (e.g. on a control flow/op path
350/// between the operations) do not have the potential memory effect
351/// `EffectType` on `memOp`. `memOp` is an operation that reads or writes to
352/// a memref. For example, if `EffectType` is MemoryEffects::Write, this method
353/// will check if there is no write to the memory between `start` and `memOp`
354/// that would change the read within `memOp`.
355template <typename EffectType, typename T>
356bool hasNoInterveningEffect(Operation *start, T memOp,
357 llvm::function_ref<bool(Value, Value)> mayAlias);
358
362 this->v = v;
363 return *this;
364 }
366 this->v = v;
367 return *this;
368 }
369 operator AffineExpr() const { return e; }
370 operator OpFoldResult() const { return v; }
373};
374
375/// Helper struct to build simple AffineValueExprs with minimal type inference
376/// support.
378 AffineBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {}
389 return makeComposedFoldedAffineApply(b, loc, {lhs.e.floorDiv(rhs.e)},
390 {lhs, rhs});
391 }
393 return makeComposedFoldedAffineApply(b, loc, {lhs.e.ceilDiv(rhs.e)},
394 {lhs, rhs});
395 }
398 b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), b.getContext()),
399 vals);
400 }
403 b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), b.getContext()),
404 vals);
405 }
406
407private:
408 OpBuilder &b;
409 Location loc;
410};
411
412} // namespace affine
413} // namespace mlir
414
415#endif // MLIR_DIALECT_AFFINE_UTILS_H
static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp)
A helper for the mechanics of mlir::hoistAffineIfOp.
Definition Utils.cpp:289
template LogicalResult mlir::affine::normalizeMemRef< memref::AllocaOp >(memref::AllocaOp op)
template LogicalResult mlir::affine::normalizeMemRef< memref::AllocOp >(memref::AllocOp op)
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
Definition AffineExpr.h:68
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
A class for computing basic dominance information.
Definition Dominance.h:140
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Definition Builders.h:630
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
A class for computing basic postdominance information.
Definition Dominance.h:204
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
DenseMap< Operation *, SmallVector< LoopReduction, 2 > > ReductionLoopMap
Definition Utils.h:43
LogicalResult affineParallelize(AffineForOp forOp, ArrayRef< LoopReduction > parallelReductions={}, AffineParallelOp *resOp=nullptr)
Replaces a parallel affine.for op with a 1-d affine.parallel op.
Definition Utils.cpp:352
OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a maximum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
bool hasNoInterveningEffect(Operation *start, T memOp, llvm::function_ref< bool(Value, Value)> mayAlias)
Hoists out affine.if/else to as high as possible, i.e., past all invariant affine....
Definition Utils.cpp:687
Value linearizeIndex(ValueRange indices, ArrayRef< int64_t > strides, int64_t offset, Type integerType, Location loc, OpBuilder &builder)
Generates IR to perform index linearization with the given indices and their corresponding strides,...
Include the generated interface declarations.
llvm::DenseMapInfo< T, Enable > DenseMapInfo
Definition LLVM.h:114
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:118
OpFoldResult add(AffineValueExpr lhs, AffineValueExpr rhs)
Definition Utils.h:379
OpFoldResult min(ArrayRef< OpFoldResult > vals)
Definition Utils.h:396
OpFoldResult ceil(AffineValueExpr lhs, AffineValueExpr rhs)
Definition Utils.h:392
OpFoldResult max(ArrayRef< OpFoldResult > vals)
Definition Utils.h:401
AffineBuilder(OpBuilder &b, Location loc)
Definition Utils.h:378
OpFoldResult floor(AffineValueExpr lhs, AffineValueExpr rhs)
Definition Utils.h:388
OpFoldResult sub(AffineValueExpr lhs, AffineValueExpr rhs)
Definition Utils.h:382
OpFoldResult mul(AffineValueExpr lhs, AffineValueExpr rhs)
Definition Utils.h:385
AffineValueExpr(AffineExpr e)
Definition Utils.h:360
AffineValueExpr bind(Value v)
Definition Utils.h:361
AffineValueExpr bind(OpFoldResult v)
Definition Utils.h:365