MLIR 23.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1//===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements logic for three optimization passes. The first two
10// passes try to move alloc nodes out of blocks to reduce the number of
11// allocations and copies during buffer deallocation. The third pass tries to
12// convert heap-based allocations to stack-based allocations, if possible.
13
15
21#include "mlir/IR/Operation.h"
24#include "mlir/Pass/Pass.h"
25
26namespace mlir {
27namespace bufferization {
28#define GEN_PASS_DEF_BUFFERHOISTINGPASS
29#define GEN_PASS_DEF_BUFFERLOOPHOISTINGPASS
30#define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACKPASS
31#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
32} // namespace bufferization
33} // namespace mlir
34
35using namespace mlir;
36using namespace mlir::bufferization;
37
38/// Returns true if the given operation implements a known high-level region-
39/// based control-flow interface.
41 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
42}
43
44/// Returns true if the given operation represents a loop by testing whether it
45/// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
46/// the case of a `RegionBranchOpInterface`, it checks all region-based control-
47/// flow edges for cycles.
48static bool isLoop(Operation *op) {
49 // If the operation implements the `LoopLikeOpInterface` it can be considered
50 // a loop.
51 if (isa<LoopLikeOpInterface>(op))
52 return true;
53
54 // If the operation does not implement the `RegionBranchOpInterface`, it is
55 // (currently) not possible to detect a loop.
56 auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
57 if (!regionInterface)
58 return false;
59
60 return regionInterface.hasLoop();
61}
62
63/// Return whether the given operation is a loop with sequential execution
64/// semantics.
65static bool isSequentialLoop(Operation *op) {
66 return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op);
67}
68
69/// Returns true if the given operation implements the AllocationOpInterface
70/// and it supports the dominate block hoisting.
72 auto allocOp = dyn_cast<AllocationOpInterface>(op);
73 return allocOp &&
74 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
75}
76
77/// Returns true if the given operation implements the AllocationOpInterface
78/// and it supports the loop hoisting.
80 auto allocOp = dyn_cast<AllocationOpInterface>(op);
81 return allocOp &&
82 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
83}
84
85/// Check if the size of the allocation is less than the given size. The
86/// transformation is only applied to small buffers since large buffers could
87/// exceed the stack space.
88static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
89 unsigned maxRankOfAllocatedMemRef) {
90 auto type = dyn_cast<ShapedType>(alloc.getType());
91 if (!type || !alloc.getDefiningOp<memref::AllocOp>())
92 return false;
93 if (!type.hasStaticShape()) {
94 // Check if the dynamic shape dimension of the alloc is produced by
95 // `memref.rank`. If this is the case, it is likely to be small.
96 // Furthermore, the dimension is limited to the maximum rank of the
97 // allocated memref to avoid large values by multiplying several small
98 // values.
99 if (type.getRank() <= maxRankOfAllocatedMemRef) {
100 return llvm::all_of(alloc.getDefiningOp()->getOperands(),
101 [&](Value operand) {
102 return operand.getDefiningOp<memref::RankOp>();
103 });
104 }
105 return false;
106 }
107 // Skip if the element type has no data layout support (e.g., nested memrefs).
108 Type elemType = type.getElementType();
109 if (!elemType.isIntOrFloat() &&
110 !isa<ComplexType, IndexType, VectorType>(elemType) &&
111 !isa<DataLayoutTypeInterface>(elemType))
112 return false;
113 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
114 .getTypeSizeInBits(elemType);
115 // Use tryGetNumElements to avoid an assertion on integer overflow (e.g. for
116 // very large statically-shaped memrefs). If the element count overflows
117 // int64_t the allocation is certainly not "small", so return false.
118 std::optional<int64_t> numElements = type.tryGetNumElements();
119 if (!numElements)
120 return false;
121 // Guard against overflow in the size computation as well.
122 if (bitwidth != 0 &&
123 *numElements > static_cast<int64_t>(maximumSizeInBytes * 8ULL / bitwidth))
124 return false;
125 return *numElements * bitwidth <= maximumSizeInBytes * 8;
126}
127
128/// Checks whether the given aliases leave the allocation scope.
129static bool
131 const BufferViewFlowAnalysis::ValueSetT &aliases) {
132 for (Value alias : aliases) {
133 for (auto *use : alias.getUsers()) {
134 // If there is at least one alias that leaves the parent region, we know
135 // that this alias escapes the whole region and hence the associated
136 // allocation leaves allocation scope.
137 if (isa<RegionBranchTerminatorOpInterface>(use) &&
138 use->getParentRegion() == parentRegion)
139 return true;
140 }
141 }
142 return false;
143}
144
145/// Checks, if an automated allocation scope for a given alloc value exists.
146static bool hasAllocationScope(Value alloc,
147 const BufferViewFlowAnalysis &aliasAnalysis) {
148 Region *region = alloc.getParentRegion();
149 do {
150 if (Operation *parentOp = region->getParentOp()) {
151 // Check if the operation is an automatic allocation scope and whether an
152 // alias leaves the scope. This means, an allocation yields out of
153 // this scope and can not be transformed in a stack-based allocation.
154 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
155 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
156 return true;
157 // Check if the operation is a known control flow interface and break the
158 // loop to avoid transformation in loops. Furthermore skip transformation
159 // if the operation does not implement a RegionBeanchOpInterface.
160 if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
161 break;
162 }
163 } while ((region = region->getParentRegion()));
164 return false;
165}
166
167namespace {
168
169//===----------------------------------------------------------------------===//
170// BufferAllocationHoisting
171//===----------------------------------------------------------------------===//
172
173/// A base implementation compatible with the `BufferAllocationHoisting` class.
174struct BufferAllocationHoistingStateBase {
175 /// A pointer to the current dominance info.
176 DominanceInfo *dominators;
177
178 /// The current allocation value.
179 Value allocValue;
180
181 /// The current placement block (if any).
182 Block *placementBlock;
183
184 /// Initializes the state base.
185 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
186 Block *placementBlock)
187 : dominators(dominators), allocValue(allocValue),
188 placementBlock(placementBlock) {}
189};
190
191/// Implements the actual hoisting logic for allocation nodes.
192template <typename StateT>
193class BufferAllocationHoisting : public BufferPlacementTransformationBase {
194public:
195 BufferAllocationHoisting(Operation *op)
196 : BufferPlacementTransformationBase(op), dominators(op),
197 postDominators(op), scopeOp(op) {}
198
199 /// Moves allocations upwards.
200 void hoist() {
201 SmallVector<Value> allocsAndAllocas;
202 for (BufferPlacementAllocs::AllocEntry &entry : allocs)
203 allocsAndAllocas.push_back(std::get<0>(entry));
204 scopeOp->walk([&](memref::AllocaOp op) {
205 allocsAndAllocas.push_back(op.getMemref());
206 });
207
208 for (auto allocValue : allocsAndAllocas) {
209 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
210 continue;
211 Operation *definingOp = allocValue.getDefiningOp();
212 assert(definingOp && "No defining op");
213 // Skip allocations in blocks that are not reachable from the function
214 // entry. Such blocks are dead code and the dominator tree analysis may
215 // not have nodes for them, which would cause crashes below.
216 if (!dominators.isReachableFromEntry(allocValue.getParentBlock()))
217 continue;
218 auto operands = definingOp->getOperands();
219 auto resultAliases = aliases.resolve(allocValue);
220 // Determine the common dominator block of all aliases.
221 Block *dominatorBlock =
222 findCommonDominator(allocValue, resultAliases, dominators);
223 // Init the initial hoisting state.
224 StateT state(&dominators, allocValue, allocValue.getParentBlock());
225 // Check for additional allocation dependencies to compute an upper bound
226 // for hoisting.
227 Block *dependencyBlock = nullptr;
228 // If this node has dependencies, check all dependent nodes. This ensures
229 // that all dependency values have been computed before allocating the
230 // buffer.
231 for (Value depValue : operands) {
232 Block *depBlock = depValue.getParentBlock();
233 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
234 dependencyBlock = depBlock;
235 }
236
237 // Find the actual placement block and determine the start operation using
238 // an upper placement-block boundary. The idea is that placement block
239 // cannot be moved any further upwards than the given upper bound.
240 Block *placementBlock = findPlacementBlock(
241 state, state.computeUpperBound(dominatorBlock, dependencyBlock));
242 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
243 allocValue, placementBlock, liveness);
244
245 // Move the alloc in front of the start operation.
246 Operation *allocOperation = allocValue.getDefiningOp();
247 allocOperation->moveBefore(startOperation);
248 }
249 }
250
251private:
252 /// Finds a valid placement block by walking upwards in the CFG until we
253 /// either cannot continue our walk due to constraints (given by the StateT
254 /// implementation) or we have reached the upper-most dominator block.
255 Block *findPlacementBlock(StateT &state, Block *upperBound) {
256 Block *currentBlock = state.placementBlock;
257 // Walk from the innermost regions/loops to the outermost regions/loops and
258 // find an appropriate placement block that satisfies the constraint of the
259 // current StateT implementation. Walk until we reach the upperBound block
260 // (if any).
261
262 // If we are not able to find a valid parent operation or an associated
263 // parent block, break the walk loop.
264 Operation *parentOp;
265 Block *parentBlock;
266 while ((parentOp = currentBlock->getParentOp()) &&
267 (parentBlock = parentOp->getBlock()) &&
268 (!upperBound ||
269 dominators.properlyDominates(upperBound, currentBlock))) {
270 // Try to find an immediate dominator and check whether the parent block
271 // is above the immediate dominator (if any).
272 DominanceInfoNode *idom = nullptr;
273
274 // DominanceInfo doesn't support getNode queries for single-block regions.
275 if (!currentBlock->isEntryBlock())
276 idom = dominators.getNode(currentBlock)->getIDom();
277
278 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
279 // If the current immediate dominator is below the placement block, move
280 // to the immediate dominator block.
281 currentBlock = idom->getBlock();
282 state.recordMoveToDominator(currentBlock);
283 } else {
284 // We have to move to our parent block since an immediate dominator does
285 // either not exist or is above our parent block. If we cannot move to
286 // our parent operation due to constraints given by the StateT
287 // implementation, break the walk loop. Furthermore, we should not move
288 // allocations out of unknown region-based control-flow operations.
289 if (!isKnownControlFlowInterface(parentOp) ||
290 !state.isLegalPlacement(parentOp))
291 break;
292 // Move to our parent block by notifying the current StateT
293 // implementation.
294 currentBlock = parentBlock;
295 state.recordMoveToParent(currentBlock);
296 }
297 }
298 // Return the finally determined placement block.
299 return state.placementBlock;
300 }
301
302 /// The dominator info to find the appropriate start operation to move the
303 /// allocs.
304 DominanceInfo dominators;
305
306 /// The post dominator info to move the dependent allocs in the right
307 /// position.
308 PostDominanceInfo postDominators;
309
310 /// The map storing the final placement blocks of a given alloc value.
311 llvm::DenseMap<Value, Block *> placementBlocks;
312
313 /// The operation that this transformation is working on. It is used to also
314 /// gather allocas.
315 Operation *scopeOp;
316};
317
318/// A state implementation compatible with the `BufferAllocationHoisting` class
319/// that hoists allocations into dominator blocks while keeping them inside of
320/// loops.
321struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
322 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
323
324 /// Computes the upper bound for the placement block search.
325 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
326 // If we do not have a dependency block, the upper bound is given by the
327 // dominator block.
328 if (!dependencyBlock)
329 return dominatorBlock;
330
331 // Find the "lower" block of the dominator and the dependency block to
332 // ensure that we do not move allocations above this block.
333 return dominators->properlyDominates(dominatorBlock, dependencyBlock)
334 ? dependencyBlock
335 : dominatorBlock;
336 }
337
338 /// Returns true if the given operation does not represent a loop.
339 bool isLegalPlacement(Operation *op) { return !isLoop(op); }
340
341 /// Returns true if the given operation should be considered for hoisting.
342 static bool shouldHoistOpType(Operation *op) {
344 }
345
346 /// Sets the current placement block to the given block.
347 void recordMoveToDominator(Block *block) { placementBlock = block; }
348
349 /// Sets the current placement block to the given block.
350 void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
351};
352
353/// A state implementation compatible with the `BufferAllocationHoisting` class
354/// that hoists allocations out of loops.
355struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
356 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
357
358 /// Remembers the dominator block of all aliases.
359 Block *aliasDominatorBlock = nullptr;
360
361 /// Computes the upper bound for the placement block search.
362 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
363 aliasDominatorBlock = dominatorBlock;
364 // If there is a dependency block, we have to use this block as an upper
365 // bound to satisfy all allocation value dependencies.
366 return dependencyBlock ? dependencyBlock : nullptr;
367 }
368
369 /// Returns true if the given operation represents a loop with sequential
370 /// execution semantics and one of the aliases caused the
371 /// `aliasDominatorBlock` to be "above" the block of the given loop operation.
372 /// If this is the case, it indicates that the allocation is passed via a back
373 /// edge.
374 bool isLegalPlacement(Operation *op) {
375 return isSequentialLoop(op) &&
376 !dominators->dominates(aliasDominatorBlock, op->getBlock());
377 }
378
379 /// Returns true if the given operation should be considered for hoisting.
380 static bool shouldHoistOpType(Operation *op) {
381 return allowAllocLoopHoisting(op);
382 }
383
384 /// Does not change the internal placement block, as we want to move
385 /// operations out of loops only.
386 void recordMoveToDominator(Block *block) {}
387
388 /// Sets the current placement block to the given block.
389 void recordMoveToParent(Block *block) { placementBlock = block; }
390};
391
392//===----------------------------------------------------------------------===//
393// BufferPlacementPromotion
394//===----------------------------------------------------------------------===//
395
396/// Promotes heap-based allocations to stack-based allocations (if possible).
397class BufferPlacementPromotion : BufferPlacementTransformationBase {
398public:
399 BufferPlacementPromotion(Operation *op)
400 : BufferPlacementTransformationBase(op) {}
401
402 /// Promote buffers to stack-based allocations.
403 void promote(function_ref<bool(Value)> isSmallAlloc) {
404 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
405 Value alloc = std::get<0>(entry);
406 Operation *dealloc = std::get<1>(entry);
407 // Checking several requirements to transform an AllocOp into an AllocaOp.
408 // The transformation is done if the allocation is limited to a given
409 // size. Furthermore, a deallocation must not be defined for this
410 // allocation entry and a parent allocation scope must exist.
411 if (!isSmallAlloc(alloc) || dealloc ||
412 !hasAllocationScope(alloc, aliases))
413 continue;
414
415 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
416 alloc, alloc.getParentBlock(), liveness);
417 // Build a new alloca that is associated with its parent
418 // `AutomaticAllocationScope` determined during the initialization phase.
419 OpBuilder builder(startOperation);
420 Operation *allocOp = alloc.getDefiningOp();
421 if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
422 std::optional<Operation *> alloca =
423 allocInterface.buildPromotedAlloc(builder, alloc);
424 if (!alloca)
425 continue;
426 // Replace the original alloc by a newly created alloca.
427 allocOp->replaceAllUsesWith(alloca.value());
428 allocOp->erase();
429 }
430 }
431 }
432};
433
434//===----------------------------------------------------------------------===//
435// BufferOptimizationPasses
436//===----------------------------------------------------------------------===//
437
438/// The buffer hoisting pass that hoists allocation nodes into dominating
439/// blocks.
440struct BufferHoistingPass
441 : public bufferization::impl::BufferHoistingPassBase<BufferHoistingPass> {
442
443 void runOnOperation() override {
444 // Hoist all allocations into dominator blocks.
445 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
446 getOperation());
447 optimizer.hoist();
448 }
449};
450
451/// The buffer loop hoisting pass that hoists allocation nodes out of loops.
452struct BufferLoopHoistingPass
453 : public bufferization::impl::BufferLoopHoistingPassBase<
454 BufferLoopHoistingPass> {
455
456 void runOnOperation() override {
457 // Hoist all allocations out of loops.
458 hoistBuffersFromLoops(getOperation());
459 }
460};
461
462/// The promote buffer to stack pass that tries to convert alloc nodes into
463/// alloca nodes.
464class PromoteBuffersToStackPass
465 : public bufferization::impl::PromoteBuffersToStackPassBase<
466 PromoteBuffersToStackPass> {
467 using Base::Base;
468
469public:
470 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
471 : isSmallAlloc(std::move(isSmallAlloc)) {}
472
473 LogicalResult initialize(MLIRContext *context) override {
474 if (isSmallAlloc == nullptr) {
475 isSmallAlloc = [=](Value alloc) {
476 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
477 maxRankOfAllocatedMemRef);
478 };
479 }
480 return success();
481 }
482
483 void runOnOperation() override {
484 // Move all allocation nodes and convert candidates into allocas.
485 BufferPlacementPromotion optimizer(getOperation());
486 optimizer.promote(isSmallAlloc);
487 }
488
489private:
490 std::function<bool(Value)> isSmallAlloc;
491};
492
493} // namespace
494
496 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
497 optimizer.hoist();
498}
499
501 std::function<bool(Value)> isSmallAlloc) {
502 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
503}
return success()
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
static bool isSequentialLoop(Operation *op)
Return whether the given operation is a loop with sequential execution semantics.
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
static bool allowAllocDominateBlockHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the dominate...
static bool allowAllocLoopHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the loop hoi...
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
LogicalResult initialize(unsigned origNumLoops, ArrayRef< ReassociationIndices > foldedIterationDims)
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition Block.cpp:36
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
SmallPtrSet< Value, 16 > ValueSetT
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:778
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:234
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:407
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition Operation.h:301
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition Region.cpp:45
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition Types.cpp:118
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition Value.cpp:46
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition Value.cpp:39
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block.
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition BufferUtils.h:38
The base class for all BufferPlacement transformations.
void hoistBuffersFromLoops(Operation *op)
Within the given operation, hoist buffers from loops where possible.
std::unique_ptr< Pass > createPromoteBuffersToStackPass(std::function< bool(Value)> isSmallAlloc)
Creates a pass that promotes heap-based allocations to stack-based ones.
Block * findCommonDominator(Value value, const BufferViewFlowAnalysis::ValueSetT &values, const DominatorT &doms)
Finds a common dominator for the given value while taking the positions of the values in the value se...
Definition BufferUtils.h:82
void promote(RewriterBase &rewriter, scf::ForallOp forallOp)
Promotes the loop body of a scf::ForallOp to its containing block.
Definition SCF.cpp:748
Include the generated interface declarations.
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition Dominance.h:30
llvm::function_ref< Fn > function_ref
Definition LLVM.h:147