MLIR 23.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1//===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements logic for three optimization passes. The first two
10// passes try to move alloc nodes out of blocks to reduce the number of
11// allocations and copies during buffer deallocation. The third pass tries to
12// convert heap-based allocations to stack-based allocations, if possible.
13
15
21#include "mlir/IR/Operation.h"
23#include "mlir/Pass/Pass.h"
24
25namespace mlir {
26namespace bufferization {
27#define GEN_PASS_DEF_BUFFERHOISTINGPASS
28#define GEN_PASS_DEF_BUFFERLOOPHOISTINGPASS
29#define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACKPASS
30#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31} // namespace bufferization
32} // namespace mlir
33
34using namespace mlir;
35using namespace mlir::bufferization;
36
37/// Returns true if the given operation implements a known high-level region-
38/// based control-flow interface.
40 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41}
42
43/// Returns true if the given operation represents a loop by testing whether it
44/// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
45/// the case of a `RegionBranchOpInterface`, it checks all region-based control-
46/// flow edges for cycles.
47static bool isLoop(Operation *op) {
48 // If the operation implements the `LoopLikeOpInterface` it can be considered
49 // a loop.
50 if (isa<LoopLikeOpInterface>(op))
51 return true;
52
53 // If the operation does not implement the `RegionBranchOpInterface`, it is
54 // (currently) not possible to detect a loop.
55 auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
56 if (!regionInterface)
57 return false;
58
59 return regionInterface.hasLoop();
60}
61
62/// Return whether the given operation is a loop with sequential execution
63/// semantics.
64static bool isSequentialLoop(Operation *op) {
65 return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op);
66}
67
68/// Returns true if the given operation implements the AllocationOpInterface
69/// and it supports the dominate block hoisting.
71 auto allocOp = dyn_cast<AllocationOpInterface>(op);
72 return allocOp &&
73 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
74}
75
76/// Returns true if the given operation implements the AllocationOpInterface
77/// and it supports the loop hoisting.
79 auto allocOp = dyn_cast<AllocationOpInterface>(op);
80 return allocOp &&
81 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
82}
83
84/// Check if the size of the allocation is less than the given size. The
85/// transformation is only applied to small buffers since large buffers could
86/// exceed the stack space.
87static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
88 unsigned maxRankOfAllocatedMemRef) {
89 auto type = dyn_cast<ShapedType>(alloc.getType());
90 if (!type || !alloc.getDefiningOp<memref::AllocOp>())
91 return false;
92 if (!type.hasStaticShape()) {
93 // Check if the dynamic shape dimension of the alloc is produced by
94 // `memref.rank`. If this is the case, it is likely to be small.
95 // Furthermore, the dimension is limited to the maximum rank of the
96 // allocated memref to avoid large values by multiplying several small
97 // values.
98 if (type.getRank() <= maxRankOfAllocatedMemRef) {
99 return llvm::all_of(alloc.getDefiningOp()->getOperands(),
100 [&](Value operand) {
101 return operand.getDefiningOp<memref::RankOp>();
102 });
103 }
104 return false;
105 }
106 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
107 .getTypeSizeInBits(type.getElementType());
108 // Use tryGetNumElements to avoid an assertion on integer overflow (e.g. for
109 // very large statically-shaped memrefs). If the element count overflows
110 // int64_t the allocation is certainly not "small", so return false.
111 std::optional<int64_t> numElements = type.tryGetNumElements();
112 if (!numElements)
113 return false;
114 // Guard against overflow in the size computation as well.
115 if (bitwidth != 0 &&
116 *numElements > static_cast<int64_t>(maximumSizeInBytes * 8ULL / bitwidth))
117 return false;
118 return *numElements * bitwidth <= maximumSizeInBytes * 8;
121/// Checks whether the given aliases leave the allocation scope.
122static bool
124 const BufferViewFlowAnalysis::ValueSetT &aliases) {
125 for (Value alias : aliases) {
126 for (auto *use : alias.getUsers()) {
127 // If there is at least one alias that leaves the parent region, we know
128 // that this alias escapes the whole region and hence the associated
129 // allocation leaves allocation scope.
130 if (isa<RegionBranchTerminatorOpInterface>(use) &&
131 use->getParentRegion() == parentRegion)
132 return true;
134 }
135 return false;
137
138/// Checks, if an automated allocation scope for a given alloc value exists.
139static bool hasAllocationScope(Value alloc,
140 const BufferViewFlowAnalysis &aliasAnalysis) {
141 Region *region = alloc.getParentRegion();
142 do {
143 if (Operation *parentOp = region->getParentOp()) {
144 // Check if the operation is an automatic allocation scope and whether an
145 // alias leaves the scope. This means, an allocation yields out of
146 // this scope and can not be transformed in a stack-based allocation.
147 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
148 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
149 return true;
150 // Check if the operation is a known control flow interface and break the
151 // loop to avoid transformation in loops. Furthermore skip transformation
152 // if the operation does not implement a RegionBeanchOpInterface.
153 if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
154 break;
155 }
156 } while ((region = region->getParentRegion()));
157 return false;
158}
159
160namespace {
162//===----------------------------------------------------------------------===//
163// BufferAllocationHoisting
164//===----------------------------------------------------------------------===//
165
166/// A base implementation compatible with the `BufferAllocationHoisting` class.
167struct BufferAllocationHoistingStateBase {
168 /// A pointer to the current dominance info.
169 DominanceInfo *dominators;
170
171 /// The current allocation value.
172 Value allocValue;
173
174 /// The current placement block (if any).
175 Block *placementBlock;
176
177 /// Initializes the state base.
178 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
179 Block *placementBlock)
180 : dominators(dominators), allocValue(allocValue),
181 placementBlock(placementBlock) {}
182};
183
184/// Implements the actual hoisting logic for allocation nodes.
185template <typename StateT>
186class BufferAllocationHoisting : public BufferPlacementTransformationBase {
187public:
188 BufferAllocationHoisting(Operation *op)
189 : BufferPlacementTransformationBase(op), dominators(op),
190 postDominators(op), scopeOp(op) {}
191
192 /// Moves allocations upwards.
193 void hoist() {
194 SmallVector<Value> allocsAndAllocas;
196 allocsAndAllocas.push_back(std::get<0>(entry));
197 scopeOp->walk([&](memref::AllocaOp op) {
198 allocsAndAllocas.push_back(op.getMemref());
199 });
201 for (auto allocValue : allocsAndAllocas) {
202 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
203 continue;
204 Operation *definingOp = allocValue.getDefiningOp();
205 assert(definingOp && "No defining op");
206 // Skip allocations in blocks that are not reachable from the function
207 // entry. Such blocks are dead code and the dominator tree analysis may
208 // not have nodes for them, which would cause crashes below.
209 if (!dominators.isReachableFromEntry(allocValue.getParentBlock()))
210 continue;
211 auto operands = definingOp->getOperands();
212 auto resultAliases = aliases.resolve(allocValue);
213 // Determine the common dominator block of all aliases.
214 Block *dominatorBlock =
215 findCommonDominator(allocValue, resultAliases, dominators);
216 // Init the initial hoisting state.
217 StateT state(&dominators, allocValue, allocValue.getParentBlock());
218 // Check for additional allocation dependencies to compute an upper bound
219 // for hoisting.
220 Block *dependencyBlock = nullptr;
221 // If this node has dependencies, check all dependent nodes. This ensures
222 // that all dependency values have been computed before allocating the
223 // buffer.
224 for (Value depValue : operands) {
225 Block *depBlock = depValue.getParentBlock();
226 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
227 dependencyBlock = depBlock;
228 }
229
230 // Find the actual placement block and determine the start operation using
231 // an upper placement-block boundary. The idea is that placement block
232 // cannot be moved any further upwards than the given upper bound.
233 Block *placementBlock = findPlacementBlock(
234 state, state.computeUpperBound(dominatorBlock, dependencyBlock));
236 allocValue, placementBlock, liveness);
237
238 // Move the alloc in front of the start operation.
239 Operation *allocOperation = allocValue.getDefiningOp();
240 allocOperation->moveBefore(startOperation);
241 }
243
244private:
245 /// Finds a valid placement block by walking upwards in the CFG until we
246 /// either cannot continue our walk due to constraints (given by the StateT
247 /// implementation) or we have reached the upper-most dominator block.
248 Block *findPlacementBlock(StateT &state, Block *upperBound) {
249 Block *currentBlock = state.placementBlock;
250 // Walk from the innermost regions/loops to the outermost regions/loops and
251 // find an appropriate placement block that satisfies the constraint of the
252 // current StateT implementation. Walk until we reach the upperBound block
253 // (if any).
254
255 // If we are not able to find a valid parent operation or an associated
256 // parent block, break the walk loop.
257 Operation *parentOp;
258 Block *parentBlock;
259 while ((parentOp = currentBlock->getParentOp()) &&
260 (parentBlock = parentOp->getBlock()) &&
261 (!upperBound ||
262 dominators.properlyDominates(upperBound, currentBlock))) {
263 // Try to find an immediate dominator and check whether the parent block
264 // is above the immediate dominator (if any).
265 DominanceInfoNode *idom = nullptr;
266
267 // DominanceInfo doesn't support getNode queries for single-block regions.
268 if (!currentBlock->isEntryBlock())
269 idom = dominators.getNode(currentBlock)->getIDom();
270
271 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
272 // If the current immediate dominator is below the placement block, move
273 // to the immediate dominator block.
274 currentBlock = idom->getBlock();
275 state.recordMoveToDominator(currentBlock);
276 } else {
277 // We have to move to our parent block since an immediate dominator does
278 // either not exist or is above our parent block. If we cannot move to
279 // our parent operation due to constraints given by the StateT
280 // implementation, break the walk loop. Furthermore, we should not move
281 // allocations out of unknown region-based control-flow operations.
282 if (!isKnownControlFlowInterface(parentOp) ||
283 !state.isLegalPlacement(parentOp))
284 break;
285 // Move to our parent block by notifying the current StateT
286 // implementation.
287 currentBlock = parentBlock;
288 state.recordMoveToParent(currentBlock);
289 }
290 }
291 // Return the finally determined placement block.
292 return state.placementBlock;
293 }
294
295 /// The dominator info to find the appropriate start operation to move the
296 /// allocs.
297 DominanceInfo dominators;
298
299 /// The post dominator info to move the dependent allocs in the right
300 /// position.
301 PostDominanceInfo postDominators;
302
303 /// The map storing the final placement blocks of a given alloc value.
304 llvm::DenseMap<Value, Block *> placementBlocks;
305
306 /// The operation that this transformation is working on. It is used to also
307 /// gather allocas.
308 Operation *scopeOp;
309};
310
311/// A state implementation compatible with the `BufferAllocationHoisting` class
312/// that hoists allocations into dominator blocks while keeping them inside of
313/// loops.
314struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
315 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
316
317 /// Computes the upper bound for the placement block search.
318 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
319 // If we do not have a dependency block, the upper bound is given by the
320 // dominator block.
321 if (!dependencyBlock)
322 return dominatorBlock;
323
324 // Find the "lower" block of the dominator and the dependency block to
325 // ensure that we do not move allocations above this block.
326 return dominators->properlyDominates(dominatorBlock, dependencyBlock)
327 ? dependencyBlock
328 : dominatorBlock;
329 }
330
331 /// Returns true if the given operation does not represent a loop.
332 bool isLegalPlacement(Operation *op) { return !isLoop(op); }
333
334 /// Returns true if the given operation should be considered for hoisting.
335 static bool shouldHoistOpType(Operation *op) {
337 }
338
339 /// Sets the current placement block to the given block.
340 void recordMoveToDominator(Block *block) { placementBlock = block; }
341
342 /// Sets the current placement block to the given block.
343 void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
344};
345
346/// A state implementation compatible with the `BufferAllocationHoisting` class
347/// that hoists allocations out of loops.
348struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
349 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
350
351 /// Remembers the dominator block of all aliases.
352 Block *aliasDominatorBlock = nullptr;
353
354 /// Computes the upper bound for the placement block search.
355 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
356 aliasDominatorBlock = dominatorBlock;
357 // If there is a dependency block, we have to use this block as an upper
358 // bound to satisfy all allocation value dependencies.
359 return dependencyBlock ? dependencyBlock : nullptr;
360 }
361
362 /// Returns true if the given operation represents a loop with sequential
363 /// execution semantics and one of the aliases caused the
364 /// `aliasDominatorBlock` to be "above" the block of the given loop operation.
365 /// If this is the case, it indicates that the allocation is passed via a back
366 /// edge.
367 bool isLegalPlacement(Operation *op) {
368 return isSequentialLoop(op) &&
369 !dominators->dominates(aliasDominatorBlock, op->getBlock());
370 }
371
372 /// Returns true if the given operation should be considered for hoisting.
373 static bool shouldHoistOpType(Operation *op) {
374 return allowAllocLoopHoisting(op);
375 }
376
377 /// Does not change the internal placement block, as we want to move
378 /// operations out of loops only.
379 void recordMoveToDominator(Block *block) {}
380
381 /// Sets the current placement block to the given block.
382 void recordMoveToParent(Block *block) { placementBlock = block; }
383};
384
385//===----------------------------------------------------------------------===//
386// BufferPlacementPromotion
387//===----------------------------------------------------------------------===//
388
389/// Promotes heap-based allocations to stack-based allocations (if possible).
390class BufferPlacementPromotion : BufferPlacementTransformationBase {
391public:
392 BufferPlacementPromotion(Operation *op)
393 : BufferPlacementTransformationBase(op) {}
394
395 /// Promote buffers to stack-based allocations.
396 void promote(function_ref<bool(Value)> isSmallAlloc) {
397 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
398 Value alloc = std::get<0>(entry);
399 Operation *dealloc = std::get<1>(entry);
400 // Checking several requirements to transform an AllocOp into an AllocaOp.
401 // The transformation is done if the allocation is limited to a given
402 // size. Furthermore, a deallocation must not be defined for this
403 // allocation entry and a parent allocation scope must exist.
404 if (!isSmallAlloc(alloc) || dealloc ||
405 !hasAllocationScope(alloc, aliases))
406 continue;
407
408 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
409 alloc, alloc.getParentBlock(), liveness);
410 // Build a new alloca that is associated with its parent
411 // `AutomaticAllocationScope` determined during the initialization phase.
412 OpBuilder builder(startOperation);
413 Operation *allocOp = alloc.getDefiningOp();
414 if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
415 std::optional<Operation *> alloca =
416 allocInterface.buildPromotedAlloc(builder, alloc);
417 if (!alloca)
418 continue;
419 // Replace the original alloc by a newly created alloca.
420 allocOp->replaceAllUsesWith(alloca.value());
421 allocOp->erase();
422 }
423 }
424 }
425};
426
427//===----------------------------------------------------------------------===//
428// BufferOptimizationPasses
429//===----------------------------------------------------------------------===//
430
431/// The buffer hoisting pass that hoists allocation nodes into dominating
432/// blocks.
433struct BufferHoistingPass
434 : public bufferization::impl::BufferHoistingPassBase<BufferHoistingPass> {
435
436 void runOnOperation() override {
437 // Hoist all allocations into dominator blocks.
438 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
439 getOperation());
440 optimizer.hoist();
441 }
442};
443
444/// The buffer loop hoisting pass that hoists allocation nodes out of loops.
445struct BufferLoopHoistingPass
447 BufferLoopHoistingPass> {
448
449 void runOnOperation() override {
450 // Hoist all allocations out of loops.
451 hoistBuffersFromLoops(getOperation());
452 }
453};
454
455/// The promote buffer to stack pass that tries to convert alloc nodes into
456/// alloca nodes.
457class PromoteBuffersToStackPass
459 PromoteBuffersToStackPass> {
460 using Base::Base;
461
462public:
463 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
464 : isSmallAlloc(std::move(isSmallAlloc)) {}
465
466 LogicalResult initialize(MLIRContext *context) override {
467 if (isSmallAlloc == nullptr) {
468 isSmallAlloc = [=](Value alloc) {
469 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
470 maxRankOfAllocatedMemRef);
471 };
472 }
473 return success();
474 }
475
476 void runOnOperation() override {
477 // Move all allocation nodes and convert candidates into allocas.
478 BufferPlacementPromotion optimizer(getOperation());
479 optimizer.promote(isSmallAlloc);
480 }
481
482private:
483 std::function<bool(Value)> isSmallAlloc;
484};
485
486} // namespace
487
489 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
490 optimizer.hoist();
491}
492
494 std::function<bool(Value)> isSmallAlloc) {
495 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
496}
return success()
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
static bool isSequentialLoop(Operation *op)
Return whether the given operation is a loop with sequential execution semantics.
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
static bool allowAllocDominateBlockHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the dominate...
static bool allowAllocLoopHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the loop hoi...
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
LogicalResult initialize(unsigned origNumLoops, ArrayRef< ReassociationIndices > foldedIterationDims)
Block represents an ordered list of Operations.
Definition Block.h:33
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition Block.cpp:36
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
SmallPtrSet< Value, 16 > ValueSetT
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
A class for computing basic dominance information.
Definition Dominance.h:140
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:778
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:234
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:407
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition Operation.h:301
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition Region.cpp:45
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition Value.cpp:46
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition Value.cpp:39
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block.
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition BufferUtils.h:38
The base class for all BufferPlacement transformations.
void hoistBuffersFromLoops(Operation *op)
Within the given operation, hoist buffers from loops where possible.
std::unique_ptr<::mlir::Pass > createPromoteBuffersToStackPass()
Block * findCommonDominator(Value value, const BufferViewFlowAnalysis::ValueSetT &values, const DominatorT &doms)
Finds a common dominator for the given value while taking the positions of the values in the value se...
Definition BufferUtils.h:82
void promote(RewriterBase &rewriter, scf::ForallOp forallOp)
Promotes the loop body of a scf::ForallOp to its containing block.
Definition SCF.cpp:748
Include the generated interface declarations.
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition Dominance.h:30
llvm::function_ref< Fn > function_ref
Definition LLVM.h:144