MLIR 22.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1//===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements logic for three optimization passes. The first two
10// passes try to move alloc nodes out of blocks to reduce the number of
11// allocations and copies during buffer deallocation. The third pass tries to
12// convert heap-based allocations to stack-based allocations, if possible.
13
15
21#include "mlir/IR/Operation.h"
23#include "mlir/Pass/Pass.h"
24
25namespace mlir {
26namespace bufferization {
27#define GEN_PASS_DEF_BUFFERHOISTINGPASS
28#define GEN_PASS_DEF_BUFFERLOOPHOISTINGPASS
29#define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACKPASS
30#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31} // namespace bufferization
32} // namespace mlir
33
34using namespace mlir;
35using namespace mlir::bufferization;
36
37/// Returns true if the given operation implements a known high-level region-
38/// based control-flow interface.
40 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41}
42
43/// Returns true if the given operation represents a loop by testing whether it
44/// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
45/// the case of a `RegionBranchOpInterface`, it checks all region-based control-
46/// flow edges for cycles.
47static bool isLoop(Operation *op) {
48 // If the operation implements the `LoopLikeOpInterface` it can be considered
49 // a loop.
50 if (isa<LoopLikeOpInterface>(op))
51 return true;
52
53 // If the operation does not implement the `RegionBranchOpInterface`, it is
54 // (currently) not possible to detect a loop.
55 auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
56 if (!regionInterface)
57 return false;
58
59 return regionInterface.hasLoop();
60}
61
62/// Return whether the given operation is a loop with sequential execution
63/// semantics.
64static bool isSequentialLoop(Operation *op) {
65 return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op);
66}
67
68/// Returns true if the given operation implements the AllocationOpInterface
69/// and it supports the dominate block hoisting.
71 auto allocOp = dyn_cast<AllocationOpInterface>(op);
72 return allocOp &&
73 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
74}
75
76/// Returns true if the given operation implements the AllocationOpInterface
77/// and it supports the loop hoisting.
79 auto allocOp = dyn_cast<AllocationOpInterface>(op);
80 return allocOp &&
81 static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
82}
83
84/// Check if the size of the allocation is less than the given size. The
85/// transformation is only applied to small buffers since large buffers could
86/// exceed the stack space.
87static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
88 unsigned maxRankOfAllocatedMemRef) {
89 auto type = dyn_cast<ShapedType>(alloc.getType());
90 if (!type || !alloc.getDefiningOp<memref::AllocOp>())
91 return false;
92 if (!type.hasStaticShape()) {
93 // Check if the dynamic shape dimension of the alloc is produced by
94 // `memref.rank`. If this is the case, it is likely to be small.
95 // Furthermore, the dimension is limited to the maximum rank of the
96 // allocated memref to avoid large values by multiplying several small
97 // values.
98 if (type.getRank() <= maxRankOfAllocatedMemRef) {
99 return llvm::all_of(alloc.getDefiningOp()->getOperands(),
100 [&](Value operand) {
101 return operand.getDefiningOp<memref::RankOp>();
102 });
103 }
104 return false;
105 }
106 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
107 .getTypeSizeInBits(type.getElementType());
108 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
109}
110
111/// Checks whether the given aliases leave the allocation scope.
112static bool
114 const BufferViewFlowAnalysis::ValueSetT &aliases) {
115 for (Value alias : aliases) {
116 for (auto *use : alias.getUsers()) {
117 // If there is at least one alias that leaves the parent region, we know
118 // that this alias escapes the whole region and hence the associated
119 // allocation leaves allocation scope.
120 if (isa<RegionBranchTerminatorOpInterface>(use) &&
121 use->getParentRegion() == parentRegion)
122 return true;
123 }
124 }
125 return false;
126}
127
128/// Checks, if an automated allocation scope for a given alloc value exists.
129static bool hasAllocationScope(Value alloc,
130 const BufferViewFlowAnalysis &aliasAnalysis) {
131 Region *region = alloc.getParentRegion();
132 do {
133 if (Operation *parentOp = region->getParentOp()) {
134 // Check if the operation is an automatic allocation scope and whether an
135 // alias leaves the scope. This means, an allocation yields out of
136 // this scope and can not be transformed in a stack-based allocation.
137 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
138 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
139 return true;
140 // Check if the operation is a known control flow interface and break the
141 // loop to avoid transformation in loops. Furthermore skip transformation
142 // if the operation does not implement a RegionBeanchOpInterface.
143 if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
144 break;
145 }
146 } while ((region = region->getParentRegion()));
147 return false;
148}
150namespace {
151
152//===----------------------------------------------------------------------===//
153// BufferAllocationHoisting
154//===----------------------------------------------------------------------===//
155
156/// A base implementation compatible with the `BufferAllocationHoisting` class.
157struct BufferAllocationHoistingStateBase {
158 /// A pointer to the current dominance info.
159 DominanceInfo *dominators;
160
161 /// The current allocation value.
162 Value allocValue;
163
164 /// The current placement block (if any).
165 Block *placementBlock;
166
167 /// Initializes the state base.
168 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
169 Block *placementBlock)
170 : dominators(dominators), allocValue(allocValue),
171 placementBlock(placementBlock) {}
172};
173
174/// Implements the actual hoisting logic for allocation nodes.
175template <typename StateT>
176class BufferAllocationHoisting : public BufferPlacementTransformationBase {
177public:
178 BufferAllocationHoisting(Operation *op)
179 : BufferPlacementTransformationBase(op), dominators(op),
180 postDominators(op), scopeOp(op) {}
181
182 /// Moves allocations upwards.
183 void hoist() {
184 SmallVector<Value> allocsAndAllocas;
185 for (BufferPlacementAllocs::AllocEntry &entry : allocs)
186 allocsAndAllocas.push_back(std::get<0>(entry));
187 scopeOp->walk([&](memref::AllocaOp op) {
188 allocsAndAllocas.push_back(op.getMemref());
189 });
191 for (auto allocValue : allocsAndAllocas) {
192 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
193 continue;
194 Operation *definingOp = allocValue.getDefiningOp();
195 assert(definingOp && "No defining op");
196 auto operands = definingOp->getOperands();
197 auto resultAliases = aliases.resolve(allocValue);
198 // Determine the common dominator block of all aliases.
199 Block *dominatorBlock =
200 findCommonDominator(allocValue, resultAliases, dominators);
201 // Init the initial hoisting state.
202 StateT state(&dominators, allocValue, allocValue.getParentBlock());
203 // Check for additional allocation dependencies to compute an upper bound
204 // for hoisting.
205 Block *dependencyBlock = nullptr;
206 // If this node has dependencies, check all dependent nodes. This ensures
207 // that all dependency values have been computed before allocating the
208 // buffer.
209 for (Value depValue : operands) {
210 Block *depBlock = depValue.getParentBlock();
211 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
212 dependencyBlock = depBlock;
213 }
215 // Find the actual placement block and determine the start operation using
216 // an upper placement-block boundary. The idea is that placement block
217 // cannot be moved any further upwards than the given upper bound.
218 Block *placementBlock = findPlacementBlock(
219 state, state.computeUpperBound(dominatorBlock, dependencyBlock));
221 allocValue, placementBlock, liveness);
222
223 // Move the alloc in front of the start operation.
224 Operation *allocOperation = allocValue.getDefiningOp();
225 allocOperation->moveBefore(startOperation);
226 }
227 }
228
229private:
230 /// Finds a valid placement block by walking upwards in the CFG until we
231 /// either cannot continue our walk due to constraints (given by the StateT
232 /// implementation) or we have reached the upper-most dominator block.
233 Block *findPlacementBlock(StateT &state, Block *upperBound) {
234 Block *currentBlock = state.placementBlock;
235 // Walk from the innermost regions/loops to the outermost regions/loops and
236 // find an appropriate placement block that satisfies the constraint of the
237 // current StateT implementation. Walk until we reach the upperBound block
238 // (if any).
239
240 // If we are not able to find a valid parent operation or an associated
241 // parent block, break the walk loop.
242 Operation *parentOp;
243 Block *parentBlock;
244 while ((parentOp = currentBlock->getParentOp()) &&
245 (parentBlock = parentOp->getBlock()) &&
246 (!upperBound ||
247 dominators.properlyDominates(upperBound, currentBlock))) {
248 // Try to find an immediate dominator and check whether the parent block
249 // is above the immediate dominator (if any).
250 DominanceInfoNode *idom = nullptr;
251
252 // DominanceInfo doesn't support getNode queries for single-block regions.
253 if (!currentBlock->isEntryBlock())
254 idom = dominators.getNode(currentBlock)->getIDom();
255
256 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
257 // If the current immediate dominator is below the placement block, move
258 // to the immediate dominator block.
259 currentBlock = idom->getBlock();
260 state.recordMoveToDominator(currentBlock);
261 } else {
262 // We have to move to our parent block since an immediate dominator does
263 // either not exist or is above our parent block. If we cannot move to
264 // our parent operation due to constraints given by the StateT
265 // implementation, break the walk loop. Furthermore, we should not move
266 // allocations out of unknown region-based control-flow operations.
267 if (!isKnownControlFlowInterface(parentOp) ||
268 !state.isLegalPlacement(parentOp))
269 break;
270 // Move to our parent block by notifying the current StateT
271 // implementation.
272 currentBlock = parentBlock;
273 state.recordMoveToParent(currentBlock);
274 }
275 }
276 // Return the finally determined placement block.
277 return state.placementBlock;
278 }
279
280 /// The dominator info to find the appropriate start operation to move the
281 /// allocs.
282 DominanceInfo dominators;
283
284 /// The post dominator info to move the dependent allocs in the right
285 /// position.
286 PostDominanceInfo postDominators;
287
288 /// The map storing the final placement blocks of a given alloc value.
289 llvm::DenseMap<Value, Block *> placementBlocks;
290
291 /// The operation that this transformation is working on. It is used to also
292 /// gather allocas.
293 Operation *scopeOp;
294};
295
296/// A state implementation compatible with the `BufferAllocationHoisting` class
297/// that hoists allocations into dominator blocks while keeping them inside of
298/// loops.
299struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
300 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
301
302 /// Computes the upper bound for the placement block search.
303 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
304 // If we do not have a dependency block, the upper bound is given by the
305 // dominator block.
306 if (!dependencyBlock)
307 return dominatorBlock;
308
309 // Find the "lower" block of the dominator and the dependency block to
310 // ensure that we do not move allocations above this block.
311 return dominators->properlyDominates(dominatorBlock, dependencyBlock)
312 ? dependencyBlock
313 : dominatorBlock;
314 }
315
316 /// Returns true if the given operation does not represent a loop.
317 bool isLegalPlacement(Operation *op) { return !isLoop(op); }
318
319 /// Returns true if the given operation should be considered for hoisting.
320 static bool shouldHoistOpType(Operation *op) {
322 }
323
324 /// Sets the current placement block to the given block.
325 void recordMoveToDominator(Block *block) { placementBlock = block; }
326
327 /// Sets the current placement block to the given block.
328 void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
329};
330
331/// A state implementation compatible with the `BufferAllocationHoisting` class
332/// that hoists allocations out of loops.
333struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
334 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
335
336 /// Remembers the dominator block of all aliases.
337 Block *aliasDominatorBlock = nullptr;
338
339 /// Computes the upper bound for the placement block search.
340 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
341 aliasDominatorBlock = dominatorBlock;
342 // If there is a dependency block, we have to use this block as an upper
343 // bound to satisfy all allocation value dependencies.
344 return dependencyBlock ? dependencyBlock : nullptr;
345 }
346
347 /// Returns true if the given operation represents a loop with sequential
348 /// execution semantics and one of the aliases caused the
349 /// `aliasDominatorBlock` to be "above" the block of the given loop operation.
350 /// If this is the case, it indicates that the allocation is passed via a back
351 /// edge.
352 bool isLegalPlacement(Operation *op) {
353 return isSequentialLoop(op) &&
354 !dominators->dominates(aliasDominatorBlock, op->getBlock());
355 }
356
357 /// Returns true if the given operation should be considered for hoisting.
358 static bool shouldHoistOpType(Operation *op) {
359 return allowAllocLoopHoisting(op);
360 }
361
362 /// Does not change the internal placement block, as we want to move
363 /// operations out of loops only.
364 void recordMoveToDominator(Block *block) {}
365
366 /// Sets the current placement block to the given block.
367 void recordMoveToParent(Block *block) { placementBlock = block; }
368};
369
370//===----------------------------------------------------------------------===//
371// BufferPlacementPromotion
372//===----------------------------------------------------------------------===//
373
374/// Promotes heap-based allocations to stack-based allocations (if possible).
375class BufferPlacementPromotion : BufferPlacementTransformationBase {
376public:
377 BufferPlacementPromotion(Operation *op)
378 : BufferPlacementTransformationBase(op) {}
379
380 /// Promote buffers to stack-based allocations.
381 void promote(function_ref<bool(Value)> isSmallAlloc) {
382 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
383 Value alloc = std::get<0>(entry);
384 Operation *dealloc = std::get<1>(entry);
385 // Checking several requirements to transform an AllocOp into an AllocaOp.
386 // The transformation is done if the allocation is limited to a given
387 // size. Furthermore, a deallocation must not be defined for this
388 // allocation entry and a parent allocation scope must exist.
389 if (!isSmallAlloc(alloc) || dealloc ||
390 !hasAllocationScope(alloc, aliases))
391 continue;
392
393 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
394 alloc, alloc.getParentBlock(), liveness);
395 // Build a new alloca that is associated with its parent
396 // `AutomaticAllocationScope` determined during the initialization phase.
397 OpBuilder builder(startOperation);
398 Operation *allocOp = alloc.getDefiningOp();
399 if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
400 std::optional<Operation *> alloca =
401 allocInterface.buildPromotedAlloc(builder, alloc);
402 if (!alloca)
403 continue;
404 // Replace the original alloc by a newly created alloca.
405 allocOp->replaceAllUsesWith(alloca.value());
406 allocOp->erase();
407 }
408 }
409 }
410};
411
412//===----------------------------------------------------------------------===//
413// BufferOptimizationPasses
414//===----------------------------------------------------------------------===//
415
416/// The buffer hoisting pass that hoists allocation nodes into dominating
417/// blocks.
418struct BufferHoistingPass
419 : public bufferization::impl::BufferHoistingPassBase<BufferHoistingPass> {
420
421 void runOnOperation() override {
422 // Hoist all allocations into dominator blocks.
423 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
424 getOperation());
425 optimizer.hoist();
426 }
427};
428
429/// The buffer loop hoisting pass that hoists allocation nodes out of loops.
430struct BufferLoopHoistingPass
432 BufferLoopHoistingPass> {
433
434 void runOnOperation() override {
435 // Hoist all allocations out of loops.
436 hoistBuffersFromLoops(getOperation());
437 }
438};
439
440/// The promote buffer to stack pass that tries to convert alloc nodes into
441/// alloca nodes.
442class PromoteBuffersToStackPass
444 PromoteBuffersToStackPass> {
445 using Base::Base;
446
447public:
448 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
449 : isSmallAlloc(std::move(isSmallAlloc)) {}
450
451 LogicalResult initialize(MLIRContext *context) override {
452 if (isSmallAlloc == nullptr) {
453 isSmallAlloc = [=](Value alloc) {
454 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
455 maxRankOfAllocatedMemRef);
456 };
457 }
458 return success();
459 }
460
461 void runOnOperation() override {
462 // Move all allocation nodes and convert candidates into allocas.
463 BufferPlacementPromotion optimizer(getOperation());
464 optimizer.promote(isSmallAlloc);
465 }
466
467private:
468 std::function<bool(Value)> isSmallAlloc;
469};
470
471} // namespace
472
474 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
475 optimizer.hoist();
476}
477
479 std::function<bool(Value)> isSmallAlloc) {
480 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
481}
return success()
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
static bool isSequentialLoop(Operation *op)
Return whether the given operation is a loop with sequential execution semantics.
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
static bool allowAllocDominateBlockHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the dominate...
static bool allowAllocLoopHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the loop hoi...
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
LogicalResult initialize(unsigned origNumLoops, ArrayRef< ReassociationIndices > foldedIterationDims)
Block represents an ordered list of Operations.
Definition Block.h:33
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition Block.cpp:36
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
SmallPtrSet< Value, 16 > ValueSetT
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
A class for computing basic dominance information.
Definition Dominance.h:140
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition Operation.h:749
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:213
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition Operation.h:272
void erase()
Remove this operation from its parent block and delete it.
A class for computing basic postdominance information.
Definition Dominance.h:204
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition Region.cpp:45
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition Region.h:200
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition Value.cpp:46
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition Value.cpp:39
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block.
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition BufferUtils.h:38
The base class for all BufferPlacement transformations.
void hoistBuffersFromLoops(Operation *op)
Within the given operation, hoist buffers from loops where possible.
std::unique_ptr<::mlir::Pass > createPromoteBuffersToStackPass()
Block * findCommonDominator(Value value, const BufferViewFlowAnalysis::ValueSetT &values, const DominatorT &doms)
Finds a common dominator for the given value while taking the positions of the values in the value se...
Definition BufferUtils.h:82
void promote(RewriterBase &rewriter, scf::ForallOp forallOp)
Promotes the loop body of a scf::ForallOp to its containing block.
Definition SCF.cpp:792
Include the generated interface declarations.
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition Dominance.h:30
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152