MLIR  14.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "PassDetail.h"
18 #include "mlir/IR/Operation.h"
20 #include "mlir/Pass/Pass.h"
21 
22 using namespace mlir;
23 using namespace mlir::bufferization;
24 
25 /// Returns true if the given operation implements a known high-level region-
26 /// based control-flow interface.
28  return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
29 }
30 
31 /// Check if the size of the allocation is less than the given size. The
32 /// transformation is only applied to small buffers since large buffers could
33 /// exceed the stack space.
34 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
35  unsigned bitwidthOfIndexType,
36  unsigned maxRankOfAllocatedMemRef) {
37  auto type = alloc.getType().dyn_cast<ShapedType>();
38  if (!type || !alloc.getDefiningOp<memref::AllocOp>())
39  return false;
40  if (!type.hasStaticShape()) {
41  // Check if the dynamic shape dimension of the alloc is produced by
42  // `memref.rank`. If this is the case, it is likely to be small.
43  // Furthermore, the dimension is limited to the maximum rank of the
44  // allocated memref to avoid large values by multiplying several small
45  // values.
46  if (type.getRank() <= maxRankOfAllocatedMemRef) {
47  return llvm::all_of(alloc.getDefiningOp()->getOperands(),
48  [&](Value operand) {
49  return operand.getDefiningOp<memref::RankOp>();
50  });
51  }
52  return false;
53  }
54  // For index types, use the provided size, as the type does not know.
55  unsigned int bitwidth = type.getElementType().isIndex()
56  ? bitwidthOfIndexType
57  : type.getElementTypeBitWidth();
58  return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
59 }
60 
61 /// Checks whether the given aliases leave the allocation scope.
62 static bool
64  const BufferViewFlowAnalysis::ValueSetT &aliases) {
65  for (Value alias : aliases) {
66  for (auto *use : alias.getUsers()) {
67  // If there is at least one alias that leaves the parent region, we know
68  // that this alias escapes the whole region and hence the associated
69  // allocation leaves allocation scope.
70  if (isRegionReturnLike(use) && use->getParentRegion() == parentRegion)
71  return true;
72  }
73  }
74  return false;
75 }
76 
77 /// Checks, if an automated allocation scope for a given alloc value exists.
78 static bool hasAllocationScope(Value alloc,
79  const BufferViewFlowAnalysis &aliasAnalysis) {
80  Region *region = alloc.getParentRegion();
81  do {
82  if (Operation *parentOp = region->getParentOp()) {
83  // Check if the operation is an automatic allocation scope and whether an
84  // alias leaves the scope. This means, an allocation yields out of
85  // this scope and can not be transformed in a stack-based allocation.
86  if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
87  !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
88  return true;
89  // Check if the operation is a known control flow interface and break the
90  // loop to avoid transformation in loops. Furthermore skip transformation
91  // if the operation does not implement a RegionBeanchOpInterface.
93  !isKnownControlFlowInterface(parentOp))
94  break;
95  }
96  } while ((region = region->getParentRegion()));
97  return false;
98 }
99 
100 namespace {
101 
102 //===----------------------------------------------------------------------===//
103 // BufferAllocationHoisting
104 //===----------------------------------------------------------------------===//
105 
106 /// A base implementation compatible with the `BufferAllocationHoisting` class.
107 struct BufferAllocationHoistingStateBase {
108  /// A pointer to the current dominance info.
109  DominanceInfo *dominators;
110 
111  /// The current allocation value.
112  Value allocValue;
113 
114  /// The current placement block (if any).
115  Block *placementBlock;
116 
117  /// Initializes the state base.
118  BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
119  Block *placementBlock)
120  : dominators(dominators), allocValue(allocValue),
121  placementBlock(placementBlock) {}
122 };
123 
124 /// Implements the actual hoisting logic for allocation nodes.
125 template <typename StateT>
126 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
127 public:
128  BufferAllocationHoisting(Operation *op)
129  : BufferPlacementTransformationBase(op), dominators(op),
130  postDominators(op), scopeOp(op) {}
131 
132  /// Moves allocations upwards.
133  void hoist() {
134  SmallVector<Value> allocsAndAllocas;
135  for (BufferPlacementAllocs::AllocEntry &entry : allocs)
136  allocsAndAllocas.push_back(std::get<0>(entry));
137  scopeOp->walk(
138  [&](memref::AllocaOp op) { allocsAndAllocas.push_back(op.memref()); });
139 
140  for (auto allocValue : allocsAndAllocas) {
141  if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
142  continue;
143  Operation *definingOp = allocValue.getDefiningOp();
144  assert(definingOp && "No defining op");
145  auto operands = definingOp->getOperands();
146  auto resultAliases = aliases.resolve(allocValue);
147  // Determine the common dominator block of all aliases.
148  Block *dominatorBlock =
149  findCommonDominator(allocValue, resultAliases, dominators);
150  // Init the initial hoisting state.
151  StateT state(&dominators, allocValue, allocValue.getParentBlock());
152  // Check for additional allocation dependencies to compute an upper bound
153  // for hoisting.
154  Block *dependencyBlock = nullptr;
155  // If this node has dependencies, check all dependent nodes. This ensures
156  // that all dependency values have been computed before allocating the
157  // buffer.
158  for (Value depValue : operands) {
159  Block *depBlock = depValue.getParentBlock();
160  if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
161  dependencyBlock = depBlock;
162  }
163 
164  // Find the actual placement block and determine the start operation using
165  // an upper placement-block boundary. The idea is that placement block
166  // cannot be moved any further upwards than the given upper bound.
167  Block *placementBlock = findPlacementBlock(
168  state, state.computeUpperBound(dominatorBlock, dependencyBlock));
170  allocValue, placementBlock, liveness);
171 
172  // Move the alloc in front of the start operation.
173  Operation *allocOperation = allocValue.getDefiningOp();
174  allocOperation->moveBefore(startOperation);
175  }
176  }
177 
178 private:
179  /// Finds a valid placement block by walking upwards in the CFG until we
180  /// either cannot continue our walk due to constraints (given by the StateT
181  /// implementation) or we have reached the upper-most dominator block.
182  Block *findPlacementBlock(StateT &state, Block *upperBound) {
183  Block *currentBlock = state.placementBlock;
184  // Walk from the innermost regions/loops to the outermost regions/loops and
185  // find an appropriate placement block that satisfies the constraint of the
186  // current StateT implementation. Walk until we reach the upperBound block
187  // (if any).
188 
189  // If we are not able to find a valid parent operation or an associated
190  // parent block, break the walk loop.
191  Operation *parentOp;
192  Block *parentBlock;
193  while ((parentOp = currentBlock->getParentOp()) &&
194  (parentBlock = parentOp->getBlock()) &&
195  (!upperBound ||
196  dominators.properlyDominates(upperBound, currentBlock))) {
197  // Try to find an immediate dominator and check whether the parent block
198  // is above the immediate dominator (if any).
199  DominanceInfoNode *idom = nullptr;
200 
201  // DominanceInfo doesn't support getNode queries for single-block regions.
202  if (!currentBlock->isEntryBlock())
203  idom = dominators.getNode(currentBlock)->getIDom();
204 
205  if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
206  // If the current immediate dominator is below the placement block, move
207  // to the immediate dominator block.
208  currentBlock = idom->getBlock();
209  state.recordMoveToDominator(currentBlock);
210  } else {
211  // We have to move to our parent block since an immediate dominator does
212  // either not exist or is above our parent block. If we cannot move to
213  // our parent operation due to constraints given by the StateT
214  // implementation, break the walk loop. Furthermore, we should not move
215  // allocations out of unknown region-based control-flow operations.
216  if (!isKnownControlFlowInterface(parentOp) ||
217  !state.isLegalPlacement(parentOp))
218  break;
219  // Move to our parent block by notifying the current StateT
220  // implementation.
221  currentBlock = parentBlock;
222  state.recordMoveToParent(currentBlock);
223  }
224  }
225  // Return the finally determined placement block.
226  return state.placementBlock;
227  }
228 
229  /// The dominator info to find the appropriate start operation to move the
230  /// allocs.
231  DominanceInfo dominators;
232 
233  /// The post dominator info to move the dependent allocs in the right
234  /// position.
235  PostDominanceInfo postDominators;
236 
237  /// The map storing the final placement blocks of a given alloc value.
238  llvm::DenseMap<Value, Block *> placementBlocks;
239 
240  /// The operation that this transformation is working on. It is used to also
241  /// gather allocas.
242  Operation *scopeOp;
243 };
244 
245 /// A state implementation compatible with the `BufferAllocationHoisting` class
246 /// that hoists allocations into dominator blocks while keeping them inside of
247 /// loops.
248 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
249  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
250 
251  /// Computes the upper bound for the placement block search.
252  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
253  // If we do not have a dependency block, the upper bound is given by the
254  // dominator block.
255  if (!dependencyBlock)
256  return dominatorBlock;
257 
258  // Find the "lower" block of the dominator and the dependency block to
259  // ensure that we do not move allocations above this block.
260  return dominators->properlyDominates(dominatorBlock, dependencyBlock)
261  ? dependencyBlock
262  : dominatorBlock;
263  }
264 
265  /// Returns true if the given operation does not represent a loop.
266  bool isLegalPlacement(Operation *op) {
268  }
269 
270  /// Returns true if the given operation should be considered for hoisting.
271  static bool shouldHoistOpType(Operation *op) {
272  return llvm::isa<memref::AllocOp>(op);
273  }
274 
275  /// Sets the current placement block to the given block.
276  void recordMoveToDominator(Block *block) { placementBlock = block; }
277 
278  /// Sets the current placement block to the given block.
279  void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
280 };
281 
282 /// A state implementation compatible with the `BufferAllocationHoisting` class
283 /// that hoists allocations out of loops.
284 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
285  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
286 
287  /// Remembers the dominator block of all aliases.
288  Block *aliasDominatorBlock = nullptr;
289 
290  /// Computes the upper bound for the placement block search.
291  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
292  aliasDominatorBlock = dominatorBlock;
293  // If there is a dependency block, we have to use this block as an upper
294  // bound to satisfy all allocation value dependencies.
295  return dependencyBlock ? dependencyBlock : nullptr;
296  }
297 
298  /// Returns true if the given operation represents a loop and one of the
299  /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
300  /// given loop operation. If this is the case, it indicates that the
301  /// allocation is passed via a back edge.
302  bool isLegalPlacement(Operation *op) {
304  !dominators->dominates(aliasDominatorBlock, op->getBlock());
305  }
306 
307  /// Returns true if the given operation should be considered for hoisting.
308  static bool shouldHoistOpType(Operation *op) {
309  return llvm::isa<memref::AllocOp, memref::AllocaOp>(op);
310  }
311 
312  /// Does not change the internal placement block, as we want to move
313  /// operations out of loops only.
314  void recordMoveToDominator(Block *block) {}
315 
316  /// Sets the current placement block to the given block.
317  void recordMoveToParent(Block *block) { placementBlock = block; }
318 };
319 
320 //===----------------------------------------------------------------------===//
321 // BufferPlacementPromotion
322 //===----------------------------------------------------------------------===//
323 
324 /// Promotes heap-based allocations to stack-based allocations (if possible).
325 class BufferPlacementPromotion : BufferPlacementTransformationBase {
326 public:
327  BufferPlacementPromotion(Operation *op)
329 
330  /// Promote buffers to stack-based allocations.
331  void promote(function_ref<bool(Value)> isSmallAlloc) {
332  for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
333  Value alloc = std::get<0>(entry);
334  Operation *dealloc = std::get<1>(entry);
335  // Checking several requirements to transform an AllocOp into an AllocaOp.
336  // The transformation is done if the allocation is limited to a given
337  // size. Furthermore, a deallocation must not be defined for this
338  // allocation entry and a parent allocation scope must exist.
339  if (!isSmallAlloc(alloc) || dealloc ||
340  !hasAllocationScope(alloc, aliases))
341  continue;
342 
344  alloc, alloc.getParentBlock(), liveness);
345  // Build a new alloca that is associated with its parent
346  // `AutomaticAllocationScope` determined during the initialization phase.
347  OpBuilder builder(startOperation);
348  Operation *allocOp = alloc.getDefiningOp();
349  Operation *alloca = builder.create<memref::AllocaOp>(
350  alloc.getLoc(), alloc.getType().cast<MemRefType>(),
351  allocOp->getOperands());
352 
353  // Replace the original alloc by a newly created alloca.
354  allocOp->replaceAllUsesWith(alloca);
355  allocOp->erase();
356  }
357  }
358 };
359 
360 //===----------------------------------------------------------------------===//
361 // BufferOptimizationPasses
362 //===----------------------------------------------------------------------===//
363 
364 /// The buffer hoisting pass that hoists allocation nodes into dominating
365 /// blocks.
366 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
367 
368  void runOnOperation() override {
369  // Hoist all allocations into dominator blocks.
370  BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
371  getOperation());
372  optimizer.hoist();
373  }
374 };
375 
376 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
377 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
378 
379  void runOnOperation() override {
380  // Hoist all allocations out of loops.
381  BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
382  getOperation());
383  optimizer.hoist();
384  }
385 };
386 
387 /// The promote buffer to stack pass that tries to convert alloc nodes into
388 /// alloca nodes.
389 class PromoteBuffersToStackPass
390  : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
391 public:
392  PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
393  unsigned bitwidthOfIndexType,
394  unsigned maxRankOfAllocatedMemRef) {
395  this->maxAllocSizeInBytes = maxAllocSizeInBytes;
396  this->bitwidthOfIndexType = bitwidthOfIndexType;
397  this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
398  }
399 
400  explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
401  : isSmallAlloc(std::move(isSmallAlloc)) {}
402 
403  LogicalResult initialize(MLIRContext *context) override {
404  if (isSmallAlloc == nullptr) {
405  isSmallAlloc = [=](Value alloc) {
406  return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
407  bitwidthOfIndexType,
408  maxRankOfAllocatedMemRef);
409  };
410  }
411  return success();
412  }
413 
414  void runOnOperation() override {
415  // Move all allocation nodes and convert candidates into allocas.
416  BufferPlacementPromotion optimizer(getOperation());
417  optimizer.promote(isSmallAlloc);
418  }
419 
420 private:
421  std::function<bool(Value)> isSmallAlloc;
422 };
423 
424 } // namespace
425 
427  return std::make_unique<BufferHoistingPass>();
428 }
429 
431  return std::make_unique<BufferLoopHoistingPass>();
432 }
433 
435  unsigned maxAllocSizeInBytes, unsigned bitwidthOfIndexType,
436  unsigned maxRankOfAllocatedMemRef) {
437  return std::make_unique<PromoteBuffersToStackPass>(
438  maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
439 }
440 
442  std::function<bool(Value)> isSmallAlloc) {
443  return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
444 }
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:440
Include the generated interface declarations.
static Operation * create(Location location, OperationName name, TypeRange resultTypes, ValueRange operands, ArrayRef< NamedAttribute > attributes, BlockRange successors, unsigned numRegions)
Create a new Operation with the specific fields.
Definition: Operation.cpp:27
This class contains a list of basic blocks and a link to the parent operation it is attached to...
Definition: Region.h:26
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
operand_range getOperands()
Returns an iterator on the underlying Value&#39;s.
Definition: Operation.h:247
Block represents an ordered list of Operations.
Definition: Block.h:29
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition: BufferUtils.h:37
A trait of region holding operations that define a new scope for automatic allocations, i.e., allocations that are freed when control is transferred back from the operation&#39;s region.
The base class for all BufferPlacement transformations.
Definition: BufferUtils.h:77
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
A class for computing basic dominance information.
Definition: Dominance.h:117
bool isRegionReturnLike(Operation *operation)
Returns true if the given operation is either annotated with the ReturnLike trait or implements the R...
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:96
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block...
Definition: BufferUtils.cpp:32
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:424
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
U dyn_cast() const
Definition: Types.h:244
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided &#39;values&#39;.
Definition: Operation.h:154
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition: Block.cpp:35
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
std::unique_ptr< Pass > createBufferHoistingPass()
Creates a pass that moves allocations upwards to reduce the number of required copies that are insert...
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:26
std::unique_ptr< Pass > createBufferLoopHoistingPass()
Creates a pass that moves allocations upwards out of loops.
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:84
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned bitwidthOfIndexType, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
Type getType() const
Return the type of this value.
Definition: Value.h:117
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:55
std::unique_ptr< Pass > createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes=1024, unsigned bitwidthOfIndexType=64, unsigned maxRankOfAllocatedMemRef=1)
Creates a pass that promotes heap-based allocations to stack-based ones.
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
This class helps build Operations.
Definition: Builders.h:177
A class for computing basic postdominance information.
Definition: Dominance.h:176
U cast() const
Definition: Types.h:250
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition: Dominance.h:29