MLIR  19.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
15 
21 #include "mlir/IR/Operation.h"
23 #include "mlir/Pass/Pass.h"
24 
25 namespace mlir {
26 namespace bufferization {
27 #define GEN_PASS_DEF_BUFFERHOISTING
28 #define GEN_PASS_DEF_BUFFERLOOPHOISTING
29 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK
30 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31 } // namespace bufferization
32 } // namespace mlir
33 
34 using namespace mlir;
35 using namespace mlir::bufferization;
36 
37 /// Returns true if the given operation implements a known high-level region-
38 /// based control-flow interface.
40  return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41 }
42 
43 /// Returns true if the given operation represents a loop by testing whether it
44 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
45 /// the case of a `RegionBranchOpInterface`, it checks all region-based control-
46 /// flow edges for cycles.
47 static bool isLoop(Operation *op) {
48  // If the operation implements the `LoopLikeOpInterface` it can be considered
49  // a loop.
50  if (isa<LoopLikeOpInterface>(op))
51  return true;
52 
53  // If the operation does not implement the `RegionBranchOpInterface`, it is
54  // (currently) not possible to detect a loop.
55  auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
56  if (!regionInterface)
57  return false;
58 
59  return regionInterface.hasLoop();
60 }
61 
62 /// Returns true if the given operation implements the AllocationOpInterface
63 /// and it supports the dominate block hoisting.
65  auto allocOp = dyn_cast<AllocationOpInterface>(op);
66  return allocOp &&
67  static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
68 }
69 
70 /// Returns true if the given operation implements the AllocationOpInterface
71 /// and it supports the loop hoisting.
73  auto allocOp = dyn_cast<AllocationOpInterface>(op);
74  return allocOp &&
75  static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
76 }
77 
78 /// Check if the size of the allocation is less than the given size. The
79 /// transformation is only applied to small buffers since large buffers could
80 /// exceed the stack space.
81 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
82  unsigned maxRankOfAllocatedMemRef) {
83  auto type = dyn_cast<ShapedType>(alloc.getType());
84  if (!type || !alloc.getDefiningOp<memref::AllocOp>())
85  return false;
86  if (!type.hasStaticShape()) {
87  // Check if the dynamic shape dimension of the alloc is produced by
88  // `memref.rank`. If this is the case, it is likely to be small.
89  // Furthermore, the dimension is limited to the maximum rank of the
90  // allocated memref to avoid large values by multiplying several small
91  // values.
92  if (type.getRank() <= maxRankOfAllocatedMemRef) {
93  return llvm::all_of(alloc.getDefiningOp()->getOperands(),
94  [&](Value operand) {
95  return operand.getDefiningOp<memref::RankOp>();
96  });
97  }
98  return false;
99  }
100  unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
101  .getTypeSizeInBits(type.getElementType());
102  return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
103 }
104 
105 /// Checks whether the given aliases leave the allocation scope.
106 static bool
108  const BufferViewFlowAnalysis::ValueSetT &aliases) {
109  for (Value alias : aliases) {
110  for (auto *use : alias.getUsers()) {
111  // If there is at least one alias that leaves the parent region, we know
112  // that this alias escapes the whole region and hence the associated
113  // allocation leaves allocation scope.
114  if (isa<RegionBranchTerminatorOpInterface>(use) &&
115  use->getParentRegion() == parentRegion)
116  return true;
117  }
118  }
119  return false;
120 }
121 
122 /// Checks, if an automated allocation scope for a given alloc value exists.
123 static bool hasAllocationScope(Value alloc,
124  const BufferViewFlowAnalysis &aliasAnalysis) {
125  Region *region = alloc.getParentRegion();
126  do {
127  if (Operation *parentOp = region->getParentOp()) {
128  // Check if the operation is an automatic allocation scope and whether an
129  // alias leaves the scope. This means, an allocation yields out of
130  // this scope and can not be transformed in a stack-based allocation.
131  if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
132  !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
133  return true;
134  // Check if the operation is a known control flow interface and break the
135  // loop to avoid transformation in loops. Furthermore skip transformation
136  // if the operation does not implement a RegionBeanchOpInterface.
137  if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
138  break;
139  }
140  } while ((region = region->getParentRegion()));
141  return false;
142 }
143 
144 namespace {
145 
146 //===----------------------------------------------------------------------===//
147 // BufferAllocationHoisting
148 //===----------------------------------------------------------------------===//
149 
150 /// A base implementation compatible with the `BufferAllocationHoisting` class.
151 struct BufferAllocationHoistingStateBase {
152  /// A pointer to the current dominance info.
153  DominanceInfo *dominators;
154 
155  /// The current allocation value.
156  Value allocValue;
157 
158  /// The current placement block (if any).
159  Block *placementBlock;
160 
161  /// Initializes the state base.
162  BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
163  Block *placementBlock)
164  : dominators(dominators), allocValue(allocValue),
165  placementBlock(placementBlock) {}
166 };
167 
168 /// Implements the actual hoisting logic for allocation nodes.
169 template <typename StateT>
170 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
171 public:
172  BufferAllocationHoisting(Operation *op)
173  : BufferPlacementTransformationBase(op), dominators(op),
174  postDominators(op), scopeOp(op) {}
175 
176  /// Moves allocations upwards.
177  void hoist() {
178  SmallVector<Value> allocsAndAllocas;
179  for (BufferPlacementAllocs::AllocEntry &entry : allocs)
180  allocsAndAllocas.push_back(std::get<0>(entry));
181  scopeOp->walk([&](memref::AllocaOp op) {
182  allocsAndAllocas.push_back(op.getMemref());
183  });
184 
185  for (auto allocValue : allocsAndAllocas) {
186  if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
187  continue;
188  Operation *definingOp = allocValue.getDefiningOp();
189  assert(definingOp && "No defining op");
190  auto operands = definingOp->getOperands();
191  auto resultAliases = aliases.resolve(allocValue);
192  // Determine the common dominator block of all aliases.
193  Block *dominatorBlock =
194  findCommonDominator(allocValue, resultAliases, dominators);
195  // Init the initial hoisting state.
196  StateT state(&dominators, allocValue, allocValue.getParentBlock());
197  // Check for additional allocation dependencies to compute an upper bound
198  // for hoisting.
199  Block *dependencyBlock = nullptr;
200  // If this node has dependencies, check all dependent nodes. This ensures
201  // that all dependency values have been computed before allocating the
202  // buffer.
203  for (Value depValue : operands) {
204  Block *depBlock = depValue.getParentBlock();
205  if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
206  dependencyBlock = depBlock;
207  }
208 
209  // Find the actual placement block and determine the start operation using
210  // an upper placement-block boundary. The idea is that placement block
211  // cannot be moved any further upwards than the given upper bound.
212  Block *placementBlock = findPlacementBlock(
213  state, state.computeUpperBound(dominatorBlock, dependencyBlock));
215  allocValue, placementBlock, liveness);
216 
217  // Move the alloc in front of the start operation.
218  Operation *allocOperation = allocValue.getDefiningOp();
219  allocOperation->moveBefore(startOperation);
220  }
221  }
222 
223 private:
224  /// Finds a valid placement block by walking upwards in the CFG until we
225  /// either cannot continue our walk due to constraints (given by the StateT
226  /// implementation) or we have reached the upper-most dominator block.
227  Block *findPlacementBlock(StateT &state, Block *upperBound) {
228  Block *currentBlock = state.placementBlock;
229  // Walk from the innermost regions/loops to the outermost regions/loops and
230  // find an appropriate placement block that satisfies the constraint of the
231  // current StateT implementation. Walk until we reach the upperBound block
232  // (if any).
233 
234  // If we are not able to find a valid parent operation or an associated
235  // parent block, break the walk loop.
236  Operation *parentOp;
237  Block *parentBlock;
238  while ((parentOp = currentBlock->getParentOp()) &&
239  (parentBlock = parentOp->getBlock()) &&
240  (!upperBound ||
241  dominators.properlyDominates(upperBound, currentBlock))) {
242  // Try to find an immediate dominator and check whether the parent block
243  // is above the immediate dominator (if any).
244  DominanceInfoNode *idom = nullptr;
245 
246  // DominanceInfo doesn't support getNode queries for single-block regions.
247  if (!currentBlock->isEntryBlock())
248  idom = dominators.getNode(currentBlock)->getIDom();
249 
250  if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
251  // If the current immediate dominator is below the placement block, move
252  // to the immediate dominator block.
253  currentBlock = idom->getBlock();
254  state.recordMoveToDominator(currentBlock);
255  } else {
256  // We have to move to our parent block since an immediate dominator does
257  // either not exist or is above our parent block. If we cannot move to
258  // our parent operation due to constraints given by the StateT
259  // implementation, break the walk loop. Furthermore, we should not move
260  // allocations out of unknown region-based control-flow operations.
261  if (!isKnownControlFlowInterface(parentOp) ||
262  !state.isLegalPlacement(parentOp))
263  break;
264  // Move to our parent block by notifying the current StateT
265  // implementation.
266  currentBlock = parentBlock;
267  state.recordMoveToParent(currentBlock);
268  }
269  }
270  // Return the finally determined placement block.
271  return state.placementBlock;
272  }
273 
274  /// The dominator info to find the appropriate start operation to move the
275  /// allocs.
276  DominanceInfo dominators;
277 
278  /// The post dominator info to move the dependent allocs in the right
279  /// position.
280  PostDominanceInfo postDominators;
281 
282  /// The map storing the final placement blocks of a given alloc value.
283  llvm::DenseMap<Value, Block *> placementBlocks;
284 
285  /// The operation that this transformation is working on. It is used to also
286  /// gather allocas.
287  Operation *scopeOp;
288 };
289 
290 /// A state implementation compatible with the `BufferAllocationHoisting` class
291 /// that hoists allocations into dominator blocks while keeping them inside of
292 /// loops.
293 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
294  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
295 
296  /// Computes the upper bound for the placement block search.
297  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
298  // If we do not have a dependency block, the upper bound is given by the
299  // dominator block.
300  if (!dependencyBlock)
301  return dominatorBlock;
302 
303  // Find the "lower" block of the dominator and the dependency block to
304  // ensure that we do not move allocations above this block.
305  return dominators->properlyDominates(dominatorBlock, dependencyBlock)
306  ? dependencyBlock
307  : dominatorBlock;
308  }
309 
310  /// Returns true if the given operation does not represent a loop.
311  bool isLegalPlacement(Operation *op) { return !isLoop(op); }
312 
313  /// Returns true if the given operation should be considered for hoisting.
314  static bool shouldHoistOpType(Operation *op) {
316  }
317 
318  /// Sets the current placement block to the given block.
319  void recordMoveToDominator(Block *block) { placementBlock = block; }
320 
321  /// Sets the current placement block to the given block.
322  void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
323 };
324 
325 /// A state implementation compatible with the `BufferAllocationHoisting` class
326 /// that hoists allocations out of loops.
327 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
328  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
329 
330  /// Remembers the dominator block of all aliases.
331  Block *aliasDominatorBlock = nullptr;
332 
333  /// Computes the upper bound for the placement block search.
334  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
335  aliasDominatorBlock = dominatorBlock;
336  // If there is a dependency block, we have to use this block as an upper
337  // bound to satisfy all allocation value dependencies.
338  return dependencyBlock ? dependencyBlock : nullptr;
339  }
340 
341  /// Returns true if the given operation represents a loop and one of the
342  /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
343  /// given loop operation. If this is the case, it indicates that the
344  /// allocation is passed via a back edge.
345  bool isLegalPlacement(Operation *op) {
346  return isLoop(op) &&
347  !dominators->dominates(aliasDominatorBlock, op->getBlock());
348  }
349 
350  /// Returns true if the given operation should be considered for hoisting.
351  static bool shouldHoistOpType(Operation *op) {
352  return allowAllocLoopHoisting(op);
353  }
354 
355  /// Does not change the internal placement block, as we want to move
356  /// operations out of loops only.
357  void recordMoveToDominator(Block *block) {}
358 
359  /// Sets the current placement block to the given block.
360  void recordMoveToParent(Block *block) { placementBlock = block; }
361 };
362 
363 //===----------------------------------------------------------------------===//
364 // BufferPlacementPromotion
365 //===----------------------------------------------------------------------===//
366 
367 /// Promotes heap-based allocations to stack-based allocations (if possible).
368 class BufferPlacementPromotion : BufferPlacementTransformationBase {
369 public:
370  BufferPlacementPromotion(Operation *op)
372 
373  /// Promote buffers to stack-based allocations.
374  void promote(function_ref<bool(Value)> isSmallAlloc) {
375  for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
376  Value alloc = std::get<0>(entry);
377  Operation *dealloc = std::get<1>(entry);
378  // Checking several requirements to transform an AllocOp into an AllocaOp.
379  // The transformation is done if the allocation is limited to a given
380  // size. Furthermore, a deallocation must not be defined for this
381  // allocation entry and a parent allocation scope must exist.
382  if (!isSmallAlloc(alloc) || dealloc ||
383  !hasAllocationScope(alloc, aliases))
384  continue;
385 
387  alloc, alloc.getParentBlock(), liveness);
388  // Build a new alloca that is associated with its parent
389  // `AutomaticAllocationScope` determined during the initialization phase.
390  OpBuilder builder(startOperation);
391  Operation *allocOp = alloc.getDefiningOp();
392  if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
393  Operation *alloca =
394  allocInterface.buildPromotedAlloc(builder, alloc).value();
395  if (!alloca)
396  continue;
397  // Replace the original alloc by a newly created alloca.
398  allocOp->replaceAllUsesWith(alloca);
399  allocOp->erase();
400  }
401  }
402  }
403 };
404 
405 //===----------------------------------------------------------------------===//
406 // BufferOptimizationPasses
407 //===----------------------------------------------------------------------===//
408 
409 /// The buffer hoisting pass that hoists allocation nodes into dominating
410 /// blocks.
411 struct BufferHoistingPass
412  : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> {
413 
414  void runOnOperation() override {
415  // Hoist all allocations into dominator blocks.
416  BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
417  getOperation());
418  optimizer.hoist();
419  }
420 };
421 
422 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
423 struct BufferLoopHoistingPass
424  : public bufferization::impl::BufferLoopHoistingBase<
425  BufferLoopHoistingPass> {
426 
427  void runOnOperation() override {
428  // Hoist all allocations out of loops.
429  hoistBuffersFromLoops(getOperation());
430  }
431 };
432 
433 /// The promote buffer to stack pass that tries to convert alloc nodes into
434 /// alloca nodes.
435 class PromoteBuffersToStackPass
436  : public bufferization::impl::PromoteBuffersToStackBase<
437  PromoteBuffersToStackPass> {
438 public:
439  PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
440  unsigned maxRankOfAllocatedMemRef) {
441  this->maxAllocSizeInBytes = maxAllocSizeInBytes;
442  this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
443  }
444 
445  explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
446  : isSmallAlloc(std::move(isSmallAlloc)) {}
447 
448  LogicalResult initialize(MLIRContext *context) override {
449  if (isSmallAlloc == nullptr) {
450  isSmallAlloc = [=](Value alloc) {
451  return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
452  maxRankOfAllocatedMemRef);
453  };
454  }
455  return success();
456  }
457 
458  void runOnOperation() override {
459  // Move all allocation nodes and convert candidates into allocas.
460  BufferPlacementPromotion optimizer(getOperation());
461  optimizer.promote(isSmallAlloc);
462  }
463 
464 private:
465  std::function<bool(Value)> isSmallAlloc;
466 };
467 
468 } // namespace
469 
471  BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
472  optimizer.hoist();
473 }
474 
476  return std::make_unique<BufferHoistingPass>();
477 }
478 
480  return std::make_unique<BufferLoopHoistingPass>();
481 }
482 
484  unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
485  return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
486  maxRankOfAllocatedMemRef);
487 }
488 
490  std::function<bool(Value)> isSmallAlloc) {
491  return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
492 }
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
static bool allowAllocDominateBlockHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the dominate...
static bool allowAllocLoopHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the loop hoi...
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
Block represents an ordered list of Operations.
Definition: Block.h:30
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition: Block.cpp:35
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
A class for computing basic dominance information.
Definition: Dominance.h:136
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:209
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:555
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
A class for computing basic postdominance information.
Definition: Dominance.h:195
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block.
Definition: BufferUtils.cpp:34
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition: BufferUtils.h:37
The base class for all BufferPlacement transformations.
Definition: BufferUtils.h:102
void hoistBuffersFromLoops(Operation *op)
Within the given operation, hoist buffers from loops where possible.
std::unique_ptr< Pass > createBufferHoistingPass()
Creates a pass that moves allocations upwards to reduce the number of required copies that are insert...
Block * findCommonDominator(Value value, const BufferViewFlowAnalysis::ValueSetT &values, const DominatorT &doms)
Finds a common dominator for the given value while taking the positions of the values in the value se...
Definition: BufferUtils.h:81
std::unique_ptr< Pass > createBufferLoopHoistingPass()
Creates a pass that moves allocations upwards out of loops.
std::unique_ptr< Pass > createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes=1024, unsigned maxRankOfAllocatedMemRef=1)
Creates a pass that promotes heap-based allocations to stack-based ones.
void promote(RewriterBase &rewriter, scf::ForallOp forallOp)
Promotes the loop body of a scf::ForallOp to its containing block.
Definition: SCF.cpp:644
Include the generated interface declarations.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition: Dominance.h:30
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26