MLIR  18.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
15 
21 #include "mlir/IR/Operation.h"
23 #include "mlir/Pass/Pass.h"
24 
25 namespace mlir {
26 namespace bufferization {
27 #define GEN_PASS_DEF_BUFFERHOISTING
28 #define GEN_PASS_DEF_BUFFERLOOPHOISTING
29 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK
30 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31 } // namespace bufferization
32 } // namespace mlir
33 
34 using namespace mlir;
35 using namespace mlir::bufferization;
36 
37 /// Returns true if the given operation implements a known high-level region-
38 /// based control-flow interface.
40  return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41 }
42 
43 /// Returns true if the given operation implements the AllocationOpInterface
44 /// and it supports the dominate block hoisting.
46  auto allocOp = dyn_cast<AllocationOpInterface>(op);
47  return allocOp &&
48  static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
49 }
50 
51 /// Returns true if the given operation implements the AllocationOpInterface
52 /// and it supports the loop hoisting.
54  auto allocOp = dyn_cast<AllocationOpInterface>(op);
55  return allocOp &&
56  static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
57 }
58 
59 /// Check if the size of the allocation is less than the given size. The
60 /// transformation is only applied to small buffers since large buffers could
61 /// exceed the stack space.
62 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
63  unsigned maxRankOfAllocatedMemRef) {
64  auto type = dyn_cast<ShapedType>(alloc.getType());
65  if (!type || !alloc.getDefiningOp<memref::AllocOp>())
66  return false;
67  if (!type.hasStaticShape()) {
68  // Check if the dynamic shape dimension of the alloc is produced by
69  // `memref.rank`. If this is the case, it is likely to be small.
70  // Furthermore, the dimension is limited to the maximum rank of the
71  // allocated memref to avoid large values by multiplying several small
72  // values.
73  if (type.getRank() <= maxRankOfAllocatedMemRef) {
74  return llvm::all_of(alloc.getDefiningOp()->getOperands(),
75  [&](Value operand) {
76  return operand.getDefiningOp<memref::RankOp>();
77  });
78  }
79  return false;
80  }
81  unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
82  .getTypeSizeInBits(type.getElementType());
83  return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
84 }
85 
86 /// Checks whether the given aliases leave the allocation scope.
87 static bool
89  const BufferViewFlowAnalysis::ValueSetT &aliases) {
90  for (Value alias : aliases) {
91  for (auto *use : alias.getUsers()) {
92  // If there is at least one alias that leaves the parent region, we know
93  // that this alias escapes the whole region and hence the associated
94  // allocation leaves allocation scope.
95  if (isa<RegionBranchTerminatorOpInterface>(use) &&
96  use->getParentRegion() == parentRegion)
97  return true;
98  }
99  }
100  return false;
101 }
102 
103 /// Checks, if an automated allocation scope for a given alloc value exists.
104 static bool hasAllocationScope(Value alloc,
105  const BufferViewFlowAnalysis &aliasAnalysis) {
106  Region *region = alloc.getParentRegion();
107  do {
108  if (Operation *parentOp = region->getParentOp()) {
109  // Check if the operation is an automatic allocation scope and whether an
110  // alias leaves the scope. This means, an allocation yields out of
111  // this scope and can not be transformed in a stack-based allocation.
112  if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
113  !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
114  return true;
115  // Check if the operation is a known control flow interface and break the
116  // loop to avoid transformation in loops. Furthermore skip transformation
117  // if the operation does not implement a RegionBeanchOpInterface.
119  !isKnownControlFlowInterface(parentOp))
120  break;
121  }
122  } while ((region = region->getParentRegion()));
123  return false;
124 }
125 
126 namespace {
127 
128 //===----------------------------------------------------------------------===//
129 // BufferAllocationHoisting
130 //===----------------------------------------------------------------------===//
131 
132 /// A base implementation compatible with the `BufferAllocationHoisting` class.
133 struct BufferAllocationHoistingStateBase {
134  /// A pointer to the current dominance info.
135  DominanceInfo *dominators;
136 
137  /// The current allocation value.
138  Value allocValue;
139 
140  /// The current placement block (if any).
141  Block *placementBlock;
142 
143  /// Initializes the state base.
144  BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
145  Block *placementBlock)
146  : dominators(dominators), allocValue(allocValue),
147  placementBlock(placementBlock) {}
148 };
149 
150 /// Implements the actual hoisting logic for allocation nodes.
151 template <typename StateT>
152 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
153 public:
154  BufferAllocationHoisting(Operation *op)
155  : BufferPlacementTransformationBase(op), dominators(op),
156  postDominators(op), scopeOp(op) {}
157 
158  /// Moves allocations upwards.
159  void hoist() {
160  SmallVector<Value> allocsAndAllocas;
161  for (BufferPlacementAllocs::AllocEntry &entry : allocs)
162  allocsAndAllocas.push_back(std::get<0>(entry));
163  scopeOp->walk([&](memref::AllocaOp op) {
164  allocsAndAllocas.push_back(op.getMemref());
165  });
166 
167  for (auto allocValue : allocsAndAllocas) {
168  if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
169  continue;
170  Operation *definingOp = allocValue.getDefiningOp();
171  assert(definingOp && "No defining op");
172  auto operands = definingOp->getOperands();
173  auto resultAliases = aliases.resolve(allocValue);
174  // Determine the common dominator block of all aliases.
175  Block *dominatorBlock =
176  findCommonDominator(allocValue, resultAliases, dominators);
177  // Init the initial hoisting state.
178  StateT state(&dominators, allocValue, allocValue.getParentBlock());
179  // Check for additional allocation dependencies to compute an upper bound
180  // for hoisting.
181  Block *dependencyBlock = nullptr;
182  // If this node has dependencies, check all dependent nodes. This ensures
183  // that all dependency values have been computed before allocating the
184  // buffer.
185  for (Value depValue : operands) {
186  Block *depBlock = depValue.getParentBlock();
187  if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
188  dependencyBlock = depBlock;
189  }
190 
191  // Find the actual placement block and determine the start operation using
192  // an upper placement-block boundary. The idea is that placement block
193  // cannot be moved any further upwards than the given upper bound.
194  Block *placementBlock = findPlacementBlock(
195  state, state.computeUpperBound(dominatorBlock, dependencyBlock));
197  allocValue, placementBlock, liveness);
198 
199  // Move the alloc in front of the start operation.
200  Operation *allocOperation = allocValue.getDefiningOp();
201  allocOperation->moveBefore(startOperation);
202  }
203  }
204 
205 private:
206  /// Finds a valid placement block by walking upwards in the CFG until we
207  /// either cannot continue our walk due to constraints (given by the StateT
208  /// implementation) or we have reached the upper-most dominator block.
209  Block *findPlacementBlock(StateT &state, Block *upperBound) {
210  Block *currentBlock = state.placementBlock;
211  // Walk from the innermost regions/loops to the outermost regions/loops and
212  // find an appropriate placement block that satisfies the constraint of the
213  // current StateT implementation. Walk until we reach the upperBound block
214  // (if any).
215 
216  // If we are not able to find a valid parent operation or an associated
217  // parent block, break the walk loop.
218  Operation *parentOp;
219  Block *parentBlock;
220  while ((parentOp = currentBlock->getParentOp()) &&
221  (parentBlock = parentOp->getBlock()) &&
222  (!upperBound ||
223  dominators.properlyDominates(upperBound, currentBlock))) {
224  // Try to find an immediate dominator and check whether the parent block
225  // is above the immediate dominator (if any).
226  DominanceInfoNode *idom = nullptr;
227 
228  // DominanceInfo doesn't support getNode queries for single-block regions.
229  if (!currentBlock->isEntryBlock())
230  idom = dominators.getNode(currentBlock)->getIDom();
231 
232  if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
233  // If the current immediate dominator is below the placement block, move
234  // to the immediate dominator block.
235  currentBlock = idom->getBlock();
236  state.recordMoveToDominator(currentBlock);
237  } else {
238  // We have to move to our parent block since an immediate dominator does
239  // either not exist or is above our parent block. If we cannot move to
240  // our parent operation due to constraints given by the StateT
241  // implementation, break the walk loop. Furthermore, we should not move
242  // allocations out of unknown region-based control-flow operations.
243  if (!isKnownControlFlowInterface(parentOp) ||
244  !state.isLegalPlacement(parentOp))
245  break;
246  // Move to our parent block by notifying the current StateT
247  // implementation.
248  currentBlock = parentBlock;
249  state.recordMoveToParent(currentBlock);
250  }
251  }
252  // Return the finally determined placement block.
253  return state.placementBlock;
254  }
255 
256  /// The dominator info to find the appropriate start operation to move the
257  /// allocs.
258  DominanceInfo dominators;
259 
260  /// The post dominator info to move the dependent allocs in the right
261  /// position.
262  PostDominanceInfo postDominators;
263 
264  /// The map storing the final placement blocks of a given alloc value.
265  llvm::DenseMap<Value, Block *> placementBlocks;
266 
267  /// The operation that this transformation is working on. It is used to also
268  /// gather allocas.
269  Operation *scopeOp;
270 };
271 
272 /// A state implementation compatible with the `BufferAllocationHoisting` class
273 /// that hoists allocations into dominator blocks while keeping them inside of
274 /// loops.
275 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
276  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
277 
278  /// Computes the upper bound for the placement block search.
279  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
280  // If we do not have a dependency block, the upper bound is given by the
281  // dominator block.
282  if (!dependencyBlock)
283  return dominatorBlock;
284 
285  // Find the "lower" block of the dominator and the dependency block to
286  // ensure that we do not move allocations above this block.
287  return dominators->properlyDominates(dominatorBlock, dependencyBlock)
288  ? dependencyBlock
289  : dominatorBlock;
290  }
291 
292  /// Returns true if the given operation does not represent a loop.
293  bool isLegalPlacement(Operation *op) {
295  }
296 
297  /// Returns true if the given operation should be considered for hoisting.
298  static bool shouldHoistOpType(Operation *op) {
300  }
301 
302  /// Sets the current placement block to the given block.
303  void recordMoveToDominator(Block *block) { placementBlock = block; }
304 
305  /// Sets the current placement block to the given block.
306  void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
307 };
308 
309 /// A state implementation compatible with the `BufferAllocationHoisting` class
310 /// that hoists allocations out of loops.
311 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
312  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
313 
314  /// Remembers the dominator block of all aliases.
315  Block *aliasDominatorBlock = nullptr;
316 
317  /// Computes the upper bound for the placement block search.
318  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
319  aliasDominatorBlock = dominatorBlock;
320  // If there is a dependency block, we have to use this block as an upper
321  // bound to satisfy all allocation value dependencies.
322  return dependencyBlock ? dependencyBlock : nullptr;
323  }
324 
325  /// Returns true if the given operation represents a loop and one of the
326  /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
327  /// given loop operation. If this is the case, it indicates that the
328  /// allocation is passed via a back edge.
329  bool isLegalPlacement(Operation *op) {
331  !dominators->dominates(aliasDominatorBlock, op->getBlock());
332  }
333 
334  /// Returns true if the given operation should be considered for hoisting.
335  static bool shouldHoistOpType(Operation *op) {
336  return allowAllocLoopHoisting(op);
337  }
338 
339  /// Does not change the internal placement block, as we want to move
340  /// operations out of loops only.
341  void recordMoveToDominator(Block *block) {}
342 
343  /// Sets the current placement block to the given block.
344  void recordMoveToParent(Block *block) { placementBlock = block; }
345 };
346 
347 //===----------------------------------------------------------------------===//
348 // BufferPlacementPromotion
349 //===----------------------------------------------------------------------===//
350 
351 /// Promotes heap-based allocations to stack-based allocations (if possible).
352 class BufferPlacementPromotion : BufferPlacementTransformationBase {
353 public:
354  BufferPlacementPromotion(Operation *op)
356 
357  /// Promote buffers to stack-based allocations.
358  void promote(function_ref<bool(Value)> isSmallAlloc) {
359  for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
360  Value alloc = std::get<0>(entry);
361  Operation *dealloc = std::get<1>(entry);
362  // Checking several requirements to transform an AllocOp into an AllocaOp.
363  // The transformation is done if the allocation is limited to a given
364  // size. Furthermore, a deallocation must not be defined for this
365  // allocation entry and a parent allocation scope must exist.
366  if (!isSmallAlloc(alloc) || dealloc ||
367  !hasAllocationScope(alloc, aliases))
368  continue;
369 
371  alloc, alloc.getParentBlock(), liveness);
372  // Build a new alloca that is associated with its parent
373  // `AutomaticAllocationScope` determined during the initialization phase.
374  OpBuilder builder(startOperation);
375  Operation *allocOp = alloc.getDefiningOp();
376  if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
377  Operation *alloca =
378  allocInterface.buildPromotedAlloc(builder, alloc).value();
379  if (!alloca)
380  continue;
381  // Replace the original alloc by a newly created alloca.
382  allocOp->replaceAllUsesWith(alloca);
383  allocOp->erase();
384  }
385  }
386  }
387 };
388 
389 //===----------------------------------------------------------------------===//
390 // BufferOptimizationPasses
391 //===----------------------------------------------------------------------===//
392 
393 /// The buffer hoisting pass that hoists allocation nodes into dominating
394 /// blocks.
395 struct BufferHoistingPass
396  : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> {
397 
398  void runOnOperation() override {
399  // Hoist all allocations into dominator blocks.
400  BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
401  getOperation());
402  optimizer.hoist();
403  }
404 };
405 
406 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
407 struct BufferLoopHoistingPass
408  : public bufferization::impl::BufferLoopHoistingBase<
409  BufferLoopHoistingPass> {
410 
411  void runOnOperation() override {
412  // Hoist all allocations out of loops.
413  hoistBuffersFromLoops(getOperation());
414  }
415 };
416 
417 /// The promote buffer to stack pass that tries to convert alloc nodes into
418 /// alloca nodes.
419 class PromoteBuffersToStackPass
420  : public bufferization::impl::PromoteBuffersToStackBase<
421  PromoteBuffersToStackPass> {
422 public:
423  PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
424  unsigned maxRankOfAllocatedMemRef) {
425  this->maxAllocSizeInBytes = maxAllocSizeInBytes;
426  this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
427  }
428 
429  explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
430  : isSmallAlloc(std::move(isSmallAlloc)) {}
431 
432  LogicalResult initialize(MLIRContext *context) override {
433  if (isSmallAlloc == nullptr) {
434  isSmallAlloc = [=](Value alloc) {
435  return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
436  maxRankOfAllocatedMemRef);
437  };
438  }
439  return success();
440  }
441 
442  void runOnOperation() override {
443  // Move all allocation nodes and convert candidates into allocas.
444  BufferPlacementPromotion optimizer(getOperation());
445  optimizer.promote(isSmallAlloc);
446  }
447 
448 private:
449  std::function<bool(Value)> isSmallAlloc;
450 };
451 
452 } // namespace
453 
455  BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
456  optimizer.hoist();
457 }
458 
460  return std::make_unique<BufferHoistingPass>();
461 }
462 
464  return std::make_unique<BufferLoopHoistingPass>();
465 }
466 
468  unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
469  return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
470  maxRankOfAllocatedMemRef);
471 }
472 
474  std::function<bool(Value)> isSmallAlloc) {
475  return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
476 }
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
static bool allowAllocDominateBlockHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the dominate...
static bool allowAllocLoopHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the loop hoi...
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
Block represents an ordered list of Operations.
Definition: Block.h:30
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition: Block.cpp:35
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
A class for computing basic dominance information.
Definition: Dominance.h:121
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:206
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:554
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
A class for computing basic postdominance information.
Definition: Dominance.h:180
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:125
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block.
Definition: BufferUtils.cpp:34
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition: BufferUtils.h:37
The base class for all BufferPlacement transformations.
Definition: BufferUtils.h:77
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
void hoistBuffersFromLoops(Operation *op)
Within the given operation, hoist buffers from loops where possible.
std::unique_ptr< Pass > createBufferHoistingPass()
Creates a pass that moves allocations upwards to reduce the number of required copies that are insert...
std::unique_ptr< Pass > createBufferLoopHoistingPass()
Creates a pass that moves allocations upwards out of loops.
std::unique_ptr< Pass > createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes=1024, unsigned maxRankOfAllocatedMemRef=1)
Creates a pass that promotes heap-based allocations to stack-based ones.
void promote(RewriterBase &rewriter, scf::ForallOp forallOp)
Promotes the loop body of a scf::ForallOp to its containing block.
Definition: SCF.cpp:627
Include the generated interface declarations.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition: Dominance.h:30
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26