MLIR  20.0.0git
BufferOptimizations.cpp
Go to the documentation of this file.
1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
15 
21 #include "mlir/IR/Operation.h"
23 #include "mlir/Pass/Pass.h"
24 
25 namespace mlir {
26 namespace bufferization {
27 #define GEN_PASS_DEF_BUFFERHOISTING
28 #define GEN_PASS_DEF_BUFFERLOOPHOISTING
29 #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK
30 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
31 } // namespace bufferization
32 } // namespace mlir
33 
34 using namespace mlir;
35 using namespace mlir::bufferization;
36 
37 /// Returns true if the given operation implements a known high-level region-
38 /// based control-flow interface.
40  return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
41 }
42 
43 /// Returns true if the given operation represents a loop by testing whether it
44 /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In
45 /// the case of a `RegionBranchOpInterface`, it checks all region-based control-
46 /// flow edges for cycles.
47 static bool isLoop(Operation *op) {
48  // If the operation implements the `LoopLikeOpInterface` it can be considered
49  // a loop.
50  if (isa<LoopLikeOpInterface>(op))
51  return true;
52 
53  // If the operation does not implement the `RegionBranchOpInterface`, it is
54  // (currently) not possible to detect a loop.
55  auto regionInterface = dyn_cast<RegionBranchOpInterface>(op);
56  if (!regionInterface)
57  return false;
58 
59  return regionInterface.hasLoop();
60 }
61 
62 /// Return whether the given operation is a loop with sequential execution
63 /// semantics.
64 static bool isSequentialLoop(Operation *op) {
65  return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op);
66 }
67 
68 /// Returns true if the given operation implements the AllocationOpInterface
69 /// and it supports the dominate block hoisting.
71  auto allocOp = dyn_cast<AllocationOpInterface>(op);
72  return allocOp &&
73  static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block);
74 }
75 
76 /// Returns true if the given operation implements the AllocationOpInterface
77 /// and it supports the loop hoisting.
79  auto allocOp = dyn_cast<AllocationOpInterface>(op);
80  return allocOp &&
81  static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop);
82 }
83 
84 /// Check if the size of the allocation is less than the given size. The
85 /// transformation is only applied to small buffers since large buffers could
86 /// exceed the stack space.
87 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
88  unsigned maxRankOfAllocatedMemRef) {
89  auto type = dyn_cast<ShapedType>(alloc.getType());
90  if (!type || !alloc.getDefiningOp<memref::AllocOp>())
91  return false;
92  if (!type.hasStaticShape()) {
93  // Check if the dynamic shape dimension of the alloc is produced by
94  // `memref.rank`. If this is the case, it is likely to be small.
95  // Furthermore, the dimension is limited to the maximum rank of the
96  // allocated memref to avoid large values by multiplying several small
97  // values.
98  if (type.getRank() <= maxRankOfAllocatedMemRef) {
99  return llvm::all_of(alloc.getDefiningOp()->getOperands(),
100  [&](Value operand) {
101  return operand.getDefiningOp<memref::RankOp>();
102  });
103  }
104  return false;
105  }
106  unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
107  .getTypeSizeInBits(type.getElementType());
108  return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
109 }
110 
111 /// Checks whether the given aliases leave the allocation scope.
112 static bool
114  const BufferViewFlowAnalysis::ValueSetT &aliases) {
115  for (Value alias : aliases) {
116  for (auto *use : alias.getUsers()) {
117  // If there is at least one alias that leaves the parent region, we know
118  // that this alias escapes the whole region and hence the associated
119  // allocation leaves allocation scope.
120  if (isa<RegionBranchTerminatorOpInterface>(use) &&
121  use->getParentRegion() == parentRegion)
122  return true;
123  }
124  }
125  return false;
126 }
127 
128 /// Checks, if an automated allocation scope for a given alloc value exists.
129 static bool hasAllocationScope(Value alloc,
130  const BufferViewFlowAnalysis &aliasAnalysis) {
131  Region *region = alloc.getParentRegion();
132  do {
133  if (Operation *parentOp = region->getParentOp()) {
134  // Check if the operation is an automatic allocation scope and whether an
135  // alias leaves the scope. This means, an allocation yields out of
136  // this scope and can not be transformed in a stack-based allocation.
137  if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
138  !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
139  return true;
140  // Check if the operation is a known control flow interface and break the
141  // loop to avoid transformation in loops. Furthermore skip transformation
142  // if the operation does not implement a RegionBeanchOpInterface.
143  if (isLoop(parentOp) || !isKnownControlFlowInterface(parentOp))
144  break;
145  }
146  } while ((region = region->getParentRegion()));
147  return false;
148 }
149 
150 namespace {
151 
152 //===----------------------------------------------------------------------===//
153 // BufferAllocationHoisting
154 //===----------------------------------------------------------------------===//
155 
156 /// A base implementation compatible with the `BufferAllocationHoisting` class.
157 struct BufferAllocationHoistingStateBase {
158  /// A pointer to the current dominance info.
159  DominanceInfo *dominators;
160 
161  /// The current allocation value.
162  Value allocValue;
163 
164  /// The current placement block (if any).
165  Block *placementBlock;
166 
167  /// Initializes the state base.
168  BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
169  Block *placementBlock)
170  : dominators(dominators), allocValue(allocValue),
171  placementBlock(placementBlock) {}
172 };
173 
174 /// Implements the actual hoisting logic for allocation nodes.
175 template <typename StateT>
176 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
177 public:
178  BufferAllocationHoisting(Operation *op)
179  : BufferPlacementTransformationBase(op), dominators(op),
180  postDominators(op), scopeOp(op) {}
181 
182  /// Moves allocations upwards.
183  void hoist() {
184  SmallVector<Value> allocsAndAllocas;
185  for (BufferPlacementAllocs::AllocEntry &entry : allocs)
186  allocsAndAllocas.push_back(std::get<0>(entry));
187  scopeOp->walk([&](memref::AllocaOp op) {
188  allocsAndAllocas.push_back(op.getMemref());
189  });
190 
191  for (auto allocValue : allocsAndAllocas) {
192  if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
193  continue;
194  Operation *definingOp = allocValue.getDefiningOp();
195  assert(definingOp && "No defining op");
196  auto operands = definingOp->getOperands();
197  auto resultAliases = aliases.resolve(allocValue);
198  // Determine the common dominator block of all aliases.
199  Block *dominatorBlock =
200  findCommonDominator(allocValue, resultAliases, dominators);
201  // Init the initial hoisting state.
202  StateT state(&dominators, allocValue, allocValue.getParentBlock());
203  // Check for additional allocation dependencies to compute an upper bound
204  // for hoisting.
205  Block *dependencyBlock = nullptr;
206  // If this node has dependencies, check all dependent nodes. This ensures
207  // that all dependency values have been computed before allocating the
208  // buffer.
209  for (Value depValue : operands) {
210  Block *depBlock = depValue.getParentBlock();
211  if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
212  dependencyBlock = depBlock;
213  }
214 
215  // Find the actual placement block and determine the start operation using
216  // an upper placement-block boundary. The idea is that placement block
217  // cannot be moved any further upwards than the given upper bound.
218  Block *placementBlock = findPlacementBlock(
219  state, state.computeUpperBound(dominatorBlock, dependencyBlock));
221  allocValue, placementBlock, liveness);
222 
223  // Move the alloc in front of the start operation.
224  Operation *allocOperation = allocValue.getDefiningOp();
225  allocOperation->moveBefore(startOperation);
226  }
227  }
228 
229 private:
230  /// Finds a valid placement block by walking upwards in the CFG until we
231  /// either cannot continue our walk due to constraints (given by the StateT
232  /// implementation) or we have reached the upper-most dominator block.
233  Block *findPlacementBlock(StateT &state, Block *upperBound) {
234  Block *currentBlock = state.placementBlock;
235  // Walk from the innermost regions/loops to the outermost regions/loops and
236  // find an appropriate placement block that satisfies the constraint of the
237  // current StateT implementation. Walk until we reach the upperBound block
238  // (if any).
239 
240  // If we are not able to find a valid parent operation or an associated
241  // parent block, break the walk loop.
242  Operation *parentOp;
243  Block *parentBlock;
244  while ((parentOp = currentBlock->getParentOp()) &&
245  (parentBlock = parentOp->getBlock()) &&
246  (!upperBound ||
247  dominators.properlyDominates(upperBound, currentBlock))) {
248  // Try to find an immediate dominator and check whether the parent block
249  // is above the immediate dominator (if any).
250  DominanceInfoNode *idom = nullptr;
251 
252  // DominanceInfo doesn't support getNode queries for single-block regions.
253  if (!currentBlock->isEntryBlock())
254  idom = dominators.getNode(currentBlock)->getIDom();
255 
256  if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
257  // If the current immediate dominator is below the placement block, move
258  // to the immediate dominator block.
259  currentBlock = idom->getBlock();
260  state.recordMoveToDominator(currentBlock);
261  } else {
262  // We have to move to our parent block since an immediate dominator does
263  // either not exist or is above our parent block. If we cannot move to
264  // our parent operation due to constraints given by the StateT
265  // implementation, break the walk loop. Furthermore, we should not move
266  // allocations out of unknown region-based control-flow operations.
267  if (!isKnownControlFlowInterface(parentOp) ||
268  !state.isLegalPlacement(parentOp))
269  break;
270  // Move to our parent block by notifying the current StateT
271  // implementation.
272  currentBlock = parentBlock;
273  state.recordMoveToParent(currentBlock);
274  }
275  }
276  // Return the finally determined placement block.
277  return state.placementBlock;
278  }
279 
280  /// The dominator info to find the appropriate start operation to move the
281  /// allocs.
282  DominanceInfo dominators;
283 
284  /// The post dominator info to move the dependent allocs in the right
285  /// position.
286  PostDominanceInfo postDominators;
287 
288  /// The map storing the final placement blocks of a given alloc value.
289  llvm::DenseMap<Value, Block *> placementBlocks;
290 
291  /// The operation that this transformation is working on. It is used to also
292  /// gather allocas.
293  Operation *scopeOp;
294 };
295 
296 /// A state implementation compatible with the `BufferAllocationHoisting` class
297 /// that hoists allocations into dominator blocks while keeping them inside of
298 /// loops.
299 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
300  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
301 
302  /// Computes the upper bound for the placement block search.
303  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
304  // If we do not have a dependency block, the upper bound is given by the
305  // dominator block.
306  if (!dependencyBlock)
307  return dominatorBlock;
308 
309  // Find the "lower" block of the dominator and the dependency block to
310  // ensure that we do not move allocations above this block.
311  return dominators->properlyDominates(dominatorBlock, dependencyBlock)
312  ? dependencyBlock
313  : dominatorBlock;
314  }
315 
316  /// Returns true if the given operation does not represent a loop.
317  bool isLegalPlacement(Operation *op) { return !isLoop(op); }
318 
319  /// Returns true if the given operation should be considered for hoisting.
320  static bool shouldHoistOpType(Operation *op) {
322  }
323 
324  /// Sets the current placement block to the given block.
325  void recordMoveToDominator(Block *block) { placementBlock = block; }
326 
327  /// Sets the current placement block to the given block.
328  void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
329 };
330 
331 /// A state implementation compatible with the `BufferAllocationHoisting` class
332 /// that hoists allocations out of loops.
333 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
334  using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
335 
336  /// Remembers the dominator block of all aliases.
337  Block *aliasDominatorBlock = nullptr;
338 
339  /// Computes the upper bound for the placement block search.
340  Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
341  aliasDominatorBlock = dominatorBlock;
342  // If there is a dependency block, we have to use this block as an upper
343  // bound to satisfy all allocation value dependencies.
344  return dependencyBlock ? dependencyBlock : nullptr;
345  }
346 
347  /// Returns true if the given operation represents a loop with sequential
348  /// execution semantics and one of the aliases caused the
349  /// `aliasDominatorBlock` to be "above" the block of the given loop operation.
350  /// If this is the case, it indicates that the allocation is passed via a back
351  /// edge.
352  bool isLegalPlacement(Operation *op) {
353  return isSequentialLoop(op) &&
354  !dominators->dominates(aliasDominatorBlock, op->getBlock());
355  }
356 
357  /// Returns true if the given operation should be considered for hoisting.
358  static bool shouldHoistOpType(Operation *op) {
359  return allowAllocLoopHoisting(op);
360  }
361 
362  /// Does not change the internal placement block, as we want to move
363  /// operations out of loops only.
364  void recordMoveToDominator(Block *block) {}
365 
366  /// Sets the current placement block to the given block.
367  void recordMoveToParent(Block *block) { placementBlock = block; }
368 };
369 
370 //===----------------------------------------------------------------------===//
371 // BufferPlacementPromotion
372 //===----------------------------------------------------------------------===//
373 
374 /// Promotes heap-based allocations to stack-based allocations (if possible).
375 class BufferPlacementPromotion : BufferPlacementTransformationBase {
376 public:
377  BufferPlacementPromotion(Operation *op)
379 
380  /// Promote buffers to stack-based allocations.
381  void promote(function_ref<bool(Value)> isSmallAlloc) {
382  for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
383  Value alloc = std::get<0>(entry);
384  Operation *dealloc = std::get<1>(entry);
385  // Checking several requirements to transform an AllocOp into an AllocaOp.
386  // The transformation is done if the allocation is limited to a given
387  // size. Furthermore, a deallocation must not be defined for this
388  // allocation entry and a parent allocation scope must exist.
389  if (!isSmallAlloc(alloc) || dealloc ||
390  !hasAllocationScope(alloc, aliases))
391  continue;
392 
394  alloc, alloc.getParentBlock(), liveness);
395  // Build a new alloca that is associated with its parent
396  // `AutomaticAllocationScope` determined during the initialization phase.
397  OpBuilder builder(startOperation);
398  Operation *allocOp = alloc.getDefiningOp();
399  if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) {
400  std::optional<Operation *> alloca =
401  allocInterface.buildPromotedAlloc(builder, alloc);
402  if (!alloca)
403  continue;
404  // Replace the original alloc by a newly created alloca.
405  allocOp->replaceAllUsesWith(alloca.value());
406  allocOp->erase();
407  }
408  }
409  }
410 };
411 
412 //===----------------------------------------------------------------------===//
413 // BufferOptimizationPasses
414 //===----------------------------------------------------------------------===//
415 
416 /// The buffer hoisting pass that hoists allocation nodes into dominating
417 /// blocks.
418 struct BufferHoistingPass
419  : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> {
420 
421  void runOnOperation() override {
422  // Hoist all allocations into dominator blocks.
423  BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
424  getOperation());
425  optimizer.hoist();
426  }
427 };
428 
429 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
430 struct BufferLoopHoistingPass
431  : public bufferization::impl::BufferLoopHoistingBase<
432  BufferLoopHoistingPass> {
433 
434  void runOnOperation() override {
435  // Hoist all allocations out of loops.
436  hoistBuffersFromLoops(getOperation());
437  }
438 };
439 
440 /// The promote buffer to stack pass that tries to convert alloc nodes into
441 /// alloca nodes.
442 class PromoteBuffersToStackPass
443  : public bufferization::impl::PromoteBuffersToStackBase<
444  PromoteBuffersToStackPass> {
445 public:
446  PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
447  unsigned maxRankOfAllocatedMemRef) {
448  this->maxAllocSizeInBytes = maxAllocSizeInBytes;
449  this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
450  }
451 
452  explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
453  : isSmallAlloc(std::move(isSmallAlloc)) {}
454 
455  LogicalResult initialize(MLIRContext *context) override {
456  if (isSmallAlloc == nullptr) {
457  isSmallAlloc = [=](Value alloc) {
458  return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
459  maxRankOfAllocatedMemRef);
460  };
461  }
462  return success();
463  }
464 
465  void runOnOperation() override {
466  // Move all allocation nodes and convert candidates into allocas.
467  BufferPlacementPromotion optimizer(getOperation());
468  optimizer.promote(isSmallAlloc);
469  }
470 
471 private:
472  std::function<bool(Value)> isSmallAlloc;
473 };
474 
475 } // namespace
476 
478  BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op);
479  optimizer.hoist();
480 }
481 
483  return std::make_unique<BufferHoistingPass>();
484 }
485 
487  return std::make_unique<BufferLoopHoistingPass>();
488 }
489 
491  unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
492  return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
493  maxRankOfAllocatedMemRef);
494 }
495 
497  std::function<bool(Value)> isSmallAlloc) {
498  return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
499 }
static bool leavesAllocationScope(Region *parentRegion, const BufferViewFlowAnalysis::ValueSetT &aliases)
Checks whether the given aliases leave the allocation scope.
static bool isKnownControlFlowInterface(Operation *op)
Returns true if the given operation implements a known high-level region- based control-flow interfac...
static bool hasAllocationScope(Value alloc, const BufferViewFlowAnalysis &aliasAnalysis)
Checks, if an automated allocation scope for a given alloc value exists.
static bool isSequentialLoop(Operation *op)
Return whether the given operation is a loop with sequential execution semantics.
static bool isLoop(Operation *op)
Returns true if the given operation represents a loop by testing whether it implements the LoopLikeOp...
static bool allowAllocDominateBlockHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the dominate...
static bool allowAllocLoopHoisting(Operation *op)
Returns true if the given operation implements the AllocationOpInterface and it supports the loop hoi...
static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, unsigned maxRankOfAllocatedMemRef)
Check if the size of the allocation is less than the given size.
Block represents an ordered list of Operations.
Definition: Block.h:33
bool isEntryBlock()
Return if this block is the entry block in the parent region.
Definition: Block.cpp:38
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:33
A straight-forward alias analysis which ensures that all dependencies of all values will be determine...
ValueSetT resolve(Value value) const
Find all immediate and indirect views upon this value.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSizeInBits(Type t) const
Returns the size in bits of the given type in the current scope.
A class for computing basic dominance information.
Definition: Dominance.h:140
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:216
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:745
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:555
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
A class for computing basic postdominance information.
Definition: Dominance.h:197
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Region * getParentRegion()
Return the region containing this region or nullptr if the region is attached to a top-level operatio...
Definition: Region.cpp:45
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
static Operation * getStartOperation(Value allocValue, Block *placementBlock, const Liveness &liveness)
Get the start operation to place the given alloc value within the specified placement block.
Definition: BufferUtils.cpp:36
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition: BufferUtils.h:37
The base class for all BufferPlacement transformations.
Definition: BufferUtils.h:102
void hoistBuffersFromLoops(Operation *op)
Within the given operation, hoist buffers from loops where possible.
std::unique_ptr< Pass > createBufferHoistingPass()
Creates a pass that moves allocations upwards to reduce the number of required copies that are insert...
Block * findCommonDominator(Value value, const BufferViewFlowAnalysis::ValueSetT &values, const DominatorT &doms)
Finds a common dominator for the given value while taking the positions of the values in the value se...
Definition: BufferUtils.h:81
std::unique_ptr< Pass > createBufferLoopHoistingPass()
Creates a pass that moves allocations upwards out of loops.
std::unique_ptr< Pass > createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes=1024, unsigned maxRankOfAllocatedMemRef=1)
Creates a pass that promotes heap-based allocations to stack-based ones.
void promote(RewriterBase &rewriter, scf::ForallOp forallOp)
Promotes the loop body of a scf::ForallOp to its containing block.
Definition: SCF.cpp:644
Include the generated interface declarations.
llvm::DomTreeNodeBase< Block > DominanceInfoNode
Definition: Dominance.h:30