MLIR  20.0.0git
BufferDeallocation.cpp
Go to the documentation of this file.
1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer deallocation also adds required new clone operations to
11 // ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferViewFlowAnalysis class). Consider the following example:
19 //
20 // ^bb0(%arg0):
21 // cf.cond_br %cond, ^bb1, ^bb2
22 // ^bb1:
23 // cf.br ^exit(%arg0)
24 // ^bb2:
25 // %new_value = ...
26 // cf.br ^exit(%new_value)
27 // ^exit(%arg1):
28 // return %arg1;
29 //
30 // We should place the dealloc for %new_value in exit. However, we have to free
31 // the buffer in the same block, because it cannot be freed in the post
32 // dominator. However, this requires a new clone buffer for %arg1 that will
33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we
34 // will find out that %new_value has a potential alias %arg1. In order to find
35 // the dealloc position we have to find all potential aliases, iterate over
36 // their uses and find the common post-dominator block (note that additional
37 // clones and buffers remove potential aliases and will influence the placement
38 // of the deallocs). In all cases, the computed block can be safely used to free
39 // the %new_value buffer (may be exit or bb2) as it will die and we can use
40 // liveness information to determine the exact operation after which we have to
41 // insert the dealloc. However, the algorithm supports introducing clone buffers
42 // and placing deallocs in safe locations to ensure that all buffers will be
43 // freed in the end.
44 //
45 // TODO:
46 // The current implementation does not support explicit-control-flow loops and
47 // the resulting code will be invalid with respect to program semantics.
48 // However, structured control-flow loops are fully supported. Furthermore, it
49 // doesn't accept functions which return buffers already.
50 //
51 //===----------------------------------------------------------------------===//
52 
54 
60 #include "llvm/ADT/SetOperations.h"
61 
62 namespace mlir {
63 namespace bufferization {
64 #define GEN_PASS_DEF_BUFFERDEALLOCATION
65 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
66 } // namespace bufferization
67 } // namespace mlir
68 
69 using namespace mlir;
70 using namespace mlir::bufferization;
71 
72 /// Walks over all immediate return-like terminators in the given region.
73 static LogicalResult walkReturnOperations(
74  Region *region,
75  llvm::function_ref<LogicalResult(RegionBranchTerminatorOpInterface)> func) {
76  for (Block &block : *region) {
77  Operation *terminator = block.getTerminator();
78  // Skip non region-return-like terminators.
79  if (auto regionTerminator =
80  dyn_cast<RegionBranchTerminatorOpInterface>(terminator)) {
81  if (failed(func(regionTerminator)))
82  return failure();
83  }
84  }
85  return success();
86 }
87 
88 /// Checks if all operations that have at least one attached region implement
89 /// the RegionBranchOpInterface. This is not required in edge cases, where we
90 /// have a single attached region and the parent operation has no results.
92  WalkResult result = op->walk([&](Operation *operation) {
93  // Only check ops that are inside a function.
94  if (!operation->getParentOfType<func::FuncOp>())
95  return WalkResult::advance();
96 
97  auto regions = operation->getRegions();
98  // Walk over all operations in a region and check if the operation has at
99  // least one region and implements the RegionBranchOpInterface. If there
100  // is an operation that does not fulfill this condition, we cannot apply
101  // the deallocation steps. Furthermore, we accept cases, where we have a
102  // region that returns no results, since, in that case, the intra-region
103  // control flow does not affect the transformation.
104  size_t size = regions.size();
105  if (((size == 1 && !operation->getResults().empty()) || size > 1) &&
106  !dyn_cast<RegionBranchOpInterface>(operation)) {
107  operation->emitError("All operations with attached regions need to "
108  "implement the RegionBranchOpInterface.");
109  }
110 
111  return WalkResult::advance();
112  });
113  return !result.wasSkipped();
114 }
115 
116 namespace {
117 
118 //===----------------------------------------------------------------------===//
119 // Backedges analysis
120 //===----------------------------------------------------------------------===//
121 
122 /// A straight-forward program analysis which detects loop backedges induced by
123 /// explicit control flow.
124 class Backedges {
125 public:
126  using BlockSetT = SmallPtrSet<Block *, 16>;
127  using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
128 
129 public:
130  /// Constructs a new backedges analysis using the op provided.
131  Backedges(Operation *op) { recurse(op); }
132 
133  /// Returns the number of backedges formed by explicit control flow.
134  size_t size() const { return edgeSet.size(); }
135 
136  /// Returns the start iterator to loop over all backedges.
137  BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
138 
139  /// Returns the end iterator to loop over all backedges.
140  BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
141 
142 private:
143  /// Enters the current block and inserts a backedge into the `edgeSet` if we
144  /// have already visited the current block. The inserted edge links the given
145  /// `predecessor` with the `current` block.
146  bool enter(Block &current, Block *predecessor) {
147  bool inserted = visited.insert(&current).second;
148  if (!inserted)
149  edgeSet.insert(std::make_pair(predecessor, &current));
150  return inserted;
151  }
152 
153  /// Leaves the current block.
154  void exit(Block &current) { visited.erase(&current); }
155 
156  /// Recurses into the given operation while taking all attached regions into
157  /// account.
158  void recurse(Operation *op) {
159  Block *current = op->getBlock();
160  // If the current op implements the `BranchOpInterface`, there can be
161  // cycles in the scope of all successor blocks.
162  if (isa<BranchOpInterface>(op)) {
163  for (Block *succ : current->getSuccessors())
164  recurse(*succ, current);
165  }
166  // Recurse into all distinct regions and check for explicit control-flow
167  // loops.
168  for (Region &region : op->getRegions()) {
169  if (!region.empty())
170  recurse(region.front(), current);
171  }
172  }
173 
174  /// Recurses into explicit control-flow structures that are given by
175  /// the successor relation defined on the block level.
176  void recurse(Block &block, Block *predecessor) {
177  // Try to enter the current block. If this is not possible, we are
178  // currently processing this block and can safely return here.
179  if (!enter(block, predecessor))
180  return;
181 
182  // Recurse into all operations and successor blocks.
183  for (Operation &op : block.getOperations())
184  recurse(&op);
185 
186  // Leave the current block.
187  exit(block);
188  }
189 
190  /// Stores all blocks that are currently visited and on the processing stack.
191  BlockSetT visited;
192 
193  /// Stores all backedges in the format (source, target).
194  BackedgeSetT edgeSet;
195 };
196 
197 //===----------------------------------------------------------------------===//
198 // BufferDeallocation
199 //===----------------------------------------------------------------------===//
200 
201 /// The buffer deallocation transformation which ensures that all allocs in the
202 /// program have a corresponding de-allocation. As a side-effect, it might also
203 /// introduce clones that in turn leads to additional deallocations.
204 class BufferDeallocation : public BufferPlacementTransformationBase {
205 public:
206  using AliasAllocationMapT =
208 
209  BufferDeallocation(Operation *op)
210  : BufferPlacementTransformationBase(op), dominators(op),
211  postDominators(op) {}
212 
213  /// Checks if all allocation operations either provide an already existing
214  /// deallocation operation or implement the AllocationOpInterface. In
215  /// addition, this method initializes the internal alias to
216  /// AllocationOpInterface mapping in order to get compatible
217  /// AllocationOpInterface implementations for aliases.
218  LogicalResult prepare() {
219  for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
220  // Get the defining allocation operation.
221  Value alloc = std::get<0>(entry);
222  auto allocationInterface =
223  alloc.getDefiningOp<bufferization::AllocationOpInterface>();
224  // If there is no existing deallocation operation and no implementation of
225  // the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
226  if (!std::get<1>(entry) && !allocationInterface) {
227  return alloc.getDefiningOp()->emitError(
228  "Allocation is not deallocated explicitly nor does the operation "
229  "implement the AllocationOpInterface.");
230  }
231 
232  // Register the current allocation interface implementation.
233  aliasToAllocations[alloc] = allocationInterface;
234 
235  // Get the alias information for the current allocation node.
236  for (Value alias : aliases.resolve(alloc)) {
237  // TODO: check for incompatible implementations of the
238  // AllocationOpInterface. This could be realized by promoting the
239  // AllocationOpInterface to a DialectInterface.
240  aliasToAllocations[alias] = allocationInterface;
241  }
242  }
243  return success();
244  }
245 
246  /// Performs the actual placement/creation of all temporary clone and dealloc
247  /// nodes.
248  LogicalResult deallocate() {
249  // Add additional clones that are required.
250  if (failed(introduceClones()))
251  return failure();
252 
253  // Place deallocations for all allocation entries.
254  return placeDeallocs();
255  }
256 
257 private:
258  /// Introduces required clone operations to avoid memory leaks.
259  LogicalResult introduceClones() {
260  // Initialize the set of values that require a dedicated memory free
261  // operation since their operands cannot be safely deallocated in a post
262  // dominator.
263  SetVector<Value> valuesToFree;
264  llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
266 
267  // Check dominance relation for proper dominance properties. If the given
268  // value node does not dominate an alias, we will have to create a clone in
269  // order to free all buffers that can potentially leak into a post
270  // dominator.
271  auto findUnsafeValues = [&](Value source, Block *definingBlock) {
272  auto it = aliases.find(source);
273  if (it == aliases.end())
274  return;
275  for (Value value : it->second) {
276  if (valuesToFree.count(value) > 0)
277  continue;
278  Block *parentBlock = value.getParentBlock();
279  // Check whether we have to free this particular block argument or
280  // generic value. We have to free the current alias if it is either
281  // defined in a non-dominated block or it is defined in the same block
282  // but the current value is not dominated by the source value.
283  if (!dominators.dominates(definingBlock, parentBlock) ||
284  (definingBlock == parentBlock && isa<BlockArgument>(value))) {
285  toProcess.emplace_back(value, parentBlock);
286  valuesToFree.insert(value);
287  } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
288  .second)
289  toProcess.emplace_back(value, definingBlock);
290  }
291  };
292 
293  // Detect possibly unsafe aliases starting from all allocations.
294  for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
295  Value allocValue = std::get<0>(entry);
296  findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
297  }
298  // Try to find block arguments that require an explicit free operation
299  // until we reach a fix point.
300  while (!toProcess.empty()) {
301  auto current = toProcess.pop_back_val();
302  findUnsafeValues(std::get<0>(current), std::get<1>(current));
303  }
304 
305  // Update buffer aliases to ensure that we free all buffers and block
306  // arguments at the correct locations.
307  aliases.remove(valuesToFree);
308 
309  // Add new allocs and additional clone operations.
310  for (Value value : valuesToFree) {
311  if (failed(isa<BlockArgument>(value)
312  ? introduceBlockArgCopy(cast<BlockArgument>(value))
313  : introduceValueCopyForRegionResult(value)))
314  return failure();
315 
316  // Register the value to require a final dealloc. Note that we do not have
317  // to assign a block here since we do not want to move the allocation node
318  // to another location.
319  allocs.registerAlloc(std::make_tuple(value, nullptr));
320  }
321  return success();
322  }
323 
324  /// Introduces temporary clones in all predecessors and copies the source
325  /// values into the newly allocated buffers.
326  LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
327  // Allocate a buffer for the current block argument in the block of
328  // the associated value (which will be a predecessor block by
329  // definition).
330  Block *block = blockArg.getOwner();
331  for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
332  // Get the terminator and the value that will be passed to our
333  // argument.
334  Operation *terminator = (*it)->getTerminator();
335  auto branchInterface = cast<BranchOpInterface>(terminator);
336  SuccessorOperands operands =
337  branchInterface.getSuccessorOperands(it.getSuccessorIndex());
338 
339  // Query the associated source value.
340  Value sourceValue = operands[blockArg.getArgNumber()];
341  if (!sourceValue) {
342  return failure();
343  }
344  // Wire new clone and successor operand.
345  // Create a new clone at the current location of the terminator.
346  auto clone = introduceCloneBuffers(sourceValue, terminator);
347  if (failed(clone))
348  return failure();
349  operands.slice(blockArg.getArgNumber(), 1).assign(*clone);
350  }
351 
352  // Check whether the block argument has implicitly defined predecessors via
353  // the RegionBranchOpInterface. This can be the case if the current block
354  // argument belongs to the first block in a region and the parent operation
355  // implements the RegionBranchOpInterface.
356  Region *argRegion = block->getParent();
357  Operation *parentOp = argRegion->getParentOp();
358  RegionBranchOpInterface regionInterface;
359  if (&argRegion->front() != block ||
360  !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
361  return success();
362 
363  if (failed(introduceClonesForRegionSuccessors(
364  regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
365  [&](RegionSuccessor &successorRegion) {
366  // Find a predecessor of our argRegion.
367  return successorRegion.getSuccessor() == argRegion;
368  })))
369  return failure();
370 
371  // Check whether the block argument belongs to an entry region of the
372  // parent operation. In this case, we have to introduce an additional clone
373  // for buffer that is passed to the argument.
374  SmallVector<RegionSuccessor, 2> successorRegions;
375  regionInterface.getSuccessorRegions(/*point=*/RegionBranchPoint::parent(),
376  successorRegions);
377  auto *it =
378  llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
379  return successorRegion.getSuccessor() == argRegion;
380  });
381  if (it == successorRegions.end())
382  return success();
383 
384  // Determine the actual operand to introduce a clone for and rewire the
385  // operand to point to the clone instead.
386  auto operands = regionInterface.getEntrySuccessorOperands(argRegion);
387  size_t operandIndex =
388  llvm::find(it->getSuccessorInputs(), blockArg).getIndex() +
389  operands.getBeginOperandIndex();
390  Value operand = parentOp->getOperand(operandIndex);
391  assert(operand ==
392  operands[operandIndex - operands.getBeginOperandIndex()] &&
393  "region interface operands don't match parentOp operands");
394  auto clone = introduceCloneBuffers(operand, parentOp);
395  if (failed(clone))
396  return failure();
397 
398  parentOp->setOperand(operandIndex, *clone);
399  return success();
400  }
401 
402  /// Introduces temporary clones in front of all associated nested-region
403  /// terminators and copies the source values into the newly allocated buffers.
404  LogicalResult introduceValueCopyForRegionResult(Value value) {
405  // Get the actual result index in the scope of the parent terminator.
406  Operation *operation = value.getDefiningOp();
407  auto regionInterface = cast<RegionBranchOpInterface>(operation);
408  // Filter successors that return to the parent operation.
409  auto regionPredicate = [&](RegionSuccessor &successorRegion) {
410  // If the RegionSuccessor has no associated successor, it will return to
411  // its parent operation.
412  return !successorRegion.getSuccessor();
413  };
414  // Introduce a clone for all region "results" that are returned to the
415  // parent operation. This is required since the parent's result value has
416  // been considered critical. Therefore, the algorithm assumes that a clone
417  // of a previously allocated buffer is returned by the operation (like in
418  // the case of a block argument).
419  return introduceClonesForRegionSuccessors(
420  regionInterface, operation->getRegions(), value, regionPredicate);
421  }
422 
423  /// Introduces buffer clones for all terminators in the given regions. The
424  /// regionPredicate is applied to every successor region in order to restrict
425  /// the clones to specific regions.
426  template <typename TPredicate>
427  LogicalResult introduceClonesForRegionSuccessors(
428  RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
429  Value argValue, const TPredicate &regionPredicate) {
430  for (Region &region : regions) {
431  // Query the regionInterface to get all successor regions of the current
432  // one.
433  SmallVector<RegionSuccessor, 2> successorRegions;
434  regionInterface.getSuccessorRegions(region, successorRegions);
435  // Try to find a matching region successor.
436  RegionSuccessor *regionSuccessor =
437  llvm::find_if(successorRegions, regionPredicate);
438  if (regionSuccessor == successorRegions.end())
439  continue;
440  // Get the operand index in the context of the current successor input
441  // bindings.
442  size_t operandIndex =
443  llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
444  .getIndex();
445 
446  // Iterate over all immediate terminator operations to introduce
447  // new buffer allocations. Thereby, the appropriate terminator operand
448  // will be adjusted to point to the newly allocated buffer instead.
449  if (failed(walkReturnOperations(
450  &region, [&](RegionBranchTerminatorOpInterface terminator) {
451  // Get the actual mutable operands for this terminator op.
452  auto terminatorOperands =
453  terminator.getMutableSuccessorOperands(*regionSuccessor);
454  // Extract the source value from the current terminator.
455  // This conversion needs to exist on a separate line due to a
456  // bug in GCC conversion analysis.
457  OperandRange immutableTerminatorOperands = terminatorOperands;
458  Value sourceValue = immutableTerminatorOperands[operandIndex];
459  // Create a new clone at the current location of the terminator.
460  auto clone = introduceCloneBuffers(sourceValue, terminator);
461  if (failed(clone))
462  return failure();
463  // Wire clone and terminator operand.
464  terminatorOperands.slice(operandIndex, 1).assign(*clone);
465  return success();
466  })))
467  return failure();
468  }
469  return success();
470  }
471 
472  /// Creates a new memory allocation for the given source value and clones
473  /// its content into the newly allocated buffer. The terminator operation is
474  /// used to insert the clone operation at the right place.
475  FailureOr<Value> introduceCloneBuffers(Value sourceValue,
476  Operation *terminator) {
477  // Avoid multiple clones of the same source value. This can happen in the
478  // presence of loops when a branch acts as a backedge while also having
479  // another successor that returns to its parent operation. Note: that
480  // copying copied buffers can introduce memory leaks since the invariant of
481  // BufferDeallocation assumes that a buffer will be only cloned once into a
482  // temporary buffer. Hence, the construction of clone chains introduces
483  // additional allocations that are not tracked automatically by the
484  // algorithm.
485  if (clonedValues.contains(sourceValue))
486  return sourceValue;
487  // Create a new clone operation that copies the contents of the old
488  // buffer to the new one.
489  auto clone = buildClone(terminator, sourceValue);
490  if (succeeded(clone)) {
491  // Remember the clone of original source value.
492  clonedValues.insert(*clone);
493  }
494  return clone;
495  }
496 
497  /// Finds correct dealloc positions according to the algorithm described at
498  /// the top of the file for all alloc nodes and block arguments that can be
499  /// handled by this analysis.
500  LogicalResult placeDeallocs() {
501  // Move or insert deallocs using the previously computed information.
502  // These deallocations will be linked to their associated allocation nodes
503  // since they don't have any aliases that can (potentially) increase their
504  // liveness.
505  for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
506  Value alloc = std::get<0>(entry);
507  auto aliasesSet = aliases.resolve(alloc);
508  assert(!aliasesSet.empty() && "must contain at least one alias");
509 
510  // Determine the actual block to place the dealloc and get liveness
511  // information.
512  Block *placementBlock =
513  findCommonDominator(alloc, aliasesSet, postDominators);
514  const LivenessBlockInfo *livenessInfo =
515  liveness.getLiveness(placementBlock);
516 
517  // We have to ensure that the dealloc will be after the last use of all
518  // aliases of the given value. We first assume that there are no uses in
519  // the placementBlock and that we can safely place the dealloc at the
520  // beginning.
521  Operation *endOperation = &placementBlock->front();
522 
523  // Iterate over all aliases and ensure that the endOperation will point
524  // to the last operation of all potential aliases in the placementBlock.
525  for (Value alias : aliasesSet) {
526  // Ensure that the start operation is at least the defining operation of
527  // the current alias to avoid invalid placement of deallocs for aliases
528  // without any uses.
529  Operation *beforeOp = endOperation;
530  if (alias.getDefiningOp() &&
531  !(beforeOp = placementBlock->findAncestorOpInBlock(
532  *alias.getDefiningOp())))
533  continue;
534 
535  Operation *aliasEndOperation =
536  livenessInfo->getEndOperation(alias, beforeOp);
537  // Check whether the aliasEndOperation lies in the desired block and
538  // whether it is behind the current endOperation. If yes, this will be
539  // the new endOperation.
540  if (aliasEndOperation->getBlock() == placementBlock &&
541  endOperation->isBeforeInBlock(aliasEndOperation))
542  endOperation = aliasEndOperation;
543  }
544  // endOperation is the last operation behind which we can safely store
545  // the dealloc taking all potential aliases into account.
546 
547  // If there is an existing dealloc, move it to the right place.
548  Operation *deallocOperation = std::get<1>(entry);
549  if (deallocOperation) {
550  deallocOperation->moveAfter(endOperation);
551  } else {
552  // If the Dealloc position is at the terminator operation of the
553  // block, then the value should escape from a deallocation.
554  Operation *nextOp = endOperation->getNextNode();
555  if (!nextOp)
556  continue;
557  // If there is no dealloc node, insert one in the right place.
558  if (failed(buildDealloc(nextOp, alloc)))
559  return failure();
560  }
561  }
562  return success();
563  }
564 
565  /// Builds a deallocation operation compatible with the given allocation
566  /// value. If there is no registered AllocationOpInterface implementation for
567  /// the given value (e.g. in the case of a function parameter), this method
568  /// builds a memref::DeallocOp.
569  LogicalResult buildDealloc(Operation *op, Value alloc) {
570  OpBuilder builder(op);
571  auto it = aliasToAllocations.find(alloc);
572  if (it != aliasToAllocations.end()) {
573  // Call the allocation op interface to build a supported and
574  // compatible deallocation operation.
575  auto dealloc = it->second.buildDealloc(builder, alloc);
576  if (!dealloc)
577  return op->emitError()
578  << "allocations without compatible deallocations are "
579  "not supported";
580  } else {
581  // Build a "default" DeallocOp for unknown allocation sources.
582  builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
583  }
584  return success();
585  }
586 
587  /// Builds a clone operation compatible with the given allocation value. If
588  /// there is no registered AllocationOpInterface implementation for the given
589  /// value (e.g. in the case of a function parameter), this method builds a
590  /// bufferization::CloneOp.
591  FailureOr<Value> buildClone(Operation *op, Value alloc) {
592  OpBuilder builder(op);
593  auto it = aliasToAllocations.find(alloc);
594  if (it != aliasToAllocations.end()) {
595  // Call the allocation op interface to build a supported and
596  // compatible clone operation.
597  auto clone = it->second.buildClone(builder, alloc);
598  if (clone)
599  return *clone;
600  return (LogicalResult)(op->emitError()
601  << "allocations without compatible clone ops "
602  "are not supported");
603  }
604  // Build a "default" CloneOp for unknown allocation sources.
605  return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
606  .getResult();
607  }
608 
609  /// The dominator info to find the appropriate start operation to move the
610  /// allocs.
611  DominanceInfo dominators;
612 
613  /// The post dominator info to move the dependent allocs in the right
614  /// position.
615  PostDominanceInfo postDominators;
616 
617  /// Stores already cloned buffers to avoid additional clones of clones.
618  ValueSetT clonedValues;
619 
620  /// Maps aliases to their source allocation interfaces (inverse mapping).
621  AliasAllocationMapT aliasToAllocations;
622 };
623 
624 //===----------------------------------------------------------------------===//
625 // BufferDeallocationPass
626 //===----------------------------------------------------------------------===//
627 
628 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
629 /// into the right positions. Furthermore, it inserts additional clones if
630 /// necessary. It uses the algorithm described at the top of the file.
631 struct BufferDeallocationPass
632  : public bufferization::impl::BufferDeallocationBase<
633  BufferDeallocationPass> {
634  void getDependentDialects(DialectRegistry &registry) const override {
635  registry.insert<bufferization::BufferizationDialect>();
636  registry.insert<memref::MemRefDialect>();
637  }
638 
639  void runOnOperation() override {
640  func::FuncOp func = getOperation();
641  if (func.isExternal())
642  return;
643 
644  if (failed(deallocateBuffers(func)))
645  signalPassFailure();
646  }
647 };
648 
649 } // namespace
650 
652  if (isa<ModuleOp>(op)) {
653  WalkResult result = op->walk([&](func::FuncOp funcOp) {
654  if (failed(deallocateBuffers(funcOp)))
655  return WalkResult::interrupt();
656  return WalkResult::advance();
657  });
658  return success(!result.wasInterrupted());
659  }
660 
661  // Ensure that there are supported loops only.
662  Backedges backedges(op);
663  if (backedges.size()) {
664  op->emitError("Only structured control-flow loops are supported.");
665  return failure();
666  }
667 
668  // Check that the control flow structures are supported.
670  return failure();
671 
672  // Gather all required allocation nodes and prepare the deallocation phase.
673  BufferDeallocation deallocation(op);
674 
675  // Check for supported AllocationOpInterface implementations and prepare the
676  // internal deallocation pass.
677  if (failed(deallocation.prepare()))
678  return failure();
679 
680  // Place all required temporary clone and dealloc nodes.
681  if (failed(deallocation.deallocate()))
682  return failure();
683 
684  return success();
685 }
686 
687 //===----------------------------------------------------------------------===//
688 // BufferDeallocationPass construction
689 //===----------------------------------------------------------------------===//
690 
692  return std::make_unique<BufferDeallocationPass>();
693 }
static LogicalResult walkReturnOperations(Region *region, llvm::function_ref< LogicalResult(RegionBranchTerminatorOpInterface)> func)
Walks over all immediate return-like terminators in the given region.
static bool validateSupportedControlFlow(Operation *op)
Checks if all operations that have at least one attached region implement the RegionBranchOpInterface...
This class represents an argument of a Block.
Definition: Value.h:319
Block * getOwner() const
Returns the block that owns this argument.
Definition: Value.h:328
unsigned getArgNumber() const
Returns the number of this argument.
Definition: Value.h:331
Block represents an ordered list of Operations.
Definition: Block.h:33
Operation * findAncestorOpInBlock(Operation &op)
Returns 'op' if 'op' lies in this block, or otherwise finds the ancestor operation of 'op' that lies ...
Definition: Block.cpp:76
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition: Block.cpp:29
pred_iterator pred_begin()
Definition: Block.h:233
SuccessorRange getSuccessors()
Definition: Block.h:267
OpListType & getOperations()
Definition: Block.h:137
Operation & front()
Definition: Block.h:153
pred_iterator pred_end()
Definition: Block.h:236
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
A class for computing basic dominance information.
Definition: Dominance.h:140
This class represents liveness information on block level.
Definition: Liveness.h:99
Operation * getEndOperation(Value value, Operation *startOperation) const
Gets the end operation for the given value using the start operation provided (must be referenced in ...
Definition: Liveness.cpp:379
void assign(ValueRange values)
Assign this range to the given values.
This class helps build Operations.
Definition: Builders.h:216
This class implements the operand iterators for the Operation class.
Definition: ValueRange.h:42
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Value getOperand(unsigned idx)
Definition: Operation.h:345
void setOperand(unsigned idx, Value value)
Definition: Operation.h:346
bool isBeforeInBlock(Operation *other)
Given an operation 'other' that is within the same parent block, return whether the current operation...
Definition: Operation.cpp:386
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
static Operation * create(Location location, OperationName name, TypeRange resultTypes, ValueRange operands, NamedAttrList &&attributes, OpaqueProperties properties, BlockRange successors, unsigned numRegions)
Create a new Operation with the specific fields.
Definition: Operation.cpp:67
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:672
result_range getResults()
Definition: Operation.h:410
void moveAfter(Operation *existingOp)
Unlink this operation from its current block and insert it right after existingOp which may be in the...
Definition: Operation.cpp:569
A class for computing basic postdominance information.
Definition: Dominance.h:197
static constexpr RegionBranchPoint parent()
Returns an instance of RegionBranchPoint representing the parent operation.
This class represents a successor of a region.
ValueRange getSuccessorInputs() const
Return the inputs to the successor that are remapped by the exit values of the current region.
Region * getSuccessor() const
Return the given region successor.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
Operation * getParentOp()
Return the parent operation this region is attached to.
Definition: Region.h:200
bool empty()
Definition: Region.h:60
Block & front()
Definition: Region.h:65
This class models how operands are forwarded to block arguments in control flow.
MutableOperandRange slice(unsigned subStart, unsigned subLen) const
Get a slice of the operands forwarded to the successor.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:26
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
A utility result that is used to signal how to proceed with an ongoing walk:
Definition: Visitors.h:33
bool wasSkipped() const
Returns true if the walk was skipped.
Definition: Visitors.h:58
static WalkResult advance()
Definition: Visitors.h:51
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: Visitors.h:55
static WalkResult interrupt()
Definition: Visitors.h:50
std::tuple< Value, Operation * > AllocEntry
Represents a tuple of allocValue and deallocOperation.
Definition: BufferUtils.h:37
The base class for all BufferPlacement transformations.
Definition: BufferUtils.h:102
Block * findCommonDominator(Value value, const BufferViewFlowAnalysis::ValueSetT &values, const DominatorT &doms)
Finds a common dominator for the given value while taking the positions of the values in the value se...
Definition: BufferUtils.h:81
LogicalResult deallocateBuffers(Operation *op)
Run buffer deallocation.
std::unique_ptr< Pass > createBufferDeallocationPass()
Creates an instance of the BufferDeallocation pass to free all allocated buffers.
Include the generated interface declarations.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)