MLIR  20.0.0git
InlinerInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- InlinerInterfaceImpl.cpp - Inlining for LLVM the dialect -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "mlir/IR/Matchers.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/Debug.h"
23 
24 #define DEBUG_TYPE "llvm-inliner"
25 
26 using namespace mlir;
27 
28 /// Check whether the given alloca is an input to a lifetime intrinsic,
29 /// optionally passing through one or more casts on the way. This is not
30 /// transitive through block arguments.
31 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
32  SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
33  allocaOp->getUsers().end());
34  while (!stack.empty()) {
35  Operation *op = stack.pop_back_val();
36  if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
37  return true;
38  if (isa<LLVM::BitcastOp>(op))
39  stack.append(op->getUsers().begin(), op->getUsers().end());
40  }
41  return false;
42 }
43 
44 /// Handles alloca operations in the inlined blocks:
45 /// - Moves all alloca operations with a constant size in the former entry block
46 /// of the callee into the entry block of the caller, so they become part of
47 /// the function prologue/epilogue during code generation.
48 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
49 /// to the inlined blocks.
50 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
51 /// inlined.
52 static void
54  iterator_range<Region::iterator> inlinedBlocks) {
55  // Locate the entry block of the closest callsite ancestor that has either the
56  // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
57  // programs, this is the LLVMFuncOp containing the call site. However, in
58  // mixed-dialect programs, the callsite might be nested in another operation
59  // that carries one of these traits. In such scenarios, this traversal stops
60  // at the closest ancestor with either trait, ensuring visibility post
61  // relocation and respecting allocation scopes.
62  Block *callerEntryBlock = nullptr;
63  Operation *currentOp = call;
64  while (Operation *parentOp = currentOp->getParentOp()) {
65  if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
66  parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
67  callerEntryBlock = &currentOp->getParentRegion()->front();
68  break;
69  }
70  currentOp = parentOp;
71  }
72 
73  // Avoid relocating the alloca operations if the call has been inlined into
74  // the entry block already, which is typically the encompassing
75  // LLVM function, or if the relevant entry block cannot be identified.
76  Block *calleeEntryBlock = &(*inlinedBlocks.begin());
77  if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
78  return;
79 
81  bool shouldInsertLifetimes = false;
82  bool hasDynamicAlloca = false;
83  // Conservatively only move static alloca operations that are part of the
84  // entry block and do not inspect nested regions, since they may execute
85  // conditionally or have other unknown semantics.
86  for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
87  IntegerAttr arraySize;
88  if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
89  hasDynamicAlloca = true;
90  continue;
91  }
92  bool shouldInsertLifetime =
93  arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
94  shouldInsertLifetimes |= shouldInsertLifetime;
95  allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
96  }
97  // Check the remaining inlined blocks for dynamic allocas as well.
98  for (Block &block : llvm::drop_begin(inlinedBlocks)) {
99  if (hasDynamicAlloca)
100  break;
101  hasDynamicAlloca =
102  llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
103  return !matchPattern(allocaOp.getArraySize(), m_Constant());
104  });
105  }
106  if (allocasToMove.empty() && !hasDynamicAlloca)
107  return;
108  OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
109  Value stackPtr;
110  if (hasDynamicAlloca) {
111  // This may result in multiple stacksave/stackrestore intrinsics in the same
112  // scope if some are already present in the body of the caller. This is not
113  // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
114  // other cases where the stacksave/stackrestore is redundant.
115  stackPtr = builder.create<LLVM::StackSaveOp>(
116  call->getLoc(), LLVM::LLVMPointerType::get(call->getContext()));
117  }
118  builder.setInsertionPoint(callerEntryBlock, callerEntryBlock->begin());
119  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
120  auto newConstant = builder.create<LLVM::ConstantOp>(
121  allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
122  // Insert a lifetime start intrinsic where the alloca was before moving it.
123  if (shouldInsertLifetime) {
124  OpBuilder::InsertionGuard insertionGuard(builder);
125  builder.setInsertionPoint(allocaOp);
126  builder.create<LLVM::LifetimeStartOp>(
127  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
128  allocaOp.getResult());
129  }
130  allocaOp->moveAfter(newConstant);
131  allocaOp.getArraySizeMutable().assign(newConstant.getResult());
132  }
133  if (!shouldInsertLifetimes && !hasDynamicAlloca)
134  return;
135  // Insert a lifetime end intrinsic before each return in the callee function.
136  for (Block &block : inlinedBlocks) {
137  if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
138  continue;
139  builder.setInsertionPoint(block.getTerminator());
140  if (hasDynamicAlloca)
141  builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr);
142  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
143  if (shouldInsertLifetime)
144  builder.create<LLVM::LifetimeEndOp>(
145  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
146  allocaOp.getResult());
147  }
148  }
149 }
150 
151 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
152 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
153 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
154 static void
157 
158  // Register handles in the walker to create the deep clones.
159  // The walker ensures that an attribute is only ever walked once and does a
160  // post-order walk, ensuring the domain is visited prior to the scope.
161  AttrTypeWalker walker;
162 
163  // Perform the deep clones while visiting. Builders create a distinct
164  // attribute to make sure that new instances are always created by the
165  // uniquer.
166  walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
167  mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
168  domainAttr.getContext(), domainAttr.getDescription());
169  });
170 
171  walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
172  mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
173  cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
174  scopeAttr.getDescription());
175  });
176 
177  // Map an array of scopes to an array of deep clones.
178  auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
179  if (!arrayAttr)
180  return nullptr;
181 
182  // Create the deep clones if necessary.
183  walker.walk(arrayAttr);
184 
185  return ArrayAttr::get(arrayAttr.getContext(),
186  llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
187  return mapping.lookup(attr);
188  }));
189  };
190 
191  for (Block &block : inlinedBlocks) {
192  block.walk([&](Operation *op) {
193  if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
194  aliasInterface.setAliasScopes(
195  convertScopeList(aliasInterface.getAliasScopesOrNull()));
196  aliasInterface.setNoAliasScopes(
197  convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
198  }
199 
200  if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
201  // Create the deep clones if necessary.
202  walker.walk(noAliasScope.getScopeAttr());
203 
204  noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
205  mapping.lookup(noAliasScope.getScopeAttr())));
206  }
207  });
208  }
209 }
210 
211 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
212 /// Returns null if both parameters are null. If only one attribute is null,
213 /// return the other.
214 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
215  if (!lhs)
216  return rhs;
217  if (!rhs)
218  return lhs;
219 
220  SmallVector<Attribute> result;
221  llvm::append_range(result, lhs);
222  llvm::append_range(result, rhs);
223  return ArrayAttr::get(lhs.getContext(), result);
224 }
225 
226 /// Attempts to return the set of all underlying pointer values that
227 /// `pointerValue` is based on. This function traverses through select
228 /// operations and block arguments.
229 static FailureOr<SmallVector<Value>>
231  SmallVector<Value> result;
232  WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
233  // Attempt to advance to the source of the underlying view-like operation.
234  // Examples of view-like operations include GEPOp and AddrSpaceCastOp.
235  if (auto viewOp = val.getDefiningOp<ViewLikeOpInterface>())
236  return WalkContinuation::advanceTo(viewOp.getViewSource());
237 
238  // Attempt to advance to control flow predecessors.
239  std::optional<SmallVector<Value>> controlFlowPredecessors =
241  if (controlFlowPredecessors)
242  return WalkContinuation::advanceTo(*controlFlowPredecessors);
243 
244  // For all non-control flow results, consider `val` an underlying object.
245  if (isa<OpResult>(val)) {
246  result.push_back(val);
247  return WalkContinuation::skip();
248  }
249 
250  // If this place is reached, `val` is a block argument that is not
251  // understood. Therefore, we conservatively interrupt.
252  // Note: Dealing with function arguments is not necessary, as the slice
253  // would have to go through an SSACopyOp first.
255  });
256 
257  if (walkResult.wasInterrupted())
258  return failure();
259 
260  return result;
261 }
262 
263 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
264 /// the appropriate inlined memory operations in an attempt to preserve the
265 /// original semantics of the parameter attribute.
267  Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
268 
269  // First, collect all ssa copy operations, which correspond to function
270  // parameters, and additionally store the noalias parameters. All parameters
271  // have been marked by the `handleArgument` implementation by using the
272  // `ssa.copy` intrinsic. Additionally, noalias parameters have an attached
273  // `noalias` attribute to the intrinsics. These intrinsics are only meant to
274  // be temporary and should therefore be deleted after we're done using them
275  // here.
276  SetVector<LLVM::SSACopyOp> ssaCopies;
277  SetVector<LLVM::SSACopyOp> noAliasParams;
278  for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
279  for (Operation *user : argument.getUsers()) {
280  auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
281  if (!ssaCopy)
282  continue;
283  ssaCopies.insert(ssaCopy);
284 
285  if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
286  continue;
287  noAliasParams.insert(ssaCopy);
288  }
289  }
290 
291  // Scope exit block to make it impossible to forget to get rid of the
292  // intrinsics.
293  auto exit = llvm::make_scope_exit([&] {
294  for (LLVM::SSACopyOp ssaCopyOp : ssaCopies) {
295  ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
296  ssaCopyOp->erase();
297  }
298  });
299 
300  // If there were no noalias parameters, we have nothing to do here.
301  if (noAliasParams.empty())
302  return;
303 
304  // Create a new domain for this specific inlining and a new scope for every
305  // noalias parameter.
306  auto functionDomain = LLVM::AliasScopeDomainAttr::get(
307  call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
309  for (LLVM::SSACopyOp copyOp : noAliasParams) {
310  auto scope = LLVM::AliasScopeAttr::get(functionDomain);
311  pointerScopes[copyOp] = scope;
312 
313  OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope);
314  }
315 
316  // Go through every instruction and attempt to find which noalias parameters
317  // it is definitely based on and definitely not based on.
318  for (Block &inlinedBlock : inlinedBlocks) {
319  inlinedBlock.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
320  // Collect the pointer arguments affected by the alias scopes.
321  SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
322 
323  // Find the set of underlying pointers that this pointer is based on.
324  SmallPtrSet<Value, 4> basedOnPointers;
325  for (Value pointer : pointerArgs) {
326  FailureOr<SmallVector<Value>> underlyingObjectSet =
327  getUnderlyingObjectSet(pointer);
328  if (failed(underlyingObjectSet))
329  return;
330  llvm::copy(*underlyingObjectSet,
331  std::inserter(basedOnPointers, basedOnPointers.begin()));
332  }
333 
334  bool aliasesOtherKnownObject = false;
335  // Go through the based on pointers and check that they are either:
336  // * Constants that can be ignored (undef, poison, null pointer).
337  // * Based on a pointer parameter.
338  // * Other pointers that we know can't alias with our noalias parameter.
339  //
340  // Any other value might be a pointer based on any noalias parameter that
341  // hasn't been identified. In that case conservatively don't add any
342  // scopes to this operation indicating either aliasing or not aliasing
343  // with any parameter.
344  if (llvm::any_of(basedOnPointers, [&](Value object) {
345  if (matchPattern(object, m_Constant()))
346  return false;
347 
348  if (auto ssaCopy = object.getDefiningOp<LLVM::SSACopyOp>()) {
349  // If that value is based on a noalias parameter, it is guaranteed
350  // to not alias with any other object.
351  aliasesOtherKnownObject |= !noAliasParams.contains(ssaCopy);
352  return false;
353  }
354 
355  if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
356  object.getDefiningOp())) {
357  aliasesOtherKnownObject = true;
358  return false;
359  }
360  return true;
361  }))
362  return;
363 
364  // Add all noalias parameter scopes to the noalias scope list that we are
365  // not based on.
366  SmallVector<Attribute> noAliasScopes;
367  for (LLVM::SSACopyOp noAlias : noAliasParams) {
368  if (basedOnPointers.contains(noAlias))
369  continue;
370 
371  noAliasScopes.push_back(pointerScopes[noAlias]);
372  }
373 
374  if (!noAliasScopes.empty())
375  aliasInterface.setNoAliasScopes(
376  concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
377  ArrayAttr::get(call->getContext(), noAliasScopes)));
378 
379  // Don't add alias scopes to call operations or operations that might
380  // operate on pointers not based on any noalias parameter.
381  // Since we add all scopes to an operation's noalias list that it
382  // definitely doesn't alias, we mustn't do the same for the alias.scope
383  // list if other objects are involved.
384  //
385  // Consider the following case:
386  // %0 = llvm.alloca
387  // %1 = select %magic, %0, %noalias_param
388  // store 5, %1 (1) noalias=[scope(...)]
389  // ...
390  // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
391  //
392  // We can add the scopes of any noalias parameters that aren't
393  // noalias_param's scope to (1) and add all of them to (2). We mustn't add
394  // the scope of noalias_param to the alias.scope list of (1) since
395  // that would mean (2) cannot alias with (1) which is wrong since both may
396  // store to %0.
397  //
398  // In conclusion, only add scopes to the alias.scope list if all pointers
399  // have a corresponding scope.
400  // Call operations are included in this list since we do not know whether
401  // the callee accesses any memory besides the ones passed as its
402  // arguments.
403  if (aliasesOtherKnownObject ||
404  isa<LLVM::CallOp>(aliasInterface.getOperation()))
405  return;
406 
407  SmallVector<Attribute> aliasScopes;
408  for (LLVM::SSACopyOp noAlias : noAliasParams)
409  if (basedOnPointers.contains(noAlias))
410  aliasScopes.push_back(pointerScopes[noAlias]);
411 
412  if (!aliasScopes.empty())
413  aliasInterface.setAliasScopes(
414  concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
415  ArrayAttr::get(call->getContext(), aliasScopes)));
416  });
417  }
418 }
419 
420 /// Appends any alias scopes of the call operation to any inlined memory
421 /// operation.
422 static void
424  iterator_range<Region::iterator> inlinedBlocks) {
425  auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
426  if (!callAliasInterface)
427  return;
428 
429  ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
430  ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
431  // If the call has neither alias scopes or noalias scopes we have nothing to
432  // do here.
433  if (!aliasScopes && !noAliasScopes)
434  return;
435 
436  // Simply append the call op's alias and noalias scopes to any operation
437  // implementing AliasAnalysisOpInterface.
438  for (Block &block : inlinedBlocks) {
439  block.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
440  if (aliasScopes)
441  aliasInterface.setAliasScopes(concatArrayAttr(
442  aliasInterface.getAliasScopesOrNull(), aliasScopes));
443 
444  if (noAliasScopes)
445  aliasInterface.setNoAliasScopes(concatArrayAttr(
446  aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
447  });
448  }
449 }
450 
451 /// Handles all interactions with alias scopes during inlining.
452 static void handleAliasScopes(Operation *call,
453  iterator_range<Region::iterator> inlinedBlocks) {
454  deepCloneAliasScopes(inlinedBlocks);
455  createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
456  appendCallOpAliasScopes(call, inlinedBlocks);
457 }
458 
459 /// Appends any access groups of the call operation to any inlined memory
460 /// operation.
461 static void handleAccessGroups(Operation *call,
462  iterator_range<Region::iterator> inlinedBlocks) {
463  auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
464  if (!callAccessGroupInterface)
465  return;
466 
467  auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
468  if (!accessGroups)
469  return;
470 
471  // Simply append the call op's access groups to any operation implementing
472  // AccessGroupOpInterface.
473  for (Block &block : inlinedBlocks)
474  for (auto accessGroupOpInterface :
475  block.getOps<LLVM::AccessGroupOpInterface>())
476  accessGroupOpInterface.setAccessGroups(concatArrayAttr(
477  accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
478 }
479 
480 /// Updates locations inside loop annotations to reflect that they were inlined.
481 static void
483  iterator_range<Region::iterator> inlinedBlocks) {
484  // Attempt to extract a DISubprogram from the callee.
485  auto func = call->getParentOfType<FunctionOpInterface>();
486  if (!func)
487  return;
488  LocationAttr funcLoc = func->getLoc();
489  auto fusedLoc = dyn_cast_if_present<FusedLoc>(funcLoc);
490  if (!fusedLoc)
491  return;
492  auto scope =
493  dyn_cast_if_present<LLVM::DISubprogramAttr>(fusedLoc.getMetadata());
494  if (!scope)
495  return;
496 
497  // Helper to build a new fused location that reflects the inlining of the loop
498  // annotation.
499  auto updateLoc = [&](FusedLoc loc) -> FusedLoc {
500  if (!loc)
501  return {};
502  Location callSiteLoc = CallSiteLoc::get(loc, call->getLoc());
503  return FusedLoc::get(loc.getContext(), callSiteLoc, scope);
504  };
505 
506  AttrTypeReplacer replacer;
507  replacer.addReplacement([&](LLVM::LoopAnnotationAttr loopAnnotation)
508  -> std::pair<Attribute, WalkResult> {
509  FusedLoc newStartLoc = updateLoc(loopAnnotation.getStartLoc());
510  FusedLoc newEndLoc = updateLoc(loopAnnotation.getEndLoc());
511  if (!newStartLoc && !newEndLoc)
512  return {loopAnnotation, WalkResult::advance()};
513  auto newLoopAnnotation = LLVM::LoopAnnotationAttr::get(
514  loopAnnotation.getContext(), loopAnnotation.getDisableNonforced(),
515  loopAnnotation.getVectorize(), loopAnnotation.getInterleave(),
516  loopAnnotation.getUnroll(), loopAnnotation.getUnrollAndJam(),
517  loopAnnotation.getLicm(), loopAnnotation.getDistribute(),
518  loopAnnotation.getPipeline(), loopAnnotation.getPeeled(),
519  loopAnnotation.getUnswitch(), loopAnnotation.getMustProgress(),
520  loopAnnotation.getIsVectorized(), newStartLoc, newEndLoc,
521  loopAnnotation.getParallelAccesses());
522  // Needs to advance, as loop annotations can be nested.
523  return {newLoopAnnotation, WalkResult::advance()};
524  });
525 
526  for (Block &block : inlinedBlocks)
527  for (Operation &op : block)
528  replacer.recursivelyReplaceElementsIn(&op);
529 }
530 
531 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
532 /// realigns `alloca` if this does not exceed the natural stack alignment.
533 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
534 static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
535  uint64_t requestedAlignment,
536  DataLayout const &dataLayout) {
537  uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
538  if (requestedAlignment <= allocaAlignment)
539  // No realignment necessary.
540  return allocaAlignment;
541  uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
542  // If the natural stack alignment is not specified, the data layout returns
543  // zero. Optimistically allow realignment in this case.
544  if (naturalStackAlignmentBits == 0 ||
545  // If the requested alignment exceeds the natural stack alignment, this
546  // will trigger a dynamic stack realignment, so we prefer to copy...
547  8 * requestedAlignment <= naturalStackAlignmentBits ||
548  // ...unless the alloca already triggers dynamic stack realignment. Then
549  // we might as well further increase the alignment to avoid a copy.
550  8 * allocaAlignment > naturalStackAlignmentBits) {
551  alloca.setAlignment(requestedAlignment);
552  allocaAlignment = requestedAlignment;
553  }
554  return allocaAlignment;
555 }
556 
557 /// Tries to find and return the alignment of the pointer `value` by looking for
558 /// an alignment attribute on the defining allocation op or function argument.
559 /// If the found alignment is lower than `requestedAlignment`, tries to realign
560 /// the pointer, then returns the resulting post-alignment, regardless of
561 /// whether it was realigned or not. If no existing alignment attribute is
562 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
563 static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
564  DataLayout const &dataLayout) {
565  if (Operation *definingOp = value.getDefiningOp()) {
566  if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
567  return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
568  dataLayout);
569  if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
570  if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
571  definingOp, addressOf.getGlobalNameAttr()))
572  return global.getAlignment().value_or(1);
573  // We don't currently handle this operation; assume no alignment.
574  return 1;
575  }
576  // Since there is no defining op, this is a block argument. Probably this
577  // comes directly from a function argument, so check that this is the case.
578  Operation *parentOp = value.getParentBlock()->getParentOp();
579  if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
580  // Use the alignment attribute set for this argument in the parent function
581  // if it has been set.
582  auto blockArg = llvm::cast<BlockArgument>(value);
583  if (Attribute alignAttr = func.getArgAttr(
584  blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
585  return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
586  }
587  // We didn't find anything useful; assume no alignment.
588  return 1;
589 }
590 
591 /// Introduces a new alloca and copies the memory pointed to by `argument` to
592 /// the address of the new alloca, then returns the value of the new alloca.
594  Value argument, Type elementType,
595  uint64_t elementTypeSize,
596  uint64_t targetAlignment) {
597  // Allocate the new value on the stack.
598  Value allocaOp;
599  {
600  // Since this is a static alloca, we can put it directly in the entry block,
601  // so they can be absorbed into the prologue/epilogue at code generation.
602  OpBuilder::InsertionGuard insertionGuard(builder);
603  Block *entryBlock = &(*argument.getParentRegion()->begin());
604  builder.setInsertionPointToStart(entryBlock);
605  Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
606  builder.getI64IntegerAttr(1));
607  allocaOp = builder.create<LLVM::AllocaOp>(
608  loc, argument.getType(), elementType, one, targetAlignment);
609  }
610  // Copy the pointee to the newly allocated value.
611  Value copySize = builder.create<LLVM::ConstantOp>(
612  loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize));
613  builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize,
614  /*isVolatile=*/false);
615  return allocaOp;
616 }
617 
618 /// Handles a function argument marked with the byval attribute by introducing a
619 /// memcpy or realigning the defining operation, if required either due to the
620 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
621 /// `requestedAlignment` specifies the alignment set in the "align" argument
622 /// attribute (or 1 if no align attribute was set).
623 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
624  Value argument, Type elementType,
625  uint64_t requestedAlignment) {
626  auto func = cast<LLVM::LLVMFuncOp>(callable);
627  LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryEffectsAttr();
628  // If there is no memory effects attribute, assume that the function is
629  // not read-only.
630  bool isReadOnly = memoryEffects &&
631  memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
632  memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
633  // Check if there's an alignment mismatch requiring us to copy.
634  DataLayout dataLayout = DataLayout::closest(callable);
635  uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
636  if (isReadOnly) {
637  if (requestedAlignment <= minimumAlignment)
638  return argument;
639  uint64_t currentAlignment =
640  tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
641  if (currentAlignment >= requestedAlignment)
642  return argument;
643  }
644  uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
645  return handleByValArgumentInit(builder, func.getLoc(), argument, elementType,
646  dataLayout.getTypeSize(elementType),
647  targetAlignment);
648 }
649 
650 namespace {
651 struct LLVMInlinerInterface : public DialectInlinerInterface {
653 
654  LLVMInlinerInterface(Dialect *dialect)
655  : DialectInlinerInterface(dialect),
656  // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
657  disallowedFunctionAttrs({
658  StringAttr::get(dialect->getContext(), "noduplicate"),
659  StringAttr::get(dialect->getContext(), "presplitcoroutine"),
660  StringAttr::get(dialect->getContext(), "returns_twice"),
661  StringAttr::get(dialect->getContext(), "strictfp"),
662  }) {}
663 
664  bool isLegalToInline(Operation *call, Operation *callable,
665  bool wouldBeCloned) const final {
666  if (!wouldBeCloned)
667  return false;
668  if (!isa<LLVM::CallOp>(call)) {
669  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: call is not an '"
670  << LLVM::CallOp::getOperationName() << "' op\n");
671  return false;
672  }
673  auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
674  if (!funcOp) {
675  LLVM_DEBUG(llvm::dbgs()
676  << "Cannot inline: callable is not an '"
677  << LLVM::LLVMFuncOp::getOperationName() << "' op\n");
678  return false;
679  }
680  if (funcOp.isNoInline()) {
681  LLVM_DEBUG(llvm::dbgs()
682  << "Cannot inline: function is marked no_inline\n");
683  return false;
684  }
685  if (funcOp.isVarArg()) {
686  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: callable is variadic\n");
687  return false;
688  }
689  // TODO: Generate aliasing metadata from noalias result attributes.
690  if (auto attrs = funcOp.getArgAttrs()) {
691  for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
692  if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
693  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
694  << ": inalloca arguments not supported\n");
695  return false;
696  }
697  }
698  }
699  // TODO: Handle exceptions.
700  if (funcOp.getPersonality()) {
701  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
702  << ": unhandled function personality\n");
703  return false;
704  }
705  if (funcOp.getPassthrough()) {
706  // TODO: Used attributes should not be passthrough.
707  if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
708  auto stringAttr = dyn_cast<StringAttr>(attr);
709  if (!stringAttr)
710  return false;
711  if (disallowedFunctionAttrs.contains(stringAttr)) {
712  LLVM_DEBUG(llvm::dbgs()
713  << "Cannot inline " << funcOp.getSymName()
714  << ": found disallowed function attribute "
715  << stringAttr << "\n");
716  return true;
717  }
718  return false;
719  }))
720  return false;
721  }
722  return true;
723  }
724 
725  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
726  return true;
727  }
728 
729  bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
730  // The inliner cannot handle variadic function arguments.
731  return !isa<LLVM::VaStartOp>(op);
732  }
733 
734  /// Handle the given inlined return by replacing it with a branch. This
735  /// overload is called when the inlined region has more than one block.
736  void handleTerminator(Operation *op, Block *newDest) const final {
737  // Only return needs to be handled here.
738  auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
739  if (!returnOp)
740  return;
741 
742  // Replace the return with a branch to the dest.
743  OpBuilder builder(op);
744  builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest);
745  op->erase();
746  }
747 
748  /// Handle the given inlined return by replacing the uses of the call with the
749  /// operands of the return. This overload is called when the inlined region
750  /// only contains one block.
751  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
752  // Return will be the only terminator present.
753  auto returnOp = cast<LLVM::ReturnOp>(op);
754 
755  // Replace the values directly with the return operands.
756  assert(returnOp.getNumOperands() == valuesToRepl.size());
757  for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
758  dst.replaceAllUsesWith(src);
759  }
760 
761  Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
762  Value argument,
763  DictionaryAttr argumentAttrs) const final {
764  if (std::optional<NamedAttribute> attr =
765  argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
766  Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
767  uint64_t requestedAlignment = 1;
768  if (std::optional<NamedAttribute> alignAttr =
769  argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
770  requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
771  .getValue()
772  .getLimitedValue();
773  }
774  return handleByValArgument(builder, callable, argument, elementType,
775  requestedAlignment);
776  }
777 
778  // This code is essentially a workaround for deficiencies in the inliner
779  // interface: We need to transform operations *after* inlined based on the
780  // argument attributes of the parameters *before* inlining. This method runs
781  // prior to actual inlining and thus cannot transform the post-inlining
782  // code, while `processInlinedCallBlocks` does not have access to
783  // pre-inlining function arguments. Additionally, it is required to
784  // distinguish which parameter an SSA value originally came from. As a
785  // workaround until this is changed: Create an ssa.copy intrinsic with the
786  // noalias attribute (when it was present before) that can easily be found,
787  // and is extremely unlikely to exist in the code prior to inlining, using
788  // this to communicate between this method and `processInlinedCallBlocks`.
789  // TODO: Fix this by refactoring the inliner interface.
790  auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument);
791  if (argumentAttrs.contains(LLVM::LLVMDialect::getNoAliasAttrName()))
792  copyOp->setDiscardableAttr(
793  builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
794  builder.getUnitAttr());
795  return copyOp;
796  }
797 
798  void processInlinedCallBlocks(
799  Operation *call,
800  iterator_range<Region::iterator> inlinedBlocks) const override {
801  handleInlinedAllocas(call, inlinedBlocks);
802  handleAliasScopes(call, inlinedBlocks);
803  handleAccessGroups(call, inlinedBlocks);
804  handleLoopAnnotations(call, inlinedBlocks);
805  }
806 
807  // Keeping this (immutable) state on the interface allows us to look up
808  // StringAttrs instead of looking up strings, since StringAttrs are bound to
809  // the current context and thus cannot be initialized as static fields.
810  const DenseSet<StringAttr> disallowedFunctionAttrs;
811 };
812 
813 } // end anonymous namespace
814 
816  registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
817  dialect->addInterfaces<LLVMInlinerInterface>();
818  });
819 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static void handleLoopAnnotations(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Updates locations inside loop annotations to reflect that they were inlined.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static FailureOr< SmallVector< Value > > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This is an attribute/type replacer that is naively cached.
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:31
iterator begin()
Definition: Block.h:141
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:191
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
IntegerType getI64Type()
Definition: Builders.cpp:109
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:152
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:44
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:46
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:38
MLIRContext * getContext() const
Definition: Dialect.h:52
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
Location objects represent source locations information in MLIR.
Definition: Location.h:31
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:356
This class helps build Operations.
Definition: Builders.h:215
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:439
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:406
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:497
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:869
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
void moveAfter(Operation *existingOp)
Unlink this operation from its current block and insert it right after existingOp which may be in the...
Definition: Operation.cpp:569
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
A class to signal how to proceed with the walk of the backward slice:
Definition: SliceWalk.h:20
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: SliceWalk.h:60
static WalkContinuation skip()
Creates a continuation that advances the walk without adding any predecessor values to the work list.
Definition: SliceWalk.h:55
static WalkContinuation advanceTo(mlir::ValueRange nextValues)
Creates a continuation that adds the user-specified nextValues to the work list and advances the walk...
Definition: SliceWalk.h:49
static WalkContinuation interrupt()
Creates a continuation that interrupts the walk.
Definition: SliceWalk.h:43
static WalkResult advance()
Definition: Visitors.h:51
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
Register a replacement function for mapping a given attribute or type.
void registerInlinerInterface(DialectRegistry &registry)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:485
std::optional< SmallVector< Value > > getControlFlowPredecessors(Value value)
Computes a vector of all control predecessors of value.
Definition: SliceWalk.cpp:106
WalkContinuation walkSlice(mlir::ValueRange rootValues, WalkCallback walkCallback)
Walks the slice starting from the rootValues using a depth-first traversal.
Definition: SliceWalk.cpp:6
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369
This trait indicates that a terminator operation is "return-like".