MLIR  22.0.0git
InlinerInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- InlinerInterfaceImpl.cpp - Inlining for LLVM the dialect -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
11 //
12 //===----------------------------------------------------------------------===//
13 
18 #include "mlir/IR/Matchers.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "llvm-inliner"
26 
27 using namespace mlir;
28 
29 /// Check whether the given alloca is an input to a lifetime intrinsic,
30 /// optionally passing through one or more casts on the way. This is not
31 /// transitive through block arguments.
32 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
33  SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
34  allocaOp->getUsers().end());
35  while (!stack.empty()) {
36  Operation *op = stack.pop_back_val();
37  if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
38  return true;
39  if (isa<LLVM::BitcastOp>(op))
40  stack.append(op->getUsers().begin(), op->getUsers().end());
41  }
42  return false;
43 }
44 
45 /// Handles alloca operations in the inlined blocks:
46 /// - Moves all alloca operations with a constant size in the former entry block
47 /// of the callee into the entry block of the caller, so they become part of
48 /// the function prologue/epilogue during code generation.
49 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
50 /// to the inlined blocks.
51 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
52 /// inlined.
53 static void
55  iterator_range<Region::iterator> inlinedBlocks) {
56  // Locate the entry block of the closest callsite ancestor that has either the
57  // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
58  // programs, this is the LLVMFuncOp containing the call site. However, in
59  // mixed-dialect programs, the callsite might be nested in another operation
60  // that carries one of these traits. In such scenarios, this traversal stops
61  // at the closest ancestor with either trait, ensuring visibility post
62  // relocation and respecting allocation scopes.
63  Block *callerEntryBlock = nullptr;
64  Operation *currentOp = call;
65  while (Operation *parentOp = currentOp->getParentOp()) {
66  if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
67  parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
68  callerEntryBlock = &currentOp->getParentRegion()->front();
69  break;
70  }
71  currentOp = parentOp;
72  }
73 
74  // Avoid relocating the alloca operations if the call has been inlined into
75  // the entry block already, which is typically the encompassing
76  // LLVM function, or if the relevant entry block cannot be identified.
77  Block *calleeEntryBlock = &(*inlinedBlocks.begin());
78  if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
79  return;
80 
82  bool shouldInsertLifetimes = false;
83  bool hasDynamicAlloca = false;
84  // Conservatively only move static alloca operations that are part of the
85  // entry block and do not inspect nested regions, since they may execute
86  // conditionally or have other unknown semantics.
87  for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
88  IntegerAttr arraySize;
89  if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
90  hasDynamicAlloca = true;
91  continue;
92  }
93  bool shouldInsertLifetime =
94  arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
95  shouldInsertLifetimes |= shouldInsertLifetime;
96  allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
97  }
98  // Check the remaining inlined blocks for dynamic allocas as well.
99  for (Block &block : llvm::drop_begin(inlinedBlocks)) {
100  if (hasDynamicAlloca)
101  break;
102  hasDynamicAlloca =
103  llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
104  return !matchPattern(allocaOp.getArraySize(), m_Constant());
105  });
106  }
107  if (allocasToMove.empty() && !hasDynamicAlloca)
108  return;
109  OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
110  Value stackPtr;
111  if (hasDynamicAlloca) {
112  // This may result in multiple stacksave/stackrestore intrinsics in the same
113  // scope if some are already present in the body of the caller. This is not
114  // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
115  // other cases where the stacksave/stackrestore is redundant.
116  stackPtr = LLVM::StackSaveOp::create(
117  builder, call->getLoc(),
119  }
120  builder.setInsertionPointToStart(callerEntryBlock);
121  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
122  auto newConstant =
123  LLVM::ConstantOp::create(builder, allocaOp->getLoc(),
124  allocaOp.getArraySize().getType(), arraySize);
125  // Insert a lifetime start intrinsic where the alloca was before moving it.
126  if (shouldInsertLifetime) {
127  OpBuilder::InsertionGuard insertionGuard(builder);
128  builder.setInsertionPoint(allocaOp);
129  LLVM::LifetimeStartOp::create(builder, allocaOp.getLoc(),
130  arraySize.getValue().getLimitedValue(),
131  allocaOp.getResult());
132  }
133  allocaOp->moveAfter(newConstant);
134  allocaOp.getArraySizeMutable().assign(newConstant.getResult());
135  }
136  if (!shouldInsertLifetimes && !hasDynamicAlloca)
137  return;
138  // Insert a lifetime end intrinsic before each return in the callee function.
139  for (Block &block : inlinedBlocks) {
140  if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
141  continue;
142  builder.setInsertionPoint(block.getTerminator());
143  if (hasDynamicAlloca)
144  LLVM::StackRestoreOp::create(builder, call->getLoc(), stackPtr);
145  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
146  if (shouldInsertLifetime)
147  LLVM::LifetimeEndOp::create(builder, allocaOp.getLoc(),
148  arraySize.getValue().getLimitedValue(),
149  allocaOp.getResult());
150  }
151  }
152 }
153 
154 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
155 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
156 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
157 static void
160 
161  // Register handles in the walker to create the deep clones.
162  // The walker ensures that an attribute is only ever walked once and does a
163  // post-order walk, ensuring the domain is visited prior to the scope.
164  AttrTypeWalker walker;
165 
166  // Perform the deep clones while visiting. Builders create a distinct
167  // attribute to make sure that new instances are always created by the
168  // uniquer.
169  walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
170  mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
171  domainAttr.getContext(), domainAttr.getDescription());
172  });
173 
174  walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
175  mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
176  cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
177  scopeAttr.getDescription());
178  });
179 
180  // Map an array of scopes to an array of deep clones.
181  auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
182  if (!arrayAttr)
183  return nullptr;
184 
185  // Create the deep clones if necessary.
186  walker.walk(arrayAttr);
187 
188  return ArrayAttr::get(arrayAttr.getContext(),
189  llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
190  return mapping.lookup(attr);
191  }));
192  };
193 
194  for (Block &block : inlinedBlocks) {
195  block.walk([&](Operation *op) {
196  if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
197  aliasInterface.setAliasScopes(
198  convertScopeList(aliasInterface.getAliasScopesOrNull()));
199  aliasInterface.setNoAliasScopes(
200  convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
201  }
202 
203  if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
204  // Create the deep clones if necessary.
205  walker.walk(noAliasScope.getScopeAttr());
206 
207  noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
208  mapping.lookup(noAliasScope.getScopeAttr())));
209  }
210  });
211  }
212 }
213 
214 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
215 /// Returns null if both parameters are null. If only one attribute is null,
216 /// return the other.
217 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
218  if (!lhs)
219  return rhs;
220  if (!rhs)
221  return lhs;
222 
223  SmallVector<Attribute> result;
224  llvm::append_range(result, lhs);
225  llvm::append_range(result, rhs);
226  return ArrayAttr::get(lhs.getContext(), result);
227 }
228 
229 /// Attempts to return the set of all underlying pointer values that
230 /// `pointerValue` is based on. This function traverses through select
231 /// operations and block arguments.
232 static FailureOr<SmallVector<Value>>
234  SmallVector<Value> result;
235  WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
236  // Attempt to advance to the source of the underlying view-like operation.
237  // Examples of view-like operations include GEPOp and AddrSpaceCastOp.
238  if (auto viewOp = val.getDefiningOp<ViewLikeOpInterface>())
239  return WalkContinuation::advanceTo(viewOp.getViewSource());
240 
241  // Attempt to advance to control flow predecessors.
242  std::optional<SmallVector<Value>> controlFlowPredecessors =
244  if (controlFlowPredecessors)
245  return WalkContinuation::advanceTo(*controlFlowPredecessors);
246 
247  // For all non-control flow results, consider `val` an underlying object.
248  if (isa<OpResult>(val)) {
249  result.push_back(val);
250  return WalkContinuation::skip();
251  }
252 
253  // If this place is reached, `val` is a block argument that is not
254  // understood. Therefore, we conservatively interrupt.
255  // Note: Dealing with function arguments is not necessary, as the slice
256  // would have to go through an SSACopyOp first.
258  });
259 
260  if (walkResult.wasInterrupted())
261  return failure();
262 
263  return result;
264 }
265 
266 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
267 /// the appropriate inlined memory operations in an attempt to preserve the
268 /// original semantics of the parameter attribute.
270  Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
271 
272  // First, collect all ssa copy operations, which correspond to function
273  // parameters, and additionally store the noalias parameters. All parameters
274  // have been marked by the `handleArgument` implementation by using the
275  // `ssa.copy` intrinsic. Additionally, noalias parameters have an attached
276  // `noalias` attribute to the intrinsics. These intrinsics are only meant to
277  // be temporary and should therefore be deleted after we're done using them
278  // here.
279  SetVector<LLVM::SSACopyOp> ssaCopies;
280  SetVector<LLVM::SSACopyOp> noAliasParams;
281  for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
282  for (Operation *user : argument.getUsers()) {
283  auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
284  if (!ssaCopy)
285  continue;
286  ssaCopies.insert(ssaCopy);
287 
288  if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
289  continue;
290  noAliasParams.insert(ssaCopy);
291  }
292  }
293 
294  // Scope exit block to make it impossible to forget to get rid of the
295  // intrinsics.
296  auto exit = llvm::make_scope_exit([&] {
297  for (LLVM::SSACopyOp ssaCopyOp : ssaCopies) {
298  ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
299  ssaCopyOp->erase();
300  }
301  });
302 
303  // If there were no noalias parameters, we have nothing to do here.
304  if (noAliasParams.empty())
305  return;
306 
307  // Create a new domain for this specific inlining and a new scope for every
308  // noalias parameter.
309  auto functionDomain = LLVM::AliasScopeDomainAttr::get(
310  call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
312  for (LLVM::SSACopyOp copyOp : noAliasParams) {
313  auto scope = LLVM::AliasScopeAttr::get(functionDomain);
314  pointerScopes[copyOp] = scope;
315 
316  auto builder = OpBuilder(call);
317  LLVM::NoAliasScopeDeclOp::create(builder, call->getLoc(), scope);
318  }
319 
320  // Go through every instruction and attempt to find which noalias parameters
321  // it is definitely based on and definitely not based on.
322  for (Block &inlinedBlock : inlinedBlocks) {
323  inlinedBlock.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
324  // Collect the pointer arguments affected by the alias scopes.
325  SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
326 
327  // Find the set of underlying pointers that this pointer is based on.
328  SmallPtrSet<Value, 4> basedOnPointers;
329  for (Value pointer : pointerArgs) {
330  FailureOr<SmallVector<Value>> underlyingObjectSet =
331  getUnderlyingObjectSet(pointer);
332  if (failed(underlyingObjectSet))
333  return;
334  llvm::copy(*underlyingObjectSet,
335  std::inserter(basedOnPointers, basedOnPointers.begin()));
336  }
337 
338  bool aliasesOtherKnownObject = false;
339  // Go through the based on pointers and check that they are either:
340  // * Constants that can be ignored (undef, poison, null pointer).
341  // * Based on a pointer parameter.
342  // * Other pointers that we know can't alias with our noalias parameter.
343  //
344  // Any other value might be a pointer based on any noalias parameter that
345  // hasn't been identified. In that case conservatively don't add any
346  // scopes to this operation indicating either aliasing or not aliasing
347  // with any parameter.
348  if (llvm::any_of(basedOnPointers, [&](Value object) {
349  if (matchPattern(object, m_Constant()))
350  return false;
351 
352  if (auto ssaCopy = object.getDefiningOp<LLVM::SSACopyOp>()) {
353  // If that value is based on a noalias parameter, it is guaranteed
354  // to not alias with any other object.
355  aliasesOtherKnownObject |= !noAliasParams.contains(ssaCopy);
356  return false;
357  }
358 
359  if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
360  object.getDefiningOp())) {
361  aliasesOtherKnownObject = true;
362  return false;
363  }
364  return true;
365  }))
366  return;
367 
368  // Add all noalias parameter scopes to the noalias scope list that we are
369  // not based on.
370  SmallVector<Attribute> noAliasScopes;
371  for (LLVM::SSACopyOp noAlias : noAliasParams) {
372  if (basedOnPointers.contains(noAlias))
373  continue;
374 
375  noAliasScopes.push_back(pointerScopes[noAlias]);
376  }
377 
378  if (!noAliasScopes.empty())
379  aliasInterface.setNoAliasScopes(
380  concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
381  ArrayAttr::get(call->getContext(), noAliasScopes)));
382 
383  // Don't add alias scopes to call operations or operations that might
384  // operate on pointers not based on any noalias parameter.
385  // Since we add all scopes to an operation's noalias list that it
386  // definitely doesn't alias, we mustn't do the same for the alias.scope
387  // list if other objects are involved.
388  //
389  // Consider the following case:
390  // %0 = llvm.alloca
391  // %1 = select %magic, %0, %noalias_param
392  // store 5, %1 (1) noalias=[scope(...)]
393  // ...
394  // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
395  //
396  // We can add the scopes of any noalias parameters that aren't
397  // noalias_param's scope to (1) and add all of them to (2). We mustn't add
398  // the scope of noalias_param to the alias.scope list of (1) since
399  // that would mean (2) cannot alias with (1) which is wrong since both may
400  // store to %0.
401  //
402  // In conclusion, only add scopes to the alias.scope list if all pointers
403  // have a corresponding scope.
404  // Call operations are included in this list since we do not know whether
405  // the callee accesses any memory besides the ones passed as its
406  // arguments.
407  if (aliasesOtherKnownObject ||
408  isa<LLVM::CallOp>(aliasInterface.getOperation()))
409  return;
410 
411  SmallVector<Attribute> aliasScopes;
412  for (LLVM::SSACopyOp noAlias : noAliasParams)
413  if (basedOnPointers.contains(noAlias))
414  aliasScopes.push_back(pointerScopes[noAlias]);
415 
416  if (!aliasScopes.empty())
417  aliasInterface.setAliasScopes(
418  concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
419  ArrayAttr::get(call->getContext(), aliasScopes)));
420  });
421  }
422 }
423 
424 /// Appends any alias scopes of the call operation to any inlined memory
425 /// operation.
426 static void
428  iterator_range<Region::iterator> inlinedBlocks) {
429  auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
430  if (!callAliasInterface)
431  return;
432 
433  ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
434  ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
435  // If the call has neither alias scopes or noalias scopes we have nothing to
436  // do here.
437  if (!aliasScopes && !noAliasScopes)
438  return;
439 
440  // Simply append the call op's alias and noalias scopes to any operation
441  // implementing AliasAnalysisOpInterface.
442  for (Block &block : inlinedBlocks) {
443  block.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
444  if (aliasScopes)
445  aliasInterface.setAliasScopes(concatArrayAttr(
446  aliasInterface.getAliasScopesOrNull(), aliasScopes));
447 
448  if (noAliasScopes)
449  aliasInterface.setNoAliasScopes(concatArrayAttr(
450  aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
451  });
452  }
453 }
454 
455 /// Handles all interactions with alias scopes during inlining.
456 static void handleAliasScopes(Operation *call,
457  iterator_range<Region::iterator> inlinedBlocks) {
458  deepCloneAliasScopes(inlinedBlocks);
459  createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
460  appendCallOpAliasScopes(call, inlinedBlocks);
461 }
462 
463 /// Appends any access groups of the call operation to any inlined memory
464 /// operation.
465 static void handleAccessGroups(Operation *call,
466  iterator_range<Region::iterator> inlinedBlocks) {
467  auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
468  if (!callAccessGroupInterface)
469  return;
470 
471  auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
472  if (!accessGroups)
473  return;
474 
475  // Simply append the call op's access groups to any operation implementing
476  // AccessGroupOpInterface.
477  for (Block &block : inlinedBlocks)
478  for (auto accessGroupOpInterface :
479  block.getOps<LLVM::AccessGroupOpInterface>())
480  accessGroupOpInterface.setAccessGroups(concatArrayAttr(
481  accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
482 }
483 
484 /// Updates locations inside loop annotations to reflect that they were inlined.
485 static void
487  iterator_range<Region::iterator> inlinedBlocks) {
488  // Attempt to extract a DISubprogram from the callee.
489  auto func = call->getParentOfType<FunctionOpInterface>();
490  if (!func)
491  return;
492  LocationAttr funcLoc = func->getLoc();
493  auto fusedLoc = dyn_cast_if_present<FusedLoc>(funcLoc);
494  if (!fusedLoc)
495  return;
496  auto scope =
497  dyn_cast_if_present<LLVM::DISubprogramAttr>(fusedLoc.getMetadata());
498  if (!scope)
499  return;
500 
501  // Helper to build a new fused location that reflects the inlining of the loop
502  // annotation.
503  auto updateLoc = [&](FusedLoc loc) -> FusedLoc {
504  if (!loc)
505  return {};
506  Location callSiteLoc = CallSiteLoc::get(loc, call->getLoc());
507  return FusedLoc::get(loc.getContext(), callSiteLoc, scope);
508  };
509 
510  AttrTypeReplacer replacer;
511  replacer.addReplacement([&](LLVM::LoopAnnotationAttr loopAnnotation)
512  -> std::pair<Attribute, WalkResult> {
513  FusedLoc newStartLoc = updateLoc(loopAnnotation.getStartLoc());
514  FusedLoc newEndLoc = updateLoc(loopAnnotation.getEndLoc());
515  if (!newStartLoc && !newEndLoc)
516  return {loopAnnotation, WalkResult::advance()};
517  auto newLoopAnnotation = LLVM::LoopAnnotationAttr::get(
518  loopAnnotation.getContext(), loopAnnotation.getDisableNonforced(),
519  loopAnnotation.getVectorize(), loopAnnotation.getInterleave(),
520  loopAnnotation.getUnroll(), loopAnnotation.getUnrollAndJam(),
521  loopAnnotation.getLicm(), loopAnnotation.getDistribute(),
522  loopAnnotation.getPipeline(), loopAnnotation.getPeeled(),
523  loopAnnotation.getUnswitch(), loopAnnotation.getMustProgress(),
524  loopAnnotation.getIsVectorized(), newStartLoc, newEndLoc,
525  loopAnnotation.getParallelAccesses());
526  // Needs to advance, as loop annotations can be nested.
527  return {newLoopAnnotation, WalkResult::advance()};
528  });
529 
530  for (Block &block : inlinedBlocks)
531  for (Operation &op : block)
532  replacer.recursivelyReplaceElementsIn(&op);
533 }
534 
535 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
536 /// realigns `alloca` if this does not exceed the natural stack alignment.
537 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
538 static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
539  uint64_t requestedAlignment,
540  DataLayout const &dataLayout) {
541  uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
542  if (requestedAlignment <= allocaAlignment)
543  // No realignment necessary.
544  return allocaAlignment;
545  uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
546  // If the natural stack alignment is not specified, the data layout returns
547  // zero. Optimistically allow realignment in this case.
548  if (naturalStackAlignmentBits == 0 ||
549  // If the requested alignment exceeds the natural stack alignment, this
550  // will trigger a dynamic stack realignment, so we prefer to copy...
551  8 * requestedAlignment <= naturalStackAlignmentBits ||
552  // ...unless the alloca already triggers dynamic stack realignment. Then
553  // we might as well further increase the alignment to avoid a copy.
554  8 * allocaAlignment > naturalStackAlignmentBits) {
555  alloca.setAlignment(requestedAlignment);
556  allocaAlignment = requestedAlignment;
557  }
558  return allocaAlignment;
559 }
560 
561 /// Tries to find and return the alignment of the pointer `value` by looking for
562 /// an alignment attribute on the defining allocation op or function argument.
563 /// If the found alignment is lower than `requestedAlignment`, tries to realign
564 /// the pointer, then returns the resulting post-alignment, regardless of
565 /// whether it was realigned or not. If no existing alignment attribute is
566 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
567 static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
568  DataLayout const &dataLayout) {
569  if (Operation *definingOp = value.getDefiningOp()) {
570  if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
571  return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
572  dataLayout);
573  if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
574  if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
575  definingOp, addressOf.getGlobalNameAttr()))
576  return global.getAlignment().value_or(1);
577  // We don't currently handle this operation; assume no alignment.
578  return 1;
579  }
580  // Since there is no defining op, this is a block argument. Probably this
581  // comes directly from a function argument, so check that this is the case.
582  Operation *parentOp = value.getParentBlock()->getParentOp();
583  if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
584  // Use the alignment attribute set for this argument in the parent function
585  // if it has been set.
586  auto blockArg = llvm::cast<BlockArgument>(value);
587  if (Attribute alignAttr = func.getArgAttr(
588  blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
589  return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
590  }
591  // We didn't find anything useful; assume no alignment.
592  return 1;
593 }
594 
595 /// Introduces a new alloca and copies the memory pointed to by `argument` to
596 /// the address of the new alloca, then returns the value of the new alloca.
598  Value argument, Type elementType,
599  uint64_t elementTypeSize,
600  uint64_t targetAlignment) {
601  // Allocate the new value on the stack.
602  Value allocaOp;
603  {
604  // Since this is a static alloca, we can put it directly in the entry block,
605  // so they can be absorbed into the prologue/epilogue at code generation.
606  OpBuilder::InsertionGuard insertionGuard(builder);
607  Block *entryBlock = &(*argument.getParentRegion()->begin());
608  builder.setInsertionPointToStart(entryBlock);
609  Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
610  builder.getI64IntegerAttr(1));
611  allocaOp = LLVM::AllocaOp::create(builder, loc, argument.getType(),
612  elementType, one, targetAlignment);
613  }
614  // Copy the pointee to the newly allocated value.
615  Value copySize =
616  LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
617  builder.getI64IntegerAttr(elementTypeSize));
618  LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
619  /*isVolatile=*/false);
620  return allocaOp;
621 }
622 
623 /// Handles a function argument marked with the byval attribute by introducing a
624 /// memcpy or realigning the defining operation, if required either due to the
625 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
626 /// `requestedAlignment` specifies the alignment set in the "align" argument
627 /// attribute (or 1 if no align attribute was set).
628 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
629  Value argument, Type elementType,
630  uint64_t requestedAlignment) {
631  auto func = cast<LLVM::LLVMFuncOp>(callable);
632  LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryEffectsAttr();
633  // If there is no memory effects attribute, assume that the function is
634  // not read-only.
635  bool isReadOnly = memoryEffects &&
636  memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
637  memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
638  // Check if there's an alignment mismatch requiring us to copy.
639  DataLayout dataLayout = DataLayout::closest(callable);
640  uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
641  if (isReadOnly) {
642  if (requestedAlignment <= minimumAlignment)
643  return argument;
644  uint64_t currentAlignment =
645  tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
646  if (currentAlignment >= requestedAlignment)
647  return argument;
648  }
649  uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
651  builder, argument.getLoc(), argument, elementType,
652  dataLayout.getTypeSize(elementType), targetAlignment);
653 }
654 
655 namespace {
656 struct LLVMInlinerInterface : public DialectInlinerInterface {
658 
659  LLVMInlinerInterface(Dialect *dialect)
660  : DialectInlinerInterface(dialect),
661  // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
662  disallowedFunctionAttrs({
663  StringAttr::get(dialect->getContext(), "noduplicate"),
664  StringAttr::get(dialect->getContext(), "presplitcoroutine"),
665  StringAttr::get(dialect->getContext(), "returns_twice"),
666  StringAttr::get(dialect->getContext(), "strictfp"),
667  }) {}
668 
669  bool isLegalToInline(Operation *call, Operation *callable,
670  bool wouldBeCloned) const final {
671  auto callOp = dyn_cast<LLVM::CallOp>(call);
672  if (!callOp) {
673  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: call is not an '"
674  << LLVM::CallOp::getOperationName() << "' op\n");
675  return false;
676  }
677  if (callOp.getNoInline()) {
678  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: call is marked no_inline\n");
679  return false;
680  }
681  auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
682  if (!funcOp) {
683  LLVM_DEBUG(llvm::dbgs()
684  << "Cannot inline: callable is not an '"
685  << LLVM::LLVMFuncOp::getOperationName() << "' op\n");
686  return false;
687  }
688  if (funcOp.isNoInline()) {
689  LLVM_DEBUG(llvm::dbgs()
690  << "Cannot inline: function is marked no_inline\n");
691  return false;
692  }
693  if (funcOp.isVarArg()) {
694  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: callable is variadic\n");
695  return false;
696  }
697  // TODO: Generate aliasing metadata from noalias result attributes.
698  if (auto attrs = funcOp.getArgAttrs()) {
699  for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
700  if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
701  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
702  << ": inalloca arguments not supported\n");
703  return false;
704  }
705  }
706  }
707  // TODO: Handle exceptions.
708  if (funcOp.getPersonality()) {
709  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
710  << ": unhandled function personality\n");
711  return false;
712  }
713  if (funcOp.getPassthrough()) {
714  // TODO: Used attributes should not be passthrough.
715  if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
716  auto stringAttr = dyn_cast<StringAttr>(attr);
717  if (!stringAttr)
718  return false;
719  if (disallowedFunctionAttrs.contains(stringAttr)) {
720  LLVM_DEBUG(llvm::dbgs()
721  << "Cannot inline " << funcOp.getSymName()
722  << ": found disallowed function attribute "
723  << stringAttr << "\n");
724  return true;
725  }
726  return false;
727  }))
728  return false;
729  }
730  return true;
731  }
732 
733  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
734  return true;
735  }
736 
737  bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
738  // The inliner cannot handle variadic function arguments and blocktag
739  // operations prevent inlining since they the blockaddress operations
740  // reference them via the callee symbol.
741  return !(isa<LLVM::VaStartOp>(op) || isa<LLVM::BlockTagOp>(op));
742  }
743 
744  /// Handle the given inlined return by replacing it with a branch. This
745  /// overload is called when the inlined region has more than one block.
746  void handleTerminator(Operation *op, Block *newDest) const final {
747  // Only return needs to be handled here.
748  auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
749  if (!returnOp)
750  return;
751 
752  // Replace the return with a branch to the dest.
753  OpBuilder builder(op);
754  LLVM::BrOp::create(builder, op->getLoc(), returnOp.getOperands(), newDest);
755  op->erase();
756  }
757 
758  bool allowSingleBlockOptimization(
759  iterator_range<Region::iterator> inlinedBlocks) const final {
760  if (!inlinedBlocks.empty() &&
761  isa<LLVM::UnreachableOp>(inlinedBlocks.begin()->getTerminator()))
762  return false;
763  return true;
764  }
765 
766  /// Handle the given inlined return by replacing the uses of the call with the
767  /// operands of the return. This overload is called when the inlined region
768  /// only contains one block.
769  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
770  // Return will be the only terminator present.
771  auto returnOp = cast<LLVM::ReturnOp>(op);
772 
773  // Replace the values directly with the return operands.
774  assert(returnOp.getNumOperands() == valuesToRepl.size());
775  for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
776  dst.replaceAllUsesWith(src);
777  }
778 
779  Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
780  Value argument,
781  DictionaryAttr argumentAttrs) const final {
782  if (std::optional<NamedAttribute> attr =
783  argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
784  Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
785  uint64_t requestedAlignment = 1;
786  if (std::optional<NamedAttribute> alignAttr =
787  argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
788  requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
789  .getValue()
790  .getLimitedValue();
791  }
792  return handleByValArgument(builder, callable, argument, elementType,
793  requestedAlignment);
794  }
795 
796  // This code is essentially a workaround for deficiencies in the inliner
797  // interface: We need to transform operations *after* inlined based on the
798  // argument attributes of the parameters *before* inlining. This method runs
799  // prior to actual inlining and thus cannot transform the post-inlining
800  // code, while `processInlinedCallBlocks` does not have access to
801  // pre-inlining function arguments. Additionally, it is required to
802  // distinguish which parameter an SSA value originally came from. As a
803  // workaround until this is changed: Create an ssa.copy intrinsic with the
804  // noalias attribute (when it was present before) that can easily be found,
805  // and is extremely unlikely to exist in the code prior to inlining, using
806  // this to communicate between this method and `processInlinedCallBlocks`.
807  // TODO: Fix this by refactoring the inliner interface.
808  auto copyOp = LLVM::SSACopyOp::create(builder, call->getLoc(), argument);
809  if (argumentAttrs.contains(LLVM::LLVMDialect::getNoAliasAttrName()))
810  copyOp->setDiscardableAttr(
811  builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
812  builder.getUnitAttr());
813  return copyOp;
814  }
815 
816  void processInlinedCallBlocks(
817  Operation *call,
818  iterator_range<Region::iterator> inlinedBlocks) const override {
819  handleInlinedAllocas(call, inlinedBlocks);
820  handleAliasScopes(call, inlinedBlocks);
821  handleAccessGroups(call, inlinedBlocks);
822  handleLoopAnnotations(call, inlinedBlocks);
823  }
824 
825  // Keeping this (immutable) state on the interface allows us to look up
826  // StringAttrs instead of looking up strings, since StringAttrs are bound to
827  // the current context and thus cannot be initialized as static fields.
828  const DenseSet<StringAttr> disallowedFunctionAttrs;
829 };
830 
831 } // end anonymous namespace
832 
834  registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
835  dialect->addInterfaces<LLVMInlinerInterface>();
836  });
837 }
838 
840  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
841  dialect->addInterfaces<LLVMInlinerInterface>();
842  });
843 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static void handleLoopAnnotations(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Updates locations inside loop annotations to reflect that they were inlined.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static FailureOr< SmallVector< Value > > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This is an attribute/type replacer that is naively cached.
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
iterator begin()
Definition: Block.h:143
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:193
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:31
IntegerType getI64Type()
Definition: Builders.cpp:64
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:107
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:89
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:44
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:46
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:38
MLIRContext * getContext() const
Definition: Dialect.h:52
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
Location objects represent source locations information in MLIR.
Definition: Location.h:32
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:46
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:24
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:39
A class to signal how to proceed with the walk of the backward slice:
Definition: SliceWalk.h:20
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: SliceWalk.h:60
static WalkContinuation skip()
Creates a continuation that advances the walk without adding any predecessor values to the work list.
Definition: SliceWalk.h:55
static WalkContinuation advanceTo(mlir::ValueRange nextValues)
Creates a continuation that adds the user-specified nextValues to the work list and advances the walk...
Definition: SliceWalk.h:49
static WalkContinuation interrupt()
Creates a continuation that interrupts the walk.
Definition: SliceWalk.h:43
static WalkResult advance()
Definition: WalkResult.h:47
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
Register a replacement function for mapping a given attribute or type.
void registerInlinerInterface(DialectRegistry &registry)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
void registerInlinerInterface(DialectRegistry &registry)
Register the NVVMInlinerInterface implementation of DialectInlinerInterface with the NVVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490
std::optional< SmallVector< Value > > getControlFlowPredecessors(Value value)
Computes a vector of all control predecessors of value.
Definition: SliceWalk.cpp:106
WalkContinuation walkSlice(mlir::ValueRange rootValues, WalkCallback walkCallback)
Walks the slice starting from the rootValues using a depth-first traversal.
Definition: SliceWalk.cpp:6
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369
This trait indicates that a terminator operation is "return-like".