MLIR  22.0.0git
InlinerInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- InlinerInterfaceImpl.cpp - Inlining for LLVM the dialect -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
11 //
12 //===----------------------------------------------------------------------===//
13 
18 #include "mlir/IR/Matchers.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/Support/Debug.h"
24 
25 #include "llvm/Support/DebugLog.h"
26 
27 #define DEBUG_TYPE "llvm-inliner"
28 
29 using namespace mlir;
30 
31 /// Check whether the given alloca is an input to a lifetime intrinsic,
32 /// optionally passing through one or more casts on the way. This is not
33 /// transitive through block arguments.
34 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
35  SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
36  allocaOp->getUsers().end());
37  while (!stack.empty()) {
38  Operation *op = stack.pop_back_val();
39  if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
40  return true;
41  if (isa<LLVM::BitcastOp>(op))
42  stack.append(op->getUsers().begin(), op->getUsers().end());
43  }
44  return false;
45 }
46 
47 /// Handles alloca operations in the inlined blocks:
48 /// - Moves all alloca operations with a constant size in the former entry block
49 /// of the callee into the entry block of the caller, so they become part of
50 /// the function prologue/epilogue during code generation.
51 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
52 /// to the inlined blocks.
53 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
54 /// inlined.
55 static void
57  iterator_range<Region::iterator> inlinedBlocks) {
58  // Locate the entry block of the closest callsite ancestor that has either the
59  // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
60  // programs, this is the LLVMFuncOp containing the call site. However, in
61  // mixed-dialect programs, the callsite might be nested in another operation
62  // that carries one of these traits. In such scenarios, this traversal stops
63  // at the closest ancestor with either trait, ensuring visibility post
64  // relocation and respecting allocation scopes.
65  Block *callerEntryBlock = nullptr;
66  Operation *currentOp = call;
67  while (Operation *parentOp = currentOp->getParentOp()) {
68  if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
69  parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
70  callerEntryBlock = &currentOp->getParentRegion()->front();
71  break;
72  }
73  currentOp = parentOp;
74  }
75 
76  // Avoid relocating the alloca operations if the call has been inlined into
77  // the entry block already, which is typically the encompassing
78  // LLVM function, or if the relevant entry block cannot be identified.
79  Block *calleeEntryBlock = &(*inlinedBlocks.begin());
80  if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
81  return;
82 
84  bool shouldInsertLifetimes = false;
85  bool hasDynamicAlloca = false;
86  // Conservatively only move static alloca operations that are part of the
87  // entry block and do not inspect nested regions, since they may execute
88  // conditionally or have other unknown semantics.
89  for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
90  IntegerAttr arraySize;
91  if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
92  hasDynamicAlloca = true;
93  continue;
94  }
95  bool shouldInsertLifetime =
96  arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
97  shouldInsertLifetimes |= shouldInsertLifetime;
98  allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
99  }
100  // Check the remaining inlined blocks for dynamic allocas as well.
101  for (Block &block : llvm::drop_begin(inlinedBlocks)) {
102  if (hasDynamicAlloca)
103  break;
104  hasDynamicAlloca =
105  llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
106  return !matchPattern(allocaOp.getArraySize(), m_Constant());
107  });
108  }
109  if (allocasToMove.empty() && !hasDynamicAlloca)
110  return;
111  OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
112  Value stackPtr;
113  if (hasDynamicAlloca) {
114  // This may result in multiple stacksave/stackrestore intrinsics in the same
115  // scope if some are already present in the body of the caller. This is not
116  // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
117  // other cases where the stacksave/stackrestore is redundant.
118  stackPtr = LLVM::StackSaveOp::create(
119  builder, call->getLoc(),
121  }
122  builder.setInsertionPointToStart(callerEntryBlock);
123  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
124  auto newConstant =
125  LLVM::ConstantOp::create(builder, allocaOp->getLoc(),
126  allocaOp.getArraySize().getType(), arraySize);
127  // Insert a lifetime start intrinsic where the alloca was before moving it.
128  if (shouldInsertLifetime) {
129  OpBuilder::InsertionGuard insertionGuard(builder);
130  builder.setInsertionPoint(allocaOp);
131  LLVM::LifetimeStartOp::create(builder, allocaOp.getLoc(),
132  allocaOp.getResult());
133  }
134  allocaOp->moveAfter(newConstant);
135  allocaOp.getArraySizeMutable().assign(newConstant.getResult());
136  }
137  if (!shouldInsertLifetimes && !hasDynamicAlloca)
138  return;
139  // Insert a lifetime end intrinsic before each return in the callee function.
140  for (Block &block : inlinedBlocks) {
141  if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
142  continue;
143  builder.setInsertionPoint(block.getTerminator());
144  if (hasDynamicAlloca)
145  LLVM::StackRestoreOp::create(builder, call->getLoc(), stackPtr);
146  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
147  if (shouldInsertLifetime)
148  LLVM::LifetimeEndOp::create(builder, allocaOp.getLoc(),
149  allocaOp.getResult());
150  }
151  }
152 }
153 
154 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
155 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
156 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
157 static void
160 
161  // Register handles in the walker to create the deep clones.
162  // The walker ensures that an attribute is only ever walked once and does a
163  // post-order walk, ensuring the domain is visited prior to the scope.
164  AttrTypeWalker walker;
165 
166  // Perform the deep clones while visiting. Builders create a distinct
167  // attribute to make sure that new instances are always created by the
168  // uniquer.
169  walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
170  mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
171  domainAttr.getContext(), domainAttr.getDescription());
172  });
173 
174  walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
175  mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
176  cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
177  scopeAttr.getDescription());
178  });
179 
180  // Map an array of scopes to an array of deep clones.
181  auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
182  if (!arrayAttr)
183  return nullptr;
184 
185  // Create the deep clones if necessary.
186  walker.walk(arrayAttr);
187 
188  return ArrayAttr::get(arrayAttr.getContext(),
189  llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
190  return mapping.lookup(attr);
191  }));
192  };
193 
194  for (Block &block : inlinedBlocks) {
195  block.walk([&](Operation *op) {
196  if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
197  aliasInterface.setAliasScopes(
198  convertScopeList(aliasInterface.getAliasScopesOrNull()));
199  aliasInterface.setNoAliasScopes(
200  convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
201  }
202 
203  if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
204  // Create the deep clones if necessary.
205  walker.walk(noAliasScope.getScopeAttr());
206 
207  noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
208  mapping.lookup(noAliasScope.getScopeAttr())));
209  }
210  });
211  }
212 }
213 
214 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
215 /// Returns null if both parameters are null. If only one attribute is null,
216 /// return the other.
217 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
218  if (!lhs)
219  return rhs;
220  if (!rhs)
221  return lhs;
222 
223  SmallVector<Attribute> result;
224  llvm::append_range(result, lhs);
225  llvm::append_range(result, rhs);
226  return ArrayAttr::get(lhs.getContext(), result);
227 }
228 
229 /// Attempts to return the set of all underlying pointer values that
230 /// `pointerValue` is based on. This function traverses through select
231 /// operations and block arguments.
232 static FailureOr<SmallVector<Value>>
234  SmallVector<Value> result;
235  WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
236  // Attempt to advance to the source of the underlying view-like operation.
237  // Examples of view-like operations include GEPOp and AddrSpaceCastOp.
238  if (auto viewOp = val.getDefiningOp<ViewLikeOpInterface>()) {
239  if (val == viewOp.getViewDest())
240  return WalkContinuation::advanceTo(viewOp.getViewSource());
241  }
242 
243  // Attempt to advance to control flow predecessors.
244  std::optional<SmallVector<Value>> controlFlowPredecessors =
246  if (controlFlowPredecessors)
247  return WalkContinuation::advanceTo(*controlFlowPredecessors);
248 
249  // For all non-control flow results, consider `val` an underlying object.
250  if (isa<OpResult>(val)) {
251  result.push_back(val);
252  return WalkContinuation::skip();
253  }
254 
255  // If this place is reached, `val` is a block argument that is not
256  // understood. Therefore, we conservatively interrupt.
257  // Note: Dealing with function arguments is not necessary, as the slice
258  // would have to go through an SSACopyOp first.
260  });
261 
262  if (walkResult.wasInterrupted())
263  return failure();
264 
265  return result;
266 }
267 
268 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
269 /// the appropriate inlined memory operations in an attempt to preserve the
270 /// original semantics of the parameter attribute.
272  Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
273 
274  // First, collect all ssa copy operations, which correspond to function
275  // parameters, and additionally store the noalias parameters. All parameters
276  // have been marked by the `handleArgument` implementation by using the
277  // `ssa.copy` intrinsic. Additionally, noalias parameters have an attached
278  // `noalias` attribute to the intrinsics. These intrinsics are only meant to
279  // be temporary and should therefore be deleted after we're done using them
280  // here.
281  SetVector<LLVM::SSACopyOp> ssaCopies;
282  SetVector<LLVM::SSACopyOp> noAliasParams;
283  for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
284  for (Operation *user : argument.getUsers()) {
285  auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
286  if (!ssaCopy)
287  continue;
288  ssaCopies.insert(ssaCopy);
289 
290  if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
291  continue;
292  noAliasParams.insert(ssaCopy);
293  }
294  }
295 
296  // Scope exit block to make it impossible to forget to get rid of the
297  // intrinsics.
298  auto exit = llvm::make_scope_exit([&] {
299  for (LLVM::SSACopyOp ssaCopyOp : ssaCopies) {
300  ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
301  ssaCopyOp->erase();
302  }
303  });
304 
305  // If there were no noalias parameters, we have nothing to do here.
306  if (noAliasParams.empty())
307  return;
308 
309  // Create a new domain for this specific inlining and a new scope for every
310  // noalias parameter.
311  auto functionDomain = LLVM::AliasScopeDomainAttr::get(
312  call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
314  for (LLVM::SSACopyOp copyOp : noAliasParams) {
315  auto scope = LLVM::AliasScopeAttr::get(functionDomain);
316  pointerScopes[copyOp] = scope;
317 
318  auto builder = OpBuilder(call);
319  LLVM::NoAliasScopeDeclOp::create(builder, call->getLoc(), scope);
320  }
321 
322  // Go through every instruction and attempt to find which noalias parameters
323  // it is definitely based on and definitely not based on.
324  for (Block &inlinedBlock : inlinedBlocks) {
325  inlinedBlock.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
326  // Collect the pointer arguments affected by the alias scopes.
327  SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
328 
329  // Find the set of underlying pointers that this pointer is based on.
330  SmallPtrSet<Value, 4> basedOnPointers;
331  for (Value pointer : pointerArgs) {
332  FailureOr<SmallVector<Value>> underlyingObjectSet =
333  getUnderlyingObjectSet(pointer);
334  if (failed(underlyingObjectSet))
335  return;
336  llvm::copy(*underlyingObjectSet,
337  std::inserter(basedOnPointers, basedOnPointers.begin()));
338  }
339 
340  bool aliasesOtherKnownObject = false;
341  // Go through the based on pointers and check that they are either:
342  // * Constants that can be ignored (undef, poison, null pointer).
343  // * Based on a pointer parameter.
344  // * Other pointers that we know can't alias with our noalias parameter.
345  //
346  // Any other value might be a pointer based on any noalias parameter that
347  // hasn't been identified. In that case conservatively don't add any
348  // scopes to this operation indicating either aliasing or not aliasing
349  // with any parameter.
350  if (llvm::any_of(basedOnPointers, [&](Value object) {
351  if (matchPattern(object, m_Constant()))
352  return false;
353 
354  if (auto ssaCopy = object.getDefiningOp<LLVM::SSACopyOp>()) {
355  // If that value is based on a noalias parameter, it is guaranteed
356  // to not alias with any other object.
357  aliasesOtherKnownObject |= !noAliasParams.contains(ssaCopy);
358  return false;
359  }
360 
361  if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
362  object.getDefiningOp())) {
363  aliasesOtherKnownObject = true;
364  return false;
365  }
366  return true;
367  }))
368  return;
369 
370  // Add all noalias parameter scopes to the noalias scope list that we are
371  // not based on.
372  SmallVector<Attribute> noAliasScopes;
373  for (LLVM::SSACopyOp noAlias : noAliasParams) {
374  if (basedOnPointers.contains(noAlias))
375  continue;
376 
377  noAliasScopes.push_back(pointerScopes[noAlias]);
378  }
379 
380  if (!noAliasScopes.empty())
381  aliasInterface.setNoAliasScopes(
382  concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
383  ArrayAttr::get(call->getContext(), noAliasScopes)));
384 
385  // Don't add alias scopes to call operations or operations that might
386  // operate on pointers not based on any noalias parameter.
387  // Since we add all scopes to an operation's noalias list that it
388  // definitely doesn't alias, we mustn't do the same for the alias.scope
389  // list if other objects are involved.
390  //
391  // Consider the following case:
392  // %0 = llvm.alloca
393  // %1 = select %magic, %0, %noalias_param
394  // store 5, %1 (1) noalias=[scope(...)]
395  // ...
396  // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
397  //
398  // We can add the scopes of any noalias parameters that aren't
399  // noalias_param's scope to (1) and add all of them to (2). We mustn't add
400  // the scope of noalias_param to the alias.scope list of (1) since
401  // that would mean (2) cannot alias with (1) which is wrong since both may
402  // store to %0.
403  //
404  // In conclusion, only add scopes to the alias.scope list if all pointers
405  // have a corresponding scope.
406  // Call operations are included in this list since we do not know whether
407  // the callee accesses any memory besides the ones passed as its
408  // arguments.
409  if (aliasesOtherKnownObject ||
410  isa<LLVM::CallOp>(aliasInterface.getOperation()))
411  return;
412 
413  SmallVector<Attribute> aliasScopes;
414  for (LLVM::SSACopyOp noAlias : noAliasParams)
415  if (basedOnPointers.contains(noAlias))
416  aliasScopes.push_back(pointerScopes[noAlias]);
417 
418  if (!aliasScopes.empty())
419  aliasInterface.setAliasScopes(
420  concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
421  ArrayAttr::get(call->getContext(), aliasScopes)));
422  });
423  }
424 }
425 
426 /// Appends any alias scopes of the call operation to any inlined memory
427 /// operation.
428 static void
430  iterator_range<Region::iterator> inlinedBlocks) {
431  auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
432  if (!callAliasInterface)
433  return;
434 
435  ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
436  ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
437  // If the call has neither alias scopes or noalias scopes we have nothing to
438  // do here.
439  if (!aliasScopes && !noAliasScopes)
440  return;
441 
442  // Simply append the call op's alias and noalias scopes to any operation
443  // implementing AliasAnalysisOpInterface.
444  for (Block &block : inlinedBlocks) {
445  block.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
446  if (aliasScopes)
447  aliasInterface.setAliasScopes(concatArrayAttr(
448  aliasInterface.getAliasScopesOrNull(), aliasScopes));
449 
450  if (noAliasScopes)
451  aliasInterface.setNoAliasScopes(concatArrayAttr(
452  aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
453  });
454  }
455 }
456 
457 /// Handles all interactions with alias scopes during inlining.
458 static void handleAliasScopes(Operation *call,
459  iterator_range<Region::iterator> inlinedBlocks) {
460  deepCloneAliasScopes(inlinedBlocks);
461  createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
462  appendCallOpAliasScopes(call, inlinedBlocks);
463 }
464 
465 /// Appends any access groups of the call operation to any inlined memory
466 /// operation.
467 static void handleAccessGroups(Operation *call,
468  iterator_range<Region::iterator> inlinedBlocks) {
469  auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
470  if (!callAccessGroupInterface)
471  return;
472 
473  auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
474  if (!accessGroups)
475  return;
476 
477  // Simply append the call op's access groups to any operation implementing
478  // AccessGroupOpInterface.
479  for (Block &block : inlinedBlocks)
480  for (auto accessGroupOpInterface :
481  block.getOps<LLVM::AccessGroupOpInterface>())
482  accessGroupOpInterface.setAccessGroups(concatArrayAttr(
483  accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
484 }
485 
486 /// Updates locations inside loop annotations to reflect that they were inlined.
487 static void
489  iterator_range<Region::iterator> inlinedBlocks) {
490  // Attempt to extract a DISubprogram from the callee.
491  auto func = call->getParentOfType<FunctionOpInterface>();
492  if (!func)
493  return;
494  LocationAttr funcLoc = func->getLoc();
495  auto fusedLoc = dyn_cast_if_present<FusedLoc>(funcLoc);
496  if (!fusedLoc)
497  return;
498  auto scope =
499  dyn_cast_if_present<LLVM::DISubprogramAttr>(fusedLoc.getMetadata());
500  if (!scope)
501  return;
502 
503  // Helper to build a new fused location that reflects the inlining of the loop
504  // annotation.
505  auto updateLoc = [&](FusedLoc loc) -> FusedLoc {
506  if (!loc)
507  return {};
508  Location callSiteLoc = CallSiteLoc::get(loc, call->getLoc());
509  return FusedLoc::get(loc.getContext(), callSiteLoc, scope);
510  };
511 
512  AttrTypeReplacer replacer;
513  replacer.addReplacement([&](LLVM::LoopAnnotationAttr loopAnnotation)
514  -> std::pair<Attribute, WalkResult> {
515  FusedLoc newStartLoc = updateLoc(loopAnnotation.getStartLoc());
516  FusedLoc newEndLoc = updateLoc(loopAnnotation.getEndLoc());
517  if (!newStartLoc && !newEndLoc)
518  return {loopAnnotation, WalkResult::advance()};
519  auto newLoopAnnotation = LLVM::LoopAnnotationAttr::get(
520  loopAnnotation.getContext(), loopAnnotation.getDisableNonforced(),
521  loopAnnotation.getVectorize(), loopAnnotation.getInterleave(),
522  loopAnnotation.getUnroll(), loopAnnotation.getUnrollAndJam(),
523  loopAnnotation.getLicm(), loopAnnotation.getDistribute(),
524  loopAnnotation.getPipeline(), loopAnnotation.getPeeled(),
525  loopAnnotation.getUnswitch(), loopAnnotation.getMustProgress(),
526  loopAnnotation.getIsVectorized(), newStartLoc, newEndLoc,
527  loopAnnotation.getParallelAccesses());
528  // Needs to advance, as loop annotations can be nested.
529  return {newLoopAnnotation, WalkResult::advance()};
530  });
531 
532  for (Block &block : inlinedBlocks)
533  for (Operation &op : block)
534  replacer.recursivelyReplaceElementsIn(&op);
535 }
536 
537 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
538 /// realigns `alloca` if this does not exceed the natural stack alignment.
539 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
540 static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
541  uint64_t requestedAlignment,
542  DataLayout const &dataLayout) {
543  uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
544  if (requestedAlignment <= allocaAlignment)
545  // No realignment necessary.
546  return allocaAlignment;
547  uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
548  // If the natural stack alignment is not specified, the data layout returns
549  // zero. Optimistically allow realignment in this case.
550  if (naturalStackAlignmentBits == 0 ||
551  // If the requested alignment exceeds the natural stack alignment, this
552  // will trigger a dynamic stack realignment, so we prefer to copy...
553  8 * requestedAlignment <= naturalStackAlignmentBits ||
554  // ...unless the alloca already triggers dynamic stack realignment. Then
555  // we might as well further increase the alignment to avoid a copy.
556  8 * allocaAlignment > naturalStackAlignmentBits) {
557  alloca.setAlignment(requestedAlignment);
558  allocaAlignment = requestedAlignment;
559  }
560  return allocaAlignment;
561 }
562 
563 /// Tries to find and return the alignment of the pointer `value` by looking for
564 /// an alignment attribute on the defining allocation op or function argument.
565 /// If the found alignment is lower than `requestedAlignment`, tries to realign
566 /// the pointer, then returns the resulting post-alignment, regardless of
567 /// whether it was realigned or not. If no existing alignment attribute is
568 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
569 static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
570  DataLayout const &dataLayout) {
571  if (Operation *definingOp = value.getDefiningOp()) {
572  if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
573  return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
574  dataLayout);
575  if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
576  if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
577  definingOp, addressOf.getGlobalNameAttr()))
578  return global.getAlignment().value_or(1);
579  // We don't currently handle this operation; assume no alignment.
580  return 1;
581  }
582  // Since there is no defining op, this is a block argument. Probably this
583  // comes directly from a function argument, so check that this is the case.
584  Operation *parentOp = value.getParentBlock()->getParentOp();
585  if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
586  // Use the alignment attribute set for this argument in the parent function
587  // if it has been set.
588  auto blockArg = llvm::cast<BlockArgument>(value);
589  if (Attribute alignAttr = func.getArgAttr(
590  blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
591  return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
592  }
593  // We didn't find anything useful; assume no alignment.
594  return 1;
595 }
596 
597 /// Introduces a new alloca and copies the memory pointed to by `argument` to
598 /// the address of the new alloca, then returns the value of the new alloca.
600  Value argument, Type elementType,
601  uint64_t elementTypeSize,
602  uint64_t targetAlignment) {
603  // Allocate the new value on the stack.
604  Value allocaOp;
605  {
606  // Since this is a static alloca, we can put it directly in the entry block,
607  // so they can be absorbed into the prologue/epilogue at code generation.
608  OpBuilder::InsertionGuard insertionGuard(builder);
609  Block *entryBlock = &(*argument.getParentRegion()->begin());
610  builder.setInsertionPointToStart(entryBlock);
611  Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
612  builder.getI64IntegerAttr(1));
613  allocaOp = LLVM::AllocaOp::create(builder, loc, argument.getType(),
614  elementType, one, targetAlignment);
615  }
616  // Copy the pointee to the newly allocated value.
617  Value copySize =
618  LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
619  builder.getI64IntegerAttr(elementTypeSize));
620  LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
621  /*isVolatile=*/false);
622  return allocaOp;
623 }
624 
625 /// Handles a function argument marked with the byval attribute by introducing a
626 /// memcpy or realigning the defining operation, if required either due to the
627 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
628 /// `requestedAlignment` specifies the alignment set in the "align" argument
629 /// attribute (or 1 if no align attribute was set).
630 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
631  Value argument, Type elementType,
632  uint64_t requestedAlignment) {
633  auto func = cast<LLVM::LLVMFuncOp>(callable);
634  LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryEffectsAttr();
635  // If there is no memory effects attribute, assume that the function is
636  // not read-only.
637  bool isReadOnly = memoryEffects &&
638  memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
639  memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
640  // Check if there's an alignment mismatch requiring us to copy.
641  DataLayout dataLayout = DataLayout::closest(callable);
642  uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
643  if (isReadOnly) {
644  if (requestedAlignment <= minimumAlignment)
645  return argument;
646  uint64_t currentAlignment =
647  tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
648  if (currentAlignment >= requestedAlignment)
649  return argument;
650  }
651  uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
653  builder, argument.getLoc(), argument, elementType,
654  dataLayout.getTypeSize(elementType), targetAlignment);
655 }
656 
657 namespace {
658 struct LLVMInlinerInterface : public DialectInlinerInterface {
660 
661  LLVMInlinerInterface(Dialect *dialect)
662  : DialectInlinerInterface(dialect),
663  // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
664  disallowedFunctionAttrs({
665  StringAttr::get(dialect->getContext(), "noduplicate"),
666  StringAttr::get(dialect->getContext(), "presplitcoroutine"),
667  StringAttr::get(dialect->getContext(), "returns_twice"),
668  StringAttr::get(dialect->getContext(), "strictfp"),
669  }) {}
670 
671  bool isLegalToInline(Operation *call, Operation *callable,
672  bool wouldBeCloned) const final {
673  auto callOp = dyn_cast<LLVM::CallOp>(call);
674  if (!callOp) {
675  LDBG() << "Cannot inline: call is not an '"
676  << LLVM::CallOp::getOperationName() << "' op";
677  return false;
678  }
679  if (callOp.getNoInline()) {
680  LDBG() << "Cannot inline: call is marked no_inline";
681  return false;
682  }
683  auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
684  if (!funcOp) {
685  LDBG() << "Cannot inline: callable is not an '"
686  << LLVM::LLVMFuncOp::getOperationName() << "' op";
687  return false;
688  }
689  if (funcOp.isNoInline()) {
690  LDBG() << "Cannot inline: function is marked no_inline";
691  return false;
692  }
693  if (funcOp.isVarArg()) {
694  LDBG() << "Cannot inline: callable is variadic";
695  return false;
696  }
697  // TODO: Generate aliasing metadata from noalias result attributes.
698  if (auto attrs = funcOp.getArgAttrs()) {
699  for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
700  if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
701  LDBG() << "Cannot inline " << funcOp.getSymName()
702  << ": inalloca arguments not supported";
703  return false;
704  }
705  }
706  }
707  // TODO: Handle exceptions.
708  if (funcOp.getPersonality()) {
709  LDBG() << "Cannot inline " << funcOp.getSymName()
710  << ": unhandled function personality";
711  return false;
712  }
713  if (funcOp.getPassthrough()) {
714  // TODO: Used attributes should not be passthrough.
715  if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
716  auto stringAttr = dyn_cast<StringAttr>(attr);
717  if (!stringAttr)
718  return false;
719  if (disallowedFunctionAttrs.contains(stringAttr)) {
720  LDBG() << "Cannot inline " << funcOp.getSymName()
721  << ": found disallowed function attribute " << stringAttr;
722  return true;
723  }
724  return false;
725  }))
726  return false;
727  }
728  return true;
729  }
730 
731  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
732  return true;
733  }
734 
735  bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
736  // The inliner cannot handle variadic function arguments and blocktag
737  // operations prevent inlining since they the blockaddress operations
738  // reference them via the callee symbol.
739  return !(isa<LLVM::VaStartOp>(op) || isa<LLVM::BlockTagOp>(op));
740  }
741 
742  /// Handle the given inlined return by replacing it with a branch. This
743  /// overload is called when the inlined region has more than one block.
744  void handleTerminator(Operation *op, Block *newDest) const final {
745  // Only return needs to be handled here.
746  auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
747  if (!returnOp)
748  return;
749 
750  // Replace the return with a branch to the dest.
751  OpBuilder builder(op);
752  LLVM::BrOp::create(builder, op->getLoc(), returnOp.getOperands(), newDest);
753  op->erase();
754  }
755 
756  bool allowSingleBlockOptimization(
757  iterator_range<Region::iterator> inlinedBlocks) const final {
758  if (!inlinedBlocks.empty() &&
759  isa<LLVM::UnreachableOp>(inlinedBlocks.begin()->getTerminator()))
760  return false;
761  return true;
762  }
763 
764  /// Handle the given inlined return by replacing the uses of the call with the
765  /// operands of the return. This overload is called when the inlined region
766  /// only contains one block.
767  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
768  // Return will be the only terminator present.
769  auto returnOp = cast<LLVM::ReturnOp>(op);
770 
771  // Replace the values directly with the return operands.
772  assert(returnOp.getNumOperands() == valuesToRepl.size());
773  for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
774  dst.replaceAllUsesWith(src);
775  }
776 
777  Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
778  Value argument,
779  DictionaryAttr argumentAttrs) const final {
780  if (std::optional<NamedAttribute> attr =
781  argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
782  Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
783  uint64_t requestedAlignment = 1;
784  if (std::optional<NamedAttribute> alignAttr =
785  argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
786  requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
787  .getValue()
788  .getLimitedValue();
789  }
790  return handleByValArgument(builder, callable, argument, elementType,
791  requestedAlignment);
792  }
793 
794  // This code is essentially a workaround for deficiencies in the inliner
795  // interface: We need to transform operations *after* inlined based on the
796  // argument attributes of the parameters *before* inlining. This method runs
797  // prior to actual inlining and thus cannot transform the post-inlining
798  // code, while `processInlinedCallBlocks` does not have access to
799  // pre-inlining function arguments. Additionally, it is required to
800  // distinguish which parameter an SSA value originally came from. As a
801  // workaround until this is changed: Create an ssa.copy intrinsic with the
802  // noalias attribute (when it was present before) that can easily be found,
803  // and is extremely unlikely to exist in the code prior to inlining, using
804  // this to communicate between this method and `processInlinedCallBlocks`.
805  // TODO: Fix this by refactoring the inliner interface.
806  auto copyOp = LLVM::SSACopyOp::create(builder, call->getLoc(), argument);
807  if (argumentAttrs.contains(LLVM::LLVMDialect::getNoAliasAttrName()))
808  copyOp->setDiscardableAttr(
809  builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
810  builder.getUnitAttr());
811  return copyOp;
812  }
813 
814  void processInlinedCallBlocks(
815  Operation *call,
816  iterator_range<Region::iterator> inlinedBlocks) const override {
817  handleInlinedAllocas(call, inlinedBlocks);
818  handleAliasScopes(call, inlinedBlocks);
819  handleAccessGroups(call, inlinedBlocks);
820  handleLoopAnnotations(call, inlinedBlocks);
821  }
822 
823  // Keeping this (immutable) state on the interface allows us to look up
824  // StringAttrs instead of looking up strings, since StringAttrs are bound to
825  // the current context and thus cannot be initialized as static fields.
826  const DenseSet<StringAttr> disallowedFunctionAttrs;
827 };
828 
829 } // end anonymous namespace
830 
832  registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
833  dialect->addInterfaces<LLVMInlinerInterface>();
834  });
835 }
836 
838  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
839  dialect->addInterfaces<LLVMInlinerInterface>();
840  });
841 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static void handleLoopAnnotations(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Updates locations inside loop annotations to reflect that they were inlined.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static FailureOr< SmallVector< Value > > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This is an attribute/type replacer that is naively cached.
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
iterator begin()
Definition: Block.h:143
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:193
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:31
IntegerType getI64Type()
Definition: Builders.cpp:64
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:111
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:91
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:44
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:46
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:38
MLIRContext * getContext() const
Definition: Dialect.h:52
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
Location objects represent source locations information in MLIR.
Definition: Location.h:32
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:348
This class helps build Operations.
Definition: Builders.h:207
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:431
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:398
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:46
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:24
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:39
A class to signal how to proceed with the walk of the backward slice:
Definition: SliceWalk.h:20
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition: SliceWalk.h:60
static WalkContinuation skip()
Creates a continuation that advances the walk without adding any predecessor values to the work list.
Definition: SliceWalk.h:55
static WalkContinuation advanceTo(mlir::ValueRange nextValues)
Creates a continuation that adds the user-specified nextValues to the work list and advances the walk...
Definition: SliceWalk.h:49
static WalkContinuation interrupt()
Creates a continuation that interrupts the walk.
Definition: SliceWalk.h:43
static WalkResult advance()
Definition: WalkResult.h:47
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
Register a replacement function for mapping a given attribute or type.
void registerInlinerInterface(DialectRegistry &registry)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
void registerInlinerInterface(DialectRegistry &registry)
Register the NVVMInlinerInterface implementation of DialectInlinerInterface with the NVVM dialect.
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490
std::optional< SmallVector< Value > > getControlFlowPredecessors(Value value)
Computes a vector of all control predecessors of value.
Definition: SliceWalk.cpp:106
WalkContinuation walkSlice(mlir::ValueRange rootValues, WalkCallback walkCallback)
Walks the slice starting from the rootValues using a depth-first traversal.
Definition: SliceWalk.cpp:6
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369
This trait indicates that a terminator operation is "return-like".