MLIR  19.0.0git
LLVMInlining.cpp
Go to the documentation of this file.
1 //===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LLVMInlining.h"
16 #include "mlir/IR/Matchers.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "llvm-inliner"
23 
24 using namespace mlir;
25 
26 /// Check whether the given alloca is an input to a lifetime intrinsic,
27 /// optionally passing through one or more casts on the way. This is not
28 /// transitive through block arguments.
29 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
30  SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
31  allocaOp->getUsers().end());
32  while (!stack.empty()) {
33  Operation *op = stack.pop_back_val();
34  if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
35  return true;
36  if (isa<LLVM::BitcastOp>(op))
37  stack.append(op->getUsers().begin(), op->getUsers().end());
38  }
39  return false;
40 }
41 
42 /// Handles alloca operations in the inlined blocks:
43 /// - Moves all alloca operations with a constant size in the former entry block
44 /// of the callee into the entry block of the caller, so they become part of
45 /// the function prologue/epilogue during code generation.
46 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
47 /// to the inlined blocks.
48 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
49 /// inlined.
50 static void
52  iterator_range<Region::iterator> inlinedBlocks) {
53  // Locate the entry block of the closest callsite ancestor that has either the
54  // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
55  // programs, this is the LLVMFuncOp containing the call site. However, in
56  // mixed-dialect programs, the callsite might be nested in another operation
57  // that carries one of these traits. In such scenarios, this traversal stops
58  // at the closest ancestor with either trait, ensuring visibility post
59  // relocation and respecting allocation scopes.
60  Block *callerEntryBlock = nullptr;
61  Operation *currentOp = call;
62  while (Operation *parentOp = currentOp->getParentOp()) {
63  if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
64  parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
65  callerEntryBlock = &currentOp->getParentRegion()->front();
66  break;
67  }
68  currentOp = parentOp;
69  }
70 
71  // Avoid relocating the alloca operations if the call has been inlined into
72  // the entry block already, which is typically the encompassing
73  // LLVM function, or if the relevant entry block cannot be identified.
74  Block *calleeEntryBlock = &(*inlinedBlocks.begin());
75  if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
76  return;
77 
79  bool shouldInsertLifetimes = false;
80  bool hasDynamicAlloca = false;
81  // Conservatively only move static alloca operations that are part of the
82  // entry block and do not inspect nested regions, since they may execute
83  // conditionally or have other unknown semantics.
84  for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
85  IntegerAttr arraySize;
86  if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
87  hasDynamicAlloca = true;
88  continue;
89  }
90  bool shouldInsertLifetime =
91  arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
92  shouldInsertLifetimes |= shouldInsertLifetime;
93  allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
94  }
95  // Check the remaining inlined blocks for dynamic allocas as well.
96  for (Block &block : llvm::drop_begin(inlinedBlocks)) {
97  if (hasDynamicAlloca)
98  break;
99  hasDynamicAlloca =
100  llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
101  return !matchPattern(allocaOp.getArraySize(), m_Constant());
102  });
103  }
104  if (allocasToMove.empty() && !hasDynamicAlloca)
105  return;
106  OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
107  Value stackPtr;
108  if (hasDynamicAlloca) {
109  // This may result in multiple stacksave/stackrestore intrinsics in the same
110  // scope if some are already present in the body of the caller. This is not
111  // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
112  // other cases where the stacksave/stackrestore is redundant.
113  stackPtr = builder.create<LLVM::StackSaveOp>(
114  call->getLoc(), LLVM::LLVMPointerType::get(call->getContext()));
115  }
116  builder.setInsertionPoint(callerEntryBlock, callerEntryBlock->begin());
117  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
118  auto newConstant = builder.create<LLVM::ConstantOp>(
119  allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
120  // Insert a lifetime start intrinsic where the alloca was before moving it.
121  if (shouldInsertLifetime) {
122  OpBuilder::InsertionGuard insertionGuard(builder);
123  builder.setInsertionPoint(allocaOp);
124  builder.create<LLVM::LifetimeStartOp>(
125  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
126  allocaOp.getResult());
127  }
128  allocaOp->moveAfter(newConstant);
129  allocaOp.getArraySizeMutable().assign(newConstant.getResult());
130  }
131  if (!shouldInsertLifetimes && !hasDynamicAlloca)
132  return;
133  // Insert a lifetime end intrinsic before each return in the callee function.
134  for (Block &block : inlinedBlocks) {
135  if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
136  continue;
137  builder.setInsertionPoint(block.getTerminator());
138  if (hasDynamicAlloca)
139  builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr);
140  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
141  if (shouldInsertLifetime)
142  builder.create<LLVM::LifetimeEndOp>(
143  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
144  allocaOp.getResult());
145  }
146  }
147 }
148 
149 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
150 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
151 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
152 static void
155 
156  // Register handles in the walker to create the deep clones.
157  // The walker ensures that an attribute is only ever walked once and does a
158  // post-order walk, ensuring the domain is visited prior to the scope.
159  AttrTypeWalker walker;
160 
161  // Perform the deep clones while visiting. Builders create a distinct
162  // attribute to make sure that new instances are always created by the
163  // uniquer.
164  walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
165  mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
166  domainAttr.getContext(), domainAttr.getDescription());
167  });
168 
169  walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
170  mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
171  cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
172  scopeAttr.getDescription());
173  });
174 
175  // Map an array of scopes to an array of deep clones.
176  auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
177  if (!arrayAttr)
178  return nullptr;
179 
180  // Create the deep clones if necessary.
181  walker.walk(arrayAttr);
182 
183  return ArrayAttr::get(arrayAttr.getContext(),
184  llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
185  return mapping.lookup(attr);
186  }));
187  };
188 
189  for (Block &block : inlinedBlocks) {
190  for (Operation &op : block) {
191  if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
192  aliasInterface.setAliasScopes(
193  convertScopeList(aliasInterface.getAliasScopesOrNull()));
194  aliasInterface.setNoAliasScopes(
195  convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
196  }
197 
198  if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
199  // Create the deep clones if necessary.
200  walker.walk(noAliasScope.getScopeAttr());
201 
202  noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
203  mapping.lookup(noAliasScope.getScopeAttr())));
204  }
205  }
206  }
207 }
208 
209 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
210 /// Returns null if both parameters are null. If only one attribute is null,
211 /// return the other.
212 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
213  if (!lhs)
214  return rhs;
215  if (!rhs)
216  return lhs;
217 
218  SmallVector<Attribute> result;
219  llvm::append_range(result, lhs);
220  llvm::append_range(result, rhs);
221  return ArrayAttr::get(lhs.getContext(), result);
222 }
223 
224 /// Attempts to return the underlying pointer value that `pointerValue` is based
225 /// on. This traverses down the chain of operations to the last operation
226 /// producing the base pointer and returns it. If it encounters an operation it
227 /// cannot further traverse through, returns the operation's result.
228 static Value getUnderlyingObject(Value pointerValue) {
229  while (true) {
230  if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
231  pointerValue = gepOp.getBase();
232  continue;
233  }
234 
235  if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
236  pointerValue = addrCast.getOperand();
237  continue;
238  }
239 
240  break;
241  }
242 
243  return pointerValue;
244 }
245 
246 /// Attempts to return the set of all underlying pointer values that
247 /// `pointerValue` is based on. This function traverses through select
248 /// operations and block arguments unlike getUnderlyingObject.
250  SmallVector<Value> result;
251 
252  SmallVector<Value> workList{pointerValue};
253  // Avoid dataflow loops.
255  do {
256  Value current = workList.pop_back_val();
257  current = getUnderlyingObject(current);
258 
259  if (!seen.insert(current).second)
260  continue;
261 
262  if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
263  workList.push_back(selectOp.getTrueValue());
264  workList.push_back(selectOp.getFalseValue());
265  continue;
266  }
267 
268  if (auto blockArg = dyn_cast<BlockArgument>(current)) {
269  Block *parentBlock = blockArg.getParentBlock();
270 
271  // Attempt to find all block argument operands for every predecessor.
272  // If any operand to the block argument wasn't found in a predecessor,
273  // conservatively add the block argument to the result set.
274  SmallVector<Value> operands;
275  bool anyUnknown = false;
276  for (auto iter = parentBlock->pred_begin();
277  iter != parentBlock->pred_end(); iter++) {
278  auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
279  if (!branch) {
280  result.push_back(blockArg);
281  anyUnknown = true;
282  break;
283  }
284 
285  Value operand = branch.getSuccessorOperands(
286  iter.getSuccessorIndex())[blockArg.getArgNumber()];
287  if (!operand) {
288  result.push_back(blockArg);
289  anyUnknown = true;
290  break;
291  }
292 
293  operands.push_back(operand);
294  }
295 
296  if (!anyUnknown)
297  llvm::append_range(workList, operands);
298 
299  continue;
300  }
301 
302  result.push_back(current);
303  } while (!workList.empty());
304 
305  return result;
306 }
307 
308 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
309 /// the appropriate inlined memory operations in an attempt to preserve the
310 /// original semantics of the parameter attribute.
312  Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
313 
314  // First collect all noalias parameters. These have been specially marked by
315  // the `handleArgument` implementation by using the `ssa.copy` intrinsic and
316  // attaching a `noalias` attribute to it.
317  // These are only meant to be temporary and should therefore be deleted after
318  // we're done using them here.
319  SetVector<LLVM::SSACopyOp> noAliasParams;
320  for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
321  for (Operation *user : argument.getUsers()) {
322  auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
323  if (!ssaCopy)
324  continue;
325  if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
326  continue;
327 
328  noAliasParams.insert(ssaCopy);
329  }
330  }
331 
332  // If there were none, we have nothing to do here.
333  if (noAliasParams.empty())
334  return;
335 
336  // Scope exit block to make it impossible to forget to get rid of the
337  // intrinsics.
338  auto exit = llvm::make_scope_exit([&] {
339  for (LLVM::SSACopyOp ssaCopyOp : noAliasParams) {
340  ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
341  ssaCopyOp->erase();
342  }
343  });
344 
345  // Create a new domain for this specific inlining and a new scope for every
346  // noalias parameter.
347  auto functionDomain = LLVM::AliasScopeDomainAttr::get(
348  call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
350  for (LLVM::SSACopyOp copyOp : noAliasParams) {
351  auto scope = LLVM::AliasScopeAttr::get(functionDomain);
352  pointerScopes[copyOp] = scope;
353 
354  OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope);
355  }
356 
357  // Go through every instruction and attempt to find which noalias parameters
358  // it is definitely based on and definitely not based on.
359  for (Block &inlinedBlock : inlinedBlocks) {
360  for (auto aliasInterface :
361  inlinedBlock.getOps<LLVM::AliasAnalysisOpInterface>()) {
362 
363  // Collect the pointer arguments affected by the alias scopes.
364  SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
365 
366  // Find the set of underlying pointers that this pointer is based on.
367  SmallPtrSet<Value, 4> basedOnPointers;
368  for (Value pointer : pointerArgs)
370  std::inserter(basedOnPointers, basedOnPointers.begin()));
371 
372  bool aliasesOtherKnownObject = false;
373  // Go through the based on pointers and check that they are either:
374  // * Constants that can be ignored (undef, poison, null pointer).
375  // * Based on a noalias parameter.
376  // * Other pointers that we know can't alias with our noalias parameter.
377  //
378  // Any other value might be a pointer based on any noalias parameter that
379  // hasn't been identified. In that case conservatively don't add any
380  // scopes to this operation indicating either aliasing or not aliasing
381  // with any parameter.
382  if (llvm::any_of(basedOnPointers, [&](Value object) {
383  if (matchPattern(object, m_Constant()))
384  return false;
385 
386  if (noAliasParams.contains(object.getDefiningOp<LLVM::SSACopyOp>()))
387  return false;
388 
389  // TODO: This should include other arguments from the inlined
390  // callable.
391  if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
392  object.getDefiningOp())) {
393  aliasesOtherKnownObject = true;
394  return false;
395  }
396  return true;
397  }))
398  continue;
399 
400  // Add all noalias parameter scopes to the noalias scope list that we are
401  // not based on.
402  SmallVector<Attribute> noAliasScopes;
403  for (LLVM::SSACopyOp noAlias : noAliasParams) {
404  if (basedOnPointers.contains(noAlias))
405  continue;
406 
407  noAliasScopes.push_back(pointerScopes[noAlias]);
408  }
409 
410  if (!noAliasScopes.empty())
411  aliasInterface.setNoAliasScopes(
412  concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
413  ArrayAttr::get(call->getContext(), noAliasScopes)));
414 
415  // Don't add alias scopes to call operations or operations that might
416  // operate on pointers not based on any noalias parameter.
417  // Since we add all scopes to an operation's noalias list that it
418  // definitely doesn't alias, we mustn't do the same for the alias.scope
419  // list if other objects are involved.
420  //
421  // Consider the following case:
422  // %0 = llvm.alloca
423  // %1 = select %magic, %0, %noalias_param
424  // store 5, %1 (1) noalias=[scope(...)]
425  // ...
426  // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
427  //
428  // We can add the scopes of any noalias parameters that aren't
429  // noalias_param's scope to (1) and add all of them to (2). We mustn't add
430  // the scope of noalias_param to the alias.scope list of (1) since
431  // that would mean (2) cannot alias with (1) which is wrong since both may
432  // store to %0.
433  //
434  // In conclusion, only add scopes to the alias.scope list if all pointers
435  // have a corresponding scope.
436  // Call operations are included in this list since we do not know whether
437  // the callee accesses any memory besides the ones passed as its
438  // arguments.
439  if (aliasesOtherKnownObject ||
440  isa<LLVM::CallOp>(aliasInterface.getOperation()))
441  continue;
442 
443  SmallVector<Attribute> aliasScopes;
444  for (LLVM::SSACopyOp noAlias : noAliasParams)
445  if (basedOnPointers.contains(noAlias))
446  aliasScopes.push_back(pointerScopes[noAlias]);
447 
448  if (!aliasScopes.empty())
449  aliasInterface.setAliasScopes(
450  concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
451  ArrayAttr::get(call->getContext(), aliasScopes)));
452  }
453  }
454 }
455 
456 /// Appends any alias scopes of the call operation to any inlined memory
457 /// operation.
458 static void
460  iterator_range<Region::iterator> inlinedBlocks) {
461  auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
462  if (!callAliasInterface)
463  return;
464 
465  ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
466  ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
467  // If the call has neither alias scopes or noalias scopes we have nothing to
468  // do here.
469  if (!aliasScopes && !noAliasScopes)
470  return;
471 
472  // Simply append the call op's alias and noalias scopes to any operation
473  // implementing AliasAnalysisOpInterface.
474  for (Block &block : inlinedBlocks) {
475  for (auto aliasInterface : block.getOps<LLVM::AliasAnalysisOpInterface>()) {
476  if (aliasScopes)
477  aliasInterface.setAliasScopes(concatArrayAttr(
478  aliasInterface.getAliasScopesOrNull(), aliasScopes));
479 
480  if (noAliasScopes)
481  aliasInterface.setNoAliasScopes(concatArrayAttr(
482  aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
483  }
484  }
485 }
486 
487 /// Handles all interactions with alias scopes during inlining.
488 static void handleAliasScopes(Operation *call,
489  iterator_range<Region::iterator> inlinedBlocks) {
490  deepCloneAliasScopes(inlinedBlocks);
491  createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
492  appendCallOpAliasScopes(call, inlinedBlocks);
493 }
494 
495 /// Appends any access groups of the call operation to any inlined memory
496 /// operation.
497 static void handleAccessGroups(Operation *call,
498  iterator_range<Region::iterator> inlinedBlocks) {
499  auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
500  if (!callAccessGroupInterface)
501  return;
502 
503  auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
504  if (!accessGroups)
505  return;
506 
507  // Simply append the call op's access groups to any operation implementing
508  // AccessGroupOpInterface.
509  for (Block &block : inlinedBlocks)
510  for (auto accessGroupOpInterface :
511  block.getOps<LLVM::AccessGroupOpInterface>())
512  accessGroupOpInterface.setAccessGroups(concatArrayAttr(
513  accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
514 }
515 
516 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
517 /// realigns `alloca` if this does not exceed the natural stack alignment.
518 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
519 static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
520  uint64_t requestedAlignment,
521  DataLayout const &dataLayout) {
522  uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
523  if (requestedAlignment <= allocaAlignment)
524  // No realignment necessary.
525  return allocaAlignment;
526  uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
527  // If the natural stack alignment is not specified, the data layout returns
528  // zero. Optimistically allow realignment in this case.
529  if (naturalStackAlignmentBits == 0 ||
530  // If the requested alignment exceeds the natural stack alignment, this
531  // will trigger a dynamic stack realignment, so we prefer to copy...
532  8 * requestedAlignment <= naturalStackAlignmentBits ||
533  // ...unless the alloca already triggers dynamic stack realignment. Then
534  // we might as well further increase the alignment to avoid a copy.
535  8 * allocaAlignment > naturalStackAlignmentBits) {
536  alloca.setAlignment(requestedAlignment);
537  allocaAlignment = requestedAlignment;
538  }
539  return allocaAlignment;
540 }
541 
542 /// Tries to find and return the alignment of the pointer `value` by looking for
543 /// an alignment attribute on the defining allocation op or function argument.
544 /// If the found alignment is lower than `requestedAlignment`, tries to realign
545 /// the pointer, then returns the resulting post-alignment, regardless of
546 /// whether it was realigned or not. If no existing alignment attribute is
547 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
548 static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
549  DataLayout const &dataLayout) {
550  if (Operation *definingOp = value.getDefiningOp()) {
551  if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
552  return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
553  dataLayout);
554  if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
555  if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
556  definingOp, addressOf.getGlobalNameAttr()))
557  return global.getAlignment().value_or(1);
558  // We don't currently handle this operation; assume no alignment.
559  return 1;
560  }
561  // Since there is no defining op, this is a block argument. Probably this
562  // comes directly from a function argument, so check that this is the case.
563  Operation *parentOp = value.getParentBlock()->getParentOp();
564  if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
565  // Use the alignment attribute set for this argument in the parent function
566  // if it has been set.
567  auto blockArg = llvm::cast<BlockArgument>(value);
568  if (Attribute alignAttr = func.getArgAttr(
569  blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
570  return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
571  }
572  // We didn't find anything useful; assume no alignment.
573  return 1;
574 }
575 
576 /// Introduces a new alloca and copies the memory pointed to by `argument` to
577 /// the address of the new alloca, then returns the value of the new alloca.
579  Value argument, Type elementType,
580  uint64_t elementTypeSize,
581  uint64_t targetAlignment) {
582  // Allocate the new value on the stack.
583  Value allocaOp;
584  {
585  // Since this is a static alloca, we can put it directly in the entry block,
586  // so they can be absorbed into the prologue/epilogue at code generation.
587  OpBuilder::InsertionGuard insertionGuard(builder);
588  Block *entryBlock = &(*argument.getParentRegion()->begin());
589  builder.setInsertionPointToStart(entryBlock);
590  Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
591  builder.getI64IntegerAttr(1));
592  allocaOp = builder.create<LLVM::AllocaOp>(
593  loc, argument.getType(), elementType, one, targetAlignment);
594  }
595  // Copy the pointee to the newly allocated value.
596  Value copySize = builder.create<LLVM::ConstantOp>(
597  loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize));
598  builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize,
599  /*isVolatile=*/false);
600  return allocaOp;
601 }
602 
603 /// Handles a function argument marked with the byval attribute by introducing a
604 /// memcpy or realigning the defining operation, if required either due to the
605 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
606 /// `requestedAlignment` specifies the alignment set in the "align" argument
607 /// attribute (or 1 if no align attribute was set).
608 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
609  Value argument, Type elementType,
610  uint64_t requestedAlignment) {
611  auto func = cast<LLVM::LLVMFuncOp>(callable);
612  LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr();
613  // If there is no memory effects attribute, assume that the function is
614  // not read-only.
615  bool isReadOnly = memoryEffects &&
616  memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
617  memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
618  // Check if there's an alignment mismatch requiring us to copy.
619  DataLayout dataLayout = DataLayout::closest(callable);
620  uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
621  if (isReadOnly) {
622  if (requestedAlignment <= minimumAlignment)
623  return argument;
624  uint64_t currentAlignment =
625  tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
626  if (currentAlignment >= requestedAlignment)
627  return argument;
628  }
629  uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
630  return handleByValArgumentInit(builder, func.getLoc(), argument, elementType,
631  dataLayout.getTypeSize(elementType),
632  targetAlignment);
633 }
634 
635 namespace {
636 struct LLVMInlinerInterface : public DialectInlinerInterface {
638 
639  LLVMInlinerInterface(Dialect *dialect)
640  : DialectInlinerInterface(dialect),
641  // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
642  disallowedFunctionAttrs({
643  StringAttr::get(dialect->getContext(), "noduplicate"),
644  StringAttr::get(dialect->getContext(), "noinline"),
645  StringAttr::get(dialect->getContext(), "optnone"),
646  StringAttr::get(dialect->getContext(), "presplitcoroutine"),
647  StringAttr::get(dialect->getContext(), "returns_twice"),
648  StringAttr::get(dialect->getContext(), "strictfp"),
649  }) {}
650 
651  bool isLegalToInline(Operation *call, Operation *callable,
652  bool wouldBeCloned) const final {
653  if (!wouldBeCloned)
654  return false;
655  if (!isa<LLVM::CallOp>(call)) {
656  LLVM_DEBUG(llvm::dbgs()
657  << "Cannot inline: call is not an LLVM::CallOp\n");
658  return false;
659  }
660  auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
661  if (!funcOp) {
662  LLVM_DEBUG(llvm::dbgs()
663  << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n");
664  return false;
665  }
666  if (funcOp.isVarArg()) {
667  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: callable is variadic\n");
668  return false;
669  }
670  // TODO: Generate aliasing metadata from noalias argument/result attributes.
671  if (auto attrs = funcOp.getArgAttrs()) {
672  for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
673  if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
674  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
675  << ": inalloca arguments not supported\n");
676  return false;
677  }
678  }
679  }
680  // TODO: Handle exceptions.
681  if (funcOp.getPersonality()) {
682  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
683  << ": unhandled function personality\n");
684  return false;
685  }
686  if (funcOp.getPassthrough()) {
687  // TODO: Used attributes should not be passthrough.
688  if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
689  auto stringAttr = dyn_cast<StringAttr>(attr);
690  if (!stringAttr)
691  return false;
692  if (disallowedFunctionAttrs.contains(stringAttr)) {
693  LLVM_DEBUG(llvm::dbgs()
694  << "Cannot inline " << funcOp.getSymName()
695  << ": found disallowed function attribute "
696  << stringAttr << "\n");
697  return true;
698  }
699  return false;
700  }))
701  return false;
702  }
703  return true;
704  }
705 
706  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
707  return true;
708  }
709 
710  bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
711  // The inliner cannot handle variadic function arguments.
712  return !isa<LLVM::VaStartOp>(op);
713  }
714 
715  /// Handle the given inlined return by replacing it with a branch. This
716  /// overload is called when the inlined region has more than one block.
717  void handleTerminator(Operation *op, Block *newDest) const final {
718  // Only return needs to be handled here.
719  auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
720  if (!returnOp)
721  return;
722 
723  // Replace the return with a branch to the dest.
724  OpBuilder builder(op);
725  builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest);
726  op->erase();
727  }
728 
729  /// Handle the given inlined return by replacing the uses of the call with the
730  /// operands of the return. This overload is called when the inlined region
731  /// only contains one block.
732  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
733  // Return will be the only terminator present.
734  auto returnOp = cast<LLVM::ReturnOp>(op);
735 
736  // Replace the values directly with the return operands.
737  assert(returnOp.getNumOperands() == valuesToRepl.size());
738  for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
739  dst.replaceAllUsesWith(src);
740  }
741 
742  Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
743  Value argument,
744  DictionaryAttr argumentAttrs) const final {
745  if (std::optional<NamedAttribute> attr =
746  argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
747  Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
748  uint64_t requestedAlignment = 1;
749  if (std::optional<NamedAttribute> alignAttr =
750  argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
751  requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
752  .getValue()
753  .getLimitedValue();
754  }
755  return handleByValArgument(builder, callable, argument, elementType,
756  requestedAlignment);
757  }
758  if ([[maybe_unused]] std::optional<NamedAttribute> attr =
759  argumentAttrs.getNamed(LLVM::LLVMDialect::getNoAliasAttrName())) {
760  if (argument.use_empty())
761  return argument;
762 
763  // This code is essentially a workaround for deficiencies in the
764  // inliner interface: We need to transform operations *after* inlined
765  // based on the argument attributes of the parameters *before* inlining.
766  // This method runs prior to actual inlining and thus cannot transform the
767  // post-inlining code, while `processInlinedCallBlocks` does not have
768  // access to pre-inlining function arguments. Additionally, it is required
769  // to distinguish which parameter an SSA value originally came from.
770  // As a workaround until this is changed: Create an ssa.copy intrinsic
771  // with the noalias attribute that can easily be found, and is extremely
772  // unlikely to exist in the code prior to inlining, using this to
773  // communicate between this method and `processInlinedCallBlocks`.
774  // TODO: Fix this by refactoring the inliner interface.
775  auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument);
776  copyOp->setDiscardableAttr(
777  builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
778  builder.getUnitAttr());
779  return copyOp;
780  }
781  return argument;
782  }
783 
784  void processInlinedCallBlocks(
785  Operation *call,
786  iterator_range<Region::iterator> inlinedBlocks) const override {
787  handleInlinedAllocas(call, inlinedBlocks);
788  handleAliasScopes(call, inlinedBlocks);
789  handleAccessGroups(call, inlinedBlocks);
790  }
791 
792  // Keeping this (immutable) state on the interface allows us to look up
793  // StringAttrs instead of looking up strings, since StringAttrs are bound to
794  // the current context and thus cannot be initialized as static fields.
795  const DenseSet<StringAttr> disallowedFunctionAttrs;
796 };
797 
798 } // end anonymous namespace
799 
800 void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) {
801  dialect->addInterfaces<LLVMInlinerInterface>();
802 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static SmallVector< Value > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static Value getUnderlyingObject(Value pointerValue)
Attempts to return the underlying pointer value that pointerValue is based on.
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:31
pred_iterator pred_begin()
Definition: Block.h:231
iterator begin()
Definition: Block.h:141
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:191
pred_iterator pred_end()
Definition: Block.h:234
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
IntegerType getI64Type()
Definition: Builders.cpp:85
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:128
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:44
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:46
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:41
MLIRContext * getContext() const
Definition: Dialect.h:55
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:350
This class helps build Operations.
Definition: Builders.h:209
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:433
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:400
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:464
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:869
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
void moveAfter(Operation *existingOp)
Unlink this operation from its current block and insert it right after existingOp which may be in the...
Definition: Operation.cpp:569
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
void addLLVMInlinerInterface(LLVMDialect *dialect)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:401
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:310
This trait indicates that a terminator operation is "return-like".