MLIR  18.0.0git
LLVMInlining.cpp
Go to the documentation of this file.
1 //===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LLVMInlining.h"
16 #include "mlir/IR/Matchers.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "llvm-inliner"
23 
24 using namespace mlir;
25 
26 /// Check whether the given alloca is an input to a lifetime intrinsic,
27 /// optionally passing through one or more casts on the way. This is not
28 /// transitive through block arguments.
29 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
30  SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
31  allocaOp->getUsers().end());
32  while (!stack.empty()) {
33  Operation *op = stack.pop_back_val();
34  if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
35  return true;
36  if (isa<LLVM::BitcastOp>(op))
37  stack.append(op->getUsers().begin(), op->getUsers().end());
38  }
39  return false;
40 }
41 
42 /// Handles alloca operations in the inlined blocks:
43 /// - Moves all alloca operations with a constant size in the former entry block
44 /// of the callee into the entry block of the caller, so they become part of
45 /// the function prologue/epilogue during code generation.
46 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
47 /// to the inlined blocks.
48 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
49 /// inlined.
50 static void
52  iterator_range<Region::iterator> inlinedBlocks) {
53  Block *calleeEntryBlock = &(*inlinedBlocks.begin());
54  Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin());
55  if (calleeEntryBlock == callerEntryBlock)
56  // Nothing to do.
57  return;
59  bool shouldInsertLifetimes = false;
60  bool hasDynamicAlloca = false;
61  // Conservatively only move static alloca operations that are part of the
62  // entry block and do not inspect nested regions, since they may execute
63  // conditionally or have other unknown semantics.
64  for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
65  IntegerAttr arraySize;
66  if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
67  hasDynamicAlloca = true;
68  continue;
69  }
70  bool shouldInsertLifetime =
71  arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
72  shouldInsertLifetimes |= shouldInsertLifetime;
73  allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
74  }
75  // Check the remaining inlined blocks for dynamic allocas as well.
76  for (Block &block : llvm::drop_begin(inlinedBlocks)) {
77  if (hasDynamicAlloca)
78  break;
79  hasDynamicAlloca =
80  llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
81  return !matchPattern(allocaOp.getArraySize(), m_Constant());
82  });
83  }
84  if (allocasToMove.empty() && !hasDynamicAlloca)
85  return;
86  OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
87  Value stackPtr;
88  if (hasDynamicAlloca) {
89  // This may result in multiple stacksave/stackrestore intrinsics in the same
90  // scope if some are already present in the body of the caller. This is not
91  // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
92  // other cases where the stacksave/stackrestore is redundant.
93  stackPtr = builder.create<LLVM::StackSaveOp>(
94  call->getLoc(), LLVM::LLVMPointerType::get(call->getContext()));
95  }
96  builder.setInsertionPoint(callerEntryBlock, callerEntryBlock->begin());
97  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
98  auto newConstant = builder.create<LLVM::ConstantOp>(
99  allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
100  // Insert a lifetime start intrinsic where the alloca was before moving it.
101  if (shouldInsertLifetime) {
102  OpBuilder::InsertionGuard insertionGuard(builder);
103  builder.setInsertionPoint(allocaOp);
104  builder.create<LLVM::LifetimeStartOp>(
105  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
106  allocaOp.getResult());
107  }
108  allocaOp->moveAfter(newConstant);
109  allocaOp.getArraySizeMutable().assign(newConstant.getResult());
110  }
111  if (!shouldInsertLifetimes && !hasDynamicAlloca)
112  return;
113  // Insert a lifetime end intrinsic before each return in the callee function.
114  for (Block &block : inlinedBlocks) {
115  if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
116  continue;
117  builder.setInsertionPoint(block.getTerminator());
118  if (hasDynamicAlloca)
119  builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr);
120  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
121  if (shouldInsertLifetime)
122  builder.create<LLVM::LifetimeEndOp>(
123  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
124  allocaOp.getResult());
125  }
126  }
127 }
128 
129 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
130 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
131 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
132 static void
135 
136  // Register handles in the walker to create the deep clones.
137  // The walker ensures that an attribute is only ever walked once and does a
138  // post-order walk, ensuring the domain is visited prior to the scope.
139  AttrTypeWalker walker;
140 
141  // Perform the deep clones while visiting. Builders create a distinct
142  // attribute to make sure that new instances are always created by the
143  // uniquer.
144  walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
145  mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
146  domainAttr.getContext(), domainAttr.getDescription());
147  });
148 
149  walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
150  mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
151  cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
152  scopeAttr.getDescription());
153  });
154 
155  // Map an array of scopes to an array of deep clones.
156  auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
157  if (!arrayAttr)
158  return nullptr;
159 
160  // Create the deep clones if necessary.
161  walker.walk(arrayAttr);
162 
163  return ArrayAttr::get(arrayAttr.getContext(),
164  llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
165  return mapping.lookup(attr);
166  }));
167  };
168 
169  for (Block &block : inlinedBlocks) {
170  for (Operation &op : block) {
171  if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
172  aliasInterface.setAliasScopes(
173  convertScopeList(aliasInterface.getAliasScopesOrNull()));
174  aliasInterface.setNoAliasScopes(
175  convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
176  }
177 
178  if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
179  // Create the deep clones if necessary.
180  walker.walk(noAliasScope.getScopeAttr());
181 
182  noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
183  mapping.lookup(noAliasScope.getScopeAttr())));
184  }
185  }
186  }
187 }
188 
189 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
190 /// Returns null if both parameters are null. If only one attribute is null,
191 /// return the other.
192 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
193  if (!lhs)
194  return rhs;
195  if (!rhs)
196  return lhs;
197 
198  SmallVector<Attribute> result;
199  llvm::append_range(result, lhs);
200  llvm::append_range(result, rhs);
201  return ArrayAttr::get(lhs.getContext(), result);
202 }
203 
204 /// Attempts to return the underlying pointer value that `pointerValue` is based
205 /// on. This traverses down the chain of operations to the last operation
206 /// producing the base pointer and returns it. If it encounters an operation it
207 /// cannot further traverse through, returns the operation's result.
208 static Value getUnderlyingObject(Value pointerValue) {
209  while (true) {
210  if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
211  pointerValue = gepOp.getBase();
212  continue;
213  }
214 
215  if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
216  pointerValue = addrCast.getOperand();
217  continue;
218  }
219 
220  break;
221  }
222 
223  return pointerValue;
224 }
225 
226 /// Attempts to return the set of all underlying pointer values that
227 /// `pointerValue` is based on. This function traverses through select
228 /// operations and block arguments unlike getUnderlyingObject.
230  SmallVector<Value> result;
231 
232  SmallVector<Value> workList{pointerValue};
233  // Avoid dataflow loops.
235  do {
236  Value current = workList.pop_back_val();
237  current = getUnderlyingObject(current);
238 
239  if (!seen.insert(current).second)
240  continue;
241 
242  if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
243  workList.push_back(selectOp.getTrueValue());
244  workList.push_back(selectOp.getFalseValue());
245  continue;
246  }
247 
248  if (auto blockArg = dyn_cast<BlockArgument>(current)) {
249  Block *parentBlock = blockArg.getParentBlock();
250 
251  // Attempt to find all block argument operands for every predecessor.
252  // If any operand to the block argument wasn't found in a predecessor,
253  // conservatively add the block argument to the result set.
254  SmallVector<Value> operands;
255  bool anyUnknown = false;
256  for (auto iter = parentBlock->pred_begin();
257  iter != parentBlock->pred_end(); iter++) {
258  auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
259  if (!branch) {
260  result.push_back(blockArg);
261  anyUnknown = true;
262  break;
263  }
264 
265  Value operand = branch.getSuccessorOperands(
266  iter.getSuccessorIndex())[blockArg.getArgNumber()];
267  if (!operand) {
268  result.push_back(blockArg);
269  anyUnknown = true;
270  break;
271  }
272 
273  operands.push_back(operand);
274  }
275 
276  if (!anyUnknown)
277  llvm::append_range(workList, operands);
278 
279  continue;
280  }
281 
282  result.push_back(current);
283  } while (!workList.empty());
284 
285  return result;
286 }
287 
288 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
289 /// the appropriate inlined memory operations in an attempt to preserve the
290 /// original semantics of the parameter attribute.
292  Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
293 
294  // First collect all noalias parameters. These have been specially marked by
295  // the `handleArgument` implementation by using the `ssa.copy` intrinsic and
296  // attaching a `noalias` attribute to it.
297  // These are only meant to be temporary and should therefore be deleted after
298  // we're done using them here.
299  SetVector<LLVM::SSACopyOp> noAliasParams;
300  for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
301  for (Operation *user : argument.getUsers()) {
302  auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
303  if (!ssaCopy)
304  continue;
305  if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
306  continue;
307 
308  noAliasParams.insert(ssaCopy);
309  }
310  }
311 
312  // If there were none, we have nothing to do here.
313  if (noAliasParams.empty())
314  return;
315 
316  // Scope exit block to make it impossible to forget to get rid of the
317  // intrinsics.
318  auto exit = llvm::make_scope_exit([&] {
319  for (LLVM::SSACopyOp ssaCopyOp : noAliasParams) {
320  ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
321  ssaCopyOp->erase();
322  }
323  });
324 
325  // Create a new domain for this specific inlining and a new scope for every
326  // noalias parameter.
327  auto functionDomain = LLVM::AliasScopeDomainAttr::get(
328  call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
330  for (LLVM::SSACopyOp copyOp : noAliasParams) {
331  auto scope = LLVM::AliasScopeAttr::get(functionDomain);
332  pointerScopes[copyOp] = scope;
333 
334  OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope);
335  }
336 
337  // Go through every instruction and attempt to find which noalias parameters
338  // it is definitely based on and definitely not based on.
339  for (Block &inlinedBlock : inlinedBlocks) {
340  for (auto aliasInterface :
341  inlinedBlock.getOps<LLVM::AliasAnalysisOpInterface>()) {
342 
343  // Collect the pointer arguments affected by the alias scopes.
344  SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
345 
346  // Find the set of underlying pointers that this pointer is based on.
347  SmallPtrSet<Value, 4> basedOnPointers;
348  for (Value pointer : pointerArgs)
350  std::inserter(basedOnPointers, basedOnPointers.begin()));
351 
352  bool aliasesOtherKnownObject = false;
353  // Go through the based on pointers and check that they are either:
354  // * Constants that can be ignored (undef, poison, null pointer).
355  // * Based on a noalias parameter.
356  // * Other pointers that we know can't alias with our noalias parameter.
357  //
358  // Any other value might be a pointer based on any noalias parameter that
359  // hasn't been identified. In that case conservatively don't add any
360  // scopes to this operation indicating either aliasing or not aliasing
361  // with any parameter.
362  if (llvm::any_of(basedOnPointers, [&](Value object) {
363  if (matchPattern(object, m_Constant()))
364  return false;
365 
366  if (noAliasParams.contains(object.getDefiningOp<LLVM::SSACopyOp>()))
367  return false;
368 
369  // TODO: This should include other arguments from the inlined
370  // callable.
371  if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
372  object.getDefiningOp())) {
373  aliasesOtherKnownObject = true;
374  return false;
375  }
376  return true;
377  }))
378  continue;
379 
380  // Add all noalias parameter scopes to the noalias scope list that we are
381  // not based on.
382  SmallVector<Attribute> noAliasScopes;
383  for (LLVM::SSACopyOp noAlias : noAliasParams) {
384  if (basedOnPointers.contains(noAlias))
385  continue;
386 
387  noAliasScopes.push_back(pointerScopes[noAlias]);
388  }
389 
390  if (!noAliasScopes.empty())
391  aliasInterface.setNoAliasScopes(
392  concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
393  ArrayAttr::get(call->getContext(), noAliasScopes)));
394 
395  // Don't add alias scopes to call operations or operations that might
396  // operate on pointers not based on any noalias parameter.
397  // Since we add all scopes to an operation's noalias list that it
398  // definitely doesn't alias, we mustn't do the same for the alias.scope
399  // list if other objects are involved.
400  //
401  // Consider the following case:
402  // %0 = llvm.alloca
403  // %1 = select %magic, %0, %noalias_param
404  // store 5, %1 (1) noalias=[scope(...)]
405  // ...
406  // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
407  //
408  // We can add the scopes of any noalias parameters that aren't
409  // noalias_param's scope to (1) and add all of them to (2). We mustn't add
410  // the scope of noalias_param to the alias.scope list of (1) since
411  // that would mean (2) cannot alias with (1) which is wrong since both may
412  // store to %0.
413  //
414  // In conclusion, only add scopes to the alias.scope list if all pointers
415  // have a corresponding scope.
416  // Call operations are included in this list since we do not know whether
417  // the callee accesses any memory besides the ones passed as its
418  // arguments.
419  if (aliasesOtherKnownObject ||
420  isa<LLVM::CallOp>(aliasInterface.getOperation()))
421  continue;
422 
423  SmallVector<Attribute> aliasScopes;
424  for (LLVM::SSACopyOp noAlias : noAliasParams)
425  if (basedOnPointers.contains(noAlias))
426  aliasScopes.push_back(pointerScopes[noAlias]);
427 
428  if (!aliasScopes.empty())
429  aliasInterface.setAliasScopes(
430  concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
431  ArrayAttr::get(call->getContext(), aliasScopes)));
432  }
433  }
434 }
435 
436 /// Appends any alias scopes of the call operation to any inlined memory
437 /// operation.
438 static void
440  iterator_range<Region::iterator> inlinedBlocks) {
441  auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
442  if (!callAliasInterface)
443  return;
444 
445  ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
446  ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
447  // If the call has neither alias scopes or noalias scopes we have nothing to
448  // do here.
449  if (!aliasScopes && !noAliasScopes)
450  return;
451 
452  // Simply append the call op's alias and noalias scopes to any operation
453  // implementing AliasAnalysisOpInterface.
454  for (Block &block : inlinedBlocks) {
455  for (auto aliasInterface : block.getOps<LLVM::AliasAnalysisOpInterface>()) {
456  if (aliasScopes)
457  aliasInterface.setAliasScopes(concatArrayAttr(
458  aliasInterface.getAliasScopesOrNull(), aliasScopes));
459 
460  if (noAliasScopes)
461  aliasInterface.setNoAliasScopes(concatArrayAttr(
462  aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
463  }
464  }
465 }
466 
467 /// Handles all interactions with alias scopes during inlining.
468 static void handleAliasScopes(Operation *call,
469  iterator_range<Region::iterator> inlinedBlocks) {
470  deepCloneAliasScopes(inlinedBlocks);
471  createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
472  appendCallOpAliasScopes(call, inlinedBlocks);
473 }
474 
475 /// Appends any access groups of the call operation to any inlined memory
476 /// operation.
477 static void handleAccessGroups(Operation *call,
478  iterator_range<Region::iterator> inlinedBlocks) {
479  auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
480  if (!callAccessGroupInterface)
481  return;
482 
483  auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
484  if (!accessGroups)
485  return;
486 
487  // Simply append the call op's access groups to any operation implementing
488  // AccessGroupOpInterface.
489  for (Block &block : inlinedBlocks)
490  for (auto accessGroupOpInterface :
491  block.getOps<LLVM::AccessGroupOpInterface>())
492  accessGroupOpInterface.setAccessGroups(concatArrayAttr(
493  accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
494 }
495 
496 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
497 /// realigns `alloca` if this does not exceed the natural stack alignment.
498 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
499 static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
500  uint64_t requestedAlignment,
501  DataLayout const &dataLayout) {
502  uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
503  if (requestedAlignment <= allocaAlignment)
504  // No realignment necessary.
505  return allocaAlignment;
506  uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
507  // If the natural stack alignment is not specified, the data layout returns
508  // zero. Optimistically allow realignment in this case.
509  if (naturalStackAlignmentBits == 0 ||
510  // If the requested alignment exceeds the natural stack alignment, this
511  // will trigger a dynamic stack realignment, so we prefer to copy...
512  8 * requestedAlignment <= naturalStackAlignmentBits ||
513  // ...unless the alloca already triggers dynamic stack realignment. Then
514  // we might as well further increase the alignment to avoid a copy.
515  8 * allocaAlignment > naturalStackAlignmentBits) {
516  alloca.setAlignment(requestedAlignment);
517  allocaAlignment = requestedAlignment;
518  }
519  return allocaAlignment;
520 }
521 
522 /// Tries to find and return the alignment of the pointer `value` by looking for
523 /// an alignment attribute on the defining allocation op or function argument.
524 /// If the found alignment is lower than `requestedAlignment`, tries to realign
525 /// the pointer, then returns the resulting post-alignment, regardless of
526 /// whether it was realigned or not. If no existing alignment attribute is
527 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
528 static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
529  DataLayout const &dataLayout) {
530  if (Operation *definingOp = value.getDefiningOp()) {
531  if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
532  return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
533  dataLayout);
534  if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
535  if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
536  definingOp, addressOf.getGlobalNameAttr()))
537  return global.getAlignment().value_or(1);
538  // We don't currently handle this operation; assume no alignment.
539  return 1;
540  }
541  // Since there is no defining op, this is a block argument. Probably this
542  // comes directly from a function argument, so check that this is the case.
543  Operation *parentOp = value.getParentBlock()->getParentOp();
544  if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
545  // Use the alignment attribute set for this argument in the parent function
546  // if it has been set.
547  auto blockArg = llvm::cast<BlockArgument>(value);
548  if (Attribute alignAttr = func.getArgAttr(
549  blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
550  return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
551  }
552  // We didn't find anything useful; assume no alignment.
553  return 1;
554 }
555 
556 /// Introduces a new alloca and copies the memory pointed to by `argument` to
557 /// the address of the new alloca, then returns the value of the new alloca.
559  Value argument, Type elementType,
560  uint64_t elementTypeSize,
561  uint64_t targetAlignment) {
562  // Allocate the new value on the stack.
563  Value allocaOp;
564  {
565  // Since this is a static alloca, we can put it directly in the entry block,
566  // so they can be absorbed into the prologue/epilogue at code generation.
567  OpBuilder::InsertionGuard insertionGuard(builder);
568  Block *entryBlock = &(*argument.getParentRegion()->begin());
569  builder.setInsertionPointToStart(entryBlock);
570  Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
571  builder.getI64IntegerAttr(1));
572  allocaOp = builder.create<LLVM::AllocaOp>(
573  loc, argument.getType(), elementType, one, targetAlignment);
574  }
575  // Copy the pointee to the newly allocated value.
576  Value copySize = builder.create<LLVM::ConstantOp>(
577  loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize));
578  builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize,
579  /*isVolatile=*/false);
580  return allocaOp;
581 }
582 
583 /// Handles a function argument marked with the byval attribute by introducing a
584 /// memcpy or realigning the defining operation, if required either due to the
585 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
586 /// `requestedAlignment` specifies the alignment set in the "align" argument
587 /// attribute (or 1 if no align attribute was set).
588 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
589  Value argument, Type elementType,
590  uint64_t requestedAlignment) {
591  auto func = cast<LLVM::LLVMFuncOp>(callable);
592  LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr();
593  // If there is no memory effects attribute, assume that the function is
594  // not read-only.
595  bool isReadOnly = memoryEffects &&
596  memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
597  memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
598  // Check if there's an alignment mismatch requiring us to copy.
599  DataLayout dataLayout = DataLayout::closest(callable);
600  uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
601  if (isReadOnly) {
602  if (requestedAlignment <= minimumAlignment)
603  return argument;
604  uint64_t currentAlignment =
605  tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
606  if (currentAlignment >= requestedAlignment)
607  return argument;
608  }
609  uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
610  return handleByValArgumentInit(builder, func.getLoc(), argument, elementType,
611  dataLayout.getTypeSize(elementType),
612  targetAlignment);
613 }
614 
615 namespace {
616 struct LLVMInlinerInterface : public DialectInlinerInterface {
618 
619  LLVMInlinerInterface(Dialect *dialect)
620  : DialectInlinerInterface(dialect),
621  // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
622  disallowedFunctionAttrs({
623  StringAttr::get(dialect->getContext(), "noduplicate"),
624  StringAttr::get(dialect->getContext(), "noinline"),
625  StringAttr::get(dialect->getContext(), "optnone"),
626  StringAttr::get(dialect->getContext(), "presplitcoroutine"),
627  StringAttr::get(dialect->getContext(), "returns_twice"),
628  StringAttr::get(dialect->getContext(), "strictfp"),
629  }) {}
630 
631  bool isLegalToInline(Operation *call, Operation *callable,
632  bool wouldBeCloned) const final {
633  if (!wouldBeCloned)
634  return false;
635  auto callOp = dyn_cast<LLVM::CallOp>(call);
636  if (!callOp) {
637  LLVM_DEBUG(llvm::dbgs()
638  << "Cannot inline: call is not an LLVM::CallOp\n");
639  return false;
640  }
641  auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
642  if (!funcOp) {
643  LLVM_DEBUG(llvm::dbgs()
644  << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n");
645  return false;
646  }
647  // TODO: Generate aliasing metadata from noalias argument/result attributes.
648  if (auto attrs = funcOp.getArgAttrs()) {
649  for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
650  if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
651  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
652  << ": inalloca arguments not supported\n");
653  return false;
654  }
655  }
656  }
657  // TODO: Handle exceptions.
658  if (funcOp.getPersonality()) {
659  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
660  << ": unhandled function personality\n");
661  return false;
662  }
663  if (funcOp.getPassthrough()) {
664  // TODO: Used attributes should not be passthrough.
665  if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
666  auto stringAttr = dyn_cast<StringAttr>(attr);
667  if (!stringAttr)
668  return false;
669  if (disallowedFunctionAttrs.contains(stringAttr)) {
670  LLVM_DEBUG(llvm::dbgs()
671  << "Cannot inline " << funcOp.getSymName()
672  << ": found disallowed function attribute "
673  << stringAttr << "\n");
674  return true;
675  }
676  return false;
677  }))
678  return false;
679  }
680  return true;
681  }
682 
683  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
684  return true;
685  }
686 
687  /// Conservative allowlist of operations supported so far.
688  bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
689  if (isPure(op))
690  return true;
691  // clang-format off
692  if (isa<LLVM::AllocaOp,
693  LLVM::AssumeOp,
694  LLVM::AtomicRMWOp,
695  LLVM::AtomicCmpXchgOp,
696  LLVM::CallOp,
697  LLVM::CallIntrinsicOp,
698  LLVM::DbgDeclareOp,
699  LLVM::DbgLabelOp,
700  LLVM::DbgValueOp,
701  LLVM::FenceOp,
702  LLVM::InlineAsmOp,
703  LLVM::LifetimeEndOp,
704  LLVM::LifetimeStartOp,
705  LLVM::LoadOp,
706  LLVM::MemcpyOp,
707  LLVM::MemcpyInlineOp,
708  LLVM::MemmoveOp,
709  LLVM::MemsetOp,
710  LLVM::NoAliasScopeDeclOp,
711  LLVM::StackRestoreOp,
712  LLVM::StackSaveOp,
713  LLVM::StoreOp,
714  LLVM::UnreachableOp>(op))
715  return true;
716  // clang-format on
717  LLVM_DEBUG(llvm::dbgs()
718  << "Cannot inline: unhandled side effecting operation \""
719  << op->getName() << "\"\n");
720  return false;
721  }
722 
723  /// Handle the given inlined return by replacing it with a branch. This
724  /// overload is called when the inlined region has more than one block.
725  void handleTerminator(Operation *op, Block *newDest) const final {
726  // Only return needs to be handled here.
727  auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
728  if (!returnOp)
729  return;
730 
731  // Replace the return with a branch to the dest.
732  OpBuilder builder(op);
733  builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest);
734  op->erase();
735  }
736 
737  /// Handle the given inlined return by replacing the uses of the call with the
738  /// operands of the return. This overload is called when the inlined region
739  /// only contains one block.
740  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
741  // Return will be the only terminator present.
742  auto returnOp = cast<LLVM::ReturnOp>(op);
743 
744  // Replace the values directly with the return operands.
745  assert(returnOp.getNumOperands() == valuesToRepl.size());
746  for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
747  dst.replaceAllUsesWith(src);
748  }
749 
750  Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
751  Value argument,
752  DictionaryAttr argumentAttrs) const final {
753  if (std::optional<NamedAttribute> attr =
754  argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
755  Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
756  uint64_t requestedAlignment = 1;
757  if (std::optional<NamedAttribute> alignAttr =
758  argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
759  requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
760  .getValue()
761  .getLimitedValue();
762  }
763  return handleByValArgument(builder, callable, argument, elementType,
764  requestedAlignment);
765  }
766  if ([[maybe_unused]] std::optional<NamedAttribute> attr =
767  argumentAttrs.getNamed(LLVM::LLVMDialect::getNoAliasAttrName())) {
768  if (argument.use_empty())
769  return argument;
770 
771  // This code is essentially a workaround for deficiencies in the
772  // inliner interface: We need to transform operations *after* inlined
773  // based on the argument attributes of the parameters *before* inlining.
774  // This method runs prior to actual inlining and thus cannot transform the
775  // post-inlining code, while `processInlinedCallBlocks` does not have
776  // access to pre-inlining function arguments. Additionally, it is required
777  // to distinguish which parameter an SSA value originally came from.
778  // As a workaround until this is changed: Create an ssa.copy intrinsic
779  // with the noalias attribute that can easily be found, and is extremely
780  // unlikely to exist in the code prior to inlining, using this to
781  // communicate between this method and `processInlinedCallBlocks`.
782  // TODO: Fix this by refactoring the inliner interface.
783  auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument);
784  copyOp->setDiscardableAttr(
785  builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
786  builder.getUnitAttr());
787  return copyOp;
788  }
789  return argument;
790  }
791 
792  void processInlinedCallBlocks(
793  Operation *call,
794  iterator_range<Region::iterator> inlinedBlocks) const override {
795  handleInlinedAllocas(call, inlinedBlocks);
796  handleAliasScopes(call, inlinedBlocks);
797  handleAccessGroups(call, inlinedBlocks);
798  }
799 
800  // Keeping this (immutable) state on the interface allows us to look up
801  // StringAttrs instead of looking up strings, since StringAttrs are bound to
802  // the current context and thus cannot be initialized as static fields.
803  const DenseSet<StringAttr> disallowedFunctionAttrs;
804 };
805 
806 } // end anonymous namespace
807 
808 void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) {
809  dialect->addInterfaces<LLVMInlinerInterface>();
810 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static SmallVector< Value > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static Value getUnderlyingObject(Value pointerValue)
Attempts to return the underlying pointer value that pointerValue is based on.
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:30
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition: Block.cpp:26
pred_iterator pred_begin()
Definition: Block.h:226
iterator begin()
Definition: Block.h:136
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:186
pred_iterator pred_end()
Definition: Block.h:229
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
IntegerType getI64Type()
Definition: Builders.cpp:85
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:128
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:44
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:46
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:41
MLIRContext * getContext() const
Definition: Dialect.h:55
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:333
This class helps build Operations.
Definition: Builders.h:206
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:416
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:383
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:446
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:852
void moveAfter(Operation *existingOp)
Unlink this operation from its current block and insert it right after existingOp which may be in the...
Definition: Operation.cpp:568
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:378
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:125
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
void addLLVMInlinerInterface(LLVMDialect *dialect)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:401
bool isPure(Operation *op)
Returns true if the given operation is pure, i.e., is speculatable that does not touch memory.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:310
This trait indicates that a terminator operation is "return-like".