MLIR  20.0.0git
LLVMInlining.cpp
Go to the documentation of this file.
1 //===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Logic for inlining LLVM functions and the definition of the
10 // LLVMInliningInterface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LLVMInlining.h"
16 #include "mlir/IR/Matchers.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "llvm-inliner"
23 
24 using namespace mlir;
25 
26 /// Check whether the given alloca is an input to a lifetime intrinsic,
27 /// optionally passing through one or more casts on the way. This is not
28 /// transitive through block arguments.
29 static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
30  SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
31  allocaOp->getUsers().end());
32  while (!stack.empty()) {
33  Operation *op = stack.pop_back_val();
34  if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
35  return true;
36  if (isa<LLVM::BitcastOp>(op))
37  stack.append(op->getUsers().begin(), op->getUsers().end());
38  }
39  return false;
40 }
41 
42 /// Handles alloca operations in the inlined blocks:
43 /// - Moves all alloca operations with a constant size in the former entry block
44 /// of the callee into the entry block of the caller, so they become part of
45 /// the function prologue/epilogue during code generation.
46 /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
47 /// to the inlined blocks.
48 /// - Inserts StackSave and StackRestore operations if dynamic allocas were
49 /// inlined.
50 static void
52  iterator_range<Region::iterator> inlinedBlocks) {
53  // Locate the entry block of the closest callsite ancestor that has either the
54  // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
55  // programs, this is the LLVMFuncOp containing the call site. However, in
56  // mixed-dialect programs, the callsite might be nested in another operation
57  // that carries one of these traits. In such scenarios, this traversal stops
58  // at the closest ancestor with either trait, ensuring visibility post
59  // relocation and respecting allocation scopes.
60  Block *callerEntryBlock = nullptr;
61  Operation *currentOp = call;
62  while (Operation *parentOp = currentOp->getParentOp()) {
63  if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
64  parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
65  callerEntryBlock = &currentOp->getParentRegion()->front();
66  break;
67  }
68  currentOp = parentOp;
69  }
70 
71  // Avoid relocating the alloca operations if the call has been inlined into
72  // the entry block already, which is typically the encompassing
73  // LLVM function, or if the relevant entry block cannot be identified.
74  Block *calleeEntryBlock = &(*inlinedBlocks.begin());
75  if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
76  return;
77 
79  bool shouldInsertLifetimes = false;
80  bool hasDynamicAlloca = false;
81  // Conservatively only move static alloca operations that are part of the
82  // entry block and do not inspect nested regions, since they may execute
83  // conditionally or have other unknown semantics.
84  for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
85  IntegerAttr arraySize;
86  if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
87  hasDynamicAlloca = true;
88  continue;
89  }
90  bool shouldInsertLifetime =
91  arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
92  shouldInsertLifetimes |= shouldInsertLifetime;
93  allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
94  }
95  // Check the remaining inlined blocks for dynamic allocas as well.
96  for (Block &block : llvm::drop_begin(inlinedBlocks)) {
97  if (hasDynamicAlloca)
98  break;
99  hasDynamicAlloca =
100  llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
101  return !matchPattern(allocaOp.getArraySize(), m_Constant());
102  });
103  }
104  if (allocasToMove.empty() && !hasDynamicAlloca)
105  return;
106  OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
107  Value stackPtr;
108  if (hasDynamicAlloca) {
109  // This may result in multiple stacksave/stackrestore intrinsics in the same
110  // scope if some are already present in the body of the caller. This is not
111  // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
112  // other cases where the stacksave/stackrestore is redundant.
113  stackPtr = builder.create<LLVM::StackSaveOp>(
114  call->getLoc(), LLVM::LLVMPointerType::get(call->getContext()));
115  }
116  builder.setInsertionPoint(callerEntryBlock, callerEntryBlock->begin());
117  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
118  auto newConstant = builder.create<LLVM::ConstantOp>(
119  allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
120  // Insert a lifetime start intrinsic where the alloca was before moving it.
121  if (shouldInsertLifetime) {
122  OpBuilder::InsertionGuard insertionGuard(builder);
123  builder.setInsertionPoint(allocaOp);
124  builder.create<LLVM::LifetimeStartOp>(
125  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
126  allocaOp.getResult());
127  }
128  allocaOp->moveAfter(newConstant);
129  allocaOp.getArraySizeMutable().assign(newConstant.getResult());
130  }
131  if (!shouldInsertLifetimes && !hasDynamicAlloca)
132  return;
133  // Insert a lifetime end intrinsic before each return in the callee function.
134  for (Block &block : inlinedBlocks) {
135  if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
136  continue;
137  builder.setInsertionPoint(block.getTerminator());
138  if (hasDynamicAlloca)
139  builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr);
140  for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
141  if (shouldInsertLifetime)
142  builder.create<LLVM::LifetimeEndOp>(
143  allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
144  allocaOp.getResult());
145  }
146  }
147 }
148 
149 /// Maps all alias scopes in the inlined operations to deep clones of the scopes
150 /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
151 /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
152 static void
155 
156  // Register handles in the walker to create the deep clones.
157  // The walker ensures that an attribute is only ever walked once and does a
158  // post-order walk, ensuring the domain is visited prior to the scope.
159  AttrTypeWalker walker;
160 
161  // Perform the deep clones while visiting. Builders create a distinct
162  // attribute to make sure that new instances are always created by the
163  // uniquer.
164  walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
165  mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
166  domainAttr.getContext(), domainAttr.getDescription());
167  });
168 
169  walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
170  mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
171  cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
172  scopeAttr.getDescription());
173  });
174 
175  // Map an array of scopes to an array of deep clones.
176  auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
177  if (!arrayAttr)
178  return nullptr;
179 
180  // Create the deep clones if necessary.
181  walker.walk(arrayAttr);
182 
183  return ArrayAttr::get(arrayAttr.getContext(),
184  llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
185  return mapping.lookup(attr);
186  }));
187  };
188 
189  for (Block &block : inlinedBlocks) {
190  block.walk([&](Operation *op) {
191  if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
192  aliasInterface.setAliasScopes(
193  convertScopeList(aliasInterface.getAliasScopesOrNull()));
194  aliasInterface.setNoAliasScopes(
195  convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
196  }
197 
198  if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
199  // Create the deep clones if necessary.
200  walker.walk(noAliasScope.getScopeAttr());
201 
202  noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
203  mapping.lookup(noAliasScope.getScopeAttr())));
204  }
205  });
206  }
207 }
208 
209 /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
210 /// Returns null if both parameters are null. If only one attribute is null,
211 /// return the other.
212 static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
213  if (!lhs)
214  return rhs;
215  if (!rhs)
216  return lhs;
217 
218  SmallVector<Attribute> result;
219  llvm::append_range(result, lhs);
220  llvm::append_range(result, rhs);
221  return ArrayAttr::get(lhs.getContext(), result);
222 }
223 
224 /// Attempts to return the underlying pointer value that `pointerValue` is based
225 /// on. This traverses down the chain of operations to the last operation
226 /// producing the base pointer and returns it. If it encounters an operation it
227 /// cannot further traverse through, returns the operation's result.
228 static Value getUnderlyingObject(Value pointerValue) {
229  while (true) {
230  if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
231  pointerValue = gepOp.getBase();
232  continue;
233  }
234 
235  if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
236  pointerValue = addrCast.getOperand();
237  continue;
238  }
239 
240  break;
241  }
242 
243  return pointerValue;
244 }
245 
246 /// Attempts to return the set of all underlying pointer values that
247 /// `pointerValue` is based on. This function traverses through select
248 /// operations and block arguments unlike getUnderlyingObject.
250  SmallVector<Value> result;
251 
252  SmallVector<Value> workList{pointerValue};
253  // Avoid dataflow loops.
255  do {
256  Value current = workList.pop_back_val();
257  current = getUnderlyingObject(current);
258 
259  if (!seen.insert(current).second)
260  continue;
261 
262  if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
263  workList.push_back(selectOp.getTrueValue());
264  workList.push_back(selectOp.getFalseValue());
265  continue;
266  }
267 
268  if (auto blockArg = dyn_cast<BlockArgument>(current)) {
269  Block *parentBlock = blockArg.getParentBlock();
270 
271  // Attempt to find all block argument operands for every predecessor.
272  // If any operand to the block argument wasn't found in a predecessor,
273  // conservatively add the block argument to the result set.
274  SmallVector<Value> operands;
275  bool anyUnknown = false;
276  for (auto iter = parentBlock->pred_begin();
277  iter != parentBlock->pred_end(); iter++) {
278  auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
279  if (!branch) {
280  result.push_back(blockArg);
281  anyUnknown = true;
282  break;
283  }
284 
285  Value operand = branch.getSuccessorOperands(
286  iter.getSuccessorIndex())[blockArg.getArgNumber()];
287  if (!operand) {
288  result.push_back(blockArg);
289  anyUnknown = true;
290  break;
291  }
292 
293  operands.push_back(operand);
294  }
295 
296  if (!anyUnknown)
297  llvm::append_range(workList, operands);
298 
299  continue;
300  }
301 
302  result.push_back(current);
303  } while (!workList.empty());
304 
305  return result;
306 }
307 
308 /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
309 /// the appropriate inlined memory operations in an attempt to preserve the
310 /// original semantics of the parameter attribute.
312  Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
313 
314  // First collect all noalias parameters. These have been specially marked by
315  // the `handleArgument` implementation by using the `ssa.copy` intrinsic and
316  // attaching a `noalias` attribute to it.
317  // These are only meant to be temporary and should therefore be deleted after
318  // we're done using them here.
319  SetVector<LLVM::SSACopyOp> noAliasParams;
320  for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
321  for (Operation *user : argument.getUsers()) {
322  auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
323  if (!ssaCopy)
324  continue;
325  if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
326  continue;
327 
328  noAliasParams.insert(ssaCopy);
329  }
330  }
331 
332  // If there were none, we have nothing to do here.
333  if (noAliasParams.empty())
334  return;
335 
336  // Scope exit block to make it impossible to forget to get rid of the
337  // intrinsics.
338  auto exit = llvm::make_scope_exit([&] {
339  for (LLVM::SSACopyOp ssaCopyOp : noAliasParams) {
340  ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
341  ssaCopyOp->erase();
342  }
343  });
344 
345  // Create a new domain for this specific inlining and a new scope for every
346  // noalias parameter.
347  auto functionDomain = LLVM::AliasScopeDomainAttr::get(
348  call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
350  for (LLVM::SSACopyOp copyOp : noAliasParams) {
351  auto scope = LLVM::AliasScopeAttr::get(functionDomain);
352  pointerScopes[copyOp] = scope;
353 
354  OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope);
355  }
356 
357  // Go through every instruction and attempt to find which noalias parameters
358  // it is definitely based on and definitely not based on.
359  for (Block &inlinedBlock : inlinedBlocks) {
360  inlinedBlock.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
361  // Collect the pointer arguments affected by the alias scopes.
362  SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
363 
364  // Find the set of underlying pointers that this pointer is based on.
365  SmallPtrSet<Value, 4> basedOnPointers;
366  for (Value pointer : pointerArgs)
368  std::inserter(basedOnPointers, basedOnPointers.begin()));
369 
370  bool aliasesOtherKnownObject = false;
371  // Go through the based on pointers and check that they are either:
372  // * Constants that can be ignored (undef, poison, null pointer).
373  // * Based on a noalias parameter.
374  // * Other pointers that we know can't alias with our noalias parameter.
375  //
376  // Any other value might be a pointer based on any noalias parameter that
377  // hasn't been identified. In that case conservatively don't add any
378  // scopes to this operation indicating either aliasing or not aliasing
379  // with any parameter.
380  if (llvm::any_of(basedOnPointers, [&](Value object) {
381  if (matchPattern(object, m_Constant()))
382  return false;
383 
384  if (noAliasParams.contains(object.getDefiningOp<LLVM::SSACopyOp>()))
385  return false;
386 
387  // TODO: This should include other arguments from the inlined
388  // callable.
389  if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
390  object.getDefiningOp())) {
391  aliasesOtherKnownObject = true;
392  return false;
393  }
394  return true;
395  }))
396  return;
397 
398  // Add all noalias parameter scopes to the noalias scope list that we are
399  // not based on.
400  SmallVector<Attribute> noAliasScopes;
401  for (LLVM::SSACopyOp noAlias : noAliasParams) {
402  if (basedOnPointers.contains(noAlias))
403  continue;
404 
405  noAliasScopes.push_back(pointerScopes[noAlias]);
406  }
407 
408  if (!noAliasScopes.empty())
409  aliasInterface.setNoAliasScopes(
410  concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
411  ArrayAttr::get(call->getContext(), noAliasScopes)));
412 
413  // Don't add alias scopes to call operations or operations that might
414  // operate on pointers not based on any noalias parameter.
415  // Since we add all scopes to an operation's noalias list that it
416  // definitely doesn't alias, we mustn't do the same for the alias.scope
417  // list if other objects are involved.
418  //
419  // Consider the following case:
420  // %0 = llvm.alloca
421  // %1 = select %magic, %0, %noalias_param
422  // store 5, %1 (1) noalias=[scope(...)]
423  // ...
424  // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
425  //
426  // We can add the scopes of any noalias parameters that aren't
427  // noalias_param's scope to (1) and add all of them to (2). We mustn't add
428  // the scope of noalias_param to the alias.scope list of (1) since
429  // that would mean (2) cannot alias with (1) which is wrong since both may
430  // store to %0.
431  //
432  // In conclusion, only add scopes to the alias.scope list if all pointers
433  // have a corresponding scope.
434  // Call operations are included in this list since we do not know whether
435  // the callee accesses any memory besides the ones passed as its
436  // arguments.
437  if (aliasesOtherKnownObject ||
438  isa<LLVM::CallOp>(aliasInterface.getOperation()))
439  return;
440 
441  SmallVector<Attribute> aliasScopes;
442  for (LLVM::SSACopyOp noAlias : noAliasParams)
443  if (basedOnPointers.contains(noAlias))
444  aliasScopes.push_back(pointerScopes[noAlias]);
445 
446  if (!aliasScopes.empty())
447  aliasInterface.setAliasScopes(
448  concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
449  ArrayAttr::get(call->getContext(), aliasScopes)));
450  });
451  }
452 }
453 
454 /// Appends any alias scopes of the call operation to any inlined memory
455 /// operation.
456 static void
458  iterator_range<Region::iterator> inlinedBlocks) {
459  auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
460  if (!callAliasInterface)
461  return;
462 
463  ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
464  ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
465  // If the call has neither alias scopes or noalias scopes we have nothing to
466  // do here.
467  if (!aliasScopes && !noAliasScopes)
468  return;
469 
470  // Simply append the call op's alias and noalias scopes to any operation
471  // implementing AliasAnalysisOpInterface.
472  for (Block &block : inlinedBlocks) {
473  block.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
474  if (aliasScopes)
475  aliasInterface.setAliasScopes(concatArrayAttr(
476  aliasInterface.getAliasScopesOrNull(), aliasScopes));
477 
478  if (noAliasScopes)
479  aliasInterface.setNoAliasScopes(concatArrayAttr(
480  aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
481  });
482  }
483 }
484 
485 /// Handles all interactions with alias scopes during inlining.
486 static void handleAliasScopes(Operation *call,
487  iterator_range<Region::iterator> inlinedBlocks) {
488  deepCloneAliasScopes(inlinedBlocks);
489  createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
490  appendCallOpAliasScopes(call, inlinedBlocks);
491 }
492 
493 /// Appends any access groups of the call operation to any inlined memory
494 /// operation.
495 static void handleAccessGroups(Operation *call,
496  iterator_range<Region::iterator> inlinedBlocks) {
497  auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
498  if (!callAccessGroupInterface)
499  return;
500 
501  auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
502  if (!accessGroups)
503  return;
504 
505  // Simply append the call op's access groups to any operation implementing
506  // AccessGroupOpInterface.
507  for (Block &block : inlinedBlocks)
508  for (auto accessGroupOpInterface :
509  block.getOps<LLVM::AccessGroupOpInterface>())
510  accessGroupOpInterface.setAccessGroups(concatArrayAttr(
511  accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
512 }
513 
514 /// Updates locations inside loop annotations to reflect that they were inlined.
515 static void
517  iterator_range<Region::iterator> inlinedBlocks) {
518  // Attempt to extract a DISubprogram from the callee.
519  auto func = call->getParentOfType<FunctionOpInterface>();
520  if (!func)
521  return;
522  LocationAttr funcLoc = func->getLoc();
523  auto fusedLoc = dyn_cast_if_present<FusedLoc>(funcLoc);
524  if (!fusedLoc)
525  return;
526  auto scope =
527  dyn_cast_if_present<LLVM::DISubprogramAttr>(fusedLoc.getMetadata());
528  if (!scope)
529  return;
530 
531  // Helper to build a new fused location that reflects the inlining of the loop
532  // annotation.
533  auto updateLoc = [&](FusedLoc loc) -> FusedLoc {
534  if (!loc)
535  return {};
536  Location callSiteLoc = CallSiteLoc::get(loc, call->getLoc());
537  return FusedLoc::get(loc.getContext(), callSiteLoc, scope);
538  };
539 
540  AttrTypeReplacer replacer;
541  replacer.addReplacement([&](LLVM::LoopAnnotationAttr loopAnnotation)
542  -> std::pair<Attribute, WalkResult> {
543  FusedLoc newStartLoc = updateLoc(loopAnnotation.getStartLoc());
544  FusedLoc newEndLoc = updateLoc(loopAnnotation.getEndLoc());
545  if (!newStartLoc && !newEndLoc)
546  return {loopAnnotation, WalkResult::advance()};
547  auto newLoopAnnotation = LLVM::LoopAnnotationAttr::get(
548  loopAnnotation.getContext(), loopAnnotation.getDisableNonforced(),
549  loopAnnotation.getVectorize(), loopAnnotation.getInterleave(),
550  loopAnnotation.getUnroll(), loopAnnotation.getUnrollAndJam(),
551  loopAnnotation.getLicm(), loopAnnotation.getDistribute(),
552  loopAnnotation.getPipeline(), loopAnnotation.getPeeled(),
553  loopAnnotation.getUnswitch(), loopAnnotation.getMustProgress(),
554  loopAnnotation.getIsVectorized(), newStartLoc, newEndLoc,
555  loopAnnotation.getParallelAccesses());
556  // Needs to advance, as loop annotations can be nested.
557  return {newLoopAnnotation, WalkResult::advance()};
558  });
559 
560  for (Block &block : inlinedBlocks)
561  for (Operation &op : block)
562  replacer.recursivelyReplaceElementsIn(&op);
563 }
564 
565 /// If `requestedAlignment` is higher than the alignment specified on `alloca`,
566 /// realigns `alloca` if this does not exceed the natural stack alignment.
567 /// Returns the post-alignment of `alloca`, whether it was realigned or not.
568 static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
569  uint64_t requestedAlignment,
570  DataLayout const &dataLayout) {
571  uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
572  if (requestedAlignment <= allocaAlignment)
573  // No realignment necessary.
574  return allocaAlignment;
575  uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
576  // If the natural stack alignment is not specified, the data layout returns
577  // zero. Optimistically allow realignment in this case.
578  if (naturalStackAlignmentBits == 0 ||
579  // If the requested alignment exceeds the natural stack alignment, this
580  // will trigger a dynamic stack realignment, so we prefer to copy...
581  8 * requestedAlignment <= naturalStackAlignmentBits ||
582  // ...unless the alloca already triggers dynamic stack realignment. Then
583  // we might as well further increase the alignment to avoid a copy.
584  8 * allocaAlignment > naturalStackAlignmentBits) {
585  alloca.setAlignment(requestedAlignment);
586  allocaAlignment = requestedAlignment;
587  }
588  return allocaAlignment;
589 }
590 
591 /// Tries to find and return the alignment of the pointer `value` by looking for
592 /// an alignment attribute on the defining allocation op or function argument.
593 /// If the found alignment is lower than `requestedAlignment`, tries to realign
594 /// the pointer, then returns the resulting post-alignment, regardless of
595 /// whether it was realigned or not. If no existing alignment attribute is
596 /// found, returns 1 (i.e., assume that no alignment is guaranteed).
597 static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
598  DataLayout const &dataLayout) {
599  if (Operation *definingOp = value.getDefiningOp()) {
600  if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
601  return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
602  dataLayout);
603  if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
604  if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
605  definingOp, addressOf.getGlobalNameAttr()))
606  return global.getAlignment().value_or(1);
607  // We don't currently handle this operation; assume no alignment.
608  return 1;
609  }
610  // Since there is no defining op, this is a block argument. Probably this
611  // comes directly from a function argument, so check that this is the case.
612  Operation *parentOp = value.getParentBlock()->getParentOp();
613  if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
614  // Use the alignment attribute set for this argument in the parent function
615  // if it has been set.
616  auto blockArg = llvm::cast<BlockArgument>(value);
617  if (Attribute alignAttr = func.getArgAttr(
618  blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
619  return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
620  }
621  // We didn't find anything useful; assume no alignment.
622  return 1;
623 }
624 
625 /// Introduces a new alloca and copies the memory pointed to by `argument` to
626 /// the address of the new alloca, then returns the value of the new alloca.
628  Value argument, Type elementType,
629  uint64_t elementTypeSize,
630  uint64_t targetAlignment) {
631  // Allocate the new value on the stack.
632  Value allocaOp;
633  {
634  // Since this is a static alloca, we can put it directly in the entry block,
635  // so they can be absorbed into the prologue/epilogue at code generation.
636  OpBuilder::InsertionGuard insertionGuard(builder);
637  Block *entryBlock = &(*argument.getParentRegion()->begin());
638  builder.setInsertionPointToStart(entryBlock);
639  Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
640  builder.getI64IntegerAttr(1));
641  allocaOp = builder.create<LLVM::AllocaOp>(
642  loc, argument.getType(), elementType, one, targetAlignment);
643  }
644  // Copy the pointee to the newly allocated value.
645  Value copySize = builder.create<LLVM::ConstantOp>(
646  loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize));
647  builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize,
648  /*isVolatile=*/false);
649  return allocaOp;
650 }
651 
652 /// Handles a function argument marked with the byval attribute by introducing a
653 /// memcpy or realigning the defining operation, if required either due to the
654 /// pointee being writeable in the callee, and/or due to an alignment mismatch.
655 /// `requestedAlignment` specifies the alignment set in the "align" argument
656 /// attribute (or 1 if no align attribute was set).
657 static Value handleByValArgument(OpBuilder &builder, Operation *callable,
658  Value argument, Type elementType,
659  uint64_t requestedAlignment) {
660  auto func = cast<LLVM::LLVMFuncOp>(callable);
661  LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryEffectsAttr();
662  // If there is no memory effects attribute, assume that the function is
663  // not read-only.
664  bool isReadOnly = memoryEffects &&
665  memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
666  memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
667  // Check if there's an alignment mismatch requiring us to copy.
668  DataLayout dataLayout = DataLayout::closest(callable);
669  uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
670  if (isReadOnly) {
671  if (requestedAlignment <= minimumAlignment)
672  return argument;
673  uint64_t currentAlignment =
674  tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
675  if (currentAlignment >= requestedAlignment)
676  return argument;
677  }
678  uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
679  return handleByValArgumentInit(builder, func.getLoc(), argument, elementType,
680  dataLayout.getTypeSize(elementType),
681  targetAlignment);
682 }
683 
684 namespace {
685 struct LLVMInlinerInterface : public DialectInlinerInterface {
687 
688  LLVMInlinerInterface(Dialect *dialect)
689  : DialectInlinerInterface(dialect),
690  // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
691  disallowedFunctionAttrs({
692  StringAttr::get(dialect->getContext(), "noduplicate"),
693  StringAttr::get(dialect->getContext(), "presplitcoroutine"),
694  StringAttr::get(dialect->getContext(), "returns_twice"),
695  StringAttr::get(dialect->getContext(), "strictfp"),
696  }) {}
697 
698  bool isLegalToInline(Operation *call, Operation *callable,
699  bool wouldBeCloned) const final {
700  if (!wouldBeCloned)
701  return false;
702  if (!isa<LLVM::CallOp>(call)) {
703  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: call is not an '"
704  << LLVM::CallOp::getOperationName() << "' op\n");
705  return false;
706  }
707  auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
708  if (!funcOp) {
709  LLVM_DEBUG(llvm::dbgs()
710  << "Cannot inline: callable is not an '"
711  << LLVM::LLVMFuncOp::getOperationName() << "' op\n");
712  return false;
713  }
714  if (funcOp.isNoInline()) {
715  LLVM_DEBUG(llvm::dbgs()
716  << "Cannot inline: function is marked no_inline\n");
717  return false;
718  }
719  if (funcOp.isVarArg()) {
720  LLVM_DEBUG(llvm::dbgs() << "Cannot inline: callable is variadic\n");
721  return false;
722  }
723  // TODO: Generate aliasing metadata from noalias result attributes.
724  if (auto attrs = funcOp.getArgAttrs()) {
725  for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
726  if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
727  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
728  << ": inalloca arguments not supported\n");
729  return false;
730  }
731  }
732  }
733  // TODO: Handle exceptions.
734  if (funcOp.getPersonality()) {
735  LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
736  << ": unhandled function personality\n");
737  return false;
738  }
739  if (funcOp.getPassthrough()) {
740  // TODO: Used attributes should not be passthrough.
741  if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
742  auto stringAttr = dyn_cast<StringAttr>(attr);
743  if (!stringAttr)
744  return false;
745  if (disallowedFunctionAttrs.contains(stringAttr)) {
746  LLVM_DEBUG(llvm::dbgs()
747  << "Cannot inline " << funcOp.getSymName()
748  << ": found disallowed function attribute "
749  << stringAttr << "\n");
750  return true;
751  }
752  return false;
753  }))
754  return false;
755  }
756  return true;
757  }
758 
759  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
760  return true;
761  }
762 
763  bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
764  // The inliner cannot handle variadic function arguments.
765  return !isa<LLVM::VaStartOp>(op);
766  }
767 
768  /// Handle the given inlined return by replacing it with a branch. This
769  /// overload is called when the inlined region has more than one block.
770  void handleTerminator(Operation *op, Block *newDest) const final {
771  // Only return needs to be handled here.
772  auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
773  if (!returnOp)
774  return;
775 
776  // Replace the return with a branch to the dest.
777  OpBuilder builder(op);
778  builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest);
779  op->erase();
780  }
781 
782  /// Handle the given inlined return by replacing the uses of the call with the
783  /// operands of the return. This overload is called when the inlined region
784  /// only contains one block.
785  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
786  // Return will be the only terminator present.
787  auto returnOp = cast<LLVM::ReturnOp>(op);
788 
789  // Replace the values directly with the return operands.
790  assert(returnOp.getNumOperands() == valuesToRepl.size());
791  for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
792  dst.replaceAllUsesWith(src);
793  }
794 
795  Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
796  Value argument,
797  DictionaryAttr argumentAttrs) const final {
798  if (std::optional<NamedAttribute> attr =
799  argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
800  Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
801  uint64_t requestedAlignment = 1;
802  if (std::optional<NamedAttribute> alignAttr =
803  argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
804  requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
805  .getValue()
806  .getLimitedValue();
807  }
808  return handleByValArgument(builder, callable, argument, elementType,
809  requestedAlignment);
810  }
811  if (argumentAttrs.contains(LLVM::LLVMDialect::getNoAliasAttrName())) {
812  if (argument.use_empty())
813  return argument;
814 
815  // This code is essentially a workaround for deficiencies in the
816  // inliner interface: We need to transform operations *after* inlined
817  // based on the argument attributes of the parameters *before* inlining.
818  // This method runs prior to actual inlining and thus cannot transform the
819  // post-inlining code, while `processInlinedCallBlocks` does not have
820  // access to pre-inlining function arguments. Additionally, it is required
821  // to distinguish which parameter an SSA value originally came from.
822  // As a workaround until this is changed: Create an ssa.copy intrinsic
823  // with the noalias attribute that can easily be found, and is extremely
824  // unlikely to exist in the code prior to inlining, using this to
825  // communicate between this method and `processInlinedCallBlocks`.
826  // TODO: Fix this by refactoring the inliner interface.
827  auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument);
828  copyOp->setDiscardableAttr(
829  builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
830  builder.getUnitAttr());
831  return copyOp;
832  }
833  return argument;
834  }
835 
836  void processInlinedCallBlocks(
837  Operation *call,
838  iterator_range<Region::iterator> inlinedBlocks) const override {
839  handleInlinedAllocas(call, inlinedBlocks);
840  handleAliasScopes(call, inlinedBlocks);
841  handleAccessGroups(call, inlinedBlocks);
842  handleLoopAnnotations(call, inlinedBlocks);
843  }
844 
845  // Keeping this (immutable) state on the interface allows us to look up
846  // StringAttrs instead of looking up strings, since StringAttrs are bound to
847  // the current context and thus cannot be initialized as static fields.
848  const DenseSet<StringAttr> disallowedFunctionAttrs;
849 };
850 
851 } // end anonymous namespace
852 
853 void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) {
854  dialect->addInterfaces<LLVMInlinerInterface>();
855 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static void handleLoopAnnotations(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Updates locations inside loop annotations to reflect that they were inlined.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static SmallVector< Value > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static Value getUnderlyingObject(Value pointerValue)
Attempts to return the underlying pointer value that pointerValue is based on.
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This is an attribute/type replacer that is naively cached.
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:31
pred_iterator pred_begin()
Definition: Block.h:231
iterator begin()
Definition: Block.h:141
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition: Block.h:191
pred_iterator pred_end()
Definition: Block.h:234
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition: Block.cpp:30
IntegerType getI64Type()
Definition: Builders.cpp:89
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:132
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
Definition: InliningUtils.h:44
DialectInlinerInterface(Dialect *dialect)
Definition: InliningUtils.h:46
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:38
MLIRContext * getContext() const
Definition: Dialect.h:52
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
Location objects represent source locations information in MLIR.
Definition: Location.h:31
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:351
This class helps build Operations.
Definition: Builders.h:210
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:434
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:401
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:468
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:869
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition: Operation.h:230
void moveAfter(Operation *existingOp)
Unlink this operation from its current block and insert it right after existingOp which may be in the...
Definition: Operation.cpp:569
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
Block & front()
Definition: Region.h:65
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition: Value.cpp:41
static WalkResult advance()
Definition: Visitors.h:51
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
Register a replacement function for mapping a given attribute or type.
void addLLVMInlinerInterface(LLVMDialect *dialect)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:401
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:310
This trait indicates that a terminator operation is "return-like".