MLIR 23.0.0git
InlinerInterfaceImpl.cpp
Go to the documentation of this file.
1//===- InlinerInterfaceImpl.cpp - Inlining for LLVM the dialect -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Logic for inlining LLVM functions and the definition of the
10// LLVMInliningInterface.
11//
12//===----------------------------------------------------------------------===//
13
18#include "mlir/IR/Matchers.h"
22#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Debug.h"
24
25#include "llvm/Support/DebugLog.h"
26
27#define DEBUG_TYPE "llvm-inliner"
28
29using namespace mlir;
30
31/// Check whether the given alloca is an input to a lifetime intrinsic,
32/// optionally passing through one or more casts on the way. This is not
33/// transitive through block arguments.
34static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
35 SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
36 allocaOp->getUsers().end());
37 while (!stack.empty()) {
38 Operation *op = stack.pop_back_val();
39 if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
40 return true;
41 if (isa<LLVM::BitcastOp>(op))
42 stack.append(op->getUsers().begin(), op->getUsers().end());
43 }
44 return false;
45}
46
47/// Handles alloca operations in the inlined blocks:
48/// - Moves all alloca operations with a constant size in the former entry block
49/// of the callee into the entry block of the caller, so they become part of
50/// the function prologue/epilogue during code generation.
51/// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
52/// to the inlined blocks.
53/// - Inserts StackSave and StackRestore operations if dynamic allocas were
54/// inlined.
55static void
58 // Locate the entry block of the closest callsite ancestor that has either the
59 // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
60 // programs, this is the LLVMFuncOp containing the call site. However, in
61 // mixed-dialect programs, the callsite might be nested in another operation
62 // that carries one of these traits. In such scenarios, this traversal stops
63 // at the closest ancestor with either trait, ensuring visibility post
64 // relocation and respecting allocation scopes.
65 Block *callerEntryBlock = nullptr;
66 Operation *currentOp = call;
67 while (Operation *parentOp = currentOp->getParentOp()) {
68 if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
69 parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
70 callerEntryBlock = &currentOp->getParentRegion()->front();
71 break;
72 }
73 currentOp = parentOp;
74 }
75
76 // Avoid relocating the alloca operations if the call has been inlined into
77 // the entry block already, which is typically the encompassing
78 // LLVM function, or if the relevant entry block cannot be identified.
79 Block *calleeEntryBlock = &(*inlinedBlocks.begin());
80 if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
81 return;
82
84 bool shouldInsertLifetimes = false;
85 bool hasDynamicAlloca = false;
86 // Conservatively only move static alloca operations that are part of the
87 // entry block and do not inspect nested regions, since they may execute
88 // conditionally or have other unknown semantics.
89 for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
90 IntegerAttr arraySize;
91 if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
92 hasDynamicAlloca = true;
93 continue;
94 }
95 bool shouldInsertLifetime =
96 arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
97 shouldInsertLifetimes |= shouldInsertLifetime;
98 allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
99 }
100 // Check the remaining inlined blocks for dynamic allocas as well.
101 for (Block &block : llvm::drop_begin(inlinedBlocks)) {
102 if (hasDynamicAlloca)
103 break;
104 hasDynamicAlloca =
105 llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
106 return !matchPattern(allocaOp.getArraySize(), m_Constant());
107 });
108 }
109 if (allocasToMove.empty() && !hasDynamicAlloca)
110 return;
111 OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
112 Value stackPtr;
113 if (hasDynamicAlloca) {
114 // This may result in multiple stacksave/stackrestore intrinsics in the same
115 // scope if some are already present in the body of the caller. This is not
116 // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
117 // other cases where the stacksave/stackrestore is redundant.
118 stackPtr = LLVM::StackSaveOp::create(
119 builder, call->getLoc(),
120 LLVM::LLVMPointerType::get(call->getContext()));
121 }
122 builder.setInsertionPointToStart(callerEntryBlock);
123 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
124 auto newConstant =
125 LLVM::ConstantOp::create(builder, allocaOp->getLoc(),
126 allocaOp.getArraySize().getType(), arraySize);
127 // Insert a lifetime start intrinsic where the alloca was before moving it.
128 if (shouldInsertLifetime) {
129 OpBuilder::InsertionGuard insertionGuard(builder);
130 builder.setInsertionPoint(allocaOp);
131 LLVM::LifetimeStartOp::create(builder, allocaOp.getLoc(),
132 allocaOp.getResult());
133 }
134 allocaOp->moveAfter(newConstant);
135 allocaOp.getArraySizeMutable().assign(newConstant.getResult());
136 }
137 if (!shouldInsertLifetimes && !hasDynamicAlloca)
138 return;
139 // Insert a lifetime end intrinsic before each return in the callee function.
140 for (Block &block : inlinedBlocks) {
141 if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
142 continue;
143 builder.setInsertionPoint(block.getTerminator());
144 if (hasDynamicAlloca)
145 LLVM::StackRestoreOp::create(builder, call->getLoc(), stackPtr);
146 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
147 if (shouldInsertLifetime)
148 LLVM::LifetimeEndOp::create(builder, allocaOp.getLoc(),
149 allocaOp.getResult());
150 }
151 }
152}
153
154/// Maps all alias scopes in the inlined operations to deep clones of the scopes
155/// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
156/// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
157static void
160
161 // Register handles in the walker to create the deep clones.
162 // The walker ensures that an attribute is only ever walked once and does a
163 // post-order walk, ensuring the domain is visited prior to the scope.
164 AttrTypeWalker walker;
165
166 // Perform the deep clones while visiting. Builders create a distinct
167 // attribute to make sure that new instances are always created by the
168 // uniquer.
169 walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
170 mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
171 domainAttr.getContext(), domainAttr.getDescription());
172 });
173
174 walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
175 mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
176 cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
177 scopeAttr.getDescription());
178 });
179
180 // Map an array of scopes to an array of deep clones.
181 auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
182 if (!arrayAttr)
183 return nullptr;
184
185 // Create the deep clones if necessary.
186 walker.walk(arrayAttr);
187
188 return ArrayAttr::get(arrayAttr.getContext(),
189 llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
190 return mapping.lookup(attr);
191 }));
192 };
193
194 for (Block &block : inlinedBlocks) {
195 block.walk([&](Operation *op) {
196 if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
197 aliasInterface.setAliasScopes(
198 convertScopeList(aliasInterface.getAliasScopesOrNull()));
199 aliasInterface.setNoAliasScopes(
200 convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
201 }
202
203 if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
204 // Create the deep clones if necessary.
205 walker.walk(noAliasScope.getScopeAttr());
206
207 noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
208 mapping.lookup(noAliasScope.getScopeAttr())));
209 }
210 });
211 }
212}
213
214/// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
215/// Returns null if both parameters are null. If only one attribute is null,
216/// return the other.
218 if (!lhs)
219 return rhs;
220 if (!rhs)
221 return lhs;
222
224 llvm::append_range(result, lhs);
225 llvm::append_range(result, rhs);
226 return ArrayAttr::get(lhs.getContext(), result);
227}
228
229/// Attempts to return the set of all underlying pointer values that
230/// `pointerValue` is based on. This function traverses through select
231/// operations and block arguments.
232static FailureOr<SmallVector<Value>>
235 WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
236 // Attempt to advance to the source of the underlying view-like operation.
237 // Examples of view-like operations include GEPOp and AddrSpaceCastOp.
238 if (auto viewOp = val.getDefiningOp<ViewLikeOpInterface>()) {
239 if (val == viewOp.getViewDest())
240 return WalkContinuation::advanceTo(viewOp.getViewSource());
241 }
242
243 // Attempt to advance to control flow predecessors.
244 std::optional<SmallVector<Value>> controlFlowPredecessors =
246 if (controlFlowPredecessors)
247 return WalkContinuation::advanceTo(*controlFlowPredecessors);
248
249 // For all non-control flow results, consider `val` an underlying object.
250 if (isa<OpResult>(val)) {
251 result.push_back(val);
252 return WalkContinuation::skip();
253 }
254
255 // If this place is reached, `val` is a block argument that is not
256 // understood. Therefore, we conservatively interrupt.
257 // Note: Dealing with function arguments is not necessary, as the slice
258 // would have to go through an SSACopyOp first.
260 });
261
262 if (walkResult.wasInterrupted())
263 return failure();
264
265 return result;
266}
267
268/// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
269/// the appropriate inlined memory operations in an attempt to preserve the
270/// original semantics of the parameter attribute.
272 Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
273
274 // First, collect all ssa copy operations, which correspond to function
275 // parameters, and additionally store the noalias parameters. All parameters
276 // have been marked by the `handleArgument` implementation by using the
277 // `ssa.copy` intrinsic. Additionally, noalias parameters have an attached
278 // `noalias` attribute to the intrinsics. These intrinsics are only meant to
279 // be temporary and should therefore be deleted after we're done using them
280 // here.
282 SetVector<LLVM::SSACopyOp> noAliasParams;
283 for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
284 for (Operation *user : argument.getUsers()) {
285 auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
286 if (!ssaCopy)
287 continue;
288 ssaCopies.insert(ssaCopy);
289
290 if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
291 continue;
292 noAliasParams.insert(ssaCopy);
293 }
294 }
295
296 // Scope exit block to make it impossible to forget to get rid of the
297 // intrinsics.
298 llvm::scope_exit exit([&] {
299 for (LLVM::SSACopyOp ssaCopyOp : ssaCopies) {
300 ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
301 ssaCopyOp->erase();
302 }
303 });
304
305 // If there were no noalias parameters, we have nothing to do here.
306 if (noAliasParams.empty())
307 return;
308
309 // Create a new domain for this specific inlining and a new scope for every
310 // noalias parameter.
311 auto functionDomain = LLVM::AliasScopeDomainAttr::get(
312 call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
314 for (LLVM::SSACopyOp copyOp : noAliasParams) {
315 auto scope = LLVM::AliasScopeAttr::get(functionDomain);
316 pointerScopes[copyOp] = scope;
317
318 auto builder = OpBuilder(call);
319 LLVM::NoAliasScopeDeclOp::create(builder, call->getLoc(), scope);
320 }
321
322 // Go through every instruction and attempt to find which noalias parameters
323 // it is definitely based on and definitely not based on.
324 for (Block &inlinedBlock : inlinedBlocks) {
325 inlinedBlock.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
326 // Collect the pointer arguments affected by the alias scopes.
327 SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
328
329 // Find the set of underlying pointers that this pointer is based on.
330 SmallPtrSet<Value, 4> basedOnPointers;
331 for (Value pointer : pointerArgs) {
332 FailureOr<SmallVector<Value>> underlyingObjectSet =
333 getUnderlyingObjectSet(pointer);
334 if (failed(underlyingObjectSet))
335 return;
336 llvm::copy(*underlyingObjectSet,
337 std::inserter(basedOnPointers, basedOnPointers.begin()));
338 }
339
340 bool aliasesOtherKnownObject = false;
341 // Go through the based on pointers and check that they are either:
342 // * Constants that can be ignored (undef, poison, null pointer).
343 // * Based on a pointer parameter.
344 // * Other pointers that we know can't alias with our noalias parameter.
345 //
346 // Any other value might be a pointer based on any noalias parameter that
347 // hasn't been identified. In that case conservatively don't add any
348 // scopes to this operation indicating either aliasing or not aliasing
349 // with any parameter.
350 if (llvm::any_of(basedOnPointers, [&](Value object) {
351 if (matchPattern(object, m_Constant()))
352 return false;
353
354 if (auto ssaCopy = object.getDefiningOp<LLVM::SSACopyOp>()) {
355 // If that value is based on a noalias parameter, it is guaranteed
356 // to not alias with any other object.
357 aliasesOtherKnownObject |= !noAliasParams.contains(ssaCopy);
358 return false;
359 }
360
361 if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
362 object.getDefiningOp())) {
363 aliasesOtherKnownObject = true;
364 return false;
365 }
366 return true;
367 }))
368 return;
369
370 // Add all noalias parameter scopes to the noalias scope list that we are
371 // not based on.
372 SmallVector<Attribute> noAliasScopes;
373 for (LLVM::SSACopyOp noAlias : noAliasParams) {
374 if (basedOnPointers.contains(noAlias))
375 continue;
376
377 noAliasScopes.push_back(pointerScopes[noAlias]);
378 }
379
380 if (!noAliasScopes.empty())
381 aliasInterface.setNoAliasScopes(
382 concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
383 ArrayAttr::get(call->getContext(), noAliasScopes)));
384
385 // Don't add alias scopes to call operations or operations that might
386 // operate on pointers not based on any noalias parameter.
387 // Since we add all scopes to an operation's noalias list that it
388 // definitely doesn't alias, we mustn't do the same for the alias.scope
389 // list if other objects are involved.
390 //
391 // Consider the following case:
392 // %0 = llvm.alloca
393 // %1 = select %magic, %0, %noalias_param
394 // store 5, %1 (1) noalias=[scope(...)]
395 // ...
396 // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
397 //
398 // We can add the scopes of any noalias parameters that aren't
399 // noalias_param's scope to (1) and add all of them to (2). We mustn't add
400 // the scope of noalias_param to the alias.scope list of (1) since
401 // that would mean (2) cannot alias with (1) which is wrong since both may
402 // store to %0.
403 //
404 // In conclusion, only add scopes to the alias.scope list if all pointers
405 // have a corresponding scope.
406 // Call operations are included in this list since we do not know whether
407 // the callee accesses any memory besides the ones passed as its
408 // arguments.
409 if (aliasesOtherKnownObject ||
410 isa<LLVM::CallOp>(aliasInterface.getOperation()))
411 return;
412
413 SmallVector<Attribute> aliasScopes;
414 for (LLVM::SSACopyOp noAlias : noAliasParams)
415 if (basedOnPointers.contains(noAlias))
416 aliasScopes.push_back(pointerScopes[noAlias]);
417
418 if (!aliasScopes.empty())
419 aliasInterface.setAliasScopes(
420 concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
421 ArrayAttr::get(call->getContext(), aliasScopes)));
422 });
423 }
424}
425
426/// Appends any alias scopes of the call operation to any inlined memory
427/// operation.
428static void
430 iterator_range<Region::iterator> inlinedBlocks) {
431 auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
432 if (!callAliasInterface)
433 return;
434
435 ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
436 ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
437 // If the call has neither alias scopes or noalias scopes we have nothing to
438 // do here.
439 if (!aliasScopes && !noAliasScopes)
440 return;
441
442 // Simply append the call op's alias and noalias scopes to any operation
443 // implementing AliasAnalysisOpInterface.
444 for (Block &block : inlinedBlocks) {
445 block.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
446 if (aliasScopes)
447 aliasInterface.setAliasScopes(concatArrayAttr(
448 aliasInterface.getAliasScopesOrNull(), aliasScopes));
449
450 if (noAliasScopes)
451 aliasInterface.setNoAliasScopes(concatArrayAttr(
452 aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
453 });
454 }
455}
456
457/// Handles all interactions with alias scopes during inlining.
458static void handleAliasScopes(Operation *call,
459 iterator_range<Region::iterator> inlinedBlocks) {
460 deepCloneAliasScopes(inlinedBlocks);
461 createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
462 appendCallOpAliasScopes(call, inlinedBlocks);
463}
464
465/// Appends any access groups of the call operation to any inlined memory
466/// operation.
468 iterator_range<Region::iterator> inlinedBlocks) {
469 auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
470 if (!callAccessGroupInterface)
471 return;
472
473 auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
474 if (!accessGroups)
475 return;
476
477 // Simply append the call op's access groups to any operation implementing
478 // AccessGroupOpInterface.
479 for (Block &block : inlinedBlocks)
480 for (auto accessGroupOpInterface :
481 block.getOps<LLVM::AccessGroupOpInterface>())
482 accessGroupOpInterface.setAccessGroups(concatArrayAttr(
483 accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
484}
485
486/// Updates locations inside loop annotations to reflect that they were inlined.
487static void
489 iterator_range<Region::iterator> inlinedBlocks) {
490 // Attempt to extract a DISubprogram from the callee.
491 auto func = call->getParentOfType<FunctionOpInterface>();
492 if (!func)
493 return;
494 LocationAttr funcLoc = func->getLoc();
495 auto fusedLoc = dyn_cast_if_present<FusedLoc>(funcLoc);
496 if (!fusedLoc)
497 return;
498 auto scope =
499 dyn_cast_if_present<LLVM::DISubprogramAttr>(fusedLoc.getMetadata());
500 if (!scope)
501 return;
502
503 // Helper to build a new fused location that reflects the inlining of the loop
504 // annotation.
505 auto updateLoc = [&](FusedLoc loc) -> FusedLoc {
506 if (!loc)
507 return {};
508 Location callSiteLoc = CallSiteLoc::get(loc, call->getLoc());
509 return FusedLoc::get(loc.getContext(), callSiteLoc, scope);
510 };
511
512 AttrTypeReplacer replacer;
513 replacer.addReplacement([&](LLVM::LoopAnnotationAttr loopAnnotation)
514 -> std::pair<Attribute, WalkResult> {
515 FusedLoc newStartLoc = updateLoc(loopAnnotation.getStartLoc());
516 FusedLoc newEndLoc = updateLoc(loopAnnotation.getEndLoc());
517 if (!newStartLoc && !newEndLoc)
518 return {loopAnnotation, WalkResult::advance()};
519 auto newLoopAnnotation = LLVM::LoopAnnotationAttr::get(
520 loopAnnotation.getContext(), loopAnnotation.getDisableNonforced(),
521 loopAnnotation.getVectorize(), loopAnnotation.getInterleave(),
522 loopAnnotation.getUnroll(), loopAnnotation.getUnrollAndJam(),
523 loopAnnotation.getLicm(), loopAnnotation.getDistribute(),
524 loopAnnotation.getPipeline(), loopAnnotation.getPeeled(),
525 loopAnnotation.getUnswitch(), loopAnnotation.getMustProgress(),
526 loopAnnotation.getIsVectorized(), newStartLoc, newEndLoc,
527 loopAnnotation.getParallelAccesses());
528 // Needs to advance, as loop annotations can be nested.
529 return {newLoopAnnotation, WalkResult::advance()};
530 });
531
532 for (Block &block : inlinedBlocks)
533 for (Operation &op : block)
534 replacer.recursivelyReplaceElementsIn(&op);
535}
536
537/// If `requestedAlignment` is higher than the alignment specified on `alloca`,
538/// realigns `alloca` if this does not exceed the natural stack alignment.
539/// Returns the post-alignment of `alloca`, whether it was realigned or not.
540static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
541 uint64_t requestedAlignment,
542 DataLayout const &dataLayout) {
543 uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
544 if (requestedAlignment <= allocaAlignment)
545 // No realignment necessary.
546 return allocaAlignment;
547 uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
548 // If the natural stack alignment is not specified, the data layout returns
549 // zero. Optimistically allow realignment in this case.
550 if (naturalStackAlignmentBits == 0 ||
551 // If the requested alignment exceeds the natural stack alignment, this
552 // will trigger a dynamic stack realignment, so we prefer to copy...
553 8 * requestedAlignment <= naturalStackAlignmentBits ||
554 // ...unless the alloca already triggers dynamic stack realignment. Then
555 // we might as well further increase the alignment to avoid a copy.
556 8 * allocaAlignment > naturalStackAlignmentBits) {
557 alloca.setAlignment(requestedAlignment);
558 allocaAlignment = requestedAlignment;
559 }
560 return allocaAlignment;
561}
562
563/// Tries to find and return the alignment of the pointer `value` by looking for
564/// an alignment attribute on the defining allocation op or function argument.
565/// If the found alignment is lower than `requestedAlignment`, tries to realign
566/// the pointer, then returns the resulting post-alignment, regardless of
567/// whether it was realigned or not. If no existing alignment attribute is
568/// found, returns 1 (i.e., assume that no alignment is guaranteed).
569static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
570 DataLayout const &dataLayout) {
571 if (Operation *definingOp = value.getDefiningOp()) {
572 if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
573 return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
574 dataLayout);
575 if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
577 definingOp, addressOf.getGlobalNameAttr()))
578 return global.getAlignment().value_or(1);
579 // We don't currently handle this operation; assume no alignment.
580 return 1;
581 }
582 // Since there is no defining op, this is a block argument. Probably this
583 // comes directly from a function argument, so check that this is the case.
584 Operation *parentOp = value.getParentBlock()->getParentOp();
585 if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
586 // Use the alignment attribute set for this argument in the parent function
587 // if it has been set.
588 auto blockArg = llvm::cast<BlockArgument>(value);
589 if (Attribute alignAttr = func.getArgAttr(
590 blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
591 return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
592 }
593 // We didn't find anything useful; assume no alignment.
594 return 1;
595}
596
597/// Introduces a new alloca and copies the memory pointed to by `argument` to
598/// the address of the new alloca, then returns the value of the new alloca.
600 Value argument, Type elementType,
601 uint64_t elementTypeSize,
602 uint64_t targetAlignment) {
603 // Allocate the new value on the stack.
604 Value allocaOp;
605 {
606 // Walk up from the call site to find the innermost AutomaticAllocationScope
607 // (e.g. an llvm.func or scf.forall). Placing the alloca at the entry block
608 // of that scope keeps it inside parallel regions rather than hoisting it
609 // out, while still landing at the function entry block for the common
610 // non-parallel case.
611 OpBuilder::InsertionGuard insertionGuard(builder);
612 Operation *scope = builder.getInsertionBlock()->getParentOp();
615 Block *entryBlock = &scope->getRegion(0).front();
616 builder.setInsertionPointToStart(entryBlock);
617 Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
618 builder.getI64IntegerAttr(1));
619 allocaOp = LLVM::AllocaOp::create(builder, loc, argument.getType(),
620 elementType, one, targetAlignment);
621 }
622 // Copy the pointee to the newly allocated value.
623 Value copySize =
624 LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
625 builder.getI64IntegerAttr(elementTypeSize));
626 // Preserve the alignment of the destination (alloca) in the memcpy's
627 // arg_attrs.
628 NamedAttribute dstAlignAttr =
629 builder.getNamedAttr(LLVM::LLVMDialect::getAlignAttrName(),
630 builder.getI64IntegerAttr(targetAlignment));
631 ArrayAttr argAttrs =
632 builder.getArrayAttr({builder.getDictionaryAttr({dstAlignAttr})});
633 LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
634 /*isVolatile=*/false,
635 /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
636 /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr, argAttrs,
637 /*res_attrs=*/nullptr);
638 return allocaOp;
639}
640
641/// Handles a function argument marked with the byval attribute by introducing a
642/// memcpy or realigning the defining operation, if required either due to the
643/// pointee being writeable in the callee, and/or due to an alignment mismatch.
644/// `requestedAlignment` specifies the alignment set in the "align" argument
645/// attribute (or 1 if no align attribute was set).
646static Value handleByValArgument(OpBuilder &builder, Operation *callable,
647 Value argument, Type elementType,
648 uint64_t requestedAlignment) {
649 auto func = cast<LLVM::LLVMFuncOp>(callable);
650 LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryEffectsAttr();
651 // If there is no memory effects attribute, assume that the function is
652 // not read-only.
653 bool isReadOnly = memoryEffects &&
654 memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
655 memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
656 // Check if there's an alignment mismatch requiring us to copy.
657 DataLayout dataLayout = DataLayout::closest(callable);
658 uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
659 if (isReadOnly) {
660 if (requestedAlignment <= minimumAlignment)
661 return argument;
662 uint64_t currentAlignment =
663 tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
664 if (currentAlignment >= requestedAlignment)
665 return argument;
666 }
667 uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
669 builder, argument.getLoc(), argument, elementType,
670 dataLayout.getTypeSize(elementType), targetAlignment);
671}
672
673namespace {
674struct LLVMInlinerInterface : public DialectInlinerInterface {
675 using DialectInlinerInterface::DialectInlinerInterface;
676
677 LLVMInlinerInterface(Dialect *dialect)
678 : DialectInlinerInterface(dialect),
679 // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
680 disallowedFunctionAttrs({
681 StringAttr::get(dialect->getContext(), "noduplicate"),
682 StringAttr::get(dialect->getContext(), "presplitcoroutine"),
683 StringAttr::get(dialect->getContext(), "returns_twice"),
684 StringAttr::get(dialect->getContext(), "strictfp"),
685 }) {}
686
687 bool isLegalToInline(Operation *call, Operation *callable,
688 bool wouldBeCloned) const final {
689 auto callOp = dyn_cast<LLVM::CallOp>(call);
690 if (!callOp) {
691 LDBG() << "Cannot inline: call is not an '"
692 << LLVM::CallOp::getOperationName() << "' op";
693 return false;
694 }
695 if (callOp.getNoInline()) {
696 LDBG() << "Cannot inline: call is marked no_inline";
697 return false;
698 }
699 auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
700 if (!funcOp) {
701 LDBG() << "Cannot inline: callable is not an '"
702 << LLVM::LLVMFuncOp::getOperationName() << "' op";
703 return false;
704 }
705 if (funcOp.isNoInline()) {
706 LDBG() << "Cannot inline: function is marked no_inline";
707 return false;
708 }
709 if (funcOp.isVarArg()) {
710 LDBG() << "Cannot inline: callable is variadic";
711 return false;
712 }
713 // TODO: Generate aliasing metadata from noalias result attributes.
714 if (auto attrs = funcOp.getArgAttrs()) {
715 for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
716 if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
717 LDBG() << "Cannot inline " << funcOp.getSymName()
718 << ": inalloca arguments not supported";
719 return false;
720 }
721 }
722 }
723 // TODO: Handle exceptions.
724 if (funcOp.getPersonality()) {
725 LDBG() << "Cannot inline " << funcOp.getSymName()
726 << ": unhandled function personality";
727 return false;
728 }
729 if (funcOp.getPassthrough()) {
730 // TODO: Used attributes should not be passthrough.
731 if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
732 auto stringAttr = dyn_cast<StringAttr>(attr);
733 if (!stringAttr)
734 return false;
735 if (disallowedFunctionAttrs.contains(stringAttr)) {
736 LDBG() << "Cannot inline " << funcOp.getSymName()
737 << ": found disallowed function attribute " << stringAttr;
738 return true;
739 }
740 return false;
741 }))
742 return false;
743 }
744 // Refuse to inline if any block in the callee ends with an op that does
745 // not have the terminator trait. The MLIR verifier conservatively accepts
746 // unregistered ops as potential terminators (via mightHaveTrait), but
747 // handleTerminator uses cast<LLVM::ReturnOp> in the single-block path and
748 // would crash on such ops. Registered terminators from other dialects
749 // (e.g. cf.br) are safe: the multi-block path uses dyn_cast and skips
750 // non-llvm.return ops gracefully.
751 for (Block &block : funcOp.getBody()) {
752 if (!block.empty() && !block.back().hasTrait<OpTrait::IsTerminator>()) {
753 LDBG() << "Cannot inline " << funcOp.getSymName()
754 << ": block ends with non-terminator op";
755 return false;
756 }
757 }
758 return true;
759 }
760
761 bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
762 return true;
763 }
764
765 bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
766 // The inliner cannot handle variadic function arguments and blocktag
767 // operations prevent inlining since they the blockaddress operations
768 // reference them via the callee symbol.
769 return !(isa<LLVM::VaStartOp>(op) || isa<LLVM::BlockTagOp>(op));
770 }
771
772 /// Handle the given inlined return by replacing it with a branch. This
773 /// overload is called when the inlined region has more than one block.
774 void handleTerminator(Operation *op, Block *newDest) const final {
775 // Only return needs to be handled here.
776 auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
777 if (!returnOp)
778 return;
779
780 // Replace the return with a branch to the dest.
781 OpBuilder builder(op);
782 LLVM::BrOp::create(builder, op->getLoc(), returnOp.getOperands(), newDest);
783 op->erase();
784 }
785
786 bool allowSingleBlockOptimization(
787 iterator_range<Region::iterator> inlinedBlocks) const final {
788 return !(!inlinedBlocks.empty() &&
789 isa<LLVM::UnreachableOp>(inlinedBlocks.begin()->getTerminator()));
790 }
791
792 /// Handle the given inlined return by replacing the uses of the call with the
793 /// operands of the return. This overload is called when the inlined region
794 /// only contains one block.
795 void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
796 // Return will be the only terminator present.
797 auto returnOp = cast<LLVM::ReturnOp>(op);
798
799 // Replace the values directly with the return operands.
800 assert(returnOp.getNumOperands() == valuesToRepl.size());
801 for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
802 dst.replaceAllUsesWith(src);
803 }
804
805 Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
806 Value argument,
807 DictionaryAttr argumentAttrs) const final {
808 if (std::optional<NamedAttribute> attr =
809 argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
810 Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
811 uint64_t requestedAlignment = 1;
812 if (std::optional<NamedAttribute> alignAttr =
813 argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
814 requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
815 .getValue()
816 .getLimitedValue();
817 }
818 return handleByValArgument(builder, callable, argument, elementType,
819 requestedAlignment);
820 }
821
822 // This code is essentially a workaround for deficiencies in the inliner
823 // interface: We need to transform operations *after* inlined based on the
824 // argument attributes of the parameters *before* inlining. This method runs
825 // prior to actual inlining and thus cannot transform the post-inlining
826 // code, while `processInlinedCallBlocks` does not have access to
827 // pre-inlining function arguments. Additionally, it is required to
828 // distinguish which parameter an SSA value originally came from. As a
829 // workaround until this is changed: Create an ssa.copy intrinsic with the
830 // noalias attribute (when it was present before) that can easily be found,
831 // and is extremely unlikely to exist in the code prior to inlining, using
832 // this to communicate between this method and `processInlinedCallBlocks`.
833 // TODO: Fix this by refactoring the inliner interface.
834 auto copyOp = LLVM::SSACopyOp::create(builder, call->getLoc(), argument);
835 if (argumentAttrs.contains(LLVM::LLVMDialect::getNoAliasAttrName()))
836 copyOp->setDiscardableAttr(
837 builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
838 builder.getUnitAttr());
839 return copyOp;
840 }
841
842 void processInlinedCallBlocks(
843 Operation *call,
844 iterator_range<Region::iterator> inlinedBlocks) const override {
845 handleInlinedAllocas(call, inlinedBlocks);
846 handleAliasScopes(call, inlinedBlocks);
847 handleAccessGroups(call, inlinedBlocks);
848 handleLoopAnnotations(call, inlinedBlocks);
849 }
850
851 // Keeping this (immutable) state on the interface allows us to look up
852 // StringAttrs instead of looking up strings, since StringAttrs are bound to
853 // the current context and thus cannot be initialized as static fields.
854 const DenseSet<StringAttr> disallowedFunctionAttrs;
855};
856
857} // end anonymous namespace
858
860 registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
861 dialect->addInterfaces<LLVMInlinerInterface>();
862 });
863}
864
866 registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
867 dialect->addInterfaces<LLVMInlinerInterface>();
868 });
869}
lhs
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static void handleLoopAnnotations(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Updates locations inside loop annotations to reflect that they were inlined.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static FailureOr< SmallVector< Value > > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
ArrayAttr()
This is an attribute/type replacer that is naively cached.
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition Attributes.h:25
Block represents an ordered list of Operations.
Definition Block.h:33
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition Block.h:203
iterator begin()
Definition Block.h:153
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
IntegerType getI64Type()
Definition Builders.cpp:69
IntegerAttr getI64IntegerAttr(int64_t value)
Definition Builders.cpp:116
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition Builders.h:93
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
Definition Builders.cpp:270
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
Definition Builders.cpp:108
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Definition Builders.cpp:98
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
MLIRContext * getContext() const
Definition Dialect.h:52
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
Location objects represent source locations information in MLIR.
Definition Location.h:32
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
NamedAttribute represents a combination of a name and an Attribute value.
Definition Attributes.h:164
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:350
This class helps build Operations.
Definition Builders.h:209
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:433
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:400
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition Builders.h:444
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:715
bool mightHaveTrait()
Returns true if the operation might have the provided trait.
Definition Operation.h:786
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
Definition Operation.h:277
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:244
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:255
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:259
user_range getUsers()
Returns a range of all users.
Definition Operation.h:902
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition Operation.h:251
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:237
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition Value.cpp:46
Location getLoc() const
Return the location of this value.
Definition Value.cpp:24
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
A class to signal how to proceed with the walk of the backward slice:
Definition SliceWalk.h:20
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition SliceWalk.h:60
static WalkContinuation skip()
Creates a continuation that advances the walk without adding any predecessor values to the work list.
Definition SliceWalk.h:55
static WalkContinuation advanceTo(mlir::ValueRange nextValues)
Creates a continuation that adds the user-specified nextValues to the work list and advances the walk...
Definition SliceWalk.h:49
static WalkContinuation interrupt()
Creates a continuation that interrupts the walk.
Definition SliceWalk.h:43
static WalkResult advance()
Definition WalkResult.h:47
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
AttrTypeReplacerBase.
void registerInlinerInterface(DialectRegistry &registry)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
void registerInlinerInterface(DialectRegistry &registry)
Register the NVVMInlinerInterface implementation of DialectInlinerInterface with the NVVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
std::optional< SmallVector< Value > > getControlFlowPredecessors(Value value)
Computes a vector of all control predecessors of value.
Definition SliceWalk.cpp:60
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:120
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:123
WalkContinuation walkSlice(mlir::ValueRange rootValues, WalkCallback walkCallback)
Walks the slice starting from the rootValues using a depth-first traversal.
Definition SliceWalk.cpp:6
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:118
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition Matchers.h:369
This trait indicates that a terminator operation is "return-like".