MLIR 22.0.0git
InlinerInterfaceImpl.cpp
Go to the documentation of this file.
1//===- InlinerInterfaceImpl.cpp - Inlining for LLVM the dialect -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Logic for inlining LLVM functions and the definition of the
10// LLVMInliningInterface.
11//
12//===----------------------------------------------------------------------===//
13
18#include "mlir/IR/Matchers.h"
22#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Debug.h"
24
25#include "llvm/Support/DebugLog.h"
26
27#define DEBUG_TYPE "llvm-inliner"
28
29using namespace mlir;
30
31/// Check whether the given alloca is an input to a lifetime intrinsic,
32/// optionally passing through one or more casts on the way. This is not
33/// transitive through block arguments.
34static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
35 SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
36 allocaOp->getUsers().end());
37 while (!stack.empty()) {
38 Operation *op = stack.pop_back_val();
39 if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
40 return true;
41 if (isa<LLVM::BitcastOp>(op))
42 stack.append(op->getUsers().begin(), op->getUsers().end());
43 }
44 return false;
45}
46
47/// Handles alloca operations in the inlined blocks:
48/// - Moves all alloca operations with a constant size in the former entry block
49/// of the callee into the entry block of the caller, so they become part of
50/// the function prologue/epilogue during code generation.
51/// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
52/// to the inlined blocks.
53/// - Inserts StackSave and StackRestore operations if dynamic allocas were
54/// inlined.
55static void
58 // Locate the entry block of the closest callsite ancestor that has either the
59 // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
60 // programs, this is the LLVMFuncOp containing the call site. However, in
61 // mixed-dialect programs, the callsite might be nested in another operation
62 // that carries one of these traits. In such scenarios, this traversal stops
63 // at the closest ancestor with either trait, ensuring visibility post
64 // relocation and respecting allocation scopes.
65 Block *callerEntryBlock = nullptr;
66 Operation *currentOp = call;
67 while (Operation *parentOp = currentOp->getParentOp()) {
68 if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
69 parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
70 callerEntryBlock = &currentOp->getParentRegion()->front();
71 break;
72 }
73 currentOp = parentOp;
74 }
75
76 // Avoid relocating the alloca operations if the call has been inlined into
77 // the entry block already, which is typically the encompassing
78 // LLVM function, or if the relevant entry block cannot be identified.
79 Block *calleeEntryBlock = &(*inlinedBlocks.begin());
80 if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
81 return;
82
84 bool shouldInsertLifetimes = false;
85 bool hasDynamicAlloca = false;
86 // Conservatively only move static alloca operations that are part of the
87 // entry block and do not inspect nested regions, since they may execute
88 // conditionally or have other unknown semantics.
89 for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
90 IntegerAttr arraySize;
91 if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
92 hasDynamicAlloca = true;
93 continue;
94 }
95 bool shouldInsertLifetime =
96 arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
97 shouldInsertLifetimes |= shouldInsertLifetime;
98 allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
99 }
100 // Check the remaining inlined blocks for dynamic allocas as well.
101 for (Block &block : llvm::drop_begin(inlinedBlocks)) {
102 if (hasDynamicAlloca)
103 break;
104 hasDynamicAlloca =
105 llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
106 return !matchPattern(allocaOp.getArraySize(), m_Constant());
107 });
108 }
109 if (allocasToMove.empty() && !hasDynamicAlloca)
110 return;
111 OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
112 Value stackPtr;
113 if (hasDynamicAlloca) {
114 // This may result in multiple stacksave/stackrestore intrinsics in the same
115 // scope if some are already present in the body of the caller. This is not
116 // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
117 // other cases where the stacksave/stackrestore is redundant.
118 stackPtr = LLVM::StackSaveOp::create(
119 builder, call->getLoc(),
120 LLVM::LLVMPointerType::get(call->getContext()));
121 }
122 builder.setInsertionPointToStart(callerEntryBlock);
123 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
124 auto newConstant =
125 LLVM::ConstantOp::create(builder, allocaOp->getLoc(),
126 allocaOp.getArraySize().getType(), arraySize);
127 // Insert a lifetime start intrinsic where the alloca was before moving it.
128 if (shouldInsertLifetime) {
129 OpBuilder::InsertionGuard insertionGuard(builder);
130 builder.setInsertionPoint(allocaOp);
131 LLVM::LifetimeStartOp::create(builder, allocaOp.getLoc(),
132 allocaOp.getResult());
133 }
134 allocaOp->moveAfter(newConstant);
135 allocaOp.getArraySizeMutable().assign(newConstant.getResult());
136 }
137 if (!shouldInsertLifetimes && !hasDynamicAlloca)
138 return;
139 // Insert a lifetime end intrinsic before each return in the callee function.
140 for (Block &block : inlinedBlocks) {
141 if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
142 continue;
143 builder.setInsertionPoint(block.getTerminator());
144 if (hasDynamicAlloca)
145 LLVM::StackRestoreOp::create(builder, call->getLoc(), stackPtr);
146 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
147 if (shouldInsertLifetime)
148 LLVM::LifetimeEndOp::create(builder, allocaOp.getLoc(),
149 allocaOp.getResult());
150 }
151 }
152}
153
154/// Maps all alias scopes in the inlined operations to deep clones of the scopes
155/// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
156/// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
157static void
160
161 // Register handles in the walker to create the deep clones.
162 // The walker ensures that an attribute is only ever walked once and does a
163 // post-order walk, ensuring the domain is visited prior to the scope.
164 AttrTypeWalker walker;
165
166 // Perform the deep clones while visiting. Builders create a distinct
167 // attribute to make sure that new instances are always created by the
168 // uniquer.
169 walker.addWalk([&](LLVM::AliasScopeDomainAttr domainAttr) {
170 mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
171 domainAttr.getContext(), domainAttr.getDescription());
172 });
173
174 walker.addWalk([&](LLVM::AliasScopeAttr scopeAttr) {
175 mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
176 cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
177 scopeAttr.getDescription());
178 });
179
180 // Map an array of scopes to an array of deep clones.
181 auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
182 if (!arrayAttr)
183 return nullptr;
184
185 // Create the deep clones if necessary.
186 walker.walk(arrayAttr);
187
188 return ArrayAttr::get(arrayAttr.getContext(),
189 llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
190 return mapping.lookup(attr);
191 }));
192 };
193
194 for (Block &block : inlinedBlocks) {
195 block.walk([&](Operation *op) {
196 if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
197 aliasInterface.setAliasScopes(
198 convertScopeList(aliasInterface.getAliasScopesOrNull()));
199 aliasInterface.setNoAliasScopes(
200 convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
201 }
202
203 if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
204 // Create the deep clones if necessary.
205 walker.walk(noAliasScope.getScopeAttr());
206
207 noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
208 mapping.lookup(noAliasScope.getScopeAttr())));
209 }
210 });
211 }
212}
213
214/// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
215/// Returns null if both parameters are null. If only one attribute is null,
216/// return the other.
218 if (!lhs)
219 return rhs;
220 if (!rhs)
221 return lhs;
222
224 llvm::append_range(result, lhs);
225 llvm::append_range(result, rhs);
226 return ArrayAttr::get(lhs.getContext(), result);
227}
228
229/// Attempts to return the set of all underlying pointer values that
230/// `pointerValue` is based on. This function traverses through select
231/// operations and block arguments.
232static FailureOr<SmallVector<Value>>
235 WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
236 // Attempt to advance to the source of the underlying view-like operation.
237 // Examples of view-like operations include GEPOp and AddrSpaceCastOp.
238 if (auto viewOp = val.getDefiningOp<ViewLikeOpInterface>()) {
239 if (val == viewOp.getViewDest())
240 return WalkContinuation::advanceTo(viewOp.getViewSource());
241 }
242
243 // Attempt to advance to control flow predecessors.
244 std::optional<SmallVector<Value>> controlFlowPredecessors =
246 if (controlFlowPredecessors)
247 return WalkContinuation::advanceTo(*controlFlowPredecessors);
248
249 // For all non-control flow results, consider `val` an underlying object.
250 if (isa<OpResult>(val)) {
251 result.push_back(val);
252 return WalkContinuation::skip();
253 }
254
255 // If this place is reached, `val` is a block argument that is not
256 // understood. Therefore, we conservatively interrupt.
257 // Note: Dealing with function arguments is not necessary, as the slice
258 // would have to go through an SSACopyOp first.
260 });
261
262 if (walkResult.wasInterrupted())
263 return failure();
264
265 return result;
266}
267
268/// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
269/// the appropriate inlined memory operations in an attempt to preserve the
270/// original semantics of the parameter attribute.
272 Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
273
274 // First, collect all ssa copy operations, which correspond to function
275 // parameters, and additionally store the noalias parameters. All parameters
276 // have been marked by the `handleArgument` implementation by using the
277 // `ssa.copy` intrinsic. Additionally, noalias parameters have an attached
278 // `noalias` attribute to the intrinsics. These intrinsics are only meant to
279 // be temporary and should therefore be deleted after we're done using them
280 // here.
282 SetVector<LLVM::SSACopyOp> noAliasParams;
283 for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
284 for (Operation *user : argument.getUsers()) {
285 auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
286 if (!ssaCopy)
287 continue;
288 ssaCopies.insert(ssaCopy);
289
290 if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
291 continue;
292 noAliasParams.insert(ssaCopy);
293 }
294 }
295
296 // Scope exit block to make it impossible to forget to get rid of the
297 // intrinsics.
298 auto exit = llvm::make_scope_exit([&] {
299 for (LLVM::SSACopyOp ssaCopyOp : ssaCopies) {
300 ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
301 ssaCopyOp->erase();
302 }
303 });
304
305 // If there were no noalias parameters, we have nothing to do here.
306 if (noAliasParams.empty())
307 return;
308
309 // Create a new domain for this specific inlining and a new scope for every
310 // noalias parameter.
311 auto functionDomain = LLVM::AliasScopeDomainAttr::get(
312 call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
314 for (LLVM::SSACopyOp copyOp : noAliasParams) {
315 auto scope = LLVM::AliasScopeAttr::get(functionDomain);
316 pointerScopes[copyOp] = scope;
317
318 auto builder = OpBuilder(call);
319 LLVM::NoAliasScopeDeclOp::create(builder, call->getLoc(), scope);
320 }
321
322 // Go through every instruction and attempt to find which noalias parameters
323 // it is definitely based on and definitely not based on.
324 for (Block &inlinedBlock : inlinedBlocks) {
325 inlinedBlock.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
326 // Collect the pointer arguments affected by the alias scopes.
327 SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
328
329 // Find the set of underlying pointers that this pointer is based on.
330 SmallPtrSet<Value, 4> basedOnPointers;
331 for (Value pointer : pointerArgs) {
332 FailureOr<SmallVector<Value>> underlyingObjectSet =
333 getUnderlyingObjectSet(pointer);
334 if (failed(underlyingObjectSet))
335 return;
336 llvm::copy(*underlyingObjectSet,
337 std::inserter(basedOnPointers, basedOnPointers.begin()));
338 }
339
340 bool aliasesOtherKnownObject = false;
341 // Go through the based on pointers and check that they are either:
342 // * Constants that can be ignored (undef, poison, null pointer).
343 // * Based on a pointer parameter.
344 // * Other pointers that we know can't alias with our noalias parameter.
345 //
346 // Any other value might be a pointer based on any noalias parameter that
347 // hasn't been identified. In that case conservatively don't add any
348 // scopes to this operation indicating either aliasing or not aliasing
349 // with any parameter.
350 if (llvm::any_of(basedOnPointers, [&](Value object) {
351 if (matchPattern(object, m_Constant()))
352 return false;
353
354 if (auto ssaCopy = object.getDefiningOp<LLVM::SSACopyOp>()) {
355 // If that value is based on a noalias parameter, it is guaranteed
356 // to not alias with any other object.
357 aliasesOtherKnownObject |= !noAliasParams.contains(ssaCopy);
358 return false;
359 }
360
361 if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
362 object.getDefiningOp())) {
363 aliasesOtherKnownObject = true;
364 return false;
365 }
366 return true;
367 }))
368 return;
369
370 // Add all noalias parameter scopes to the noalias scope list that we are
371 // not based on.
372 SmallVector<Attribute> noAliasScopes;
373 for (LLVM::SSACopyOp noAlias : noAliasParams) {
374 if (basedOnPointers.contains(noAlias))
375 continue;
376
377 noAliasScopes.push_back(pointerScopes[noAlias]);
378 }
379
380 if (!noAliasScopes.empty())
381 aliasInterface.setNoAliasScopes(
382 concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
383 ArrayAttr::get(call->getContext(), noAliasScopes)));
384
385 // Don't add alias scopes to call operations or operations that might
386 // operate on pointers not based on any noalias parameter.
387 // Since we add all scopes to an operation's noalias list that it
388 // definitely doesn't alias, we mustn't do the same for the alias.scope
389 // list if other objects are involved.
390 //
391 // Consider the following case:
392 // %0 = llvm.alloca
393 // %1 = select %magic, %0, %noalias_param
394 // store 5, %1 (1) noalias=[scope(...)]
395 // ...
396 // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
397 //
398 // We can add the scopes of any noalias parameters that aren't
399 // noalias_param's scope to (1) and add all of them to (2). We mustn't add
400 // the scope of noalias_param to the alias.scope list of (1) since
401 // that would mean (2) cannot alias with (1) which is wrong since both may
402 // store to %0.
403 //
404 // In conclusion, only add scopes to the alias.scope list if all pointers
405 // have a corresponding scope.
406 // Call operations are included in this list since we do not know whether
407 // the callee accesses any memory besides the ones passed as its
408 // arguments.
409 if (aliasesOtherKnownObject ||
410 isa<LLVM::CallOp>(aliasInterface.getOperation()))
411 return;
412
413 SmallVector<Attribute> aliasScopes;
414 for (LLVM::SSACopyOp noAlias : noAliasParams)
415 if (basedOnPointers.contains(noAlias))
416 aliasScopes.push_back(pointerScopes[noAlias]);
417
418 if (!aliasScopes.empty())
419 aliasInterface.setAliasScopes(
420 concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
421 ArrayAttr::get(call->getContext(), aliasScopes)));
422 });
423 }
424}
425
426/// Appends any alias scopes of the call operation to any inlined memory
427/// operation.
428static void
430 iterator_range<Region::iterator> inlinedBlocks) {
431 auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
432 if (!callAliasInterface)
433 return;
434
435 ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
436 ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
437 // If the call has neither alias scopes or noalias scopes we have nothing to
438 // do here.
439 if (!aliasScopes && !noAliasScopes)
440 return;
441
442 // Simply append the call op's alias and noalias scopes to any operation
443 // implementing AliasAnalysisOpInterface.
444 for (Block &block : inlinedBlocks) {
445 block.walk([&](LLVM::AliasAnalysisOpInterface aliasInterface) {
446 if (aliasScopes)
447 aliasInterface.setAliasScopes(concatArrayAttr(
448 aliasInterface.getAliasScopesOrNull(), aliasScopes));
449
450 if (noAliasScopes)
451 aliasInterface.setNoAliasScopes(concatArrayAttr(
452 aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
453 });
454 }
455}
456
457/// Handles all interactions with alias scopes during inlining.
458static void handleAliasScopes(Operation *call,
459 iterator_range<Region::iterator> inlinedBlocks) {
460 deepCloneAliasScopes(inlinedBlocks);
461 createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
462 appendCallOpAliasScopes(call, inlinedBlocks);
463}
464
465/// Appends any access groups of the call operation to any inlined memory
466/// operation.
468 iterator_range<Region::iterator> inlinedBlocks) {
469 auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
470 if (!callAccessGroupInterface)
471 return;
472
473 auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
474 if (!accessGroups)
475 return;
476
477 // Simply append the call op's access groups to any operation implementing
478 // AccessGroupOpInterface.
479 for (Block &block : inlinedBlocks)
480 for (auto accessGroupOpInterface :
481 block.getOps<LLVM::AccessGroupOpInterface>())
482 accessGroupOpInterface.setAccessGroups(concatArrayAttr(
483 accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
484}
485
486/// Updates locations inside loop annotations to reflect that they were inlined.
487static void
489 iterator_range<Region::iterator> inlinedBlocks) {
490 // Attempt to extract a DISubprogram from the callee.
491 auto func = call->getParentOfType<FunctionOpInterface>();
492 if (!func)
493 return;
494 LocationAttr funcLoc = func->getLoc();
495 auto fusedLoc = dyn_cast_if_present<FusedLoc>(funcLoc);
496 if (!fusedLoc)
497 return;
498 auto scope =
499 dyn_cast_if_present<LLVM::DISubprogramAttr>(fusedLoc.getMetadata());
500 if (!scope)
501 return;
502
503 // Helper to build a new fused location that reflects the inlining of the loop
504 // annotation.
505 auto updateLoc = [&](FusedLoc loc) -> FusedLoc {
506 if (!loc)
507 return {};
508 Location callSiteLoc = CallSiteLoc::get(loc, call->getLoc());
509 return FusedLoc::get(loc.getContext(), callSiteLoc, scope);
510 };
511
512 AttrTypeReplacer replacer;
513 replacer.addReplacement([&](LLVM::LoopAnnotationAttr loopAnnotation)
514 -> std::pair<Attribute, WalkResult> {
515 FusedLoc newStartLoc = updateLoc(loopAnnotation.getStartLoc());
516 FusedLoc newEndLoc = updateLoc(loopAnnotation.getEndLoc());
517 if (!newStartLoc && !newEndLoc)
518 return {loopAnnotation, WalkResult::advance()};
519 auto newLoopAnnotation = LLVM::LoopAnnotationAttr::get(
520 loopAnnotation.getContext(), loopAnnotation.getDisableNonforced(),
521 loopAnnotation.getVectorize(), loopAnnotation.getInterleave(),
522 loopAnnotation.getUnroll(), loopAnnotation.getUnrollAndJam(),
523 loopAnnotation.getLicm(), loopAnnotation.getDistribute(),
524 loopAnnotation.getPipeline(), loopAnnotation.getPeeled(),
525 loopAnnotation.getUnswitch(), loopAnnotation.getMustProgress(),
526 loopAnnotation.getIsVectorized(), newStartLoc, newEndLoc,
527 loopAnnotation.getParallelAccesses());
528 // Needs to advance, as loop annotations can be nested.
529 return {newLoopAnnotation, WalkResult::advance()};
530 });
531
532 for (Block &block : inlinedBlocks)
533 for (Operation &op : block)
534 replacer.recursivelyReplaceElementsIn(&op);
535}
536
537/// If `requestedAlignment` is higher than the alignment specified on `alloca`,
538/// realigns `alloca` if this does not exceed the natural stack alignment.
539/// Returns the post-alignment of `alloca`, whether it was realigned or not.
540static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
541 uint64_t requestedAlignment,
542 DataLayout const &dataLayout) {
543 uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
544 if (requestedAlignment <= allocaAlignment)
545 // No realignment necessary.
546 return allocaAlignment;
547 uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
548 // If the natural stack alignment is not specified, the data layout returns
549 // zero. Optimistically allow realignment in this case.
550 if (naturalStackAlignmentBits == 0 ||
551 // If the requested alignment exceeds the natural stack alignment, this
552 // will trigger a dynamic stack realignment, so we prefer to copy...
553 8 * requestedAlignment <= naturalStackAlignmentBits ||
554 // ...unless the alloca already triggers dynamic stack realignment. Then
555 // we might as well further increase the alignment to avoid a copy.
556 8 * allocaAlignment > naturalStackAlignmentBits) {
557 alloca.setAlignment(requestedAlignment);
558 allocaAlignment = requestedAlignment;
559 }
560 return allocaAlignment;
561}
562
563/// Tries to find and return the alignment of the pointer `value` by looking for
564/// an alignment attribute on the defining allocation op or function argument.
565/// If the found alignment is lower than `requestedAlignment`, tries to realign
566/// the pointer, then returns the resulting post-alignment, regardless of
567/// whether it was realigned or not. If no existing alignment attribute is
568/// found, returns 1 (i.e., assume that no alignment is guaranteed).
569static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
570 DataLayout const &dataLayout) {
571 if (Operation *definingOp = value.getDefiningOp()) {
572 if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
573 return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
574 dataLayout);
575 if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
577 definingOp, addressOf.getGlobalNameAttr()))
578 return global.getAlignment().value_or(1);
579 // We don't currently handle this operation; assume no alignment.
580 return 1;
581 }
582 // Since there is no defining op, this is a block argument. Probably this
583 // comes directly from a function argument, so check that this is the case.
584 Operation *parentOp = value.getParentBlock()->getParentOp();
585 if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
586 // Use the alignment attribute set for this argument in the parent function
587 // if it has been set.
588 auto blockArg = llvm::cast<BlockArgument>(value);
589 if (Attribute alignAttr = func.getArgAttr(
590 blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
591 return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
592 }
593 // We didn't find anything useful; assume no alignment.
594 return 1;
595}
596
597/// Introduces a new alloca and copies the memory pointed to by `argument` to
598/// the address of the new alloca, then returns the value of the new alloca.
600 Value argument, Type elementType,
601 uint64_t elementTypeSize,
602 uint64_t targetAlignment) {
603 // Allocate the new value on the stack.
604 Value allocaOp;
605 {
606 // Since this is a static alloca, we can put it directly in the entry block,
607 // so they can be absorbed into the prologue/epilogue at code generation.
608 OpBuilder::InsertionGuard insertionGuard(builder);
609 Block *entryBlock = &(*argument.getParentRegion()->begin());
610 builder.setInsertionPointToStart(entryBlock);
611 Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
612 builder.getI64IntegerAttr(1));
613 allocaOp = LLVM::AllocaOp::create(builder, loc, argument.getType(),
614 elementType, one, targetAlignment);
615 }
616 // Copy the pointee to the newly allocated value.
617 Value copySize =
618 LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
619 builder.getI64IntegerAttr(elementTypeSize));
620 LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
621 /*isVolatile=*/false);
622 return allocaOp;
623}
624
625/// Handles a function argument marked with the byval attribute by introducing a
626/// memcpy or realigning the defining operation, if required either due to the
627/// pointee being writeable in the callee, and/or due to an alignment mismatch.
628/// `requestedAlignment` specifies the alignment set in the "align" argument
629/// attribute (or 1 if no align attribute was set).
630static Value handleByValArgument(OpBuilder &builder, Operation *callable,
631 Value argument, Type elementType,
632 uint64_t requestedAlignment) {
633 auto func = cast<LLVM::LLVMFuncOp>(callable);
634 LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryEffectsAttr();
635 // If there is no memory effects attribute, assume that the function is
636 // not read-only.
637 bool isReadOnly = memoryEffects &&
638 memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
639 memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
640 // Check if there's an alignment mismatch requiring us to copy.
641 DataLayout dataLayout = DataLayout::closest(callable);
642 uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(elementType);
643 if (isReadOnly) {
644 if (requestedAlignment <= minimumAlignment)
645 return argument;
646 uint64_t currentAlignment =
647 tryToEnforceAlignment(argument, requestedAlignment, dataLayout);
648 if (currentAlignment >= requestedAlignment)
649 return argument;
650 }
651 uint64_t targetAlignment = std::max(requestedAlignment, minimumAlignment);
653 builder, argument.getLoc(), argument, elementType,
654 dataLayout.getTypeSize(elementType), targetAlignment);
655}
656
657namespace {
658struct LLVMInlinerInterface : public DialectInlinerInterface {
660
661 LLVMInlinerInterface(Dialect *dialect)
662 : DialectInlinerInterface(dialect),
663 // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
664 disallowedFunctionAttrs({
665 StringAttr::get(dialect->getContext(), "noduplicate"),
666 StringAttr::get(dialect->getContext(), "presplitcoroutine"),
667 StringAttr::get(dialect->getContext(), "returns_twice"),
668 StringAttr::get(dialect->getContext(), "strictfp"),
669 }) {}
670
671 bool isLegalToInline(Operation *call, Operation *callable,
672 bool wouldBeCloned) const final {
673 auto callOp = dyn_cast<LLVM::CallOp>(call);
674 if (!callOp) {
675 LDBG() << "Cannot inline: call is not an '"
676 << LLVM::CallOp::getOperationName() << "' op";
677 return false;
678 }
679 if (callOp.getNoInline()) {
680 LDBG() << "Cannot inline: call is marked no_inline";
681 return false;
682 }
683 auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
684 if (!funcOp) {
685 LDBG() << "Cannot inline: callable is not an '"
686 << LLVM::LLVMFuncOp::getOperationName() << "' op";
687 return false;
688 }
689 if (funcOp.isNoInline()) {
690 LDBG() << "Cannot inline: function is marked no_inline";
691 return false;
692 }
693 if (funcOp.isVarArg()) {
694 LDBG() << "Cannot inline: callable is variadic";
695 return false;
696 }
697 // TODO: Generate aliasing metadata from noalias result attributes.
698 if (auto attrs = funcOp.getArgAttrs()) {
699 for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
700 if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
701 LDBG() << "Cannot inline " << funcOp.getSymName()
702 << ": inalloca arguments not supported";
703 return false;
704 }
705 }
706 }
707 // TODO: Handle exceptions.
708 if (funcOp.getPersonality()) {
709 LDBG() << "Cannot inline " << funcOp.getSymName()
710 << ": unhandled function personality";
711 return false;
712 }
713 if (funcOp.getPassthrough()) {
714 // TODO: Used attributes should not be passthrough.
715 if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
716 auto stringAttr = dyn_cast<StringAttr>(attr);
717 if (!stringAttr)
718 return false;
719 if (disallowedFunctionAttrs.contains(stringAttr)) {
720 LDBG() << "Cannot inline " << funcOp.getSymName()
721 << ": found disallowed function attribute " << stringAttr;
722 return true;
723 }
724 return false;
725 }))
726 return false;
727 }
728 return true;
729 }
730
731 bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
732 return true;
733 }
734
735 bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
736 // The inliner cannot handle variadic function arguments and blocktag
737 // operations prevent inlining since they the blockaddress operations
738 // reference them via the callee symbol.
739 return !(isa<LLVM::VaStartOp>(op) || isa<LLVM::BlockTagOp>(op));
740 }
741
742 /// Handle the given inlined return by replacing it with a branch. This
743 /// overload is called when the inlined region has more than one block.
744 void handleTerminator(Operation *op, Block *newDest) const final {
745 // Only return needs to be handled here.
746 auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
747 if (!returnOp)
748 return;
749
750 // Replace the return with a branch to the dest.
751 OpBuilder builder(op);
752 LLVM::BrOp::create(builder, op->getLoc(), returnOp.getOperands(), newDest);
753 op->erase();
754 }
755
756 bool allowSingleBlockOptimization(
757 iterator_range<Region::iterator> inlinedBlocks) const final {
758 return !(!inlinedBlocks.empty() &&
759 isa<LLVM::UnreachableOp>(inlinedBlocks.begin()->getTerminator()));
760 }
761
762 /// Handle the given inlined return by replacing the uses of the call with the
763 /// operands of the return. This overload is called when the inlined region
764 /// only contains one block.
765 void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
766 // Return will be the only terminator present.
767 auto returnOp = cast<LLVM::ReturnOp>(op);
768
769 // Replace the values directly with the return operands.
770 assert(returnOp.getNumOperands() == valuesToRepl.size());
771 for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
772 dst.replaceAllUsesWith(src);
773 }
774
775 Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
776 Value argument,
777 DictionaryAttr argumentAttrs) const final {
778 if (std::optional<NamedAttribute> attr =
779 argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
780 Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
781 uint64_t requestedAlignment = 1;
782 if (std::optional<NamedAttribute> alignAttr =
783 argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
784 requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
785 .getValue()
786 .getLimitedValue();
787 }
788 return handleByValArgument(builder, callable, argument, elementType,
789 requestedAlignment);
790 }
791
792 // This code is essentially a workaround for deficiencies in the inliner
793 // interface: We need to transform operations *after* inlined based on the
794 // argument attributes of the parameters *before* inlining. This method runs
795 // prior to actual inlining and thus cannot transform the post-inlining
796 // code, while `processInlinedCallBlocks` does not have access to
797 // pre-inlining function arguments. Additionally, it is required to
798 // distinguish which parameter an SSA value originally came from. As a
799 // workaround until this is changed: Create an ssa.copy intrinsic with the
800 // noalias attribute (when it was present before) that can easily be found,
801 // and is extremely unlikely to exist in the code prior to inlining, using
802 // this to communicate between this method and `processInlinedCallBlocks`.
803 // TODO: Fix this by refactoring the inliner interface.
804 auto copyOp = LLVM::SSACopyOp::create(builder, call->getLoc(), argument);
805 if (argumentAttrs.contains(LLVM::LLVMDialect::getNoAliasAttrName()))
806 copyOp->setDiscardableAttr(
807 builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
808 builder.getUnitAttr());
809 return copyOp;
810 }
811
812 void processInlinedCallBlocks(
813 Operation *call,
814 iterator_range<Region::iterator> inlinedBlocks) const override {
815 handleInlinedAllocas(call, inlinedBlocks);
816 handleAliasScopes(call, inlinedBlocks);
817 handleAccessGroups(call, inlinedBlocks);
818 handleLoopAnnotations(call, inlinedBlocks);
819 }
820
821 // Keeping this (immutable) state on the interface allows us to look up
822 // StringAttrs instead of looking up strings, since StringAttrs are bound to
823 // the current context and thus cannot be initialized as static fields.
824 const DenseSet<StringAttr> disallowedFunctionAttrs;
825};
826
827} // end anonymous namespace
828
830 registry.addExtension(+[](MLIRContext *ctx, LLVM::LLVMDialect *dialect) {
831 dialect->addInterfaces<LLVMInlinerInterface>();
832 });
833}
834
836 registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
837 dialect->addInterfaces<LLVMInlinerInterface>();
838 });
839}
lhs
static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp)
Check whether the given alloca is an input to a lifetime intrinsic, optionally passing through one or...
static void appendCallOpAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any alias scopes of the call operation to any inlined memory operation.
static void handleLoopAnnotations(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Updates locations inside loop annotations to reflect that they were inlined.
static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs)
Creates a new ArrayAttr by concatenating lhs with rhs.
static void createNewAliasScopesFromNoAliasParameter(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Creates a new AliasScopeAttr for every noalias parameter and attaches it to the appropriate inlined m...
static FailureOr< SmallVector< Value > > getUnderlyingObjectSet(Value pointerValue)
Attempts to return the set of all underlying pointer values that pointerValue is based on.
static void handleAccessGroups(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Appends any access groups of the call operation to any inlined memory operation.
static Value handleByValArgument(OpBuilder &builder, Operation *callable, Value argument, Type elementType, uint64_t requestedAlignment)
Handles a function argument marked with the byval attribute by introducing a memcpy or realigning the...
static void handleAliasScopes(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles all interactions with alias scopes during inlining.
static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, uint64_t requestedAlignment, DataLayout const &dataLayout)
If requestedAlignment is higher than the alignment specified on alloca, realigns alloca if this does ...
static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, DataLayout const &dataLayout)
Tries to find and return the alignment of the pointer value by looking for an alignment attribute on ...
static void deepCloneAliasScopes(iterator_range< Region::iterator > inlinedBlocks)
Maps all alias scopes in the inlined operations to deep clones of the scopes and domain.
static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, uint64_t elementTypeSize, uint64_t targetAlignment)
Introduces a new alloca and copies the memory pointed to by argument to the address of the new alloca...
static void handleInlinedAllocas(Operation *call, iterator_range< Region::iterator > inlinedBlocks)
Handles alloca operations in the inlined blocks:
static bool isLegalToInline(InlinerInterface &interface, Region *src, Region *insertRegion, bool shouldCloneInlinedRegion, IRMapping &valueMapping)
Utility to check that all of the operations within 'src' can be inlined.
ArrayAttr()
This is an attribute/type replacer that is naively cached.
void addWalk(WalkFn< Attribute > &&fn)
Register a walk function for a given attribute or type.
WalkResult walk(T element)
Walk the given attribute/type, and recursively walk any sub elements.
Attributes are known-constant values of operations.
Definition Attributes.h:25
Block represents an ordered list of Operations.
Definition Block.h:33
iterator_range< op_iterator< OpT > > getOps()
Return an iterator range over the operations within this block that are of 'OpT'.
Definition Block.h:193
iterator begin()
Definition Block.h:143
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
Definition Block.cpp:31
IntegerType getI64Type()
Definition Builders.cpp:65
IntegerAttr getI64IntegerAttr(int64_t value)
Definition Builders.cpp:112
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition Builders.h:91
The main mechanism for performing data layout queries.
static DataLayout closest(Operation *op)
Returns the layout of the closest parent operation carrying layout info.
llvm::TypeSize getTypeSize(Type t) const
Returns the size of the given type in the current scope.
uint64_t getStackAlignment() const
Returns the natural alignment of the stack in bits.
uint64_t getTypeABIAlignment(Type t) const
Returns the required alignment of the given type in the current scope.
This is the interface that must be implemented by the dialects of operations to be inlined.
DialectInlinerInterface(Dialect *dialect)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
MLIRContext * getContext() const
Definition Dialect.h:52
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
Location objects represent source locations information in MLIR.
Definition Location.h:32
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:431
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:398
A trait of region holding operations that define a new scope for automatic allocations,...
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238
user_range getUsers()
Returns a range of all users.
Definition Operation.h:873
Region * getParentRegion()
Returns the region to which the instruction belongs.
Definition Operation.h:230
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
static Operation * lookupNearestSymbolFrom(Operation *from, StringAttr symbol)
Returns the operation registered with the given symbol name within the closest parent operation of,...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition Value.cpp:46
Location getLoc() const
Return the location of this value.
Definition Value.cpp:24
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Region * getParentRegion()
Return the Region in which this Value is defined.
Definition Value.cpp:39
A class to signal how to proceed with the walk of the backward slice:
Definition SliceWalk.h:20
bool wasInterrupted() const
Returns true if the walk was interrupted.
Definition SliceWalk.h:60
static WalkContinuation skip()
Creates a continuation that advances the walk without adding any predecessor values to the work list.
Definition SliceWalk.h:55
static WalkContinuation advanceTo(mlir::ValueRange nextValues)
Creates a continuation that adds the user-specified nextValues to the work list and advances the walk...
Definition SliceWalk.h:49
static WalkContinuation interrupt()
Creates a continuation that interrupts the walk.
Definition SliceWalk.h:43
static WalkResult advance()
Definition WalkResult.h:47
void recursivelyReplaceElementsIn(Operation *op, bool replaceAttrs=true, bool replaceLocs=false, bool replaceTypes=false)
Replace the elements within the given operation, and all nested operations.
void addReplacement(ReplaceFn< Attribute > fn)
AttrTypeReplacerBase.
void registerInlinerInterface(DialectRegistry &registry)
Register the LLVMInlinerInterface implementation of DialectInlinerInterface with the LLVM dialect.
void registerInlinerInterface(DialectRegistry &registry)
Register the NVVMInlinerInterface implementation of DialectInlinerInterface with the NVVM dialect.
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
std::optional< SmallVector< Value > > getControlFlowPredecessors(Value value)
Computes a vector of all control predecessors of value.
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:128
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
WalkContinuation walkSlice(mlir::ValueRange rootValues, WalkCallback walkCallback)
Walks the slice starting from the rootValues using a depth-first traversal.
Definition SliceWalk.cpp:6
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:126
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition Matchers.h:369
This trait indicates that a terminator operation is "return-like".