MLIR 23.0.0git
GPUOpsLowering.h
Go to the documentation of this file.
1//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
9#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
10
15
16namespace mlir {
17
18//===----------------------------------------------------------------------===//
19// Helper Functions
20//===----------------------------------------------------------------------===//
21
22/// Note that these functions don't take a `SymbolTable` because GPU module
23/// lowerings can have name collisions as an intermediate state.
24
25/// Find or create an external function declaration in the given module.
26LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc,
27 OpBuilder &b, StringRef name,
28 LLVM::LLVMFunctionType type);
29
30/// Create a global that contains the given string. If a global with the same
31/// string already exists in the module, return that global.
33 Operation *moduleOp, Type llvmI8,
34 StringRef namePrefix, StringRef str,
35 uint64_t alignment = 0,
36 unsigned addrSpace = 0);
37
38//===----------------------------------------------------------------------===//
39// Lowering Patterns
40//===----------------------------------------------------------------------===//
41
42/// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
43/// create a 0-sized global array symbol similar as LLVM expects. It constructs
44/// a memref descriptor with these values and return it.
46 : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
48 gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
50 unsigned alignmentBit = 0,
51 PatternBenefit benefit = 1)
52 : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit),
53 alignmentBit(alignmentBit) {}
54
55 LogicalResult
56 matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
57 ConversionPatternRewriter &rewriter) const override;
58
59private:
60 // Alignment bit
61 unsigned alignmentBit;
62};
63
65 /// The address space to use for `alloca`s in private memory.
67 /// The address space to use declaring workgroup memory.
69
70 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
71 /// should be used.
73 /// The attribute name to to set block size. Null if no attribute should be
74 /// used.
76 /// The attribute name to to set cluster size. Null if no attribute should be
77 /// used.
79
80 /// The calling convention to use for kernel functions.
81 LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
82 /// The calling convention to use for non-kernel functions.
83 LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;
84
85 /// Whether to encode workgroup attributions as additional arguments instead
86 /// of a global variable.
88};
89
90struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
93 PatternBenefit benefit = 1)
94 : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit),
95 allocaAddrSpace(options.allocaAddrSpace),
96 workgroupAddrSpace(options.workgroupAddrSpace),
97 kernelAttributeName(options.kernelAttributeName),
98 kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
99 kernelClusterSizeAttributeName(options.kernelClusterSizeAttributeName),
100 kernelCallingConvention(options.kernelCallingConvention),
101 nonKernelCallingConvention(options.nonKernelCallingConvention),
102 encodeWorkgroupAttributionsAsArguments(
103 options.encodeWorkgroupAttributionsAsArguments) {}
104
105 LogicalResult
106 matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
107 ConversionPatternRewriter &rewriter) const override;
108
109private:
110 /// The address space to use for `alloca`s in private memory.
111 unsigned allocaAddrSpace;
112 /// The address space to use declaring workgroup memory.
113 unsigned workgroupAddrSpace;
114
115 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
116 /// should be used.
117 StringAttr kernelAttributeName;
118 /// The attribute name to to set block size. Null if no attribute should be
119 /// used.
120 StringAttr kernelBlockSizeAttributeName;
121 /// The attribute name to to set cluster size. Null if no attribute should be
122 /// used.
123 StringAttr kernelClusterSizeAttributeName;
124
125 /// The calling convention to use for kernel functions
126 LLVM::CConv kernelCallingConvention;
127 /// The calling convention to use for non-kernel functions
128 LLVM::CConv nonKernelCallingConvention;
129
130 /// Whether to encode workgroup attributions as additional arguments instead
131 /// of a global variable.
132 bool encodeWorkgroupAttributionsAsArguments;
133};
134
135/// The lowering of gpu.printf to a call to HIP hostcalls
136///
137/// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
138/// to deal with %s (even if there were first-class strings in MLIR, they're not
139/// legal input to gpu.printf) or non-constant format strings
140struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
142
143 LogicalResult
144 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
145 ConversionPatternRewriter &rewriter) const override;
146};
147
148/// The lowering of gpu.printf to a call to an external printf() function
149///
150/// This pass will add a declaration of printf() to the GPUModule if needed
151/// and separate out the format strings into global constants. For some
152/// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
153/// will lower printf calls to appropriate device-side code.
154/// However not all backends use the same calling convention and function
155/// naming.
156/// For example, the LLVM SPIRV backend requires calling convention
157/// LLVM::cconv::CConv::SPIR_FUNC and function name needs to be
158/// mangled as "_Z6printfPU3AS2Kcz".
159/// Default callingConvention is LLVM::cconv::CConv::C and
160/// funcName is "printf" but they can be customized as needed.
162 : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
164 const LLVMTypeConverter &converter, int addressSpace = 0,
165 LLVM::cconv::CConv callingConvention = LLVM::cconv::CConv::C,
166 StringRef funcName = "printf")
167 : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
168 addressSpace(addressSpace), callingConvention(callingConvention),
169 funcName(funcName) {}
170
171 LogicalResult
172 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
173 ConversionPatternRewriter &rewriter) const override;
174
175private:
176 int addressSpace;
177 LLVM::cconv::CConv callingConvention;
178 StringRef funcName;
179};
180
181/// Lowering of gpu.printf to a vprintf standard library.
183 : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
185
186 LogicalResult
187 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
188 ConversionPatternRewriter &rewriter) const override;
189};
190
191struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
193
194 LogicalResult
195 matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
196 ConversionPatternRewriter &rewriter) const override;
197};
198
199namespace impl {
200/// Unrolls op to array/vector elements.
201LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
202 ConversionPatternRewriter &rewriter,
203 const LLVMTypeConverter &converter);
204} // namespace impl
205
206/// Unrolls SourceOp to array/vector elements.
207template <typename SourceOp>
209public:
211
212 LogicalResult
213 matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
214 ConversionPatternRewriter &rewriter) const override {
215 return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
216 *this->getTypeConverter());
217 }
218};
219
220} // namespace mlir
221
222#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static llvm::ManagedStatic< PassManagerOptions > options
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Definition Pattern.h:222
typename gpu::DynamicSharedMemoryOp::Adaptor OpAdaptor
Definition Pattern.h:218
Conversion from types to the LLVM IR dialect.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, unsigned alignmentBit=0, PatternBenefit benefit=1)
unsigned allocaAddrSpace
The address space to use for allocas in private memory.
LLVM::CConv nonKernelCallingConvention
The calling convention to use for non-kernel functions.
StringAttr kernelBlockSizeAttributeName
The attribute name to to set block size.
LLVM::CConv kernelCallingConvention
The calling convention to use for kernel functions.
unsigned workgroupAddrSpace
The address space to use declaring workgroup memory.
bool encodeWorkgroupAttributionsAsArguments
Whether to encode workgroup attributions as additional arguments instead of a global variable.
StringAttr kernelClusterSizeAttributeName
The attribute name to to set cluster size.
StringAttr kernelAttributeName
The attribute name to use instead of gpu.kernel.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
GPUFuncOpLowering(const LLVMTypeConverter &converter, const GPUFuncOpLoweringOptions &options, PatternBenefit benefit=1)
The lowering of gpu.printf to a call to HIP hostcalls.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, int addressSpace=0, LLVM::cconv::CConv callingConvention=LLVM::cconv::CConv::C, StringRef funcName="printf")
Lowering of gpu.printf to a vprintf standard library.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Unrolls SourceOp to array/vector elements.
LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.