MLIR 23.0.0git
GPUOpsLowering.h
Go to the documentation of this file.
1//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
9#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
10
16
17namespace mlir {
18
19//===----------------------------------------------------------------------===//
20// Helper Functions
21//===----------------------------------------------------------------------===//
22
23/// Note that these functions don't take a `SymbolTable` because GPU module
24/// lowerings can have name collisions as an intermediate state.
25
26/// Find or create an external function declaration in the given module.
27LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc,
28 OpBuilder &b, StringRef name,
29 LLVM::LLVMFunctionType type);
30
31/// Create a global that contains the given string. If a global with the same
32/// string already exists in the module, return that global.
34 Operation *moduleOp, Type llvmI8,
35 StringRef namePrefix, StringRef str,
36 uint64_t alignment = 0,
37 unsigned addrSpace = 0);
38
39//===----------------------------------------------------------------------===//
40// Lowering Patterns
41//===----------------------------------------------------------------------===//
42
43/// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
44/// create a 0-sized global array symbol similar as LLVM expects. It constructs
45/// a memref descriptor with these values and return it.
47 : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
49 gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
51 unsigned alignmentBit = 0,
52 PatternBenefit benefit = 1)
53 : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit),
54 alignmentBit(alignmentBit) {}
55
56 LogicalResult
57 matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
58 ConversionPatternRewriter &rewriter) const override;
59
60private:
61 // Alignment bit
62 unsigned alignmentBit;
63};
64
66 /// The address space to use for `alloca`s in private memory.
68 /// The address space to use declaring workgroup memory.
70
71 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
72 /// should be used.
74 /// The attribute name to to set block size. Null if no attribute should be
75 /// used.
77 /// The attribute name to to set cluster size. Null if no attribute should be
78 /// used.
80
81 /// The calling convention to use for kernel functions.
82 LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
83 /// The calling convention to use for non-kernel functions.
84 LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;
85
86 /// Whether to encode workgroup attributions as additional arguments instead
87 /// of a global variable.
89};
90
91struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
94 PatternBenefit benefit = 1)
95 : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit),
96 allocaAddrSpace(options.allocaAddrSpace),
97 workgroupAddrSpace(options.workgroupAddrSpace),
98 kernelAttributeName(options.kernelAttributeName),
99 kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
100 kernelClusterSizeAttributeName(options.kernelClusterSizeAttributeName),
101 kernelCallingConvention(options.kernelCallingConvention),
102 nonKernelCallingConvention(options.nonKernelCallingConvention),
103 encodeWorkgroupAttributionsAsArguments(
104 options.encodeWorkgroupAttributionsAsArguments) {}
105
106 LogicalResult
107 matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
108 ConversionPatternRewriter &rewriter) const override;
109
110 /// Lower discardable attrs like `func` lowering, then set `llvm.func`
111 /// properties and append GPU / target-specific discardable metadata.
112 FailureOr<LoweredLLVMFuncAttrs>
113 buildLoweredGPULLVMFuncAttrs(gpu::GPUFuncOp gpuFuncOp, Type llvmFuncType,
114 OpBuilder &rewriter) const;
115
116private:
117 /// The address space to use for `alloca`s in private memory.
118 unsigned allocaAddrSpace;
119 /// The address space to use declaring workgroup memory.
120 unsigned workgroupAddrSpace;
121
122 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
123 /// should be used.
124 StringAttr kernelAttributeName;
125 /// The attribute name to to set block size. Null if no attribute should be
126 /// used.
127 StringAttr kernelBlockSizeAttributeName;
128 /// The attribute name to to set cluster size. Null if no attribute should be
129 /// used.
130 StringAttr kernelClusterSizeAttributeName;
131
132 /// The calling convention to use for kernel functions
133 LLVM::CConv kernelCallingConvention;
134 /// The calling convention to use for non-kernel functions
135 LLVM::CConv nonKernelCallingConvention;
136
137 /// Whether to encode workgroup attributions as additional arguments instead
138 /// of a global variable.
139 bool encodeWorkgroupAttributionsAsArguments;
140};
141
142/// The lowering of gpu.printf to a call to HIP hostcalls
143///
144/// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
145/// to deal with %s (even if there were first-class strings in MLIR, they're not
146/// legal input to gpu.printf) or non-constant format strings
147struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
149
150 LogicalResult
151 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
152 ConversionPatternRewriter &rewriter) const override;
153};
154
155/// The lowering of gpu.printf to a call to an external printf() function
156///
157/// This pass will add a declaration of printf() to the GPUModule if needed
158/// and separate out the format strings into global constants. For some
159/// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
160/// will lower printf calls to appropriate device-side code.
161/// However not all backends use the same calling convention and function
162/// naming.
163/// For example, the LLVM SPIRV backend requires calling convention
164/// LLVM::cconv::CConv::SPIR_FUNC and function name needs to be
165/// mangled as "_Z6printfPU3AS2Kcz".
166/// Default callingConvention is LLVM::cconv::CConv::C and
167/// funcName is "printf" but they can be customized as needed.
169 : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
171 const LLVMTypeConverter &converter, int addressSpace = 0,
172 LLVM::cconv::CConv callingConvention = LLVM::cconv::CConv::C,
173 StringRef funcName = "printf")
174 : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
175 addressSpace(addressSpace), callingConvention(callingConvention),
176 funcName(funcName) {}
177
178 LogicalResult
179 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
180 ConversionPatternRewriter &rewriter) const override;
181
182private:
183 int addressSpace;
184 LLVM::cconv::CConv callingConvention;
185 StringRef funcName;
186};
187
188/// Lowering of gpu.printf to a vprintf standard library.
190 : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
192
193 LogicalResult
194 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
195 ConversionPatternRewriter &rewriter) const override;
196};
197
198struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
200
201 LogicalResult
202 matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
203 ConversionPatternRewriter &rewriter) const override;
204};
205
206namespace impl {
207/// Unrolls op to array/vector elements.
208LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
209 ConversionPatternRewriter &rewriter,
210 const LLVMTypeConverter &converter);
211} // namespace impl
212
213/// Unrolls SourceOp to array/vector elements.
214template <typename SourceOp>
216public:
218
219 LogicalResult
220 matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
221 ConversionPatternRewriter &rewriter) const override {
222 return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
223 *this->getTypeConverter());
224 }
225};
226
227} // namespace mlir
228
229#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static llvm::ManagedStatic< PassManagerOptions > options
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Definition Pattern.h:233
typename gpu::DynamicSharedMemoryOp::Adaptor OpAdaptor
Definition Pattern.h:229
Conversion from types to the LLVM IR dialect.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:209
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, unsigned alignmentBit=0, PatternBenefit benefit=1)
unsigned allocaAddrSpace
The address space to use for allocas in private memory.
LLVM::CConv nonKernelCallingConvention
The calling convention to use for non-kernel functions.
StringAttr kernelBlockSizeAttributeName
The attribute name to to set block size.
LLVM::CConv kernelCallingConvention
The calling convention to use for kernel functions.
unsigned workgroupAddrSpace
The address space to use declaring workgroup memory.
bool encodeWorkgroupAttributionsAsArguments
Whether to encode workgroup attributions as additional arguments instead of a global variable.
StringAttr kernelClusterSizeAttributeName
The attribute name to to set cluster size.
StringAttr kernelAttributeName
The attribute name to use instead of gpu.kernel.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
FailureOr< LoweredLLVMFuncAttrs > buildLoweredGPULLVMFuncAttrs(gpu::GPUFuncOp gpuFuncOp, Type llvmFuncType, OpBuilder &rewriter) const
Lower discardable attrs like func lowering, then set llvm.func properties and append GPU / target-spe...
GPUFuncOpLowering(const LLVMTypeConverter &converter, const GPUFuncOpLoweringOptions &options, PatternBenefit benefit=1)
The lowering of gpu.printf to a call to HIP hostcalls.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, int addressSpace=0, LLVM::cconv::CConv callingConvention=LLVM::cconv::CConv::C, StringRef funcName="printf")
Lowering of gpu.printf to a vprintf standard library.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Unrolls SourceOp to array/vector elements.
LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.