MLIR  22.0.0git
GPUOpsLowering.h
Go to the documentation of this file.
1 //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
9 #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
10 
14 
15 namespace mlir {
16 
17 //===----------------------------------------------------------------------===//
18 // Helper Functions
19 //===----------------------------------------------------------------------===//
20 
21 /// Note that these functions don't take a `SymbolTable` because GPU module
22 /// lowerings can have name collisions as an intermediate state.
23 
24 /// Find or create an external function declaration in the given module.
25 LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc,
26  OpBuilder &b, StringRef name,
27  LLVM::LLVMFunctionType type);
28 
29 /// Create a global that contains the given string. If a global with the same
30 /// string already exists in the module, return that global.
31 LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc,
32  Operation *moduleOp, Type llvmI8,
33  StringRef namePrefix, StringRef str,
34  uint64_t alignment = 0,
35  unsigned addrSpace = 0);
36 
37 //===----------------------------------------------------------------------===//
38 // Lowering Patterns
39 //===----------------------------------------------------------------------===//
40 
41 /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
42 /// create a 0-sized global array symbol similar as LLVM expects. It constructs
43 /// a memref descriptor with these values and return it.
45  : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
47  gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
49  unsigned alignmentBit = 0,
50  PatternBenefit benefit = 1)
51  : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit),
52  alignmentBit(alignmentBit) {}
53 
54  LogicalResult
55  matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
56  ConversionPatternRewriter &rewriter) const override;
57 
58 private:
59  // Alignment bit
60  unsigned alignmentBit;
61 };
62 
64  /// The address space to use for `alloca`s in private memory.
65  unsigned allocaAddrSpace;
66  /// The address space to use declaring workgroup memory.
68 
69  /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
70  /// should be used.
71  StringAttr kernelAttributeName;
72  /// The attribute name to to set block size. Null if no attribute should be
73  /// used.
75 
76  /// The calling convention to use for kernel functions.
77  LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
78  /// The calling convention to use for non-kernel functions.
79  LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;
80 
81  /// Whether to encode workgroup attributions as additional arguments instead
82  /// of a global variable.
84 };
85 
86 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
89  PatternBenefit benefit = 1)
90  : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit),
91  allocaAddrSpace(options.allocaAddrSpace),
92  workgroupAddrSpace(options.workgroupAddrSpace),
93  kernelAttributeName(options.kernelAttributeName),
94  kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
95  kernelCallingConvention(options.kernelCallingConvention),
96  nonKernelCallingConvention(options.nonKernelCallingConvention),
97  encodeWorkgroupAttributionsAsArguments(
98  options.encodeWorkgroupAttributionsAsArguments) {}
99 
100  LogicalResult
101  matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
102  ConversionPatternRewriter &rewriter) const override;
103 
104 private:
105  /// The address space to use for `alloca`s in private memory.
106  unsigned allocaAddrSpace;
107  /// The address space to use declaring workgroup memory.
108  unsigned workgroupAddrSpace;
109 
110  /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
111  /// should be used.
112  StringAttr kernelAttributeName;
113  /// The attribute name to to set block size. Null if no attribute should be
114  /// used.
115  StringAttr kernelBlockSizeAttributeName;
116 
117  /// The calling convention to use for kernel functions
118  LLVM::CConv kernelCallingConvention;
119  /// The calling convention to use for non-kernel functions
120  LLVM::CConv nonKernelCallingConvention;
121 
122  /// Whether to encode workgroup attributions as additional arguments instead
123  /// of a global variable.
124  bool encodeWorkgroupAttributionsAsArguments;
125 };
126 
127 /// The lowering of gpu.printf to a call to HIP hostcalls
128 ///
129 /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
130 /// to deal with %s (even if there were first-class strings in MLIR, they're not
131 /// legal input to gpu.printf) or non-constant format strings
132 struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
134 
135  LogicalResult
136  matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
137  ConversionPatternRewriter &rewriter) const override;
138 };
139 
140 /// The lowering of gpu.printf to a call to an external printf() function
141 ///
142 /// This pass will add a declaration of printf() to the GPUModule if needed
143 /// and separate out the format strings into global constants. For some
144 /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
145 /// will lower printf calls to appropriate device-side code
147  : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
149  int addressSpace = 0)
150  : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
151  addressSpace(addressSpace) {}
152 
153  LogicalResult
154  matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
155  ConversionPatternRewriter &rewriter) const override;
156 
157 private:
158  int addressSpace;
159 };
160 
161 /// Lowering of gpu.printf to a vprintf standard library.
163  : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
165 
166  LogicalResult
167  matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
168  ConversionPatternRewriter &rewriter) const override;
169 };
170 
171 struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
173 
174  LogicalResult
175  matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
176  ConversionPatternRewriter &rewriter) const override;
177 };
178 
179 namespace impl {
180 /// Unrolls op to array/vector elements.
181 LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
182  ConversionPatternRewriter &rewriter,
183  const LLVMTypeConverter &converter);
184 } // namespace impl
185 
186 /// Unrolls SourceOp to array/vector elements.
187 template <typename SourceOp>
189 public:
191 
192  LogicalResult
193  matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
194  ConversionPatternRewriter &rewriter) const override {
195  return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
196  *this->getTypeConverter());
197  }
198 };
199 
200 } // namespace mlir
201 
202 #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
static llvm::ManagedStatic< PassManagerOptions > options
This class implements a pattern rewriter for use with ConversionPatterns.
Utility class for operation conversions targeting the LLVM dialect that match exactly one source oper...
Definition: Pattern.h:209
ConvertOpToLLVMPattern(const LLVMTypeConverter &typeConverter, PatternBenefit benefit=1)
Definition: Pattern.h:215
Conversion from types to the LLVM IR dialect.
Definition: TypeConverter.h:35
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Note that these functions don't take a SymbolTable because GPU module lowerings can have name collisi...
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, Operation *moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
Lowering for gpu.dynamic.shared.memory to LLVM dialect.
LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, unsigned alignmentBit=0, PatternBenefit benefit=1)
unsigned allocaAddrSpace
The address space to use for allocas in private memory.
LLVM::CConv nonKernelCallingConvention
The calling convention to use for non-kernel functions.
StringAttr kernelBlockSizeAttributeName
The attribute name to to set block size.
LLVM::CConv kernelCallingConvention
The calling convention to use for kernel functions.
unsigned workgroupAddrSpace
The address space to use declaring workgroup memory.
bool encodeWorkgroupAttributionsAsArguments
Whether to encode workgroup attributions as additional arguments instead of a global variable.
StringAttr kernelAttributeName
The attribute name to use instead of gpu.kernel.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
GPUFuncOpLowering(const LLVMTypeConverter &converter, const GPUFuncOpLoweringOptions &options, PatternBenefit benefit=1)
The lowering of gpu.printf to a call to HIP hostcalls.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
The lowering of gpu.printf to a call to an external printf() function.
GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, int addressSpace=0)
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Lowering of gpu.printf to a vprintf standard library.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
Unrolls SourceOp to array/vector elements.
LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.