MLIR 23.0.0git
ROCDLToLLVMIRTranslation.cpp
Go to the documentation of this file.
1//===- ROCDLToLLVMIRTranslation.cpp - Translate ROCDL to LLVM IR ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a translation between the MLIR ROCDL dialect and
10// LLVM IR.
11//
12//===----------------------------------------------------------------------===//
13
17#include "mlir/IR/Operation.h"
19
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace mlir;
25using namespace mlir::LLVM;
27
28namespace {
29/// Implementation of the dialect interface that converts operations belonging
30/// to the ROCDL dialect to LLVM IR.
31class ROCDLDialectLLVMIRTranslationInterface
32 : public LLVMTranslationDialectInterface {
33public:
34 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
35
36 /// Translates the given operation to LLVM IR using the provided IR builder
37 /// and saving the state in `moduleTranslation`.
38 LogicalResult
39 convertOperation(Operation *op, llvm::IRBuilderBase &builder,
40 LLVM::ModuleTranslation &moduleTranslation) const final {
41 Operation &opInst = *op;
42#include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
43
44 return failure();
45 }
46
47 /// Attaches module-level metadata for functions marked as kernels.
48 LogicalResult
49 amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
50 NamedAttribute attribute,
51 LLVM::ModuleTranslation &moduleTranslation) const final {
52 auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
53 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
54 if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
55 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
56 if (!func)
57 return op->emitOpError(Twine(attribute.getName()) +
58 " is only supported on `llvm.func` operations");
59 ;
60
61 // For GPU kernels,
62 // 1. Insert AMDGPU_KERNEL calling convention.
63 // 2. Insert amdgpu-flat-work-group-size(1, 256) attribute unless the user
64 // has overriden this value - 256 is the default in clang
65 llvm::Function *llvmFunc =
66 moduleTranslation.lookupFunction(func.getName());
67 llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
68 if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
69 llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256");
70 }
71
72 // MLIR's GPU kernel APIs all assume and produce uniformly-sized
73 // workgroups, so the lowering of the `rocdl.kernel` marker encodes this
74 // assumption. This assumption may be overridden by setting
75 // `rocdl.uniform_work_group_size` on a given function.
76 if (!llvmFunc->hasFnAttribute("uniform-work-group-size"))
77 llvmFunc->addFnAttr("uniform-work-group-size");
78 }
79 // Override flat-work-group-size
80 // TODO: update clients to rocdl.flat_work_group_size instead,
81 // then remove this half of the branch
82 if (dialect->getMaxFlatWorkGroupSizeAttrHelper().getName() ==
83 attribute.getName()) {
84 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
85 if (!func)
86 return op->emitOpError(Twine(attribute.getName()) +
87 " is only supported on `llvm.func` operations");
88 auto value = dyn_cast<IntegerAttr>(attribute.getValue());
89 if (!value)
90 return op->emitOpError(Twine(attribute.getName()) +
91 " must be an integer");
92
93 llvm::Function *llvmFunc =
94 moduleTranslation.lookupFunction(func.getName());
95 llvm::SmallString<8> llvmAttrValue;
96 llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
97 attrValueStream << "1," << value.getInt();
98 llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
99 }
100 if (dialect->getWavesPerEuAttrHelper().getName() == attribute.getName()) {
101 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
102 if (!func)
103 return op->emitOpError(Twine(attribute.getName()) +
104 " is only supported on `llvm.func` operations");
105 auto value = dyn_cast<IntegerAttr>(attribute.getValue());
106 if (!value)
107 return op->emitOpError(Twine(attribute.getName()) +
108 " must be an integer");
109
110 llvm::Function *llvmFunc =
111 moduleTranslation.lookupFunction(func.getName());
112 llvm::SmallString<8> llvmAttrValue;
113 llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
114 attrValueStream << value.getInt();
115 llvmFunc->addFnAttr("amdgpu-waves-per-eu", llvmAttrValue);
116 }
117 if (dialect->getFlatWorkGroupSizeAttrHelper().getName() ==
118 attribute.getName()) {
119 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
120 if (!func)
121 return op->emitOpError(Twine(attribute.getName()) +
122 " is only supported on `llvm.func` operations");
123 auto value = dyn_cast<StringAttr>(attribute.getValue());
124 if (!value)
125 return op->emitOpError(Twine(attribute.getName()) +
126 " must be a string");
127
128 llvm::Function *llvmFunc =
129 moduleTranslation.lookupFunction(func.getName());
130 llvm::SmallString<8> llvmAttrValue;
131 llvmAttrValue.append(value.getValue());
132 llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
133 }
134 if (ROCDL::ROCDLDialect::getUniformWorkGroupSizeAttrName() ==
135 attribute.getName()) {
136 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
137 if (!func)
138 return op->emitOpError(Twine(attribute.getName()) +
139 " is only supported on `llvm.func` operations");
140 auto value = dyn_cast<BoolAttr>(attribute.getValue());
141 if (!value)
142 return op->emitOpError(Twine(attribute.getName()) +
143 " must be a boolean");
144 llvm::Function *llvmFunc =
145 moduleTranslation.lookupFunction(func.getName());
146 if (value.getValue())
147 llvmFunc->addFnAttr("uniform-work-group-size");
148 else
149 llvmFunc->removeFnAttr("uniform-work-group-size");
150 }
151 if (dialect->getUnsafeFpAtomicsAttrHelper().getName() ==
152 attribute.getName()) {
153 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
154 if (!func)
155 return op->emitOpError(Twine(attribute.getName()) +
156 " is only supported on `llvm.func` operations");
157 auto value = dyn_cast<BoolAttr>(attribute.getValue());
158 if (!value)
159 return op->emitOpError(Twine(attribute.getName()) +
160 " must be a boolean");
161 llvm::Function *llvmFunc =
162 moduleTranslation.lookupFunction(func.getName());
163 llvmFunc->addFnAttr("amdgpu-unsafe-fp-atomics",
164 value.getValue() ? "true" : "false");
165 }
166 // Set reqd_work_group_size metadata
167 if (dialect->getReqdWorkGroupSizeAttrHelper().getName() ==
168 attribute.getName()) {
169 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
170 if (!func)
171 return op->emitOpError(Twine(attribute.getName()) +
172 " is only supported on `llvm.func` operations");
173 auto value = dyn_cast<DenseI32ArrayAttr>(attribute.getValue());
174 if (!value)
175 return op->emitOpError(Twine(attribute.getName()) +
176 " must be a dense i32 array attribute");
177 SmallVector<llvm::Metadata *, 3> metadata;
178 llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
179 for (int32_t i : value.asArrayRef()) {
180 llvm::Constant *constant = llvm::ConstantInt::get(i32, i);
181 metadata.push_back(llvm::ConstantAsMetadata::get(constant));
182 }
183 llvm::Function *llvmFunc =
184 moduleTranslation.lookupFunction(func.getName());
185 llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
186 llvmFunc->setMetadata("reqd_work_group_size", node);
187 }
188
189 // Atomic and nontemporal metadata
190 if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
191 for (llvm::Instruction *i : instructions)
192 i->setMetadata("amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
193 }
194 if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
195 attribute.getName()) {
196 for (llvm::Instruction *i : instructions)
197 i->setMetadata("amdgpu.no.remote.memory",
198 llvm::MDNode::get(llvmContext, {}));
199 }
200 if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
201 attribute.getName()) {
202 for (llvm::Instruction *i : instructions)
203 i->setMetadata("amdgpu.no.fine.grained.memory",
204 llvm::MDNode::get(llvmContext, {}));
205 }
206 if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
207 attribute.getName()) {
208 for (llvm::Instruction *i : instructions)
209 i->setMetadata("amdgpu.ignore.denormal.mode",
210 llvm::MDNode::get(llvmContext, {}));
211 }
212
213 return success();
214 }
215};
216} // namespace
217
219 registry.insert<ROCDL::ROCDLDialect>();
220 registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
221 dialect->addInterfaces<ROCDLDialectLLVMIRTranslationInterface>();
222 });
223}
224
return success()
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
llvm::CallInst * createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef< llvm::Value * > args={}, ArrayRef< llvm::Type * > tys={})
Creates a call to an LLVM IR intrinsic function with the given arguments.
Include the generated interface declarations.
void registerROCDLDialectTranslation(DialectRegistry &registry)
Register the ROCDL dialect and the translation from it to the LLVM IR in the given registry;.