20#include "llvm/IR/IRBuilder.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/raw_ostream.h"
31class ROCDLDialectLLVMIRTranslationInterface
32 :
public LLVMTranslationDialectInterface {
34 using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
39 convertOperation(Operation *op, llvm::IRBuilderBase &builder,
40 LLVM::ModuleTranslation &moduleTranslation)
const final {
41 Operation &opInst = *op;
42#include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
49 amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
50 NamedAttribute attribute,
51 LLVM::ModuleTranslation &moduleTranslation)
const final {
52 auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
53 llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
54 if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
55 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
57 return op->emitOpError(Twine(attribute.getName()) +
58 " is only supported on `llvm.func` operations");
65 llvm::Function *llvmFunc =
66 moduleTranslation.lookupFunction(func.getName());
67 llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
68 if (!llvmFunc->hasFnAttribute(
"amdgpu-flat-work-group-size")) {
69 llvmFunc->addFnAttr(
"amdgpu-flat-work-group-size",
"1,256");
76 if (!llvmFunc->hasFnAttribute(
"uniform-work-group-size"))
77 llvmFunc->addFnAttr(
"uniform-work-group-size");
82 if (dialect->getMaxFlatWorkGroupSizeAttrHelper().getName() ==
83 attribute.getName()) {
84 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
86 return op->emitOpError(Twine(attribute.getName()) +
87 " is only supported on `llvm.func` operations");
88 auto value = dyn_cast<IntegerAttr>(attribute.getValue());
90 return op->emitOpError(Twine(attribute.getName()) +
91 " must be an integer");
93 llvm::Function *llvmFunc =
94 moduleTranslation.lookupFunction(func.getName());
95 llvm::SmallString<8> llvmAttrValue;
96 llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
97 attrValueStream <<
"1," << value.getInt();
98 llvmFunc->addFnAttr(
"amdgpu-flat-work-group-size", llvmAttrValue);
100 if (dialect->getWavesPerEuAttrHelper().getName() == attribute.getName()) {
101 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
103 return op->emitOpError(Twine(attribute.getName()) +
104 " is only supported on `llvm.func` operations");
105 auto value = dyn_cast<IntegerAttr>(attribute.getValue());
107 return op->emitOpError(Twine(attribute.getName()) +
108 " must be an integer");
110 llvm::Function *llvmFunc =
111 moduleTranslation.lookupFunction(func.getName());
112 llvm::SmallString<8> llvmAttrValue;
113 llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
114 attrValueStream << value.getInt();
115 llvmFunc->addFnAttr(
"amdgpu-waves-per-eu", llvmAttrValue);
117 if (dialect->getFlatWorkGroupSizeAttrHelper().getName() ==
118 attribute.getName()) {
119 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
121 return op->emitOpError(Twine(attribute.getName()) +
122 " is only supported on `llvm.func` operations");
123 auto value = dyn_cast<StringAttr>(attribute.getValue());
125 return op->emitOpError(Twine(attribute.getName()) +
126 " must be a string");
128 llvm::Function *llvmFunc =
129 moduleTranslation.lookupFunction(func.getName());
130 llvm::SmallString<8> llvmAttrValue;
131 llvmAttrValue.append(value.getValue());
132 llvmFunc->addFnAttr(
"amdgpu-flat-work-group-size", llvmAttrValue);
134 if (ROCDL::ROCDLDialect::getUniformWorkGroupSizeAttrName() ==
135 attribute.getName()) {
136 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
138 return op->emitOpError(Twine(attribute.getName()) +
139 " is only supported on `llvm.func` operations");
140 auto value = dyn_cast<BoolAttr>(attribute.getValue());
142 return op->emitOpError(Twine(attribute.getName()) +
143 " must be a boolean");
144 llvm::Function *llvmFunc =
145 moduleTranslation.lookupFunction(func.getName());
146 if (value.getValue())
147 llvmFunc->addFnAttr(
"uniform-work-group-size");
149 llvmFunc->removeFnAttr(
"uniform-work-group-size");
151 if (dialect->getUnsafeFpAtomicsAttrHelper().getName() ==
152 attribute.getName()) {
153 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
155 return op->emitOpError(Twine(attribute.getName()) +
156 " is only supported on `llvm.func` operations");
157 auto value = dyn_cast<BoolAttr>(attribute.getValue());
159 return op->emitOpError(Twine(attribute.getName()) +
160 " must be a boolean");
161 llvm::Function *llvmFunc =
162 moduleTranslation.lookupFunction(func.getName());
163 llvmFunc->addFnAttr(
"amdgpu-unsafe-fp-atomics",
164 value.getValue() ?
"true" :
"false");
167 if (dialect->getReqdWorkGroupSizeAttrHelper().getName() ==
168 attribute.getName()) {
169 auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
171 return op->emitOpError(Twine(attribute.getName()) +
172 " is only supported on `llvm.func` operations");
173 auto value = dyn_cast<DenseI32ArrayAttr>(attribute.getValue());
175 return op->emitOpError(Twine(attribute.getName()) +
176 " must be a dense i32 array attribute");
177 SmallVector<llvm::Metadata *, 3> metadata;
178 llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
179 for (int32_t i : value.asArrayRef()) {
180 llvm::Constant *constant = llvm::ConstantInt::get(i32, i);
181 metadata.push_back(llvm::ConstantAsMetadata::get(constant));
183 llvm::Function *llvmFunc =
184 moduleTranslation.lookupFunction(func.getName());
185 llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
186 llvmFunc->setMetadata(
"reqd_work_group_size", node);
190 if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
191 for (llvm::Instruction *i : instructions)
192 i->setMetadata(
"amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
194 if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
195 attribute.getName()) {
196 for (llvm::Instruction *i : instructions)
197 i->setMetadata(
"amdgpu.no.remote.memory",
198 llvm::MDNode::get(llvmContext, {}));
200 if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
201 attribute.getName()) {
202 for (llvm::Instruction *i : instructions)
203 i->setMetadata(
"amdgpu.no.fine.grained.memory",
204 llvm::MDNode::get(llvmContext, {}));
206 if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
207 attribute.getName()) {
208 for (llvm::Instruction *i : instructions)
209 i->setMetadata(
"amdgpu.ignore.denormal.mode",
210 llvm::MDNode::get(llvmContext, {}));
219 registry.
insert<ROCDL::ROCDLDialect>();
221 dialect->addInterfaces<ROCDLDialectLLVMIRTranslationInterface>();
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
llvm::CallInst * createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef< llvm::Value * > args={}, ArrayRef< llvm::Type * > tys={})
Creates a call to an LLVM IR intrinsic function with the given arguments.
Include the generated interface declarations.
void registerROCDLDialectTranslation(DialectRegistry ®istry)
Register the ROCDL dialect and the translation from it to the LLVM IR in the given registry;.