MLIR  18.0.0git
ROCDLToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- ROCDLToLLVMIRTranslation.cpp - Translate ROCDL to LLVM IR ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR ROCDL dialect and
10 // LLVM IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "mlir/IR/Operation.h"
19 
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/MDBuilder.h"
23 #include "llvm/Support/raw_ostream.h"
24 
25 using namespace mlir;
26 using namespace mlir::LLVM;
28 
29 static llvm::Value *createIntrinsicCallWithRange(llvm::IRBuilderBase &builder,
30  llvm::Intrinsic::ID intrinsic,
31  DenseI32ArrayAttr maybeRange) {
32  auto *inst = llvm::cast<llvm::CallInst>(
33  createIntrinsicCall(builder, intrinsic, {}, {}));
34  if (maybeRange) {
36  for (int32_t i : maybeRange.asArrayRef())
37  apInts.push_back(llvm::APInt(32, i));
38  llvm::MDBuilder mdBuilder(builder.getContext());
39  llvm::MDNode *range = mdBuilder.createRange(apInts[0], apInts[1]);
40  inst->setMetadata(llvm::LLVMContext::MD_range, range);
41  }
42  return inst;
43 }
44 
45 // Create a call to ROCm-Device-Library function
46 // Currently this routine will work only for calling ROCDL functions that
47 // take a single int32 argument. It is likely that the interface of this
48 // function will change to make it more generic.
49 static llvm::Value *createDeviceFunctionCall(llvm::IRBuilderBase &builder,
50  StringRef fnName, int parameter) {
51  llvm::Module *module = builder.GetInsertBlock()->getModule();
52  llvm::FunctionType *functionType = llvm::FunctionType::get(
53  llvm::Type::getInt64Ty(module->getContext()), // return type.
54  llvm::Type::getInt32Ty(module->getContext()), // parameter type.
55  false); // no variadic arguments.
56  llvm::Function *fn = dyn_cast<llvm::Function>(
57  module->getOrInsertFunction(fnName, functionType).getCallee());
58  llvm::Value *fnOp0 = llvm::ConstantInt::get(
59  llvm::Type::getInt32Ty(module->getContext()), parameter);
60  return builder.CreateCall(fn, ArrayRef<llvm::Value *>(fnOp0));
61 }
62 
63 namespace {
64 /// Implementation of the dialect interface that converts operations belonging
65 /// to the ROCDL dialect to LLVM IR.
66 class ROCDLDialectLLVMIRTranslationInterface
68 public:
70 
71  /// Translates the given operation to LLVM IR using the provided IR builder
72  /// and saving the state in `moduleTranslation`.
74  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
75  LLVM::ModuleTranslation &moduleTranslation) const final {
76  Operation &opInst = *op;
77 #include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
78 
79  return failure();
80  }
81 
82  /// Attaches module-level metadata for functions marked as kernels.
84  amendOperation(Operation *op, NamedAttribute attribute,
85  LLVM::ModuleTranslation &moduleTranslation) const final {
86  if (attribute.getName() == ROCDL::ROCDLDialect::getKernelFuncAttrName()) {
87  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
88  if (!func)
89  return failure();
90 
91  // For GPU kernels,
92  // 1. Insert AMDGPU_KERNEL calling convention.
93  // 2. Insert amdgpu-flat-work-group-size(1, 256) attribute unless the user
94  // has overriden this value - 256 is the default in clang
95  // 3. Insert amdgpu-implicitarg-num-bytes=56 (which must be set on OpenCL
96  // and HIP kernels per Clang)
97  llvm::Function *llvmFunc =
98  moduleTranslation.lookupFunction(func.getName());
99  llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
100  if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
101  llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256");
102  }
103  llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
104  }
105  // Override flat-work-group-size
106  // TODO: update clients to rocdl.flat_work_group_size instead,
107  // then remove this half of the branch
108  if ("rocdl.max_flat_work_group_size" == attribute.getName()) {
109  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
110  if (!func)
111  return failure();
112  auto value = dyn_cast<IntegerAttr>(attribute.getValue());
113  if (!value)
114  return failure();
115 
116  llvm::Function *llvmFunc =
117  moduleTranslation.lookupFunction(func.getName());
118  llvm::SmallString<8> llvmAttrValue;
119  llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
120  attrValueStream << "1," << value.getInt();
121  llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
122  }
123  if (ROCDL::ROCDLDialect::getFlatWorkGroupSizeAttrName() ==
124  attribute.getName()) {
125  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
126  if (!func)
127  return failure();
128  auto value = dyn_cast<StringAttr>(attribute.getValue());
129  if (!value)
130  return failure();
131 
132  llvm::Function *llvmFunc =
133  moduleTranslation.lookupFunction(func.getName());
134  llvm::SmallString<8> llvmAttrValue;
135  llvmAttrValue.append(value.getValue());
136  llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
137  }
138 
139  // Set reqd_work_group_size metadata
140  if (ROCDL::ROCDLDialect::getReqdWorkGroupSizeAttrName() ==
141  attribute.getName()) {
142  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
143  if (!func)
144  return failure();
145  auto value = dyn_cast<DenseI32ArrayAttr>(attribute.getValue());
146  if (!value)
147  return failure();
148  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
150  llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
151  for (int32_t i : value.asArrayRef()) {
152  llvm::Constant *constant = llvm::ConstantInt::get(i32, i);
153  metadata.push_back(llvm::ConstantAsMetadata::get(constant));
154  }
155  llvm::Function *llvmFunc =
156  moduleTranslation.lookupFunction(func.getName());
157  llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
158  llvmFunc->setMetadata("reqd_work_group_size", node);
159  }
160  return success();
161  }
162 };
163 } // namespace
164 
166  registry.insert<ROCDL::ROCDLDialect>();
167  registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
168  dialect->addInterfaces<ROCDLDialectLLVMIRTranslationInterface>();
169  });
170 }
171 
173  DialectRegistry registry;
175  context.appendDialectRegistry(registry);
176 }
static llvm::Value * createDeviceFunctionCall(llvm::IRBuilderBase &builder, StringRef fnName, int parameter)
static llvm::Value * createIntrinsicCallWithRange(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, DenseI32ArrayAttr maybeRange)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Base class for dialect interfaces providing translation to LLVM IR.
Implementation class for module translation.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:198
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
llvm::CallInst * createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef< llvm::Value * > args={}, ArrayRef< llvm::Type * > tys={})
Creates a call to an LLVM IR intrinsic function with the given arguments.
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void registerROCDLDialectTranslation(DialectRegistry &registry)
Register the ROCDL dialect and the translation from it to the LLVM IR in the given registry;.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26