MLIR  20.0.0git
ROCDLToLLVMIRTranslation.cpp
Go to the documentation of this file.
1 //===- ROCDLToLLVMIRTranslation.cpp - Translate ROCDL to LLVM IR ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR ROCDL dialect and
10 // LLVM IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "mlir/IR/Operation.h"
19 
20 #include "llvm/IR/ConstantRange.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/Support/raw_ostream.h"
24 
25 using namespace mlir;
26 using namespace mlir::LLVM;
28 
29 static llvm::Value *createIntrinsicCallWithRange(llvm::IRBuilderBase &builder,
30  llvm::Intrinsic::ID intrinsic,
31  DenseI32ArrayAttr maybeRange) {
32  auto *inst = llvm::cast<llvm::CallInst>(
33  createIntrinsicCall(builder, intrinsic, {}, {}));
34  if (maybeRange) {
35  llvm::ConstantRange Range(APInt(32, maybeRange[0]),
36  APInt(32, maybeRange[1]));
37  inst->addRangeRetAttr(Range);
38  }
39  return inst;
40 }
41 
42 // Create a call to ROCm-Device-Library function
43 // Currently this routine will work only for calling ROCDL functions that
44 // take a single int32 argument. It is likely that the interface of this
45 // function will change to make it more generic.
46 static llvm::Value *createDeviceFunctionCall(llvm::IRBuilderBase &builder,
47  StringRef fnName, int parameter) {
48  llvm::Module *module = builder.GetInsertBlock()->getModule();
49  llvm::FunctionType *functionType = llvm::FunctionType::get(
50  llvm::Type::getInt64Ty(module->getContext()), // return type.
51  llvm::Type::getInt32Ty(module->getContext()), // parameter type.
52  false); // no variadic arguments.
53  llvm::Function *fn = dyn_cast<llvm::Function>(
54  module->getOrInsertFunction(fnName, functionType).getCallee());
55  llvm::Value *fnOp0 = llvm::ConstantInt::get(
56  llvm::Type::getInt32Ty(module->getContext()), parameter);
57  return builder.CreateCall(fn, ArrayRef<llvm::Value *>(fnOp0));
58 }
59 
60 namespace {
61 /// Implementation of the dialect interface that converts operations belonging
62 /// to the ROCDL dialect to LLVM IR.
63 class ROCDLDialectLLVMIRTranslationInterface
65 public:
67 
68  /// Translates the given operation to LLVM IR using the provided IR builder
69  /// and saving the state in `moduleTranslation`.
70  LogicalResult
71  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
72  LLVM::ModuleTranslation &moduleTranslation) const final {
73  Operation &opInst = *op;
74 #include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
75 
76  return failure();
77  }
78 
79  /// Attaches module-level metadata for functions marked as kernels.
80  LogicalResult
81  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
82  NamedAttribute attribute,
83  LLVM::ModuleTranslation &moduleTranslation) const final {
84  auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
85  if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
86  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
87  if (!func)
88  return op->emitOpError(Twine(attribute.getName()) +
89  " is only supported on `llvm.func` operations");
90  ;
91 
92  // For GPU kernels,
93  // 1. Insert AMDGPU_KERNEL calling convention.
94  // 2. Insert amdgpu-flat-work-group-size(1, 256) attribute unless the user
95  // has overriden this value - 256 is the default in clang
96  llvm::Function *llvmFunc =
97  moduleTranslation.lookupFunction(func.getName());
98  llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
99  if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
100  llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256");
101  }
102 
103  // MLIR's GPU kernel APIs all assume and produce uniformly-sized
104  // workgroups, so the lowering of the `rocdl.kernel` marker encodes this
105  // assumption. This assumption may be overridden by setting
106  // `rocdl.uniform_work_group_size` on a given function.
107  if (!llvmFunc->hasFnAttribute("uniform-work-group-size"))
108  llvmFunc->addFnAttr("uniform-work-group-size", "true");
109  }
110  // Override flat-work-group-size
111  // TODO: update clients to rocdl.flat_work_group_size instead,
112  // then remove this half of the branch
113  if (dialect->getMaxFlatWorkGroupSizeAttrHelper().getName() ==
114  attribute.getName()) {
115  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
116  if (!func)
117  return op->emitOpError(Twine(attribute.getName()) +
118  " is only supported on `llvm.func` operations");
119  auto value = dyn_cast<IntegerAttr>(attribute.getValue());
120  if (!value)
121  return op->emitOpError(Twine(attribute.getName()) +
122  " must be an integer");
123 
124  llvm::Function *llvmFunc =
125  moduleTranslation.lookupFunction(func.getName());
126  llvm::SmallString<8> llvmAttrValue;
127  llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
128  attrValueStream << "1," << value.getInt();
129  llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
130  }
131  if (dialect->getFlatWorkGroupSizeAttrHelper().getName() ==
132  attribute.getName()) {
133  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
134  if (!func)
135  return op->emitOpError(Twine(attribute.getName()) +
136  " is only supported on `llvm.func` operations");
137  auto value = dyn_cast<StringAttr>(attribute.getValue());
138  if (!value)
139  return op->emitOpError(Twine(attribute.getName()) +
140  " must be a string");
141 
142  llvm::Function *llvmFunc =
143  moduleTranslation.lookupFunction(func.getName());
144  llvm::SmallString<8> llvmAttrValue;
145  llvmAttrValue.append(value.getValue());
146  llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
147  }
148  if (ROCDL::ROCDLDialect::getUniformWorkGroupSizeAttrName() ==
149  attribute.getName()) {
150  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
151  if (!func)
152  return op->emitOpError(Twine(attribute.getName()) +
153  " is only supported on `llvm.func` operations");
154  auto value = dyn_cast<BoolAttr>(attribute.getValue());
155  if (!value)
156  return op->emitOpError(Twine(attribute.getName()) +
157  " must be a boolean");
158  llvm::Function *llvmFunc =
159  moduleTranslation.lookupFunction(func.getName());
160  llvmFunc->addFnAttr("uniform-work-group-size",
161  value.getValue() ? "true" : "false");
162  }
163  // Set reqd_work_group_size metadata
164  if (dialect->getReqdWorkGroupSizeAttrHelper().getName() ==
165  attribute.getName()) {
166  auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
167  if (!func)
168  return op->emitOpError(Twine(attribute.getName()) +
169  " is only supported on `llvm.func` operations");
170  auto value = dyn_cast<DenseI32ArrayAttr>(attribute.getValue());
171  if (!value)
172  return op->emitOpError(Twine(attribute.getName()) +
173  " must be a dense i32 array attribute");
174  llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
176  llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
177  for (int32_t i : value.asArrayRef()) {
178  llvm::Constant *constant = llvm::ConstantInt::get(i32, i);
179  metadata.push_back(llvm::ConstantAsMetadata::get(constant));
180  }
181  llvm::Function *llvmFunc =
182  moduleTranslation.lookupFunction(func.getName());
183  llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
184  llvmFunc->setMetadata("reqd_work_group_size", node);
185  }
186  return success();
187  }
188 };
189 } // namespace
190 
192  registry.insert<ROCDL::ROCDLDialect>();
193  registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
194  dialect->addInterfaces<ROCDLDialectLLVMIRTranslationInterface>();
195  });
196 }
197 
199  DialectRegistry registry;
201  context.appendDialectRegistry(registry);
202 }
static llvm::Value * createDeviceFunctionCall(llvm::IRBuilderBase &builder, StringRef fnName, int parameter)
static llvm::Value * createIntrinsicCallWithRange(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, DenseI32ArrayAttr maybeRange)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Base class for dialect interfaces providing translation to LLVM IR.
Implementation class for module translation.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:671
llvm::CallInst * createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef< llvm::Value * > args={}, ArrayRef< llvm::Type * > tys={})
Creates a call to an LLVM IR intrinsic function with the given arguments.
Include the generated interface declarations.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void registerROCDLDialectTranslation(DialectRegistry &registry)
Register the ROCDL dialect and the translation from it to the LLVM IR in the given registry;.
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...