22#include "llvm/Config/Targets.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/MC/MCAsmBackend.h"
25#include "llvm/MC/MCAsmInfo.h"
26#include "llvm/MC/MCCodeEmitter.h"
27#include "llvm/MC/MCContext.h"
28#include "llvm/MC/MCInstrInfo.h"
29#include "llvm/MC/MCObjectFileInfo.h"
30#include "llvm/MC/MCObjectWriter.h"
31#include "llvm/MC/MCParser/MCTargetAsmParser.h"
32#include "llvm/MC/MCRegisterInfo.h"
33#include "llvm/MC/MCStreamer.h"
34#include "llvm/MC/MCSubtargetInfo.h"
35#include "llvm/MC/TargetRegistry.h"
36#include "llvm/Support/FileSystem.h"
37#include "llvm/Support/FileUtilities.h"
38#include "llvm/Support/Path.h"
39#include "llvm/Support/Program.h"
40#include "llvm/Support/SourceMgr.h"
41#include "llvm/Support/TargetSelect.h"
42#include "llvm/TargetParser/TargetParser.h"
50#ifndef __DEFAULT_ROCM_PATH__
51#define __DEFAULT_ROCM_PATH__ ""
56class ROCDLTargetAttrImpl
57 :
public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
59 std::optional<mlir::gpu::SerializedObject>
73 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
86 if (
const char *var = std::getenv(
"ROCM_PATH"))
88 if (
const char *var = std::getenv(
"ROCM_ROOT"))
90 if (
const char *var = std::getenv(
"ROCM_HOME"))
113 static llvm::once_flag initializeBackendOnce;
114 llvm::call_once(initializeBackendOnce, []() {
116#if LLVM_HAS_AMDGPU_TARGET
117 LLVMInitializeAMDGPUTarget();
118 LLVMInitializeAMDGPUTargetInfo();
119 LLVMInitializeAMDGPUTargetMC();
120 LLVMInitializeAMDGPUAsmParser();
121 LLVMInitializeAMDGPUAsmPrinter();
141 path.insert(path.begin(), pathRef.begin(), pathRef.end());
142 llvm::sys::path::append(path,
"amdgcn",
"bitcode");
143 pathRef = StringRef(path.data(), path.size());
146 if (!llvm::sys::fs::is_directory(pathRef)) {
148 <<
" does not exist or is not a directory";
153 auto addLib = [&](
const Twine &lib) ->
bool {
154 auto baseSize = path.size();
155 llvm::sys::path::append(path, lib);
156 StringRef pathRef(path.data(), path.size());
157 if (!llvm::sys::fs::is_regular_file(pathRef)) {
159 <<
" does not exist or is not a file";
163 path.truncate(baseSize);
177std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
188 return std::move(bcFiles);
193 if (
auto *openclVersion =
module.getNamedMetadata(
"opencl.ocl.version"))
194 module.eraseNamedMetadata(openclVersion);
196 if (
auto *ident =
module.getNamedMetadata(
"llvm.ident"))
197 module.eraseNamedMetadata(ident);
208 for (llvm::Function &f :
module.functions()) {
209 if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
210 StringRef funcName = f.getName();
211 if (
"printf" == funcName)
214 if (funcName.starts_with(
"__ockl_"))
216 if (funcName.starts_with(
"__ocml_"))
218 if (funcName ==
"__atomic_work_item_fence")
231 bool finiteOnly,
bool unsafeMath,
bool fastMath,
bool correctSqrt,
234 auto addControlVariable = [&
module](StringRef name, uint32_t value,
236 if (
module.getNamedGlobal(name))
238 llvm::IntegerType *type =
239 llvm::IntegerType::getIntNTy(
module.getContext(), bitwidth);
240 llvm::GlobalVariable *controlVariable =
new llvm::GlobalVariable(
242 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
243 llvm::ConstantInt::get(type, value), name,
nullptr,
244 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
246 controlVariable->setVisibility(
247 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
248 controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
249 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
254 abiVer.getAsInteger(0, abi);
255 module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi);
261 addControlVariable(
"__oclc_finite_only_opt", finiteOnly || fastMath, 8);
262 addControlVariable(
"__oclc_daz_opt", daz || fastMath, 8);
263 addControlVariable(
"__oclc_correctly_rounded_sqrt32",
264 correctSqrt && !fastMath, 8);
265 addControlVariable(
"__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
269 addControlVariable(
"__oclc_wavefrontsize64", wave64, 8);
271 llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(
chip);
273 addControlVariable(
"__oclc_ISA_version",
274 isaVersion.Minor + 100 * isaVersion.Stepping +
275 1000 * isaVersion.Major,
277 addControlVariable(
"__oclc_ABI_version", abi, 32);
281FailureOr<SmallVector<char, 0>>
286 llvm::raw_svector_ostream os(
result);
288 llvm::Triple triple(llvm::Triple::normalize(targetTriple));
290 const llvm::Target *
target =
291 llvm::TargetRegistry::lookupTarget(triple, error);
293 return emitError() <<
"failed to lookup target: " << error;
295 llvm::SourceMgr srcMgr;
297 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBufferCopy(isa), SMLoc());
299 const llvm::MCTargetOptions mcOptions;
300 std::unique_ptr<llvm::MCRegisterInfo> mri(
target->createMCRegInfo(triple));
301 std::unique_ptr<llvm::MCAsmInfo> mai(
302 target->createMCAsmInfo(*mri, triple, mcOptions));
303 std::unique_ptr<llvm::MCSubtargetInfo> sti(
304 target->createMCSubtargetInfo(triple, chip, features));
306 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
308 std::unique_ptr<llvm::MCObjectFileInfo> mofi(
target->createMCObjectFileInfo(
310 ctx.setObjectFileInfo(mofi.get());
313 if (!llvm::sys::fs::current_path(cwd))
314 ctx.setCompilationDir(cwd);
316 std::unique_ptr<llvm::MCStreamer> mcStreamer;
317 std::unique_ptr<llvm::MCInstrInfo> mcii(
target->createMCInstrInfo());
319 llvm::MCCodeEmitter *ce =
target->createMCCodeEmitter(*mcii, ctx);
320 llvm::MCAsmBackend *mab =
target->createMCAsmBackend(*sti, *mri, mcOptions);
321 mcStreamer.reset(
target->createMCObjectStreamer(
322 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
323 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
326 std::unique_ptr<llvm::MCAsmParser> parser(
327 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
328 std::unique_ptr<llvm::MCTargetAsmParser> tap(
329 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
332 return emitError() <<
"assembler initialization error";
334 parser->setTargetParser(*tap);
339FailureOr<SmallVector<char, 0>>
343 int tempIsaBinaryFd = -1;
345 if (llvm::sys::fs::createTemporaryFile(
"kernel%%",
"o", tempIsaBinaryFd,
346 tempIsaBinaryFilename))
348 <<
"failed to create a temporary file for dumping the ISA binary";
350 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
352 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd,
true);
353 tempIsaBinaryOs << StringRef(objectCode.data(), objectCode.size());
354 tempIsaBinaryOs.flush();
359 if (llvm::sys::fs::createTemporaryFile(
"kernel",
"hsaco", tempHsacoFilename))
361 <<
"failed to create a temporary file for the HSA code object";
363 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
365 int lldResult = llvm::sys::ExecuteAndWait(
367 {
"ld.lld",
"-shared", tempIsaBinaryFilename,
"-o", tempHsacoFilename});
369 return emitError() <<
"lld invocation failed";
373 llvm::MemoryBuffer::getFile(tempHsacoFilename,
false);
376 <<
"failed to read the HSA code object from the temp file";
378 StringRef buffer = (*hsacoFile)->getBuffer();
383FailureOr<SmallVector<char, 0>>
388 serializedISA, this->
triple, this->chip, this->features, errCallback);
390 if (failed(isaBinary))
395 llvm::sys::path::append(lldPath,
"llvm",
"bin",
"ld.lld");
396 FailureOr<SmallVector<char, 0>> linkedCode =
398 if (failed(linkedCode))
407#define DEBUG_TYPE "serialize-to-llvm"
409 llvm::dbgs() <<
"LLVM IR for module: "
410 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
411 << llvmModule <<
"\n";
418 if (failed(targetMachine))
420 <<
"target Machine unavailable for triple " <<
triple
421 <<
", can't compile with LLVM";
424 FailureOr<SmallString<0>> serializedISA =
427 if (failed(serializedISA))
430#define DEBUG_TYPE "serialize-to-isa"
432 llvm::dbgs() <<
"ISA for module: "
433 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
434 << *serializedISA <<
"\n";
444 <<
"invalid ROCm path, please set a valid path";
450#if LLVM_HAS_AMDGPU_TARGET
457 FailureOr<SmallVector<char, 0>>
458 moduleToObject(llvm::Module &llvmModule)
override;
466AMDGPUSerializer::AMDGPUSerializer(
Operation &module, ROCDLTargetAttr
target,
469 targetOptions(targetOptions) {}
471FailureOr<SmallVector<char, 0>>
472AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
473 return moduleToObjectImpl(targetOptions, llvmModule);
477std::optional<mlir::gpu::SerializedObject>
478ROCDLTargetAttrImpl::serializeToObject(
479 Attribute attribute, Operation *module,
480 const gpu::TargetOptions &
options)
const {
481 assert(module &&
"The module must be non null.");
484 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
485 module->emitError("module must be a GPU module");
488#if LLVM_HAS_AMDGPU_TARGET
489 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
492 std::optional<SmallVector<char, 0>> binary = serializer.run();
495 return gpu::SerializedObject{std::move(*binary)};
497 module->emitError("the `AMDGPU` target was not built. Please enable it when "
504ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
505 const mlir::gpu::SerializedObject &
object,
506 const gpu::TargetOptions &
options)
const {
507 gpu::CompilationTarget format =
options.getCompilationTarget();
510 gpu::KernelTableAttr kernels;
511 if (format > gpu::CompilationTarget::Binary) {
512 format = gpu::CompilationTarget::Binary;
515 DictionaryAttr properties{};
517 StringAttr objectStr = builder.getStringAttr(
518 StringRef(
object.getObject().data(),
object.getObject().size()));
519 return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
520 properties, kernels);
#define __DEFAULT_ROCM_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class represents a diagnostic that is inflight and set to be reported.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module > > &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
FailureOr< llvm::TargetMachine * > getOrCreateTargetMachine()
Create the target machine based on the target triple and chip.
virtual void setDataLayoutAndTriple(llvm::Module &module)
Hook for computing the Datalayout.
virtual FailureOr< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
StringRef triple
Target triple.
Operation & getOperation()
Returns the operation being serialized.
static FailureOr< SmallString< 0 > > translateModuleToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine, function_ref< InFlightDiagnostic()> emitError)
Translate LLVM module to textual ISA.
StringRef chip
Target chip.
ModuleToObject(Operation &module, StringRef triple, StringRef chip, StringRef features={}, int optLevel=3, function_ref< void(llvm::Module &)> initialLlvmIRCallback={}, function_ref< void(llvm::Module &)> linkedLlvmIRCallback={}, function_ref< void(llvm::Module &)> optimizedLlvmIRCallback={}, function_ref< void(StringRef)> isaCallback={})
Operation & module
Module to transform to a binary object.
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Operation is the basic unit of execution within MLIR.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
Base class for all ROCDL serializations from GPU modules into binary strings.
ROCDLTargetAttr getTarget() const
Returns the target attribute.
ArrayRef< Attribute > getLibrariesToLink() const
Returns the LLVM bitcode libraries to be linked.
AMDGCNLibraries deviceLibs
AMD GCN libraries to use when linking, the default is using none.
ROCDLTargetAttr target
ROCDL target attribute.
SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getROCMPath.
FailureOr< SmallVector< char, 0 > > moduleToObjectImpl(const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule)
Default implementation of ModuleToObject::moduleToObject.
virtual FailureOr< SmallVector< char, 0 > > compileToBinary(StringRef serializedISA)
Compiles assembly to a binary.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer)
Adds oclc control variables to the LLVM Module if needed.
std::string toolkitPath
ROCM toolkit path.
SmallVector< Attribute > librariesToLink
List of LLVM bitcode files to link to.
static void init()
Initializes the LLVM AMDGPU target by safely calling LLVMInitializeAMDGPU* methods if available.
StringRef getToolkitPath() const
Returns the ROCM toolkit path.
LogicalResult appendStandardLibs(AMDGCNLibraries libs)
Appends standard ROCm device libraries to fileList.
LogicalResult handleBitcodeFile(llvm::Module &module) override
Removes unnecessary metadata from the loaded bitcode files.
void handleModulePreLink(llvm::Module &module) override
Determines required Device Libraries and adds oclc control variables to the LLVM Module if needed.
This class represents a serialized object (GPU binary) with metadata (e.g.
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
CompilationTarget getCompilationTarget() const
Returns the compilation target.
FailureOr< SmallVector< char, 0 > > assembleIsa(StringRef isa, StringRef targetTriple, StringRef chip, StringRef features, function_ref< InFlightDiagnostic()> emitError)
Assembles ISA to an object code.
FailureOr< SmallVector< char, 0 > > linkObjectCode(ArrayRef< char > objectCode, StringRef lldPath, function_ref< InFlightDiagnostic()> emitError)
void registerROCDLTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #rocdl.target attribute in the given registry.
AMDGCNLibraries
Helper enum for specifying the AMD GCN device libraries required for compilation.
gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule, ArrayRef< char > elfData={})
Returns a #gpu.kernel_table containing kernel metadata for each of the kernels in gpuModule.
StringRef getROCMPath()
Searches & returns the path ROCM toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::function_ref< Fn > function_ref