22#include "llvm/IR/Constants.h"
23#include "llvm/MC/MCAsmBackend.h"
24#include "llvm/MC/MCAsmInfo.h"
25#include "llvm/MC/MCCodeEmitter.h"
26#include "llvm/MC/MCContext.h"
27#include "llvm/MC/MCInstrInfo.h"
28#include "llvm/MC/MCObjectFileInfo.h"
29#include "llvm/MC/MCObjectWriter.h"
30#include "llvm/MC/MCParser/MCTargetAsmParser.h"
31#include "llvm/MC/MCRegisterInfo.h"
32#include "llvm/MC/MCStreamer.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/FileUtilities.h"
37#include "llvm/Support/Path.h"
38#include "llvm/Support/Program.h"
39#include "llvm/Support/SourceMgr.h"
40#include "llvm/Support/TargetSelect.h"
41#include "llvm/TargetParser/TargetParser.h"
49#ifndef __DEFAULT_ROCM_PATH__
50#define __DEFAULT_ROCM_PATH__ ""
55class ROCDLTargetAttrImpl
56 :
public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
58 std::optional<SmallVector<char, 0>>
72 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
85 if (
const char *var = std::getenv(
"ROCM_PATH"))
87 if (
const char *var = std::getenv(
"ROCM_ROOT"))
89 if (
const char *var = std::getenv(
"ROCM_HOME"))
112 static llvm::once_flag initializeBackendOnce;
113 llvm::call_once(initializeBackendOnce, []() {
115#if MLIR_ENABLE_ROCM_CONVERSIONS
116 LLVMInitializeAMDGPUTarget();
117 LLVMInitializeAMDGPUTargetInfo();
118 LLVMInitializeAMDGPUTargetMC();
119 LLVMInitializeAMDGPUAsmParser();
120 LLVMInitializeAMDGPUAsmPrinter();
140 path.insert(path.begin(), pathRef.begin(), pathRef.end());
141 llvm::sys::path::append(path,
"amdgcn",
"bitcode");
142 pathRef = StringRef(path.data(), path.size());
145 if (!llvm::sys::fs::is_directory(pathRef)) {
147 <<
" does not exist or is not a directory";
152 auto addLib = [&](
const Twine &lib) ->
bool {
153 auto baseSize = path.size();
154 llvm::sys::path::append(path, lib);
155 StringRef pathRef(path.data(), path.size());
156 if (!llvm::sys::fs::is_regular_file(pathRef)) {
158 <<
" does not exist or is not a file";
162 path.truncate(baseSize);
176std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
187 return std::move(bcFiles);
192 if (
auto *openclVersion =
module.getNamedMetadata(
"opencl.ocl.version"))
193 module.eraseNamedMetadata(openclVersion);
195 if (
auto *ident =
module.getNamedMetadata(
"llvm.ident"))
196 module.eraseNamedMetadata(ident);
207 for (llvm::Function &f :
module.functions()) {
208 if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
209 StringRef funcName = f.getName();
210 if (
"printf" == funcName)
213 if (funcName.starts_with(
"__ockl_"))
215 if (funcName.starts_with(
"__ocml_"))
217 if (funcName ==
"__atomic_work_item_fence")
230 bool finiteOnly,
bool unsafeMath,
bool fastMath,
bool correctSqrt,
233 auto addControlVariable = [&
module](StringRef name, uint32_t value,
235 if (
module.getNamedGlobal(name))
237 llvm::IntegerType *type =
238 llvm::IntegerType::getIntNTy(
module.getContext(), bitwidth);
239 llvm::GlobalVariable *controlVariable =
new llvm::GlobalVariable(
241 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
242 llvm::ConstantInt::get(type, value), name,
nullptr,
243 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
245 controlVariable->setVisibility(
246 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
247 controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
248 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
253 abiVer.getAsInteger(0, abi);
254 module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi);
260 addControlVariable(
"__oclc_finite_only_opt", finiteOnly || fastMath, 8);
261 addControlVariable(
"__oclc_daz_opt", daz || fastMath, 8);
262 addControlVariable(
"__oclc_correctly_rounded_sqrt32",
263 correctSqrt && !fastMath, 8);
264 addControlVariable(
"__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
268 addControlVariable(
"__oclc_wavefrontsize64", wave64, 8);
270 llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(
chip);
272 addControlVariable(
"__oclc_ISA_version",
273 isaVersion.Minor + 100 * isaVersion.Stepping +
274 1000 * isaVersion.Major,
276 addControlVariable(
"__oclc_ABI_version", abi, 32);
280FailureOr<SmallVector<char, 0>>
285 llvm::raw_svector_ostream os(
result);
287 llvm::Triple triple(llvm::Triple::normalize(targetTriple));
289 const llvm::Target *
target =
290 llvm::TargetRegistry::lookupTarget(triple, error);
292 return emitError() <<
"failed to lookup target: " << error;
294 llvm::SourceMgr srcMgr;
296 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBufferCopy(isa), SMLoc());
298 const llvm::MCTargetOptions mcOptions;
299 std::unique_ptr<llvm::MCRegisterInfo> mri(
target->createMCRegInfo(triple));
300 std::unique_ptr<llvm::MCAsmInfo> mai(
301 target->createMCAsmInfo(*mri, triple, mcOptions));
302 std::unique_ptr<llvm::MCSubtargetInfo> sti(
303 target->createMCSubtargetInfo(triple, chip, features));
305 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
307 std::unique_ptr<llvm::MCObjectFileInfo> mofi(
target->createMCObjectFileInfo(
309 ctx.setObjectFileInfo(mofi.get());
312 if (!llvm::sys::fs::current_path(cwd))
313 ctx.setCompilationDir(cwd);
315 std::unique_ptr<llvm::MCStreamer> mcStreamer;
316 std::unique_ptr<llvm::MCInstrInfo> mcii(
target->createMCInstrInfo());
318 llvm::MCCodeEmitter *ce =
target->createMCCodeEmitter(*mcii, ctx);
319 llvm::MCAsmBackend *mab =
target->createMCAsmBackend(*sti, *mri, mcOptions);
320 mcStreamer.reset(
target->createMCObjectStreamer(
321 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
322 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
325 std::unique_ptr<llvm::MCAsmParser> parser(
326 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
327 std::unique_ptr<llvm::MCTargetAsmParser> tap(
328 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
331 return emitError() <<
"assembler initialization error";
333 parser->setTargetParser(*tap);
338FailureOr<SmallVector<char, 0>>
342 int tempIsaBinaryFd = -1;
344 if (llvm::sys::fs::createTemporaryFile(
"kernel%%",
"o", tempIsaBinaryFd,
345 tempIsaBinaryFilename))
347 <<
"failed to create a temporary file for dumping the ISA binary";
349 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
351 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd,
true);
352 tempIsaBinaryOs << StringRef(objectCode.data(), objectCode.size());
353 tempIsaBinaryOs.flush();
358 if (llvm::sys::fs::createTemporaryFile(
"kernel",
"hsaco", tempHsacoFilename))
360 <<
"failed to create a temporary file for the HSA code object";
362 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
365 llvm::sys::path::append(lldPath,
"llvm",
"bin",
"ld.lld");
366 int lldResult = llvm::sys::ExecuteAndWait(
368 {
"ld.lld",
"-shared", tempIsaBinaryFilename,
"-o", tempHsacoFilename});
370 return emitError() <<
"lld invocation failed";
374 llvm::MemoryBuffer::getFile(tempHsacoFilename,
false);
377 <<
"failed to read the HSA code object from the temp file";
379 StringRef buffer = (*hsacoFile)->getBuffer();
384FailureOr<SmallVector<char, 0>>
389 serializedISA, this->
triple, this->chip, this->features, errCallback);
391 if (failed(isaBinary))
395 FailureOr<SmallVector<char, 0>> linkedCode =
397 if (failed(linkedCode))
406#define DEBUG_TYPE "serialize-to-llvm"
408 llvm::dbgs() <<
"LLVM IR for module: "
409 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
410 << llvmModule <<
"\n";
417 if (failed(targetMachine))
419 <<
"target Machine unavailable for triple " <<
triple
420 <<
", can't compile with LLVM";
423 FailureOr<SmallString<0>> serializedISA =
426 if (failed(serializedISA))
429#define DEBUG_TYPE "serialize-to-isa"
431 llvm::dbgs() <<
"ISA for module: "
432 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
433 << *serializedISA <<
"\n";
443 <<
"invalid ROCm path, please set a valid path";
449#if MLIR_ENABLE_ROCM_CONVERSIONS
456 FailureOr<SmallVector<char, 0>>
457 moduleToObject(llvm::Module &llvmModule)
override;
465AMDGPUSerializer::AMDGPUSerializer(
Operation &module, ROCDLTargetAttr
target,
468 targetOptions(targetOptions) {}
470FailureOr<SmallVector<char, 0>>
471AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
472 return moduleToObjectImpl(targetOptions, llvmModule);
476std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
477 Attribute attribute, Operation *module,
478 const gpu::TargetOptions &
options)
const {
479 assert(module &&
"The module must be non null.");
482 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
483 module->emitError("module must be a GPU module");
486#if MLIR_ENABLE_ROCM_CONVERSIONS
487 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
490 return serializer.run();
492 module->emitError("the `AMDGPU` target was not built. Please enable it when "
499ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
500 const SmallVector<char, 0> &
object,
501 const gpu::TargetOptions &
options)
const {
502 gpu::CompilationTarget format =
options.getCompilationTarget();
505 gpu::KernelTableAttr kernels;
506 if (format > gpu::CompilationTarget::Binary) {
507 format = gpu::CompilationTarget::Binary;
510 DictionaryAttr properties{};
512 StringAttr objectStr =
513 builder.getStringAttr(StringRef(
object.data(),
object.size()));
514 return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
515 properties, kernels);
#define __DEFAULT_ROCM_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class represents a diagnostic that is inflight and set to be reported.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module > > &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
FailureOr< llvm::TargetMachine * > getOrCreateTargetMachine()
Create the target machine based on the target triple and chip.
virtual void setDataLayoutAndTriple(llvm::Module &module)
Hook for computing the Datalayout.
virtual FailureOr< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
StringRef triple
Target triple.
Operation & getOperation()
Returns the operation being serialized.
static FailureOr< SmallString< 0 > > translateModuleToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine, function_ref< InFlightDiagnostic()> emitError)
Translate LLVM module to textual ISA.
StringRef chip
Target chip.
ModuleToObject(Operation &module, StringRef triple, StringRef chip, StringRef features={}, int optLevel=3, function_ref< void(llvm::Module &)> initialLlvmIRCallback={}, function_ref< void(llvm::Module &)> linkedLlvmIRCallback={}, function_ref< void(llvm::Module &)> optimizedLlvmIRCallback={}, function_ref< void(StringRef)> isaCallback={})
Operation & module
Module to transform to a binary object.
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Operation is the basic unit of execution within MLIR.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
Base class for all ROCDL serializations from GPU modules into binary strings.
ROCDLTargetAttr getTarget() const
Returns the target attribute.
ArrayRef< Attribute > getLibrariesToLink() const
Returns the LLVM bitcode libraries to be linked.
AMDGCNLibraries deviceLibs
AMD GCN libraries to use when linking, the default is using none.
ROCDLTargetAttr target
ROCDL target attribute.
SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getROCMPath.
FailureOr< SmallVector< char, 0 > > moduleToObjectImpl(const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule)
Default implementation of ModuleToObject::moduleToObject.
virtual FailureOr< SmallVector< char, 0 > > compileToBinary(StringRef serializedISA)
Compiles assembly to a binary.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer)
Adds oclc control variables to the LLVM Module if needed.
std::string toolkitPath
ROCM toolkit path.
SmallVector< Attribute > librariesToLink
List of LLVM bitcode files to link to.
static void init()
Initializes the LLVM AMDGPU target by safely calling LLVMInitializeAMDGPU* methods if available.
StringRef getToolkitPath() const
Returns the ROCM toolkit path.
LogicalResult appendStandardLibs(AMDGCNLibraries libs)
Appends standard ROCm device libraries to fileList.
LogicalResult handleBitcodeFile(llvm::Module &module) override
Removes unnecessary metadata from the loaded bitcode files.
void handleModulePreLink(llvm::Module &module) override
Determines required Device Libraries and adds oclc control variables to the LLVM Module if needed.
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
CompilationTarget getCompilationTarget() const
Returns the compilation target.
FailureOr< SmallVector< char, 0 > > assembleIsa(StringRef isa, StringRef targetTriple, StringRef chip, StringRef features, function_ref< InFlightDiagnostic()> emitError)
Assembles ISA to an object code.
void registerROCDLTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #rocdl.target attribute in the given registry.
AMDGCNLibraries
Helper enum for specifying the AMD GCN device libraries required for compilation.
gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule, ArrayRef< char > elfData={})
Returns a #gpu.kernel_table containing kernel metadata for each of the kernels in gpuModule.
StringRef getROCMPath()
Searches & returns the path ROCM toolkit path, the search order is:
FailureOr< SmallVector< char, 0 > > linkObjectCode(ArrayRef< char > objectCode, StringRef toolkitPath, function_ref< InFlightDiagnostic()> emitError)
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::function_ref< Fn > function_ref