22 #include "llvm/IR/Constants.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCCodeEmitter.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/FileUtilities.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Program.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetSelect.h"
41 #include "llvm/TargetParser/TargetParser.h"
49 #ifndef __DEFAULT_ROCM_PATH__
50 #define __DEFAULT_ROCM_PATH__ ""
55 class ROCDLTargetAttrImpl
56 :
public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
58 std::optional<SmallVector<char, 0>>
72 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
85 if (
const char *var = std::getenv(
"ROCM_PATH"))
87 if (
const char *var = std::getenv(
"ROCM_ROOT"))
89 if (
const char *var = std::getenv(
"ROCM_HOME"))
95 Operation &module, ROCDLTargetAttr target,
97 : ModuleToObject(module, target.getTriple(), target.getChip(),
98 target.getFeatures(), target.getO()),
99 target(target), toolkitPath(targetOptions.getToolkitPath()),
100 librariesToLink(targetOptions.getLibrariesToLink()) {
112 static llvm::once_flag initializeBackendOnce;
113 llvm::call_once(initializeBackendOnce, []() {
115 #if MLIR_ENABLE_ROCM_CONVERSIONS
116 LLVMInitializeAMDGPUTarget();
117 LLVMInitializeAMDGPUTargetInfo();
118 LLVMInitializeAMDGPUTargetMC();
119 LLVMInitializeAMDGPUAsmParser();
120 LLVMInitializeAMDGPUAsmPrinter();
140 path.insert(path.begin(), pathRef.begin(), pathRef.end());
141 llvm::sys::path::append(path,
"amdgcn",
"bitcode");
142 pathRef = StringRef(path.data(), path.size());
145 if (!llvm::sys::fs::is_directory(pathRef)) {
147 <<
" does not exist or is not a directory";
152 auto addLib = [&](
const Twine &lib) ->
bool {
153 auto baseSize = path.size();
154 llvm::sys::path::append(path, lib);
155 StringRef pathRef(path.data(), path.size());
156 if (!llvm::sys::fs::is_regular_file(pathRef)) {
158 <<
" does not exist or is not a file";
162 path.truncate(baseSize);
176 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
187 return std::move(bcFiles);
192 if (
auto *openclVersion =
module.getNamedMetadata(
"opencl.ocl.version"))
193 module.eraseNamedMetadata(openclVersion);
195 if (
auto *ident =
module.getNamedMetadata(
"llvm.ident"))
196 module.eraseNamedMetadata(ident);
207 for (llvm::Function &f :
module.functions()) {
208 if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
209 StringRef funcName = f.getName();
210 if (
"printf" == funcName)
213 if (funcName.starts_with(
"__ockl_"))
215 if (funcName.starts_with(
"__ocml_"))
217 if (funcName ==
"__atomic_work_item_fence")
230 bool finiteOnly,
bool unsafeMath,
bool fastMath,
bool correctSqrt,
233 auto addControlVariable = [&
module](StringRef name, uint32_t value,
235 if (
module.getNamedGlobal(name))
237 llvm::IntegerType *type =
239 llvm::GlobalVariable *controlVariable =
new llvm::GlobalVariable(
241 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
243 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
245 controlVariable->setVisibility(
246 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
247 controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
248 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
252 abiVer.getAsInteger(0, abi);
259 addControlVariable(
"__oclc_finite_only_opt", finiteOnly || fastMath, 8);
260 addControlVariable(
"__oclc_daz_opt", daz || fastMath, 8);
261 addControlVariable(
"__oclc_correctly_rounded_sqrt32",
262 correctSqrt && !fastMath, 8);
263 addControlVariable(
"__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
267 addControlVariable(
"__oclc_wavefrontsize64", wave64, 8);
269 llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(
chip);
271 addControlVariable(
"__oclc_ISA_version",
272 isaVersion.Minor + 100 * isaVersion.Stepping +
273 1000 * isaVersion.Major,
275 addControlVariable(
"__oclc_ABI_version", abi, 32);
279 std::optional<SmallVector<char, 0>>
283 StringRef targetTriple = this->
triple;
286 llvm::raw_svector_ostream os(result);
288 llvm::Triple
triple(llvm::Triple::normalize(targetTriple));
290 const llvm::Target *
target =
291 llvm::TargetRegistry::lookupTarget(
triple.normalize(), error);
293 emitError(loc, Twine(
"failed to lookup target: ") + error);
297 llvm::SourceMgr srcMgr;
298 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc());
300 const llvm::MCTargetOptions mcOptions;
301 std::unique_ptr<llvm::MCRegisterInfo> mri(
302 target->createMCRegInfo(targetTriple));
303 std::unique_ptr<llvm::MCAsmInfo> mai(
304 target->createMCAsmInfo(*mri, targetTriple, mcOptions));
305 std::unique_ptr<llvm::MCSubtargetInfo> sti(
308 llvm::MCContext ctx(
triple, mai.get(), mri.get(), sti.get(), &srcMgr,
310 std::unique_ptr<llvm::MCObjectFileInfo> mofi(
target->createMCObjectFileInfo(
312 ctx.setObjectFileInfo(mofi.get());
315 if (!llvm::sys::fs::current_path(cwd))
316 ctx.setCompilationDir(cwd);
318 std::unique_ptr<llvm::MCStreamer> mcStreamer;
319 std::unique_ptr<llvm::MCInstrInfo> mcii(
target->createMCInstrInfo());
321 llvm::MCCodeEmitter *ce =
target->createMCCodeEmitter(*mcii, ctx);
322 llvm::MCAsmBackend *mab =
target->createMCAsmBackend(*sti, *mri, mcOptions);
323 mcStreamer.reset(
target->createMCObjectStreamer(
324 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
325 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
328 std::unique_ptr<llvm::MCAsmParser> parser(
329 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
330 std::unique_ptr<llvm::MCTargetAsmParser> tap(
331 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
334 emitError(loc,
"assembler initialization error");
338 parser->setTargetParser(*tap);
340 return std::move(result);
343 std::optional<SmallVector<char, 0>>
346 std::optional<SmallVector<char, 0>> isaBinary =
assembleIsa(serializedISA);
354 int tempIsaBinaryFd = -1;
356 if (llvm::sys::fs::createTemporaryFile(
"kernel%%",
"o", tempIsaBinaryFd,
357 tempIsaBinaryFilename)) {
359 <<
"failed to create a temporary file for dumping the ISA binary";
362 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
364 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd,
true);
365 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
366 tempIsaBinaryOs.flush();
371 if (llvm::sys::fs::createTemporaryFile(
"kernel",
"hsaco",
372 tempHsacoFilename)) {
374 <<
"failed to create a temporary file for the HSA code object";
377 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
380 llvm::sys::path::append(lldPath,
"llvm",
"bin",
"ld.lld");
381 int lldResult = llvm::sys::ExecuteAndWait(
383 {
"ld.lld",
"-shared", tempIsaBinaryFilename,
"-o", tempHsacoFilename});
384 if (lldResult != 0) {
391 llvm::MemoryBuffer::getFile(tempHsacoFilename,
false);
394 <<
"failed to read the HSA code object from the temp file";
398 StringRef buffer = (*hsacoFile)->getBuffer();
406 #define DEBUG_TYPE "serialize-to-llvm"
408 llvm::dbgs() <<
"LLVM IR for module: "
409 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
410 << llvmModule <<
"\n";
416 std::optional<llvm::TargetMachine *> targetMachine =
418 if (!targetMachine) {
420 <<
triple <<
", can't compile with LLVM";
425 std::optional<std::string> serializedISA =
427 if (!serializedISA) {
431 #define DEBUG_TYPE "serialize-to-isa"
433 llvm::dbgs() <<
"ISA for module: "
434 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
435 << *serializedISA <<
"\n";
452 #if MLIR_ENABLE_ROCM_CONVERSIONS
456 AMDGPUSerializer(
Operation &module, ROCDLTargetAttr target,
459 std::optional<SmallVector<char, 0>>
460 moduleToObject(llvm::Module &llvmModule)
override;
468 AMDGPUSerializer::AMDGPUSerializer(
Operation &module, ROCDLTargetAttr target,
471 targetOptions(targetOptions) {}
473 std::optional<SmallVector<char, 0>>
474 AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
475 return moduleToObjectImpl(targetOptions, llvmModule);
479 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
482 assert(module &&
"The module must be non null.");
485 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
486 module->
emitError(
"module must be a GPU module");
489 #if MLIR_ENABLE_ROCM_CONVERSIONS
490 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
493 return serializer.run();
495 module->
emitError(
"the `AMDGPU` target was not built. Please enable it when "
505 gpu::CompilationTarget format =
options.getCompilationTarget();
508 gpu::KernelTableAttr kernels;
509 if (format > gpu::CompilationTarget::Binary) {
510 format = gpu::CompilationTarget::Binary;
513 DictionaryAttr properties{};
515 StringAttr objectStr =
516 builder.getStringAttr(StringRef(
object.data(),
object.size()));
517 return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
518 properties, kernels);
#define __DEFAULT_ROCM_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
StringRef features
Target features.
static std::optional< std::string > translateToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine)
Utility function for translating to ISA, returns std::nullopt on failure.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
virtual void setDataLayoutAndTriple(llvm::Module &module)
Hook for computing the Datalayout.
StringRef triple
Target triple.
std::optional< llvm::TargetMachine * > getOrCreateTargetMachine()
Create the target machine based on the target triple and chip.
Operation & getOperation()
Returns the operation being serialized.
StringRef chip
Target chip.
Operation & module
Module to transform to a binary object.
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
Base class for all ROCDL serializations from GPU modules into binary strings.
ROCDLTargetAttr getTarget() const
Returns the target attribute.
ArrayRef< Attribute > getLibrariesToLink() const
Returns the LLVM bitcode libraries to be linked.
AMDGCNLibraries deviceLibs
AMD GCN libraries to use when linking, the default is using none.
ROCDLTargetAttr target
ROCDL target attribute.
SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getROCMPath.
std::optional< SmallVector< char, 0 > > moduleToObjectImpl(const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule)
Default implementation of ModuleToObject::moduleToObject.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer)
Adds oclc control variables to the LLVM Module if needed.
std::optional< SmallVector< char, 0 > > assembleIsa(StringRef isa)
Returns the assembled ISA.
std::string toolkitPath
ROCM toolkit path.
SmallVector< Attribute > librariesToLink
List of LLVM bitcode files to link to.
static void init()
Initializes the LLVM AMDGPU target by safely calling LLVMInitializeAMDGPU* methods if available.
StringRef getToolkitPath() const
Returns the ROCM toolkit path.
LogicalResult appendStandardLibs(AMDGCNLibraries libs)
Appends standard ROCm device libraries to fileList.
LogicalResult handleBitcodeFile(llvm::Module &module) override
Removes unnecessary metadata from the loaded bitcode files.
virtual std::optional< SmallVector< char, 0 > > compileToBinary(const std::string &serializedISA)
Compiles assembly to a binary.
void handleModulePreLink(llvm::Module &module) override
Determines required Device Libraries and adds oclc control variables to the LLVM Module if needed.
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
CompilationTarget getCompilationTarget() const
Returns the compilation target.
void registerROCDLTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #rocdl.target attribute in the given registry.
AMDGCNLibraries
Helper enum for specifying the AMD GCN device libraries required for compilation.
gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule, ArrayRef< char > elfData={})
Returns a #gpu.kernel_table containing kernel metadata for each of the kernels in gpuModule.
StringRef getROCMPath()
Searches & returns the path ROCM toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...