22 #include "llvm/IR/Constants.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCCodeEmitter.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/FileUtilities.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Program.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetSelect.h"
41 #include "llvm/TargetParser/TargetParser.h"
49 #ifndef __DEFAULT_ROCM_PATH__
50 #define __DEFAULT_ROCM_PATH__ ""
55 class ROCDLTargetAttrImpl
56 :
public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
58 std::optional<SmallVector<char, 0>>
72 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
85 if (
const char *var = std::getenv(
"ROCM_PATH"))
87 if (
const char *var = std::getenv(
"ROCM_ROOT"))
89 if (
const char *var = std::getenv(
"ROCM_HOME"))
95 Operation &module, ROCDLTargetAttr target,
97 : ModuleToObject(module, target.getTriple(), target.getChip(),
98 target.getFeatures(), target.getO()),
99 target(target), toolkitPath(targetOptions.getToolkitPath()),
100 fileList(targetOptions.getLinkFiles()) {
107 if (ArrayAttr files =
target.getLink())
109 if (
auto file = dyn_cast<StringAttr>(attr))
114 static llvm::once_flag initializeBackendOnce;
115 llvm::call_once(initializeBackendOnce, []() {
117 #if MLIR_ENABLE_ROCM_CONVERSIONS
118 LLVMInitializeAMDGPUTarget();
119 LLVMInitializeAMDGPUTargetInfo();
120 LLVMInitializeAMDGPUTargetMC();
121 LLVMInitializeAMDGPUAsmParser();
122 LLVMInitializeAMDGPUAsmPrinter();
142 path.insert(path.begin(), pathRef.begin(), pathRef.end());
143 llvm::sys::path::append(path,
"amdgcn",
"bitcode");
144 pathRef = StringRef(path.data(), path.size());
147 if (!llvm::sys::fs::is_directory(pathRef)) {
149 <<
" does not exist or is not a directory";
154 auto addLib = [&](
const Twine &lib) ->
bool {
155 auto baseSize = path.size();
156 llvm::sys::path::append(path, lib);
157 StringRef pathRef(path.data(), path.size());
158 if (!llvm::sys::fs::is_regular_file(pathRef)) {
160 <<
" does not exist or is not a file";
164 path.truncate(baseSize);
178 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
189 return std::move(bcFiles);
194 if (
auto *openclVersion =
module.getNamedMetadata(
"opencl.ocl.version"))
195 module.eraseNamedMetadata(openclVersion);
197 if (
auto *ident =
module.getNamedMetadata(
"llvm.ident"))
198 module.eraseNamedMetadata(ident);
209 for (llvm::Function &f :
module.functions()) {
210 if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
211 StringRef funcName = f.getName();
212 if (
"printf" == funcName)
215 if (funcName.starts_with(
"__ockl_"))
217 if (funcName.starts_with(
"__ocml_"))
219 if (funcName ==
"__atomic_work_item_fence")
232 bool finiteOnly,
bool unsafeMath,
bool fastMath,
bool correctSqrt,
235 auto addControlVariable = [&
module](StringRef name, uint32_t value,
237 if (
module.getNamedGlobal(name))
239 llvm::IntegerType *type =
241 llvm::GlobalVariable *controlVariable =
new llvm::GlobalVariable(
243 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
245 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
247 controlVariable->setVisibility(
248 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
249 controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
250 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
254 abiVer.getAsInteger(0, abi);
261 addControlVariable(
"__oclc_finite_only_opt", finiteOnly || fastMath, 8);
262 addControlVariable(
"__oclc_daz_opt", daz || fastMath, 8);
263 addControlVariable(
"__oclc_correctly_rounded_sqrt32",
264 correctSqrt && !fastMath, 8);
265 addControlVariable(
"__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
269 addControlVariable(
"__oclc_wavefrontsize64", wave64, 8);
271 llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(
chip);
273 addControlVariable(
"__oclc_ISA_version",
274 isaVersion.Minor + 100 * isaVersion.Stepping +
275 1000 * isaVersion.Major,
277 addControlVariable(
"__oclc_ABI_version", abi, 32);
281 std::optional<SmallVector<char, 0>>
285 StringRef targetTriple = this->
triple;
288 llvm::raw_svector_ostream os(result);
290 llvm::Triple
triple(llvm::Triple::normalize(targetTriple));
292 const llvm::Target *
target =
293 llvm::TargetRegistry::lookupTarget(
triple.normalize(), error);
295 emitError(loc, Twine(
"failed to lookup target: ") + error);
299 llvm::SourceMgr srcMgr;
300 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc());
302 const llvm::MCTargetOptions mcOptions;
303 std::unique_ptr<llvm::MCRegisterInfo> mri(
304 target->createMCRegInfo(targetTriple));
305 std::unique_ptr<llvm::MCAsmInfo> mai(
306 target->createMCAsmInfo(*mri, targetTriple, mcOptions));
307 std::unique_ptr<llvm::MCSubtargetInfo> sti(
310 llvm::MCContext ctx(
triple, mai.get(), mri.get(), sti.get(), &srcMgr,
312 std::unique_ptr<llvm::MCObjectFileInfo> mofi(
target->createMCObjectFileInfo(
314 ctx.setObjectFileInfo(mofi.get());
317 if (!llvm::sys::fs::current_path(cwd))
318 ctx.setCompilationDir(cwd);
320 std::unique_ptr<llvm::MCStreamer> mcStreamer;
321 std::unique_ptr<llvm::MCInstrInfo> mcii(
target->createMCInstrInfo());
323 llvm::MCCodeEmitter *ce =
target->createMCCodeEmitter(*mcii, ctx);
324 llvm::MCAsmBackend *mab =
target->createMCAsmBackend(*sti, *mri, mcOptions);
325 mcStreamer.reset(
target->createMCObjectStreamer(
326 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
327 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
330 std::unique_ptr<llvm::MCAsmParser> parser(
331 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
332 std::unique_ptr<llvm::MCTargetAsmParser> tap(
333 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
336 emitError(loc,
"assembler initialization error");
340 parser->setTargetParser(*tap);
342 return std::move(result);
345 std::optional<SmallVector<char, 0>>
348 std::optional<SmallVector<char, 0>> isaBinary =
assembleIsa(serializedISA);
356 int tempIsaBinaryFd = -1;
358 if (llvm::sys::fs::createTemporaryFile(
"kernel%%",
"o", tempIsaBinaryFd,
359 tempIsaBinaryFilename)) {
361 <<
"failed to create a temporary file for dumping the ISA binary";
364 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
366 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd,
true);
367 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
368 tempIsaBinaryOs.flush();
373 if (llvm::sys::fs::createTemporaryFile(
"kernel",
"hsaco",
374 tempHsacoFilename)) {
376 <<
"failed to create a temporary file for the HSA code object";
379 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
382 llvm::sys::path::append(lldPath,
"llvm",
"bin",
"ld.lld");
383 int lldResult = llvm::sys::ExecuteAndWait(
385 {
"ld.lld",
"-shared", tempIsaBinaryFilename,
"-o", tempHsacoFilename});
386 if (lldResult != 0) {
393 llvm::MemoryBuffer::getFile(tempHsacoFilename,
false);
396 <<
"failed to read the HSA code object from the temp file";
400 StringRef buffer = (*hsacoFile)->getBuffer();
408 #define DEBUG_TYPE "serialize-to-llvm"
410 llvm::dbgs() <<
"LLVM IR for module: "
411 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
412 << llvmModule <<
"\n";
418 std::optional<llvm::TargetMachine *> targetMachine =
420 if (!targetMachine) {
422 <<
triple <<
", can't compile with LLVM";
427 std::optional<std::string> serializedISA =
429 if (!serializedISA) {
433 #define DEBUG_TYPE "serialize-to-isa"
435 llvm::dbgs() <<
"ISA for module: "
436 << cast<gpu::GPUModuleOp>(
getOperation()).getNameAttr() <<
"\n"
437 << *serializedISA <<
"\n";
454 #if MLIR_ENABLE_ROCM_CONVERSIONS
458 AMDGPUSerializer(
Operation &module, ROCDLTargetAttr target,
461 std::optional<SmallVector<char, 0>>
462 moduleToObject(llvm::Module &llvmModule)
override;
470 AMDGPUSerializer::AMDGPUSerializer(
Operation &module, ROCDLTargetAttr target,
473 targetOptions(targetOptions) {}
475 std::optional<SmallVector<char, 0>>
476 AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
477 return moduleToObjectImpl(targetOptions, llvmModule);
481 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
484 assert(module &&
"The module must be non null.");
487 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
488 module->
emitError(
"module must be a GPU module");
491 #if MLIR_ENABLE_ROCM_CONVERSIONS
492 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
495 return serializer.run();
497 module->
emitError(
"the `AMDGPU` target was not built. Please enable it when "
507 gpu::CompilationTarget format =
options.getCompilationTarget();
510 gpu::KernelTableAttr kernels;
511 if (format > gpu::CompilationTarget::Binary) {
512 format = gpu::CompilationTarget::Binary;
515 DictionaryAttr properties{};
517 StringAttr objectStr =
518 builder.getStringAttr(StringRef(
object.data(),
object.size()));
519 return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
520 properties, kernels);
#define __DEFAULT_ROCM_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
StringRef features
Target features.
static std::optional< std::string > translateToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine)
Utility function for translating to ISA, returns std::nullopt on failure.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
virtual void setDataLayoutAndTriple(llvm::Module &module)
Hook for computing the Datalayout.
StringRef triple
Target triple.
std::optional< llvm::TargetMachine * > getOrCreateTargetMachine()
Create the target machine based on the target triple and chip.
Operation & getOperation()
Returns the operation being serialized.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< std::string > fileList, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
StringRef chip
Target chip.
Operation & module
Module to transform to a binary object.
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
Base class for all ROCDL serializations from GPU modules into binary strings.
ROCDLTargetAttr getTarget() const
Returns the target attribute.
ArrayRef< std::string > getFileList() const
Returns the bitcode files to be loaded.
AMDGCNLibraries deviceLibs
AMD GCN libraries to use when linking, the default is using none.
ROCDLTargetAttr target
ROCDL target attribute.
SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getROCMPath.
std::optional< SmallVector< char, 0 > > moduleToObjectImpl(const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule)
Default implementation of ModuleToObject::moduleToObject.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer)
Adds oclc control variables to the LLVM Module if needed.
std::optional< SmallVector< char, 0 > > assembleIsa(StringRef isa)
Returns the assembled ISA.
SmallVector< std::string > fileList
List of LLVM bitcode files to link to.
std::string toolkitPath
ROCM toolkit path.
static void init()
Initializes the LLVM AMDGPU target by safely calling LLVMInitializeAMDGPU* methods if available.
StringRef getToolkitPath() const
Returns the ROCM toolkit path.
LogicalResult appendStandardLibs(AMDGCNLibraries libs)
Appends standard ROCm device libraries to fileList.
LogicalResult handleBitcodeFile(llvm::Module &module) override
Removes unnecessary metadata from the loaded bitcode files.
virtual std::optional< SmallVector< char, 0 > > compileToBinary(const std::string &serializedISA)
Compiles assembly to a binary.
void handleModulePreLink(llvm::Module &module) override
Determines required Device Libraries and adds oclc control variables to the LLVM Module if needed.
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
CompilationTarget getCompilationTarget() const
Returns the compilation target.
void registerROCDLTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #rocdl.target attribute in the given registry.
AMDGCNLibraries
Helper enum for specifying the AMD GCN device libraries required for compilation.
gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule, ArrayRef< char > elfData={})
Returns a #gpu.kernel_table containing kernel metadata for each of the kernels in gpuModule.
StringRef getROCMPath()
Searches & returns the path ROCM toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...