25 #include "llvm/IR/Constants.h"
26 #include "llvm/MC/MCAsmBackend.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeEmitter.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCInstrInfo.h"
31 #include "llvm/MC/MCObjectFileInfo.h"
32 #include "llvm/MC/MCObjectWriter.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCRegisterInfo.h"
35 #include "llvm/MC/MCStreamer.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/Support/FileUtilities.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Program.h"
42 #include "llvm/Support/SourceMgr.h"
43 #include "llvm/Support/TargetSelect.h"
44 #include "llvm/TargetParser/TargetParser.h"
52 #ifndef __DEFAULT_ROCM_PATH__
53 #define __DEFAULT_ROCM_PATH__ ""
58 class ROCDLTargetAttrImpl
59 :
public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
61 std::optional<SmallVector<char, 0>>
75 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
88 if (
const char *var = std::getenv(
"ROCM_PATH"))
90 if (
const char *var = std::getenv(
"ROCM_ROOT"))
92 if (
const char *var = std::getenv(
"ROCM_HOME"))
98 Operation &module, ROCDLTargetAttr target,
100 : ModuleToObject(module, target.getTriple(), target.getChip(),
101 target.getFeatures(), target.getO()),
102 target(target), toolkitPath(targetOptions.getToolkitPath()),
103 fileList(targetOptions.getLinkFiles()) {
110 if (ArrayAttr files =
target.getLink())
112 if (
auto file = dyn_cast<StringAttr>(attr))
120 static llvm::once_flag initializeBackendOnce;
121 llvm::call_once(initializeBackendOnce, []() {
123 #if MLIR_ENABLE_ROCM_CONVERSIONS
124 LLVMInitializeAMDGPUTarget();
125 LLVMInitializeAMDGPUTargetInfo();
126 LLVMInitializeAMDGPUTargetMC();
127 LLVMInitializeAMDGPUAsmParser();
128 LLVMInitializeAMDGPUAsmPrinter();
143 if (!pathRef.empty()) {
145 path.insert(path.begin(), pathRef.begin(), pathRef.end());
146 llvm::sys::path::append(path,
"amdgcn",
"bitcode");
147 pathRef = StringRef(path.data(), path.size());
148 if (!llvm::sys::fs::is_directory(pathRef)) {
150 <<
" does not exist or is not a directory.";
153 StringRef isaVersion =
154 llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(
chip));
155 isaVersion.consume_front(
"gfx");
161 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
167 return std::move(bcFiles);
172 if (
auto *openclVersion =
module.getNamedMetadata(
"opencl.ocl.version"))
173 module.eraseNamedMetadata(openclVersion);
175 if (
auto *ident =
module.getNamedMetadata(
"llvm.ident"))
176 module.eraseNamedMetadata(ident);
181 [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
183 assert(targetMachine &&
"expect a TargetMachine");
193 StringRef isaVersion) {
194 auto addLib = [&](StringRef path) ->
bool {
195 if (!llvm::sys::fs::is_regular_file(path)) {
197 <<
" does not exist or is not a file.\n";
200 libs.push_back(path.str());
203 auto getLibPath = [&libPath](Twine lib) {
204 auto baseSize = libPath.size();
205 llvm::sys::path::append(libPath, lib +
".bc");
206 std::string path(StringRef(libPath.data(), libPath.size()).str());
207 libPath.truncate(baseSize);
212 if (addLib(getLibPath(
"ocml")) || addLib(getLibPath(
"ockl")) ||
213 addLib(getLibPath(
"hip")) || addLib(getLibPath(
"opencl")) ||
214 addLib(getLibPath(
"oclc_isa_version_" + isaVersion)))
220 llvm::Module &module,
bool wave64,
bool daz,
bool finiteOnly,
221 bool unsafeMath,
bool fastMath,
bool correctSqrt, StringRef abiVer) {
223 auto addControlVariable = [i8Ty, &
module](StringRef name,
bool enable) {
224 llvm::GlobalVariable *controlVariable =
new llvm::GlobalVariable(
225 module, i8Ty,
true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
227 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
228 controlVariable->setVisibility(
229 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
230 controlVariable->setAlignment(llvm::MaybeAlign(1));
231 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
233 addControlVariable(
"__oclc_finite_only_opt", finiteOnly || fastMath);
234 addControlVariable(
"__oclc_unsafe_math_opt", unsafeMath || fastMath);
235 addControlVariable(
"__oclc_daz_opt", daz || fastMath);
236 addControlVariable(
"__oclc_correctly_rounded_sqrt32",
237 correctSqrt && !fastMath);
238 addControlVariable(
"__oclc_wavefrontsize64", wave64);
242 abiVer.getAsInteger(0, abi);
243 llvm::GlobalVariable *abiVersion =
new llvm::GlobalVariable(
244 module, i32Ty,
true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
246 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
247 abiVersion->setVisibility(
248 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
249 abiVersion->setAlignment(llvm::MaybeAlign(4));
250 abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
253 std::optional<SmallVector<char, 0>>
257 StringRef targetTriple = this->
triple;
260 llvm::raw_svector_ostream os(result);
262 llvm::Triple
triple(llvm::Triple::normalize(targetTriple));
264 const llvm::Target *
target =
265 llvm::TargetRegistry::lookupTarget(
triple.normalize(), error);
267 emitError(loc, Twine(
"failed to lookup target: ") + error);
271 llvm::SourceMgr srcMgr;
272 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc());
274 const llvm::MCTargetOptions mcOptions;
275 std::unique_ptr<llvm::MCRegisterInfo> mri(
276 target->createMCRegInfo(targetTriple));
277 std::unique_ptr<llvm::MCAsmInfo> mai(
278 target->createMCAsmInfo(*mri, targetTriple, mcOptions));
279 std::unique_ptr<llvm::MCSubtargetInfo> sti(
282 llvm::MCContext ctx(
triple, mai.get(), mri.get(), sti.get(), &srcMgr,
284 std::unique_ptr<llvm::MCObjectFileInfo> mofi(
target->createMCObjectFileInfo(
286 ctx.setObjectFileInfo(mofi.get());
289 if (!llvm::sys::fs::current_path(cwd))
290 ctx.setCompilationDir(cwd);
292 std::unique_ptr<llvm::MCStreamer> mcStreamer;
293 std::unique_ptr<llvm::MCInstrInfo> mcii(
target->createMCInstrInfo());
295 llvm::MCCodeEmitter *ce =
target->createMCCodeEmitter(*mcii, ctx);
296 llvm::MCAsmBackend *mab =
target->createMCAsmBackend(*sti, *mri, mcOptions);
297 mcStreamer.reset(
target->createMCObjectStreamer(
298 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
299 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
300 *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
302 mcStreamer->setUseAssemblerInfoForParsing(
true);
304 std::unique_ptr<llvm::MCAsmParser> parser(
305 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
306 std::unique_ptr<llvm::MCTargetAsmParser> tap(
307 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
310 emitError(loc,
"assembler initialization error");
314 parser->setTargetParser(*tap);
320 #if MLIR_ENABLE_ROCM_CONVERSIONS
324 AMDGPUSerializer(
Operation &module, ROCDLTargetAttr target,
327 gpu::GPUModuleOp getOperation();
330 std::optional<SmallVector<char, 0>>
331 compileToBinary(
const std::string &serializedISA);
333 std::optional<SmallVector<char, 0>>
334 moduleToObject(llvm::Module &llvmModule)
override;
342 AMDGPUSerializer::AMDGPUSerializer(
Operation &module, ROCDLTargetAttr target,
345 targetOptions(targetOptions) {}
347 gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
351 std::optional<SmallVector<char, 0>>
352 AMDGPUSerializer::compileToBinary(
const std::string &serializedISA) {
354 std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
357 getOperation().emitError() <<
"Failed during ISA assembling.";
362 int tempIsaBinaryFd = -1;
364 if (llvm::sys::fs::createTemporaryFile(
"kernel%%",
"o", tempIsaBinaryFd,
365 tempIsaBinaryFilename)) {
366 getOperation().emitError()
367 <<
"Failed to create a temporary file for dumping the ISA binary.";
370 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
372 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd,
true);
373 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
374 tempIsaBinaryOs.flush();
379 if (llvm::sys::fs::createTemporaryFile(
"kernel",
"hsaco",
380 tempHsacoFilename)) {
381 getOperation().emitError()
382 <<
"Failed to create a temporary file for the HSA code object.";
385 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
388 llvm::sys::path::append(lldPath,
"llvm",
"bin",
"ld.lld");
389 int lldResult = llvm::sys::ExecuteAndWait(
391 {
"ld.lld",
"-shared", tempIsaBinaryFilename,
"-o", tempHsacoFilename});
392 if (lldResult != 0) {
393 getOperation().emitError() <<
"lld invocation failed.";
399 llvm::MemoryBuffer::getFile(tempHsacoFilename,
false);
401 getOperation().emitError()
402 <<
"Failed to read the HSA code object from the temp file.";
406 StringRef buffer = (*hsacoFile)->getBuffer();
411 std::optional<SmallVector<char, 0>>
412 AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
414 #define DEBUG_TYPE "serialize-to-llvm"
416 llvm::dbgs() <<
"LLVM IR for module: " << getOperation().getNameAttr()
418 << llvmModule <<
"\n";
421 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
424 std::optional<llvm::TargetMachine *> targetMachine =
425 getOrCreateTargetMachine();
426 if (!targetMachine) {
427 getOperation().emitError() <<
"Target Machine unavailable for triple "
428 << triple <<
", can't compile with LLVM\n";
433 std::optional<std::string> serializedISA =
434 translateToISA(llvmModule, **targetMachine);
435 if (!serializedISA) {
436 getOperation().emitError() <<
"Failed translating the module to ISA.";
439 #define DEBUG_TYPE "serialize-to-isa"
441 llvm::dbgs() <<
"ISA for module: " << getOperation().getNameAttr() <<
"\n"
442 << *serializedISA <<
"\n";
446 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
450 return compileToBinary(*serializedISA);
454 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
457 assert(module &&
"The module must be non null.");
460 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
461 module->
emitError(
"Module must be a GPU module.");
464 #if MLIR_ENABLE_ROCM_CONVERSIONS
465 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
468 return serializer.run();
470 module->
emitError(
"The `AMDGPU` target was not built. Please enable it when "
477 ROCDLTargetAttrImpl::createObject(
Attribute attribute,
480 gpu::CompilationTarget format =
options.getCompilationTarget();
482 return builder.getAttr<gpu::ObjectAttr>(
484 format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
486 builder.getStringAttr(StringRef(
object.data(),
object.size())),
nullptr);
#define __DEFAULT_ROCM_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
StringRef features
Target features.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
StringRef triple
Target triple.
std::optional< llvm::TargetMachine * > getOrCreateTargetMachine()
Create the target machine based on the target triple and chip.
Operation & getOperation()
Returns the operation being serialized.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< std::string > fileList, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
StringRef chip
Target chip.
Operation & module
Module to transform to a binary object.
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
Location getLoc()
The source location the operation was defined or derived from.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
InFlightDiagnostic emitRemark(const Twine &message={})
Emit a remark about this operation, reporting up to any diagnostic handlers that may be listening.
Base class for all ROCDL serializations from GPU modules into binary strings.
ROCDLTargetAttr getTarget() const
Returns the target attribute.
ArrayRef< std::string > getFileList() const
Returns the bitcode files to be loaded.
void addControlVariables(llvm::Module &module, bool wave64, bool daz, bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer)
Adds oclc control variables to the LLVM module.
ROCDLTargetAttr target
ROCDL target attribute.
SerializeGPUModuleBase(Operation &module, ROCDLTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getROCMPath.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
std::optional< SmallVector< char, 0 > > assembleIsa(StringRef isa)
Returns the assembled ISA.
SmallVector< std::string > fileList
List of LLVM bitcode files to link to.
std::string toolkitPath
ROCM toolkit path.
LogicalResult appendStandardLibs()
Appends standard ROCm device libraries like ocml.bc, ockl.bc, etc.
LogicalResult getCommonBitcodeLibs(llvm::SmallVector< std::string > &libs, SmallVector< char, 256 > &libPath, StringRef isaVersion)
Appends the paths of common ROCm device libraries to libs.
static void init()
Initializes the LLVM AMDGPU target by safely calling LLVMInitializeAMDGPU* methods if available.
StringRef getToolkitPath() const
Returns the ROCM toolkit path.
LogicalResult handleBitcodeFile(llvm::Module &module) override
Removes unnecessary metadata from the loaded bitcode files.
void handleModulePreLink(llvm::Module &module) override
Adds oclc control variables to the LLVM module.
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerROCDLTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #rocdl.target attribute in the given registry.
StringRef getROCMPath()
Searches & returns the path ROCM toolkit path, the search order is:
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
This class represents an efficient way to signal success or failure.