29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/FileUtilities.h"
32 #include "llvm/Support/FormatVariadic.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Process.h"
36 #include "llvm/Support/Program.h"
37 #include "llvm/Support/TargetSelect.h"
38 #include "llvm/Support/raw_ostream.h"
46 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
47 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
55 class NVVMTargetAttrImpl
56 :
public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
58 std::optional<SmallVector<char, 0>>
72 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
85 if (
const char *var = std::getenv(
"CUDA_ROOT"))
87 if (
const char *var = std::getenv(
"CUDA_HOME"))
89 if (
const char *var = std::getenv(
"CUDA_PATH"))
97 : ModuleToObject(module, target.getTriple(), target.getChip(),
98 target.getFeatures(), target.getO(),
99 targetOptions.getInitialLlvmIRCallback(),
100 targetOptions.getLinkedLlvmIRCallback(),
101 targetOptions.getOptimizedLlvmIRCallback(),
102 targetOptions.getISACallback()),
103 target(target), toolkitPath(targetOptions.getToolkitPath()),
104 librariesToLink(targetOptions.getLibrariesToLink()) {
119 static llvm::once_flag initializeBackendOnce;
120 llvm::call_once(initializeBackendOnce, []() {
122 #if LLVM_HAS_NVPTX_TARGET
123 LLVMInitializeNVPTXTarget();
124 LLVMInitializeNVPTXTargetInfo();
125 LLVMInitializeNVPTXTargetMC();
126 LLVMInitializeNVPTXAsmPrinter();
141 #if MLIR_NVVM_EMBED_LIBDEVICE
152 resourceManager.getBlobManager().lookup(
"_mlir_embedded_libdevice");
165 type, resourceManager.insert(
"_mlir_embedded_libdevice",
166 std::move(unmanagedBlob))));
169 if (!pathRef.empty()) {
171 path.insert(path.begin(), pathRef.begin(), pathRef.end());
172 pathRef = StringRef(path.data(), path.size());
173 if (!llvm::sys::fs::is_directory(pathRef)) {
175 <<
" does not exist or is not a directory.\n";
178 llvm::sys::path::append(path,
"nvvm",
"libdevice",
"libdevice.10.bc");
179 pathRef = StringRef(path.data(), path.size());
180 if (!llvm::sys::fs::is_regular_file(pathRef)) {
182 <<
" does not exist or is not a file.\n";
191 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
197 return std::move(bcFiles);
203 NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
207 gpu::GPUModuleOp getOperation();
210 std::optional<SmallVector<char, 0>>
211 compileToBinary(
const std::string &ptxCode);
214 std::optional<SmallVector<char, 0>>
215 compileToBinaryNVPTX(
const std::string &ptxCode);
219 std::optional<SmallVector<char, 0>>
220 moduleToObject(llvm::Module &llvmModule)
override;
223 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
226 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
233 std::optional<std::string> findTool(StringRef tool);
240 NVPTXSerializer::NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
243 targetOptions(targetOptions) {}
245 std::optional<NVPTXSerializer::TmpFile>
246 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
249 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
251 getOperation().emitError() <<
"Couldn't create the temp file: `" << filename
252 <<
"`, error message: " << ec.message();
255 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
258 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
262 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
265 StringRef pathRef = targetOptions.getToolkitPath();
267 if (!pathRef.empty()) {
268 path.insert(path.begin(), pathRef.begin(), pathRef.end());
269 llvm::sys::path::append(path,
"bin", tool);
270 if (llvm::sys::fs::can_execute(path))
271 return StringRef(path.data(), path.size()).str();
275 if (std::optional<std::string> toolPath =
276 llvm::sys::Process::FindInEnvPath(
"PATH", tool))
282 if (!pathRef.empty()) {
283 path.insert(path.begin(), pathRef.begin(), pathRef.end());
284 llvm::sys::path::append(path,
"bin", tool);
285 if (llvm::sys::fs::can_execute(path))
286 return StringRef(path.data(), path.size()).str();
288 getOperation().emitError()
289 <<
"Couldn't find the `" << tool
290 <<
"` binary. Please specify the toolkit "
291 "path, add the compiler to $PATH, or set one of the environment "
292 "variables in `NVVM::getCUDAToolkitPath()`.";
298 std::optional<SmallVector<char, 0>>
299 NVPTXSerializer::compileToBinary(
const std::string &ptxCode) {
302 const bool createFatbin =
303 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
306 std::optional<std::string> ptxasCompiler = findTool(
"ptxas");
309 std::optional<std::string> fatbinaryTool;
311 fatbinaryTool = findTool(
"fatbinary");
315 Location loc = getOperation().getLoc();
318 std::string basename =
319 llvm::formatv(
"mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
320 getTarget().getTriple(), getTarget().getChip());
323 std::optional<TmpFile> ptxFile = createTemp(basename,
"ptx");
326 std::optional<TmpFile> logFile = createTemp(basename,
"log");
329 std::optional<TmpFile> binaryFile = createTemp(basename,
"bin");
334 Twine cubinFilename = ptxFile->first +
".cubin";
335 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
337 cubinFile.first = binaryFile->first;
343 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
345 emitError(loc) <<
"Couldn't open the file: `" << ptxFile->first
346 <<
"`, error message: " << ec.message();
349 ptxStream << ptxCode;
350 if (ptxStream.has_error()) {
351 emitError(loc) <<
"An error occurred while writing the PTX to: `"
352 << ptxFile->first <<
"`.";
359 std::optional<StringRef> redirects[] = {
366 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
367 targetOptions.tokenizeCmdOptions();
370 std::string optLevel = std::to_string(this->optLevel);
372 {StringRef(
"ptxas"), StringRef(
"-arch"), getTarget().getChip(),
373 StringRef(ptxFile->first), StringRef(
"-o"), StringRef(cubinFile.first),
374 "--opt-level", optLevel});
376 bool useFatbin32 =
false;
377 for (
const auto *cArg : cmdOpts.second) {
381 if (StringRef arg(cArg); arg !=
"-32")
382 ptxasArgs.push_back(arg);
388 StringRef chip = getTarget().getChip();
390 chip.consume_front(
"sm_"), chip.consume_front(
"compute_");
392 std::string cubinArg =
393 llvm::formatv(
"--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
397 llvm::formatv(
"--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
400 useFatbin32 ?
"-32" :
"-64", cubinArg,
401 ptxArg,
"--create", binaryFile->first});
404 #define DEBUG_TYPE "serialize-to-binary"
406 llvm::dbgs() <<
"Tool invocation for module: "
407 << getOperation().getNameAttr() <<
"\n";
408 llvm::interleave(ptxasArgs, llvm::dbgs(),
" ");
409 llvm::dbgs() <<
"\n";
411 llvm::interleave(fatbinArgs, llvm::dbgs(),
" ");
412 llvm::dbgs() <<
"\n";
421 if (message.empty()) {
422 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
423 llvm::MemoryBuffer::getFile(logFile->first);
425 emitError(loc) << toolName <<
" invocation failed. Log:\n"
426 << toolStderr->get()->getBuffer();
428 emitError(loc) << toolName <<
" invocation failed.";
432 <<
" invocation failed, error message: " << message;
437 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
443 return emitLogError(
"`ptxas`");
444 #define DEBUG_TYPE "dump-sass"
446 std::optional<std::string> nvdisasm = findTool(
"nvdisasm");
448 {StringRef(
"nvdisasm"), StringRef(cubinFile.first)});
449 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
455 return emitLogError(
"`nvdisasm`");
456 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
457 llvm::MemoryBuffer::getFile(logFile->first);
458 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
459 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
460 llvm::dbgs().flush();
467 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
473 return emitLogError(
"`fatbinary`");
476 #define DEBUG_TYPE "serialize-to-binary"
478 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
479 llvm::MemoryBuffer::getFile(logFile->first);
480 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
481 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
482 llvm::dbgs().flush();
488 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
489 llvm::MemoryBuffer::getFile(binaryFile->first);
491 emitError(loc) <<
"Couldn't open the file: `" << binaryFile->first
492 <<
"`, error message: " << binaryBuffer.getError().message();
495 StringRef fatbin = (*binaryBuffer)->getBuffer();
499 #if MLIR_ENABLE_NVPTXCOMPILER
500 #include "nvPTXCompiler.h"
502 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
504 if (auto status = (expr)) { \
505 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
507 return std::nullopt; \
511 #include "nvFatbin.h"
513 #define RETURN_ON_NVFATBIN_ERROR(expr) \
515 auto result = (expr); \
516 if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
517 emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
518 << nvFatbinGetErrorString(result); \
519 return std::nullopt; \
523 std::optional<SmallVector<char, 0>>
524 NVPTXSerializer::compileToBinaryNVPTX(
const std::string &ptxCode) {
525 Location loc = getOperation().getLoc();
526 nvPTXCompilerHandle compiler =
nullptr;
527 nvPTXCompileResult status;
531 std::string optLevel = std::to_string(this->optLevel);
532 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
533 targetOptions.tokenizeCmdOptions();
534 cmdOpts.second.append(
535 {
"-arch", getTarget().getChip().data(),
"--opt-level", optLevel.c_str()});
538 RETURN_ON_NVPTXCOMPILER_ERROR(
539 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
542 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
543 cmdOpts.second.data());
546 if (status != NVPTXCOMPILE_SUCCESS) {
547 RETURN_ON_NVPTXCOMPILER_ERROR(
548 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
551 RETURN_ON_NVPTXCOMPILER_ERROR(
552 nvPTXCompilerGetErrorLog(compiler, log.data()));
553 emitError(loc) <<
"NVPTX compiler invocation failed, error log: "
556 emitError(loc) <<
"NVPTX compiler invocation failed with error code: "
563 RETURN_ON_NVPTXCOMPILER_ERROR(
564 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
566 RETURN_ON_NVPTXCOMPILER_ERROR(
567 nvPTXCompilerGetCompiledProgram(compiler, (
void *)binary.data()));
570 #define DEBUG_TYPE "serialize-to-binary"
572 RETURN_ON_NVPTXCOMPILER_ERROR(
573 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
576 RETURN_ON_NVPTXCOMPILER_ERROR(
577 nvPTXCompilerGetInfoLog(compiler, log.data()));
578 llvm::dbgs() <<
"NVPTX compiler invocation for module: "
579 << getOperation().getNameAttr() <<
"\n";
580 llvm::dbgs() <<
"Arguments: ";
581 llvm::interleave(cmdOpts.second, llvm::dbgs(),
" ");
582 llvm::dbgs() <<
"\nOutput\n" << log.data() <<
"\n";
583 llvm::dbgs().flush();
587 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
589 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
590 bool useFatbin32 = llvm::any_of(cmdOpts.second, [](
const char *option) {
591 return llvm::StringRef(option) ==
"-32";
594 const char *cubinOpts[1] = {useFatbin32 ?
"-32" :
"-64"};
595 nvFatbinHandle handle;
597 auto chip = getTarget().getChip();
598 chip.consume_front(
"sm_");
600 RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
601 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
602 handle, binary.data(), binary.size(), chip.data(),
nullptr));
603 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
604 handle, ptxCode.data(), ptxCode.size(), chip.data(),
nullptr,
nullptr));
607 RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
609 RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (
void *)fatbin.data()));
610 RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
618 std::optional<SmallVector<char, 0>>
619 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
621 #define DEBUG_TYPE "serialize-to-llvm"
623 llvm::dbgs() <<
"LLVM IR for module: " << getOperation().getNameAttr()
625 llvm::dbgs() << llvmModule <<
"\n";
626 llvm::dbgs().flush();
629 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
632 #if !LLVM_HAS_NVPTX_TARGET
633 getOperation()->emitError(
634 "The `NVPTX` target was not built. Please enable it when building LLVM.");
639 std::optional<llvm::TargetMachine *> targetMachine =
640 getOrCreateTargetMachine();
641 if (!targetMachine) {
642 getOperation().emitError() <<
"Target Machine unavailable for triple "
643 << triple <<
", can't optimize with LLVM\n";
646 std::optional<std::string> serializedISA =
647 translateToISA(llvmModule, **targetMachine);
648 if (!serializedISA) {
649 getOperation().emitError() <<
"Failed translating the module to ISA.";
653 isaCallback(serializedISA.value());
655 #define DEBUG_TYPE "serialize-to-isa"
657 llvm::dbgs() <<
"PTX for module: " << getOperation().getNameAttr() <<
"\n";
658 llvm::dbgs() << *serializedISA <<
"\n";
659 llvm::dbgs().flush();
664 if (targetOptions.getCompilationTarget() ==
665 gpu::CompilationTarget::Assembly) {
667 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
672 #if MLIR_ENABLE_NVPTXCOMPILER
673 return compileToBinaryNVPTX(*serializedISA);
675 return compileToBinary(*serializedISA);
679 std::optional<SmallVector<char, 0>>
682 assert(module &&
"The module must be non null.");
685 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
686 module->
emitError(
"Module must be a GPU module.");
689 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute),
options);
691 return serializer.run();
698 auto target = cast<NVVMTargetAttr>(attribute);
699 gpu::CompilationTarget format =
options.getCompilationTarget();
700 DictionaryAttr objectProps;
703 if (format == gpu::CompilationTarget::Assembly)
704 properties.push_back(
705 builder.getNamedAttr(
"O", builder.getI32IntegerAttr(target.getO())));
707 if (StringRef section =
options.getELFSection(); !section.empty())
708 properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
709 builder.getStringAttr(section)));
711 if (!properties.empty())
712 objectProps = builder.getDictionaryAttr(properties);
714 return builder.getAttr<gpu::ObjectAttr>(
716 builder.getStringAttr(StringRef(
object.data(),
object.size())),
717 objectProps,
nullptr);
const char _mlir_embedded_libdevice[]
const unsigned _mlir_embedded_libdevice_size
#define __DEFAULT_CUDATOOLKIT_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
NVVMTargetAttr target
NVVM target attribute.
std::string toolkitPath
CUDA toolkit path.
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
NVVMTargetAttr getTarget() const
Returns the target attribute.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.