29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/FileUtilities.h"
32 #include "llvm/Support/FormatVariadic.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Process.h"
36 #include "llvm/Support/Program.h"
37 #include "llvm/Support/TargetSelect.h"
38 #include "llvm/Support/Timer.h"
39 #include "llvm/Support/raw_ostream.h"
48 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
49 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
57 class NVVMTargetAttrImpl
58 :
public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
60 std::optional<SmallVector<char, 0>>
74 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
87 if (
const char *var = std::getenv(
"CUDA_ROOT"))
89 if (
const char *var = std::getenv(
"CUDA_HOME"))
91 if (
const char *var = std::getenv(
"CUDA_PATH"))
99 : ModuleToObject(module, target.getTriple(), target.getChip(),
100 target.getFeatures(), target.getO(),
101 targetOptions.getInitialLlvmIRCallback(),
102 targetOptions.getLinkedLlvmIRCallback(),
103 targetOptions.getOptimizedLlvmIRCallback(),
104 targetOptions.getISACallback()),
105 target(target), toolkitPath(targetOptions.getToolkitPath()),
106 librariesToLink(targetOptions.getLibrariesToLink()) {
121 static llvm::once_flag initializeBackendOnce;
122 llvm::call_once(initializeBackendOnce, []() {
124 #if LLVM_HAS_NVPTX_TARGET
125 LLVMInitializeNVPTXTarget();
126 LLVMInitializeNVPTXTargetInfo();
127 LLVMInitializeNVPTXTargetMC();
128 LLVMInitializeNVPTXAsmPrinter();
143 #if MLIR_NVVM_EMBED_LIBDEVICE
154 resourceManager.getBlobManager().lookup(
"_mlir_embedded_libdevice");
168 type, resourceManager.insert(
"_mlir_embedded_libdevice",
169 std::move(unmanagedBlob))));
172 if (!pathRef.empty()) {
174 path.insert(path.begin(), pathRef.begin(), pathRef.end());
175 pathRef = StringRef(path.data(), path.size());
176 if (!llvm::sys::fs::is_directory(pathRef)) {
178 <<
" does not exist or is not a directory.\n";
181 llvm::sys::path::append(path,
"nvvm",
"libdevice",
"libdevice.10.bc");
182 pathRef = StringRef(path.data(), path.size());
183 if (!llvm::sys::fs::is_regular_file(pathRef)) {
185 <<
" does not exist or is not a file.\n";
194 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
200 return std::move(bcFiles);
206 NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
210 gpu::GPUModuleOp getOperation();
213 std::optional<SmallVector<char, 0>>
214 compileToBinary(
const std::string &ptxCode);
217 std::optional<SmallVector<char, 0>>
218 compileToBinaryNVPTX(
const std::string &ptxCode);
222 std::optional<SmallVector<char, 0>>
223 moduleToObject(llvm::Module &llvmModule)
override;
228 std::optional<int64_t> getLLVMIRToISATimeInMs();
233 std::optional<int64_t> getISAToBinaryTimeInMs();
236 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
239 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
246 std::optional<std::string> findTool(StringRef tool);
252 std::optional<int64_t> llvmToISATimeInMs;
255 std::optional<int64_t> isaToBinaryTimeInMs;
259 NVPTXSerializer::NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
262 targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),
263 isaToBinaryTimeInMs(std::nullopt) {}
265 std::optional<NVPTXSerializer::TmpFile>
266 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
269 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
271 getOperation().emitError() <<
"Couldn't create the temp file: `" << filename
272 <<
"`, error message: " << ec.message();
275 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
278 std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {
279 return llvmToISATimeInMs;
282 std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
283 return isaToBinaryTimeInMs;
286 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
290 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
293 StringRef pathRef = targetOptions.getToolkitPath();
295 if (!pathRef.empty()) {
296 path.insert(path.begin(), pathRef.begin(), pathRef.end());
297 llvm::sys::path::append(path,
"bin", tool);
298 if (llvm::sys::fs::can_execute(path))
299 return StringRef(path.data(), path.size()).str();
303 if (std::optional<std::string> toolPath =
304 llvm::sys::Process::FindInEnvPath(
"PATH", tool))
310 if (!pathRef.empty()) {
311 path.insert(path.begin(), pathRef.begin(), pathRef.end());
312 llvm::sys::path::append(path,
"bin", tool);
313 if (llvm::sys::fs::can_execute(path))
314 return StringRef(path.data(), path.size()).str();
316 getOperation().emitError()
317 <<
"Couldn't find the `" << tool
318 <<
"` binary. Please specify the toolkit "
319 "path, add the compiler to $PATH, or set one of the environment "
320 "variables in `NVVM::getCUDAToolkitPath()`.";
326 std::optional<SmallVector<char, 0>>
327 NVPTXSerializer::compileToBinary(
const std::string &ptxCode) {
330 const bool createFatbin =
331 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
334 std::optional<std::string> ptxasCompiler = findTool(
"ptxas");
337 std::optional<std::string> fatbinaryTool;
339 fatbinaryTool = findTool(
"fatbinary");
343 Location loc = getOperation().getLoc();
346 std::string basename =
347 llvm::formatv(
"mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
348 getTarget().getTriple(), getTarget().getChip());
351 std::optional<TmpFile> ptxFile = createTemp(basename,
"ptx");
354 std::optional<TmpFile> logFile = createTemp(basename,
"log");
357 std::optional<TmpFile> binaryFile = createTemp(basename,
"bin");
362 Twine cubinFilename = ptxFile->first +
".cubin";
363 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
365 cubinFile.first = binaryFile->first;
371 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
373 emitError(loc) <<
"Couldn't open the file: `" << ptxFile->first
374 <<
"`, error message: " << ec.message();
377 ptxStream << ptxCode;
378 if (ptxStream.has_error()) {
379 emitError(loc) <<
"An error occurred while writing the PTX to: `"
380 << ptxFile->first <<
"`.";
387 std::optional<StringRef> redirects[] = {
394 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
395 targetOptions.tokenizeCmdOptions();
398 std::string optLevel = std::to_string(this->optLevel);
400 {StringRef(
"ptxas"), StringRef(
"-arch"), getTarget().getChip(),
401 StringRef(ptxFile->first), StringRef(
"-o"), StringRef(cubinFile.first),
402 "--opt-level", optLevel});
404 bool useFatbin32 =
false;
405 for (
const auto *cArg : cmdOpts.second) {
409 if (StringRef arg(cArg); arg !=
"-32")
410 ptxasArgs.push_back(arg);
416 StringRef chip = getTarget().getChip();
418 chip.consume_front(
"sm_"), chip.consume_front(
"compute_");
420 std::string cubinArg =
421 llvm::formatv(
"--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
425 llvm::formatv(
"--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
428 useFatbin32 ?
"-32" :
"-64", cubinArg,
429 ptxArg,
"--create", binaryFile->first});
432 #define DEBUG_TYPE "serialize-to-binary"
434 llvm::dbgs() <<
"Tool invocation for module: "
435 << getOperation().getNameAttr() <<
"\n";
436 llvm::interleave(ptxasArgs, llvm::dbgs(),
" ");
437 llvm::dbgs() <<
"\n";
439 llvm::interleave(fatbinArgs, llvm::dbgs(),
" ");
440 llvm::dbgs() <<
"\n";
449 if (message.empty()) {
450 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
451 llvm::MemoryBuffer::getFile(logFile->first);
453 emitError(loc) << toolName <<
" invocation failed. Log:\n"
454 << toolStderr->get()->getBuffer();
456 emitError(loc) << toolName <<
" invocation failed.";
460 <<
" invocation failed, error message: " << message;
465 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
471 return emitLogError(
"`ptxas`");
472 #define DEBUG_TYPE "dump-sass"
474 std::optional<std::string> nvdisasm = findTool(
"nvdisasm");
476 {StringRef(
"nvdisasm"), StringRef(cubinFile.first)});
477 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
483 return emitLogError(
"`nvdisasm`");
484 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
485 llvm::MemoryBuffer::getFile(logFile->first);
486 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
487 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
488 llvm::dbgs().flush();
495 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
501 return emitLogError(
"`fatbinary`");
504 #define DEBUG_TYPE "serialize-to-binary"
506 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
507 llvm::MemoryBuffer::getFile(logFile->first);
508 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
509 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
510 llvm::dbgs().flush();
516 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
517 llvm::MemoryBuffer::getFile(binaryFile->first);
519 emitError(loc) <<
"Couldn't open the file: `" << binaryFile->first
520 <<
"`, error message: " << binaryBuffer.getError().message();
523 StringRef fatbin = (*binaryBuffer)->getBuffer();
527 #if MLIR_ENABLE_NVPTXCOMPILER
528 #include "nvPTXCompiler.h"
530 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
532 if (auto status = (expr)) { \
533 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
535 return std::nullopt; \
539 #include "nvFatbin.h"
541 #define RETURN_ON_NVFATBIN_ERROR(expr) \
543 auto result = (expr); \
544 if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
545 emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
546 << nvFatbinGetErrorString(result); \
547 return std::nullopt; \
551 std::optional<SmallVector<char, 0>>
552 NVPTXSerializer::compileToBinaryNVPTX(
const std::string &ptxCode) {
553 Location loc = getOperation().getLoc();
554 nvPTXCompilerHandle compiler =
nullptr;
555 nvPTXCompileResult status;
559 std::string optLevel = std::to_string(this->optLevel);
560 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
561 targetOptions.tokenizeCmdOptions();
562 cmdOpts.second.append(
563 {
"-arch", getTarget().getChip().data(),
"--opt-level", optLevel.c_str()});
566 RETURN_ON_NVPTXCOMPILER_ERROR(
567 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
570 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
571 cmdOpts.second.data());
574 if (status != NVPTXCOMPILE_SUCCESS) {
575 RETURN_ON_NVPTXCOMPILER_ERROR(
576 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
579 RETURN_ON_NVPTXCOMPILER_ERROR(
580 nvPTXCompilerGetErrorLog(compiler, log.data()));
581 emitError(loc) <<
"NVPTX compiler invocation failed, error log: "
584 emitError(loc) <<
"NVPTX compiler invocation failed with error code: "
591 RETURN_ON_NVPTXCOMPILER_ERROR(
592 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
594 RETURN_ON_NVPTXCOMPILER_ERROR(
595 nvPTXCompilerGetCompiledProgram(compiler, (
void *)binary.data()));
598 #define DEBUG_TYPE "serialize-to-binary"
600 RETURN_ON_NVPTXCOMPILER_ERROR(
601 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
604 RETURN_ON_NVPTXCOMPILER_ERROR(
605 nvPTXCompilerGetInfoLog(compiler, log.data()));
606 llvm::dbgs() <<
"NVPTX compiler invocation for module: "
607 << getOperation().getNameAttr() <<
"\n";
608 llvm::dbgs() <<
"Arguments: ";
609 llvm::interleave(cmdOpts.second, llvm::dbgs(),
" ");
610 llvm::dbgs() <<
"\nOutput\n" << log.data() <<
"\n";
611 llvm::dbgs().flush();
615 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
617 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
618 bool useFatbin32 = llvm::any_of(cmdOpts.second, [](
const char *option) {
619 return llvm::StringRef(option) ==
"-32";
622 const char *cubinOpts[1] = {useFatbin32 ?
"-32" :
"-64"};
623 nvFatbinHandle handle;
625 auto chip = getTarget().getChip();
626 chip.consume_front(
"sm_");
628 RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
629 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
630 handle, binary.data(), binary.size(), chip.data(),
nullptr));
631 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
632 handle, ptxCode.data(), ptxCode.size(), chip.data(),
nullptr,
nullptr));
635 RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
637 RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (
void *)fatbin.data()));
638 RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
646 std::optional<SmallVector<char, 0>>
647 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
648 llvm::Timer moduleToObjectTimer(
649 "moduleToObjectTimer",
650 "Timer for perf llvm-ir -> isa and isa -> binary.");
651 moduleToObjectTimer.startTimer();
653 #define DEBUG_TYPE "serialize-to-llvm"
655 llvm::dbgs() <<
"LLVM IR for module: " << getOperation().getNameAttr()
657 llvm::dbgs() << llvmModule <<
"\n";
658 llvm::dbgs().flush();
661 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
664 #if !LLVM_HAS_NVPTX_TARGET
665 getOperation()->emitError(
666 "The `NVPTX` target was not built. Please enable it when building LLVM.");
671 std::optional<llvm::TargetMachine *> targetMachine =
672 getOrCreateTargetMachine();
673 if (!targetMachine) {
674 getOperation().emitError() <<
"Target Machine unavailable for triple "
675 << triple <<
", can't optimize with LLVM\n";
678 std::optional<std::string> serializedISA =
679 translateToISA(llvmModule, **targetMachine);
680 if (!serializedISA) {
681 getOperation().emitError() <<
"Failed translating the module to ISA.";
685 moduleToObjectTimer.stopTimer();
686 llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
687 moduleToObjectTimer.clear();
688 moduleToObjectTimer.startTimer();
690 isaCallback(serializedISA.value());
692 #define DEBUG_TYPE "serialize-to-isa"
694 llvm::dbgs() <<
"PTX for module: " << getOperation().getNameAttr() <<
"\n";
695 llvm::dbgs() << *serializedISA <<
"\n";
696 llvm::dbgs().flush();
701 if (targetOptions.getCompilationTarget() ==
702 gpu::CompilationTarget::Assembly) {
704 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
708 std::optional<SmallVector<char, 0>> result;
710 #if MLIR_ENABLE_NVPTXCOMPILER
711 result = compileToBinaryNVPTX(*serializedISA);
713 result = compileToBinary(*serializedISA);
716 moduleToObjectTimer.stopTimer();
717 isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
718 moduleToObjectTimer.clear();
722 std::optional<SmallVector<char, 0>>
726 assert(module &&
"The module must be non null.");
729 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
730 module->
emitError(
"Module must be a GPU module.");
733 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute),
options);
735 std::optional<SmallVector<char, 0>> result = serializer.run();
736 auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
737 if (llvmToISATimeInMs.has_value())
738 module->
setAttr(
"LLVMIRToISATimeInMs",
739 builder.getI64IntegerAttr(*llvmToISATimeInMs));
740 auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
741 if (isaToBinaryTimeInMs.has_value())
742 module->
setAttr(
"ISAToBinaryTimeInMs",
743 builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
751 auto target = cast<NVVMTargetAttr>(attribute);
752 gpu::CompilationTarget format =
options.getCompilationTarget();
753 DictionaryAttr objectProps;
756 if (format == gpu::CompilationTarget::Assembly)
757 properties.push_back(
758 builder.getNamedAttr(
"O", builder.getI32IntegerAttr(target.getO())));
760 if (StringRef section =
options.getELFSection(); !section.empty())
761 properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
762 builder.getStringAttr(section)));
764 for (
const auto *perfName : {
"LLVMIRToISATimeInMs",
"ISAToBinaryTimeInMs"}) {
765 if (module->
hasAttr(perfName)) {
766 IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->
getAttr(perfName));
767 properties.push_back(builder.getNamedAttr(
768 perfName, builder.getI64IntegerAttr(attr.getInt())));
772 if (!properties.empty())
773 objectProps = builder.getDictionaryAttr(properties);
775 return builder.getAttr<gpu::ObjectAttr>(
777 builder.getStringAttr(StringRef(
object.data(),
object.size())),
778 objectProps,
nullptr);
const unsigned _mlir_embedded_libdevice_size
#define __DEFAULT_CUDATOOLKIT_PATH__
const unsigned char _mlir_embedded_libdevice[]
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
NVVMTargetAttr target
NVVM target attribute.
std::string toolkitPath
CUDA toolkit path.
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
NVVMTargetAttr getTarget() const
Returns the target attribute.
Operation is the basic unit of execution within MLIR.
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
MLIRContext * getContext()
Return the context this operation is associated with.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.