doxygen/LLVM_2NVVM_2Target_8cpp_source.html

 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This files defines NVVM target related functions including registration

 // calls for the `#nvvm.target` compilation attribute.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Target/LLVM/NVVM/Target.h"


 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"

 #include "mlir/IR/BuiltinDialect.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/DialectResourceBlobManager.h"

 #include "mlir/Target/LLVM/NVVM/Utils.h"

 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"

 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"

 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"

 #include "mlir/Target/LLVMIR/Export.h"

 #include "llvm/Support/InterleavedRange.h"


 #include "llvm/ADT/ScopeExit.h"

 #include "llvm/Config/Targets.h"

 #include "llvm/Support/DebugLog.h"

 #include "llvm/Support/FileSystem.h"

 #include "llvm/Support/FileUtilities.h"

 #include "llvm/Support/FormatVariadic.h"

 #include "llvm/Support/MemoryBuffer.h"

 #include "llvm/Support/Path.h"

 #include "llvm/Support/Process.h"

 #include "llvm/Support/Program.h"

 #include "llvm/Support/TargetSelect.h"

 #include "llvm/Support/Timer.h"

 #include "llvm/Support/raw_ostream.h"


 #include <cstdint>

 #include <cstdlib>

 #include <optional>


 using namespace mlir;

 using namespace mlir::NVVM;


 #ifndef __DEFAULT_CUDATOOLKIT_PATH__

 #define __DEFAULT_CUDATOOLKIT_PATH__ ""

 #endif


 extern "C" const unsigned char _mlir_embedded_libdevice[];

 extern "C" const unsigned _mlir_embedded_libdevice_size;


 namespace {

 // Implementation of the `TargetAttrInterface` model.

 class NVVMTargetAttrImpl

     : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {

 public:

   std::optional<SmallVector<char, 0>>

   serializeToObject(Attribute attribute, Operation *module,

                     const gpu::TargetOptions &options) const;


   Attribute createObject(Attribute attribute, Operation *module,

                          const SmallVector<char, 0> &object,

                          const gpu::TargetOptions &options) const;

 };

 } // namespace


 // Register the NVVM dialect, the NVVM translation & the target interface.

 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(

     DialectRegistry &registry) {

   registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {

     NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);

   });

 }


 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(

     MLIRContext &context) {

   DialectRegistry registry;

   registerNVVMTargetInterfaceExternalModels(registry);

   context.appendDialectRegistry(registry);

 }


 // Search for the CUDA toolkit path.

 StringRef mlir::NVVM::getCUDAToolkitPath() {

   if (const char *var = std::getenv("CUDA_ROOT"))

     return var;

   if (const char *var = std::getenv("CUDA_HOME"))

     return var;

   if (const char *var = std::getenv("CUDA_PATH"))

     return var;

   return __DEFAULT_CUDATOOLKIT_PATH__;

 }


 SerializeGPUModuleBase::SerializeGPUModuleBase(

     Operation &module, NVVMTargetAttr target,

     const gpu::TargetOptions &targetOptions)

     : ModuleToObject(module, target.getTriple(), target.getChip(),

                      target.getFeatures(), target.getO(),

                      targetOptions.getInitialLlvmIRCallback(),

                      targetOptions.getLinkedLlvmIRCallback(),

                      targetOptions.getOptimizedLlvmIRCallback(),

                      targetOptions.getISACallback()),

       target(target), toolkitPath(targetOptions.getToolkitPath()),

       librariesToLink(targetOptions.getLibrariesToLink()) {


   // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`

   if (toolkitPath.empty())

     toolkitPath = getCUDAToolkitPath();


   // Append the files in the target attribute.

   if (target.getLink())

     librariesToLink.append(target.getLink().begin(), target.getLink().end());


   // Append libdevice to the files to be loaded.

   (void)appendStandardLibs();

 }


 void SerializeGPUModuleBase::init() {

   static llvm::once_flag initializeBackendOnce;

   llvm::call_once(initializeBackendOnce, []() {

   // If the `NVPTX` LLVM target was built, initialize it.

 #if LLVM_HAS_NVPTX_TARGET

     LLVMInitializeNVPTXTarget();

     LLVMInitializeNVPTXTargetInfo();

     LLVMInitializeNVPTXTargetMC();

     LLVMInitializeNVPTXAsmPrinter();

 #endif

   });

 }


 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }


 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }


 ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const {

   return librariesToLink;

 }


 // Try to append `libdevice` from a CUDA toolkit installation.

 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {

 #if MLIR_NVVM_EMBED_LIBDEVICE

   // If libdevice is embedded in the binary, we don't look it up on the

   // filesystem.

   MLIRContext *ctx = target.getContext();

   auto type =

       RankedTensorType::get(ArrayRef<int64_t>{_mlir_embedded_libdevice_size},

                             IntegerType::get(ctx, 8));

   auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);


   // Lookup if we already loaded the resource, otherwise create it.

   DialectResourceBlobManager::BlobEntry *blob =

       resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");

   if (blob) {

     librariesToLink.push_back(DenseResourceElementsAttr::get(

         type, DenseResourceElementsHandle(

                   blob, ctx->getLoadedDialect<BuiltinDialect>())));

     return success();

   }


   // Allocate a resource using one of the UnManagedResourceBlob method to wrap

   // the embedded data.

   auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(

       ArrayRef<char>{(const char *)_mlir_embedded_libdevice,

                      _mlir_embedded_libdevice_size});

   librariesToLink.push_back(DenseResourceElementsAttr::get(

       type, resourceManager.insert("_mlir_embedded_libdevice",

                                    std::move(unmanagedBlob))));

 #else

   StringRef pathRef = getToolkitPath();

   if (!pathRef.empty()) {

     SmallVector<char, 256> path;

     path.insert(path.begin(), pathRef.begin(), pathRef.end());

     pathRef = StringRef(path.data(), path.size());

     if (!llvm::sys::fs::is_directory(pathRef)) {

       getOperation().emitError() << "CUDA path: " << pathRef

                                  << " does not exist or is not a directory.\n";

       return failure();

     }

     llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");

     pathRef = StringRef(path.data(), path.size());

     if (!llvm::sys::fs::is_regular_file(pathRef)) {

       getOperation().emitError() << "LibDevice path: " << pathRef

                                  << " does not exist or is not a file.\n";

       return failure();

     }

     librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));

   }

 #endif

   return success();

 }


 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>

 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {

   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;

   if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink,

                                       bcFiles, true)))

     return std::nullopt;

   return std::move(bcFiles);

 }


 namespace {

 class NVPTXSerializer : public SerializeGPUModuleBase {

 public:

   NVPTXSerializer(Operation &module, NVVMTargetAttr target,

                   const gpu::TargetOptions &targetOptions);


   /// Returns the GPU module op being serialized.

   gpu::GPUModuleOp getOperation();


   /// Compiles PTX to cubin using `ptxas`.

   std::optional<SmallVector<char, 0>>

   compileToBinary(const std::string &ptxCode);


   /// Compiles PTX to cubin using the `nvptxcompiler` library.

   std::optional<SmallVector<char, 0>>

   compileToBinaryNVPTX(const std::string &ptxCode);


   /// Serializes the LLVM module to an object format, depending on the

   /// compilation target selected in target options.

   std::optional<SmallVector<char, 0>>

   moduleToObject(llvm::Module &llvmModule) override;


   /// Get LLVMIR->ISA performance result.

   /// Return nullopt if moduleToObject has not been called or the target format

   /// is LLVMIR.

   std::optional<int64_t> getLLVMIRToISATimeInMs();


   /// Get ISA->Binary performance result.

   /// Return nullopt if moduleToObject has not been called or the target format

   /// is LLVMIR or ISA.

   std::optional<int64_t> getISAToBinaryTimeInMs();


 private:

   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;


   /// Creates a temp file.

   std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);


   /// Finds the `tool` path, where `tool` is the name of the binary to search,

   /// i.e. `ptxas` or `fatbinary`. The search order is:

   /// 1. The toolkit path in `targetOptions`.

   /// 2. In the system PATH.

   /// 3. The path from `getCUDAToolkitPath()`.

   std::optional<std::string> findTool(StringRef tool);


   /// Target options.

   gpu::TargetOptions targetOptions;


   /// LLVMIR->ISA perf result.

   std::optional<int64_t> llvmToISATimeInMs;


   /// ISA->Binary perf result.

   std::optional<int64_t> isaToBinaryTimeInMs;

 };

 } // namespace


 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,

                                  const gpu::TargetOptions &targetOptions)

     : SerializeGPUModuleBase(module, target, targetOptions),

       targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),

       isaToBinaryTimeInMs(std::nullopt) {}


 std::optional<NVPTXSerializer::TmpFile>

 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {

   llvm::SmallString<128> filename;

   if (name.size() > 80)

     name = name.substr(0, 80);

   std::error_code ec =

       llvm::sys::fs::createTemporaryFile(name, suffix, filename);

   if (ec) {

     getOperation().emitError() << "Couldn't create the temp file: `" << filename

                                << "`, error message: " << ec.message();

     return std::nullopt;

   }

   return TmpFile(filename, llvm::FileRemover(filename.c_str()));

 }


 std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {

   return llvmToISATimeInMs;

 }


 std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {

   return isaToBinaryTimeInMs;

 }


 gpu::GPUModuleOp NVPTXSerializer::getOperation() {

   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());

 }


 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {

   // Find the `tool` path.

   // 1. Check the toolkit path given in the command line.

   StringRef pathRef = targetOptions.getToolkitPath();

   SmallVector<char, 256> path;

   if (!pathRef.empty()) {

     path.insert(path.begin(), pathRef.begin(), pathRef.end());

     llvm::sys::path::append(path, "bin", tool);

     if (llvm::sys::fs::can_execute(path))

       return StringRef(path.data(), path.size()).str();

   }


   // 2. Check PATH.

   if (std::optional<std::string> toolPath =

           llvm::sys::Process::FindInEnvPath("PATH", tool))

     return *toolPath;


   // 3. Check `getCUDAToolkitPath()`.

   pathRef = getCUDAToolkitPath();

   path.clear();

   if (!pathRef.empty()) {

     path.insert(path.begin(), pathRef.begin(), pathRef.end());

     llvm::sys::path::append(path, "bin", tool);

     if (llvm::sys::fs::can_execute(path))

       return StringRef(path.data(), path.size()).str();

   }

   getOperation().emitError()

       << "Couldn't find the `" << tool

       << "` binary. Please specify the toolkit "

          "path, add the compiler to $PATH, or set one of the environment "

          "variables in `NVVM::getCUDAToolkitPath()`.";

   return std::nullopt;

 }


 /// Adds optional command-line arguments to existing arguments.

 template <typename T>

 static void setOptionalCommandlineArguments(NVVMTargetAttr target,

                                             SmallVectorImpl<T> &ptxasArgs) {

   if (!target.hasCmdOptions())

     return;


   std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();

   for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {

     if (auto strAttr = dyn_cast<StringAttr>(attr)) {

       if constexpr (std::is_same_v<T, StringRef>) {

         ptxasArgs.push_back(strAttr.getValue());

       } else if constexpr (std::is_same_v<T, const char *>) {

         ptxasArgs.push_back(strAttr.getValue().data());

       }

     }

   }

 }


 // TODO: clean this method & have a generic tool driver or never emit binaries

 // with this mechanism and let another stage take care of it.

 std::optional<SmallVector<char, 0>>

 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {

   // Determine if the serializer should create a fatbinary with the PTX embeded

   // or a simple CUBIN binary.

   const bool createFatbin =

       targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;


   // Find the `ptxas` & `fatbinary` tools.

   std::optional<std::string> ptxasCompiler = findTool("ptxas");

   if (!ptxasCompiler)

     return std::nullopt;

   std::optional<std::string> fatbinaryTool;

   if (createFatbin) {

     fatbinaryTool = findTool("fatbinary");

     if (!fatbinaryTool)

       return std::nullopt;

   }

   Location loc = getOperation().getLoc();


   // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.

   std::string basename =

       llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),

                     getTarget().getTriple(), getTarget().getChip());


   // Create temp files:

   std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");

   if (!ptxFile)

     return std::nullopt;

   std::optional<TmpFile> logFile = createTemp(basename, "log");

   if (!logFile)

     return std::nullopt;

   std::optional<TmpFile> binaryFile = createTemp(basename, "bin");

   if (!binaryFile)

     return std::nullopt;

   TmpFile cubinFile;

   if (createFatbin) {

     std::string cubinFilename = (ptxFile->first + ".cubin").str();

     cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));

   } else {

     cubinFile.first = binaryFile->first;

   }


   std::error_code ec;

   // Dump the PTX to a temp file.

   {

     llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);

     if (ec) {

       emitError(loc) << "Couldn't open the file: `" << ptxFile->first

                      << "`, error message: " << ec.message();

       return std::nullopt;

     }

     ptxStream << ptxCode;

     if (ptxStream.has_error()) {

       emitError(loc) << "An error occurred while writing the PTX to: `"

                      << ptxFile->first << "`.";

       return std::nullopt;

     }

     ptxStream.flush();

   }


   // Command redirects.

   std::optional<StringRef> redirects[] = {

       std::nullopt,

       logFile->first,

       logFile->first,

   };


   // Get any extra args passed in `targetOptions`.

   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =

       targetOptions.tokenizeCmdOptions();


   // Create ptxas args.

   std::string optLevel = std::to_string(this->optLevel);

   SmallVector<StringRef, 12> ptxasArgs(

       {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),

        StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),

        "--opt-level", optLevel});


   bool useFatbin32 = false;

   for (const auto *cArg : cmdOpts.second) {

     // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to

     // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is

     // assumed.

     if (StringRef arg(cArg); arg != "-32")

       ptxasArgs.push_back(arg);

     else

       useFatbin32 = true;

   }


   // Set optional command line arguments

   setOptionalCommandlineArguments(getTarget(), ptxasArgs);


   // Create the `fatbinary` args.

   StringRef chip = getTarget().getChip();

   // Remove the arch prefix to obtain the compute capability.

   chip.consume_front("sm_"), chip.consume_front("compute_");

   // Embed the cubin object.

   std::string cubinArg =

       llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)

           .str();

   // Embed the PTX file so the driver can JIT if needed.

   std::string ptxArg =

       llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)

           .str();

   SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),

                                         useFatbin32 ? "-32" : "-64", cubinArg,

                                         ptxArg, "--create", binaryFile->first});


   // Dump tool invocation commands.

 #define DEBUG_TYPE "serialize-to-binary"

   LDBG() << "Tool invocation for module: " << getOperation().getNameAttr()

          << "\nptxas executable:" << ptxasCompiler.value()

          << "\nptxas args: " << llvm::interleaved(ptxasArgs, " ");

   if (createFatbin)

     LDBG() << "fatbin args: " << llvm::interleaved(fatbinArgs, " ");

 #undef DEBUG_TYPE


   // Helper function for printing tool error logs.

   std::string message;

   auto emitLogError =

       [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {

     if (message.empty()) {

       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =

           llvm::MemoryBuffer::getFile(logFile->first);

       if (toolStderr)

         emitError(loc) << toolName << " invocation failed. Log:\n"

                        << toolStderr->get()->getBuffer();

       else

         emitError(loc) << toolName << " invocation failed.";

       return std::nullopt;

     }

     emitError(loc) << toolName

                    << " invocation failed, error message: " << message;

     return std::nullopt;

   };


   // Invoke PTXAS.

   if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,

                                 /*Env=*/std::nullopt,

                                 /*Redirects=*/redirects,

                                 /*SecondsToWait=*/0,

                                 /*MemoryLimit=*/0,

                                 /*ErrMsg=*/&message))

     return emitLogError("`ptxas`");

 #define DEBUG_TYPE "dump-sass"

   LLVM_DEBUG({

     std::optional<std::string> nvdisasm = findTool("nvdisasm");

     SmallVector<StringRef> nvdisasmArgs(

         {StringRef("nvdisasm"), StringRef(cubinFile.first)});

     if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,

                                   /*Env=*/std::nullopt,

                                   /*Redirects=*/redirects,

                                   /*SecondsToWait=*/0,

                                   /*MemoryLimit=*/0,

                                   /*ErrMsg=*/&message))

       return emitLogError("`nvdisasm`");

     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =

         llvm::MemoryBuffer::getFile(logFile->first);

     if (logBuffer && !(*logBuffer)->getBuffer().empty()) {

       LDBG() << "Output:\n" << (*logBuffer)->getBuffer();

       llvm::dbgs().flush();

     }

   });

 #undef DEBUG_TYPE


   // Invoke `fatbin`.

   message.clear();

   if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,

                                                 /*Env=*/std::nullopt,

                                                 /*Redirects=*/redirects,

                                                 /*SecondsToWait=*/0,

                                                 /*MemoryLimit=*/0,

                                                 /*ErrMsg=*/&message))

     return emitLogError("`fatbinary`");


 // Dump the output of the tools, helpful if the verbose flag was passed.

 #define DEBUG_TYPE "serialize-to-binary"

   LLVM_DEBUG({

     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =

         llvm::MemoryBuffer::getFile(logFile->first);

     if (logBuffer && !(*logBuffer)->getBuffer().empty()) {

       LDBG() << "Output:\n" << (*logBuffer)->getBuffer();

       llvm::dbgs().flush();

     }

   });

 #undef DEBUG_TYPE


   // Read the fatbin.

   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =

       llvm::MemoryBuffer::getFile(binaryFile->first);

   if (!binaryBuffer) {

     emitError(loc) << "Couldn't open the file: `" << binaryFile->first

                    << "`, error message: " << binaryBuffer.getError().message();

     return std::nullopt;

   }

   StringRef fatbin = (*binaryBuffer)->getBuffer();

   return SmallVector<char, 0>(fatbin.begin(), fatbin.end());

 }


 #if MLIR_ENABLE_NVPTXCOMPILER

 #include "nvPTXCompiler.h"


 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr)                                    \

   do {                                                                         \

     if (auto status = (expr)) {                                                \

       emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ")  \

                      << status;                                                \

       return std::nullopt;                                                     \

     }                                                                          \

   } while (false)


 #include "nvFatbin.h"


 #define RETURN_ON_NVFATBIN_ERROR(expr)                                         \

   do {                                                                         \

     auto result = (expr);                                                      \

     if (result != nvFatbinResult::NVFATBIN_SUCCESS) {                          \

       emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ")      \

                      << nvFatbinGetErrorString(result);                        \

       return std::nullopt;                                                     \

     }                                                                          \

   } while (false)


 std::optional<SmallVector<char, 0>>

 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {

   Location loc = getOperation().getLoc();

   nvPTXCompilerHandle compiler = nullptr;

   nvPTXCompileResult status;

   size_t logSize;


   // Create the options.

   std::string optLevel = std::to_string(this->optLevel);

   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =

       targetOptions.tokenizeCmdOptions();

   cmdOpts.second.append(

       {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});


   // Set optional command line arguments

   setOptionalCommandlineArguments(getTarget(), cmdOpts.second);

   // Create the compiler handle.

   RETURN_ON_NVPTXCOMPILER_ERROR(

       nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));


   // Try to compile the binary.

   status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),

                                 cmdOpts.second.data());


   // Check if compilation failed.

   if (status != NVPTXCOMPILE_SUCCESS) {

     RETURN_ON_NVPTXCOMPILER_ERROR(

         nvPTXCompilerGetErrorLogSize(compiler, &logSize));

     if (logSize != 0) {

       SmallVector<char> log(logSize + 1, 0);

       RETURN_ON_NVPTXCOMPILER_ERROR(

           nvPTXCompilerGetErrorLog(compiler, log.data()));

       emitError(loc) << "NVPTX compiler invocation failed, error log: "

                      << log.data();

     } else {

       emitError(loc) << "NVPTX compiler invocation failed with error code: "

                      << status;

     }

     return std::nullopt;

   }


   // Retrieve the binary.

   size_t elfSize;

   RETURN_ON_NVPTXCOMPILER_ERROR(

       nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));

   SmallVector<char, 0> binary(elfSize, 0);

   RETURN_ON_NVPTXCOMPILER_ERROR(

       nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));


 // Dump the log of the compiler, helpful if the verbose flag was passed.

 #define DEBUG_TYPE "serialize-to-binary"

   LLVM_DEBUG({

     RETURN_ON_NVPTXCOMPILER_ERROR(

         nvPTXCompilerGetInfoLogSize(compiler, &logSize));

     if (logSize != 0) {

       SmallVector<char> log(logSize + 1, 0);

       RETURN_ON_NVPTXCOMPILER_ERROR(

           nvPTXCompilerGetInfoLog(compiler, log.data()));

       LDBG() << "NVPTX compiler invocation for module: "

              << getOperation().getNameAttr()

              << "\nArguments: " << llvm::interleaved(cmdOpts.second, " ")

              << "\nOutput\n"

              << log.data();

     }

   });

 #undef DEBUG_TYPE

   RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));


   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {

     bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {

       return llvm::StringRef(option) == "-32";

     });


     const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};

     nvFatbinHandle handle;


     auto chip = getTarget().getChip();

     chip.consume_front("sm_");


     RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));

     RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(

         handle, binary.data(), binary.size(), chip.data(), nullptr));

     RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(

         handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));


     size_t fatbinSize;

     RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));

     SmallVector<char, 0> fatbin(fatbinSize, 0);

     RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));

     RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));

     return fatbin;

   }


   return binary;

 }

 #endif // MLIR_ENABLE_NVPTXCOMPILER


 std::optional<SmallVector<char, 0>>

 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {

   llvm::Timer moduleToObjectTimer(

       "moduleToObjectTimer",

       "Timer for perf llvm-ir -> isa and isa -> binary.");

   auto clear = llvm::make_scope_exit([&]() { moduleToObjectTimer.clear(); });

   // Return LLVM IR if the compilation target is `offload`.

 #define DEBUG_TYPE "serialize-to-llvm"

   LLVM_DEBUG({

     LDBG() << "LLVM IR for module: " << getOperation().getNameAttr();

     LDBG() << llvmModule;

   });

 #undef DEBUG_TYPE

   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)

     return SerializeGPUModuleBase::moduleToObject(llvmModule);


 #if !LLVM_HAS_NVPTX_TARGET

   getOperation()->emitError(

       "The `NVPTX` target was not built. Please enable it when building LLVM.");

   return std::nullopt;

 #endif // LLVM_HAS_NVPTX_TARGET


   // Emit PTX code.

   std::optional<llvm::TargetMachine *> targetMachine =

       getOrCreateTargetMachine();

   if (!targetMachine) {

     getOperation().emitError() << "Target Machine unavailable for triple "

                                << triple << ", can't optimize with LLVM\n";

     return std::nullopt;

   }

   moduleToObjectTimer.startTimer();

   std::optional<std::string> serializedISA =

       translateToISA(llvmModule, **targetMachine);

   moduleToObjectTimer.stopTimer();

   llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;

   moduleToObjectTimer.clear();

   if (!serializedISA) {

     getOperation().emitError() << "Failed translating the module to ISA.";

     return std::nullopt;

   }


   if (isaCallback)

     isaCallback(serializedISA.value());


 #define DEBUG_TYPE "serialize-to-isa"

   LDBG() << "PTX for module: " << getOperation().getNameAttr() << "\n"

          << *serializedISA;

 #undef DEBUG_TYPE


   // Return PTX if the compilation target is `assembly`.

   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)

     return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());


   std::optional<SmallVector<char, 0>> result;

   moduleToObjectTimer.startTimer();

   // Compile to binary.

 #if MLIR_ENABLE_NVPTXCOMPILER

   result = compileToBinaryNVPTX(*serializedISA);

 #else

   result = compileToBinary(*serializedISA);

 #endif // MLIR_ENABLE_NVPTXCOMPILER


   moduleToObjectTimer.stopTimer();

   isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;

   moduleToObjectTimer.clear();

   return result;

 }


 std::optional<SmallVector<char, 0>>

 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,

                                       const gpu::TargetOptions &options) const {

   Builder builder(attribute.getContext());

   assert(module && "The module must be non null.");

   if (!module)

     return std::nullopt;

   if (!mlir::isa<gpu::GPUModuleOp>(module)) {

     module->emitError("Module must be a GPU module.");

     return std::nullopt;

   }

   NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);

   serializer.init();

   std::optional<SmallVector<char, 0>> result = serializer.run();

   auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();

   if (llvmToISATimeInMs.has_value())

     module->setAttr("LLVMIRToISATimeInMs",

                     builder.getI64IntegerAttr(*llvmToISATimeInMs));

   auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();

   if (isaToBinaryTimeInMs.has_value())

     module->setAttr("ISAToBinaryTimeInMs",

                     builder.getI64IntegerAttr(*isaToBinaryTimeInMs));

   return result;

 }


 Attribute

 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,

                                  const SmallVector<char, 0> &object,

                                  const gpu::TargetOptions &options) const {

   auto target = cast<NVVMTargetAttr>(attribute);

   gpu::CompilationTarget format = options.getCompilationTarget();

   DictionaryAttr objectProps;

   Builder builder(attribute.getContext());

   SmallVector<NamedAttribute, 4> properties;

   if (format == gpu::CompilationTarget::Assembly)

     properties.push_back(

         builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));


   if (StringRef section = options.getELFSection(); !section.empty())

     properties.push_back(builder.getNamedAttr(gpu::elfSectionName,

                                               builder.getStringAttr(section)));


   for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {

     if (module->hasAttr(perfName)) {

       IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));

       properties.push_back(builder.getNamedAttr(

           perfName, builder.getI64IntegerAttr(attr.getInt())));

     }

   }


   if (!properties.empty())

     objectProps = builder.getDictionaryAttr(properties);


   return builder.getAttr<gpu::ObjectAttr>(

       attribute, format,

       builder.getStringAttr(StringRef(object.data(), object.size())),

       objectProps, /*kernels=*/nullptr);

 }

BuiltinDialect.h

CompilationInterfaces.h

DialectResourceBlobManager.h

Export.h

GPUDialect.h

GPUToLLVMIRTranslation.h

LLVMToLLVMIRTranslation.h

setOptionalCommandlineArguments
static void setOptionalCommandlineArguments(NVVMTargetAttr target, SmallVectorImpl< T > &ptxasArgs)
Adds optional command-line arguments to existing arguments.
Definition: Target.cpp:330

_mlir_embedded_libdevice_size
const unsigned _mlir_embedded_libdevice_size
Definition: Target.cpp:55

__DEFAULT_CUDATOOLKIT_PATH__
#define __DEFAULT_CUDATOOLKIT_PATH__
Definition: Target.cpp:51

_mlir_embedded_libdevice
const unsigned char _mlir_embedded_libdevice[]
Definition: Target.cpp:54

Target.h

NVVMDialect.h

NVVMToLLVMIRTranslation.h

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition: PassManagerOptions.cpp:89

Utils.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallString
Definition: LLVM.h:41

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::Attribute::getContext
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37

mlir::Builder
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51

mlir::DialectRegistry
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
Definition: DialectRegistry.h:139

mlir::DialectRegistry::addExtension
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Definition: DialectRegistry.h:215

mlir::DialectResourceBlobManager::BlobEntry
The class represents an individual entry of a blob.
Definition: DialectResourceBlobManager.h:36

mlir::LLVM::ModuleToObject::loadBitcodeFilesFromList
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
Definition: ModuleToObject.cpp:92

mlir::LLVM::ModuleToObject::moduleToObject
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Definition: ModuleToObject.cpp:239

mlir::LLVM::ModuleToObject::getOperation
Operation & getOperation()
Returns the operation being serialized.
Definition: ModuleToObject.cpp:51

mlir::LLVM::ModuleToObject::module
Operation & module
Module to transform to a binary object.
Definition: ModuleToObject.h:108

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63

mlir::MLIRContext::appendDialectRegistry
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Definition: MLIRContext.cpp:416

mlir::MLIRContext::getLoadedDialect
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Definition: MLIRContext.cpp:454

mlir::NVVM::SerializeGPUModuleBase
Base class for all NVVM serializations from GPU modules into binary strings.
Definition: Utils.h:32

mlir::NVVM::SerializeGPUModuleBase::getLibrariesToLink
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
Definition: Target.cpp:139

mlir::NVVM::SerializeGPUModuleBase::SerializeGPUModuleBase
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
Definition: Target.cpp:98

mlir::NVVM::SerializeGPUModuleBase::target
NVVMTargetAttr target
NVVM target attribute.
Definition: Utils.h:63

mlir::NVVM::SerializeGPUModuleBase::toolkitPath
std::string toolkitPath
CUDA toolkit path.
Definition: Utils.h:66

mlir::NVVM::SerializeGPUModuleBase::librariesToLink
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
Definition: Utils.h:71

mlir::NVVM::SerializeGPUModuleBase::loadBitcodeFiles
std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
Definition: Target.cpp:197

mlir::NVVM::SerializeGPUModuleBase::appendStandardLibs
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
Definition: Target.cpp:144

mlir::NVVM::SerializeGPUModuleBase::init
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
Definition: Target.cpp:122

mlir::NVVM::SerializeGPUModuleBase::getToolkitPath
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
Definition: Target.cpp:137

mlir::NVVM::SerializeGPUModuleBase::getTarget
NVVMTargetAttr getTarget() const
Returns the target attribute.
Definition: Target.cpp:135

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getAttr
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
Definition: Operation.h:534

mlir::Operation::hasAttr
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition: Operation.h:560

mlir::Operation::getContext
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216

mlir::Operation::emitError
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268

mlir::Operation::setAttr
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:582

mlir::UnmanagedAsmResourceBlob::allocateInferAlign
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Definition: AsmState.h:235

mlir::gpu::TargetOptions
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
Definition: CompilationInterfaces.h:47

BuiltinTypes.h

mlir::NVVM
Definition: GPUToNVVM.h:18

mlir::NVVM::registerNVVMTargetInterfaceExternalModels
void registerNVVMTargetInterfaceExternalModels(DialectRegistry &registry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
Definition: Target.cpp:73

mlir::NVVM::getCUDAToolkitPath
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Definition: Target.cpp:88

mlir::remark::failed
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:561

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::emitError
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
Definition: Diagnostics.cpp:328

mlir::DenseResourceElementsHandle
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
Definition: BuiltinAttributes.h:702

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::DialectResourceBlobHandle::getManagerInterface
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.
Definition: DialectResourceBlobManager.h:209