24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/FileUtilities.h"
27 #include "llvm/Support/FormatVariadic.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/TargetSelect.h"
39 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
40 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
45 class NVVMTargetAttrImpl
46 :
public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
48 std::optional<SmallVector<char, 0>>
62 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
75 if (
const char *var = std::getenv(
"CUDA_ROOT"))
77 if (
const char *var = std::getenv(
"CUDA_HOME"))
79 if (
const char *var = std::getenv(
"CUDA_PATH"))
87 : ModuleToObject(module, target.getTriple(), target.getChip(),
88 target.getFeatures(), target.getO()),
89 target(target), toolkitPath(targetOptions.getToolkitPath()),
90 fileList(targetOptions.getLinkFiles()) {
97 if (ArrayAttr files =
target.getLink())
99 if (
auto file = dyn_cast<StringAttr>(attr))
107 static llvm::once_flag initializeBackendOnce;
108 llvm::call_once(initializeBackendOnce, []() {
110 #if LLVM_HAS_NVPTX_TARGET
111 LLVMInitializeNVPTXTarget();
112 LLVMInitializeNVPTXTargetInfo();
113 LLVMInitializeNVPTXTargetMC();
114 LLVMInitializeNVPTXAsmPrinter();
130 if (!pathRef.empty()) {
132 path.insert(path.begin(), pathRef.begin(), pathRef.end());
133 pathRef = StringRef(path.data(), path.size());
134 if (!llvm::sys::fs::is_directory(pathRef)) {
136 <<
" does not exist or is not a directory.\n";
139 llvm::sys::path::append(path,
"nvvm",
"libdevice",
"libdevice.10.bc");
140 pathRef = StringRef(path.data(), path.size());
141 if (!llvm::sys::fs::is_regular_file(pathRef)) {
143 <<
" does not exist or is not a file.\n";
151 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
157 return std::move(bcFiles);
163 NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
167 gpu::GPUModuleOp getOperation();
170 std::optional<SmallVector<char, 0>>
171 compileToBinary(
const std::string &ptxCode);
174 std::optional<SmallVector<char, 0>>
175 compileToBinaryNVPTX(
const std::string &ptxCode);
179 std::optional<SmallVector<char, 0>>
180 moduleToObject(llvm::Module &llvmModule)
override;
183 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
186 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
193 std::optional<std::string> findTool(StringRef tool);
200 NVPTXSerializer::NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
203 targetOptions(targetOptions) {}
205 std::optional<NVPTXSerializer::TmpFile>
206 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
209 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
211 getOperation().emitError() <<
"Couldn't create the temp file: `" << filename
212 <<
"`, error message: " << ec.message();
215 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
218 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
222 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
225 StringRef pathRef = targetOptions.getToolkitPath();
227 if (!pathRef.empty()) {
228 path.insert(path.begin(), pathRef.begin(), pathRef.end());
229 llvm::sys::path::append(path,
"bin", tool);
230 if (llvm::sys::fs::can_execute(path))
231 return StringRef(path.data(), path.size()).str();
235 if (std::optional<std::string> toolPath =
236 llvm::sys::Process::FindInEnvPath(
"PATH", tool))
242 if (!pathRef.empty()) {
243 path.insert(path.begin(), pathRef.begin(), pathRef.end());
244 llvm::sys::path::append(path,
"bin", tool);
245 if (llvm::sys::fs::can_execute(path))
246 return StringRef(path.data(), path.size()).str();
248 getOperation().emitError()
249 <<
"Couldn't find the `" << tool
250 <<
"` binary. Please specify the toolkit "
251 "path, add the compiler to $PATH, or set one of the environment "
252 "variables in `NVVM::getCUDAToolkitPath()`.";
258 std::optional<SmallVector<char, 0>>
259 NVPTXSerializer::compileToBinary(
const std::string &ptxCode) {
262 const bool createFatbin =
263 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
266 std::optional<std::string> ptxasCompiler = findTool(
"ptxas");
269 std::optional<std::string> fatbinaryTool;
271 fatbinaryTool = findTool(
"fatbinary");
275 Location loc = getOperation().getLoc();
278 std::string basename =
279 llvm::formatv(
"mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
280 getTarget().getTriple(), getTarget().getChip());
283 std::optional<TmpFile> ptxFile = createTemp(basename,
"ptx");
286 std::optional<TmpFile> logFile = createTemp(basename,
"log");
289 std::optional<TmpFile> binaryFile = createTemp(basename,
"bin");
294 Twine cubinFilename = ptxFile->first +
".cubin";
295 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
297 cubinFile.first = binaryFile->first;
303 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
305 emitError(loc) <<
"Couldn't open the file: `" << ptxFile->first
306 <<
"`, error message: " << ec.message();
309 ptxStream << ptxCode;
310 if (ptxStream.has_error()) {
311 emitError(loc) <<
"An error occurred while writing the PTX to: `"
312 << ptxFile->first <<
"`.";
319 std::optional<StringRef> redirects[] = {
326 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
327 targetOptions.tokenizeCmdOptions();
330 std::string optLevel = std::to_string(this->optLevel);
332 {StringRef(
"ptxas"), StringRef(
"-arch"), getTarget().getChip(),
333 StringRef(ptxFile->first), StringRef(
"-o"), StringRef(cubinFile.first),
334 "--opt-level", optLevel});
336 bool useFatbin32 =
false;
337 for (
const auto *cArg : cmdOpts.second) {
341 if (StringRef arg(cArg); arg !=
"-32")
342 ptxasArgs.push_back(arg);
348 StringRef chip = getTarget().getChip();
350 chip.consume_front(
"sm_"), chip.consume_front(
"compute_");
352 std::string cubinArg =
353 llvm::formatv(
"--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
357 llvm::formatv(
"--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
360 useFatbin32 ?
"-32" :
"-64", cubinArg,
361 ptxArg,
"--create", binaryFile->first});
364 #define DEBUG_TYPE "serialize-to-binary"
366 llvm::dbgs() <<
"Tool invocation for module: "
367 << getOperation().getNameAttr() <<
"\n";
368 llvm::interleave(ptxasArgs, llvm::dbgs(),
" ");
369 llvm::dbgs() <<
"\n";
371 llvm::interleave(fatbinArgs, llvm::dbgs(),
" ");
372 llvm::dbgs() <<
"\n";
381 if (message.empty()) {
382 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
383 llvm::MemoryBuffer::getFile(logFile->first);
385 emitError(loc) << toolName <<
" invocation failed. Log:\n"
386 << toolStderr->get()->getBuffer();
388 emitError(loc) << toolName <<
" invocation failed.";
392 <<
" invocation failed, error message: " << message;
397 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
403 return emitLogError(
"`ptxas`");
407 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
413 return emitLogError(
"`fatbinary`");
416 #define DEBUG_TYPE "serialize-to-binary"
418 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
419 llvm::MemoryBuffer::getFile(logFile->first);
420 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
421 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
422 llvm::dbgs().flush();
428 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
429 llvm::MemoryBuffer::getFile(binaryFile->first);
431 emitError(loc) <<
"Couldn't open the file: `" << binaryFile->first
432 <<
"`, error message: " << binaryBuffer.getError().message();
435 StringRef fatbin = (*binaryBuffer)->getBuffer();
439 #if MLIR_ENABLE_NVPTXCOMPILER
440 #include "nvPTXCompiler.h"
442 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
444 if (auto status = (expr)) { \
445 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
447 return std::nullopt; \
451 std::optional<SmallVector<char, 0>>
452 NVPTXSerializer::compileToBinaryNVPTX(
const std::string &ptxCode) {
453 Location loc = getOperation().getLoc();
454 nvPTXCompilerHandle compiler =
nullptr;
455 nvPTXCompileResult status;
459 std::string optLevel = std::to_string(this->optLevel);
460 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
461 targetOptions.tokenizeCmdOptions();
462 cmdOpts.second.append(
463 {
"-arch", getTarget().getChip().data(),
"--opt-level", optLevel.c_str()});
466 RETURN_ON_NVPTXCOMPILER_ERROR(
467 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
470 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
471 cmdOpts.second.data());
474 if (status != NVPTXCOMPILE_SUCCESS) {
475 RETURN_ON_NVPTXCOMPILER_ERROR(
476 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
479 RETURN_ON_NVPTXCOMPILER_ERROR(
480 nvPTXCompilerGetErrorLog(compiler, log.data()));
481 emitError(loc) <<
"NVPTX compiler invocation failed, error log: "
484 emitError(loc) <<
"NVPTX compiler invocation failed with error code: "
491 RETURN_ON_NVPTXCOMPILER_ERROR(
492 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
494 RETURN_ON_NVPTXCOMPILER_ERROR(
495 nvPTXCompilerGetCompiledProgram(compiler, (
void *)binary.data()));
498 #define DEBUG_TYPE "serialize-to-binary"
500 RETURN_ON_NVPTXCOMPILER_ERROR(
501 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
504 RETURN_ON_NVPTXCOMPILER_ERROR(
505 nvPTXCompilerGetInfoLog(compiler, log.data()));
506 llvm::dbgs() <<
"NVPTX compiler invocation for module: "
507 << getOperation().getNameAttr() <<
"\n";
508 llvm::dbgs() <<
"Arguments: ";
509 llvm::interleave(cmdOpts.second, llvm::dbgs(),
" ");
510 llvm::dbgs() <<
"\nOutput\n" << log.data() <<
"\n";
511 llvm::dbgs().flush();
515 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
520 std::optional<SmallVector<char, 0>>
521 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
523 #define DEBUG_TYPE "serialize-to-llvm"
525 llvm::dbgs() <<
"LLVM IR for module: " << getOperation().getNameAttr()
527 llvm::dbgs() << llvmModule <<
"\n";
528 llvm::dbgs().flush();
531 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
534 #if !LLVM_HAS_NVPTX_TARGET
535 getOperation()->emitError(
536 "The `NVPTX` target was not built. Please enable it when building LLVM.");
541 std::optional<llvm::TargetMachine *> targetMachine =
542 getOrCreateTargetMachine();
543 if (!targetMachine) {
544 getOperation().emitError() <<
"Target Machine unavailable for triple "
545 << triple <<
", can't optimize with LLVM\n";
548 std::optional<std::string> serializedISA =
549 translateToISA(llvmModule, **targetMachine);
550 if (!serializedISA) {
551 getOperation().emitError() <<
"Failed translating the module to ISA.";
554 #define DEBUG_TYPE "serialize-to-isa"
556 llvm::dbgs() <<
"PTX for module: " << getOperation().getNameAttr() <<
"\n";
557 llvm::dbgs() << *serializedISA <<
"\n";
558 llvm::dbgs().flush();
563 if (targetOptions.getCompilationTarget() ==
564 gpu::CompilationTarget::Assembly) {
566 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
571 #if MLIR_ENABLE_NVPTXCOMPILER
572 return compileToBinaryNVPTX(*serializedISA);
574 return compileToBinary(*serializedISA);
578 std::optional<SmallVector<char, 0>>
581 assert(module &&
"The module must be non null.");
584 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
585 module->
emitError(
"Module must be a GPU module.");
588 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute),
options);
590 return serializer.run();
594 NVVMTargetAttrImpl::createObject(
Attribute attribute,
597 auto target = cast<NVVMTargetAttr>(attribute);
598 gpu::CompilationTarget format =
options.getCompilationTarget();
599 DictionaryAttr objectProps;
601 if (format == gpu::CompilationTarget::Assembly)
602 objectProps = builder.getDictionaryAttr(
603 {builder.getNamedAttr(
"O", builder.getI32IntegerAttr(target.getO()))});
604 return builder.getAttr<gpu::ObjectAttr>(
606 builder.getStringAttr(StringRef(
object.data(),
object.size())),
#define __DEFAULT_CUDATOOLKIT_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< std::string > fileList, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Base class for all NVVM serializations from GPU modules into binary strings.
ArrayRef< std::string > getFileList() const
Returns the bitcode files to be loaded.
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
SmallVector< std::string > fileList
List of LLVM bitcode files to link to.
NVVMTargetAttr target
NVVM target attribute.
std::string toolkitPath
CUDA toolkit path.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into fileList.
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
NVVMTargetAttr getTarget() const
Returns the target attribute.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.