16 #include "mlir/Config/mlir-config.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FileUtilities.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Support/Process.h"
32 #include "llvm/Support/Program.h"
33 #include "llvm/Support/TargetSelect.h"
40 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
41 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
46 class NVVMTargetAttrImpl
47 :
public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
49 std::optional<SmallVector<char, 0>>
63 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
76 if (
const char *var = std::getenv(
"CUDA_ROOT"))
78 if (
const char *var = std::getenv(
"CUDA_HOME"))
80 if (
const char *var = std::getenv(
"CUDA_PATH"))
88 : ModuleToObject(module, target.getTriple(), target.getChip(),
89 target.getFeatures(), target.getO()),
90 target(target), toolkitPath(targetOptions.getToolkitPath()),
91 fileList(targetOptions.getLinkFiles()) {
98 if (ArrayAttr files =
target.getLink())
100 if (
auto file = dyn_cast<StringAttr>(attr))
108 static llvm::once_flag initializeBackendOnce;
109 llvm::call_once(initializeBackendOnce, []() {
111 #if LLVM_HAS_NVPTX_TARGET
112 LLVMInitializeNVPTXTarget();
113 LLVMInitializeNVPTXTargetInfo();
114 LLVMInitializeNVPTXTargetMC();
115 LLVMInitializeNVPTXAsmPrinter();
131 if (!pathRef.empty()) {
133 path.insert(path.begin(), pathRef.begin(), pathRef.end());
134 pathRef = StringRef(path.data(), path.size());
135 if (!llvm::sys::fs::is_directory(pathRef)) {
137 <<
" does not exist or is not a directory.\n";
140 llvm::sys::path::append(path,
"nvvm",
"libdevice",
"libdevice.10.bc");
141 pathRef = StringRef(path.data(), path.size());
142 if (!llvm::sys::fs::is_regular_file(pathRef)) {
144 <<
" does not exist or is not a file.\n";
152 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
158 return std::move(bcFiles);
161 #if MLIR_ENABLE_CUDA_CONVERSIONS
165 NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
168 gpu::GPUModuleOp getOperation();
171 std::optional<SmallVector<char, 0>>
172 compileToBinary(
const std::string &ptxCode);
175 std::optional<SmallVector<char, 0>>
176 compileToBinaryNVPTX(
const std::string &ptxCode);
178 std::optional<SmallVector<char, 0>>
179 moduleToObject(llvm::Module &llvmModule)
override;
182 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
185 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
192 std::optional<std::string> findTool(StringRef tool);
199 NVPTXSerializer::NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
202 targetOptions(targetOptions) {}
204 std::optional<NVPTXSerializer::TmpFile>
205 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
208 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
210 getOperation().emitError() <<
"Couldn't create the temp file: `" << filename
211 <<
"`, error message: " << ec.message();
214 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
217 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
221 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
224 StringRef pathRef = targetOptions.getToolkitPath();
226 if (!pathRef.empty()) {
227 path.insert(path.begin(), pathRef.begin(), pathRef.end());
228 llvm::sys::path::append(path,
"bin", tool);
229 if (llvm::sys::fs::can_execute(path))
230 return StringRef(path.data(), path.size()).str();
234 if (std::optional<std::string> toolPath =
235 llvm::sys::Process::FindInEnvPath(
"PATH", tool))
241 if (!pathRef.empty()) {
242 path.insert(path.begin(), pathRef.begin(), pathRef.end());
243 llvm::sys::path::append(path,
"bin", tool);
244 if (llvm::sys::fs::can_execute(path))
245 return StringRef(path.data(), path.size()).str();
247 getOperation().emitError()
248 <<
"Couldn't find the `" << tool
249 <<
"` binary. Please specify the toolkit "
250 "path, add the compiler to $PATH, or set one of the environment "
251 "variables in `NVVM::getCUDAToolkitPath()`.";
257 std::optional<SmallVector<char, 0>>
258 NVPTXSerializer::compileToBinary(
const std::string &ptxCode) {
261 const bool createFatbin =
262 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
265 std::optional<std::string> ptxasCompiler = findTool(
"ptxas");
268 std::optional<std::string> fatbinaryTool = findTool(
"fatbinary");
269 if (createFatbin && !fatbinaryTool)
271 Location loc = getOperation().getLoc();
274 std::string basename =
275 llvm::formatv(
"mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
276 getTarget().getTriple(), getTarget().getChip());
279 std::optional<TmpFile> ptxFile = createTemp(basename,
"ptx");
282 std::optional<TmpFile> logFile = createTemp(basename,
"log");
285 std::optional<TmpFile> binaryFile = createTemp(basename,
"bin");
290 Twine cubinFilename = ptxFile->first +
".cubin";
291 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
293 cubinFile.first = binaryFile->first;
299 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
301 emitError(loc) <<
"Couldn't open the file: `" << ptxFile->first
302 <<
"`, error message: " << ec.message();
305 ptxStream << ptxCode;
306 if (ptxStream.has_error()) {
307 emitError(loc) <<
"An error occurred while writing the PTX to: `"
308 << ptxFile->first <<
"`.";
315 std::optional<StringRef> redirects[] = {
322 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
323 targetOptions.tokenizeCmdOptions();
326 std::string optLevel = std::to_string(this->optLevel);
328 {StringRef(
"ptxas"), StringRef(
"-arch"), getTarget().getChip(),
329 StringRef(ptxFile->first), StringRef(
"-o"), StringRef(cubinFile.first),
330 "--opt-level", optLevel});
332 bool useFatbin32 =
false;
333 for (
const auto *cArg : cmdOpts.second) {
337 if (StringRef arg(cArg); arg !=
"-32")
338 ptxasArgs.push_back(arg);
344 StringRef chip = getTarget().getChip();
346 chip.consume_front(
"sm_"), chip.consume_front(
"compute_");
348 std::string cubinArg =
349 llvm::formatv(
"--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
353 llvm::formatv(
"--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
356 useFatbin32 ?
"-32" :
"-64", cubinArg,
357 ptxArg,
"--create", binaryFile->first});
360 #define DEBUG_TYPE "serialize-to-binary"
362 llvm::dbgs() <<
"Tool invocation for module: "
363 << getOperation().getNameAttr() <<
"\n";
364 llvm::interleave(ptxasArgs, llvm::dbgs(),
" ");
365 llvm::dbgs() <<
"\n";
367 llvm::interleave(fatbinArgs, llvm::dbgs(),
" ");
368 llvm::dbgs() <<
"\n";
377 if (message.empty()) {
378 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
379 llvm::MemoryBuffer::getFile(logFile->first);
381 emitError(loc) << toolName <<
" invocation failed. Log:\n"
382 << toolStderr->get()->getBuffer();
384 emitError(loc) << toolName <<
" invocation failed.";
388 <<
" invocation failed, error message: " << message;
393 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
399 return emitLogError(
"`ptxas`");
403 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
409 return emitLogError(
"`fatbinary`");
412 #define DEBUG_TYPE "serialize-to-binary"
414 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
415 llvm::MemoryBuffer::getFile(logFile->first);
416 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
417 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
418 llvm::dbgs().flush();
424 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
425 llvm::MemoryBuffer::getFile(binaryFile->first);
427 emitError(loc) <<
"Couldn't open the file: `" << binaryFile->first
428 <<
"`, error message: " << binaryBuffer.getError().message();
431 StringRef fatbin = (*binaryBuffer)->getBuffer();
435 #if MLIR_ENABLE_NVPTXCOMPILER
436 #include "nvPTXCompiler.h"
438 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
440 if (auto status = (expr)) { \
441 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
443 return std::nullopt; \
447 std::optional<SmallVector<char, 0>>
448 NVPTXSerializer::compileToBinaryNVPTX(
const std::string &ptxCode) {
449 Location loc = getOperation().getLoc();
450 nvPTXCompilerHandle compiler =
nullptr;
451 nvPTXCompileResult status;
455 std::string optLevel = std::to_string(this->optLevel);
456 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
457 targetOptions.tokenizeCmdOptions();
458 cmdOpts.second.append(
459 {
"-arch", getTarget().getChip().data(),
"--opt-level", optLevel.c_str()});
462 RETURN_ON_NVPTXCOMPILER_ERROR(
463 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
466 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
467 cmdOpts.second.data());
470 if (status != NVPTXCOMPILE_SUCCESS) {
471 RETURN_ON_NVPTXCOMPILER_ERROR(
472 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
475 RETURN_ON_NVPTXCOMPILER_ERROR(
476 nvPTXCompilerGetErrorLog(compiler, log.data()));
477 emitError(loc) <<
"NVPTX compiler invocation failed, error log: "
480 emitError(loc) <<
"NVPTX compiler invocation failed with error code: "
487 RETURN_ON_NVPTXCOMPILER_ERROR(
488 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
490 RETURN_ON_NVPTXCOMPILER_ERROR(
491 nvPTXCompilerGetCompiledProgram(compiler, (
void *)binary.data()));
494 #define DEBUG_TYPE "serialize-to-binary"
496 RETURN_ON_NVPTXCOMPILER_ERROR(
497 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
500 RETURN_ON_NVPTXCOMPILER_ERROR(
501 nvPTXCompilerGetInfoLog(compiler, log.data()));
502 llvm::dbgs() <<
"NVPTX compiler invocation for module: "
503 << getOperation().getNameAttr() <<
"\n";
504 llvm::dbgs() <<
"Arguments: ";
505 llvm::interleave(cmdOpts.second, llvm::dbgs(),
" ");
506 llvm::dbgs() <<
"\nOutput\n" << log.data() <<
"\n";
507 llvm::dbgs().flush();
511 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
516 std::optional<SmallVector<char, 0>>
517 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
519 #define DEBUG_TYPE "serialize-to-llvm"
521 llvm::dbgs() <<
"LLVM IR for module: " << getOperation().getNameAttr()
523 llvm::dbgs() << llvmModule <<
"\n";
524 llvm::dbgs().flush();
527 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
531 std::optional<llvm::TargetMachine *> targetMachine =
532 getOrCreateTargetMachine();
533 if (!targetMachine) {
534 getOperation().emitError() <<
"Target Machine unavailable for triple "
535 << triple <<
", can't optimize with LLVM\n";
538 std::optional<std::string> serializedISA =
539 translateToISA(llvmModule, **targetMachine);
540 if (!serializedISA) {
541 getOperation().emitError() <<
"Failed translating the module to ISA.";
544 #define DEBUG_TYPE "serialize-to-isa"
546 llvm::dbgs() <<
"PTX for module: " << getOperation().getNameAttr() <<
"\n";
547 llvm::dbgs() << *serializedISA <<
"\n";
548 llvm::dbgs().flush();
553 if (targetOptions.getCompilationTarget() ==
554 gpu::CompilationTarget::Assembly) {
556 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
561 #if MLIR_ENABLE_NVPTXCOMPILER
562 return compileToBinaryNVPTX(*serializedISA);
564 return compileToBinary(*serializedISA);
569 std::optional<SmallVector<char, 0>>
572 assert(module &&
"The module must be non null.");
575 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
576 module->
emitError(
"Module must be a GPU module.");
579 #if MLIR_ENABLE_CUDA_CONVERSIONS
580 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute),
options);
582 return serializer.run();
585 "The `NVPTX` target was not built. Please enable it when building LLVM.");
591 NVVMTargetAttrImpl::createObject(
Attribute attribute,
594 auto target = cast<NVVMTargetAttr>(attribute);
595 gpu::CompilationTarget format =
options.getCompilationTarget();
596 DictionaryAttr objectProps;
598 if (format == gpu::CompilationTarget::Assembly)
599 objectProps = builder.getDictionaryAttr(
600 {builder.getNamedAttr(
"O", builder.getI32IntegerAttr(target.getO()))});
601 return builder.getAttr<gpu::ObjectAttr>(
603 builder.getStringAttr(StringRef(
object.data(),
object.size())),
#define __DEFAULT_CUDATOOLKIT_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< std::string > fileList, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Base class for all NVVM serializations from GPU modules into binary strings.
ArrayRef< std::string > getFileList() const
Returns the bitcode files to be loaded.
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
SmallVector< std::string > fileList
List of LLVM bitcode files to link to.
NVVMTargetAttr target
NVVM target attribute.
std::string toolkitPath
CUDA toolkit path.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into fileList.
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
NVVMTargetAttr getTarget() const
Returns the target attribute.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
This class represents an efficient way to signal success or failure.