24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/FileUtilities.h"
27 #include "llvm/Support/FormatVariadic.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/TargetSelect.h"
33 #include "llvm/Support/raw_ostream.h"
40 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
41 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
46 class NVVMTargetAttrImpl
47 :
public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
49 std::optional<SmallVector<char, 0>>
63 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
76 if (
const char *var = std::getenv(
"CUDA_ROOT"))
78 if (
const char *var = std::getenv(
"CUDA_HOME"))
80 if (
const char *var = std::getenv(
"CUDA_PATH"))
88 : ModuleToObject(module, target.getTriple(), target.getChip(),
89 target.getFeatures(), target.getO()),
90 target(target), toolkitPath(targetOptions.getToolkitPath()),
91 fileList(targetOptions.getLinkFiles()) {
98 if (ArrayAttr files =
target.getLink())
100 if (
auto file = dyn_cast<StringAttr>(attr))
108 static llvm::once_flag initializeBackendOnce;
109 llvm::call_once(initializeBackendOnce, []() {
111 #if LLVM_HAS_NVPTX_TARGET
112 LLVMInitializeNVPTXTarget();
113 LLVMInitializeNVPTXTargetInfo();
114 LLVMInitializeNVPTXTargetMC();
115 LLVMInitializeNVPTXAsmPrinter();
131 if (!pathRef.empty()) {
133 path.insert(path.begin(), pathRef.begin(), pathRef.end());
134 pathRef = StringRef(path.data(), path.size());
135 if (!llvm::sys::fs::is_directory(pathRef)) {
137 <<
" does not exist or is not a directory.\n";
140 llvm::sys::path::append(path,
"nvvm",
"libdevice",
"libdevice.10.bc");
141 pathRef = StringRef(path.data(), path.size());
142 if (!llvm::sys::fs::is_regular_file(pathRef)) {
144 <<
" does not exist or is not a file.\n";
152 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
158 return std::move(bcFiles);
164 NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
168 gpu::GPUModuleOp getOperation();
171 std::optional<SmallVector<char, 0>>
172 compileToBinary(
const std::string &ptxCode);
175 std::optional<SmallVector<char, 0>>
176 compileToBinaryNVPTX(
const std::string &ptxCode);
180 std::optional<SmallVector<char, 0>>
181 moduleToObject(llvm::Module &llvmModule)
override;
184 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
187 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
194 std::optional<std::string> findTool(StringRef tool);
201 NVPTXSerializer::NVPTXSerializer(
Operation &module, NVVMTargetAttr target,
204 targetOptions(targetOptions) {}
206 std::optional<NVPTXSerializer::TmpFile>
207 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
210 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
212 getOperation().emitError() <<
"Couldn't create the temp file: `" << filename
213 <<
"`, error message: " << ec.message();
216 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
219 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
223 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
226 StringRef pathRef = targetOptions.getToolkitPath();
228 if (!pathRef.empty()) {
229 path.insert(path.begin(), pathRef.begin(), pathRef.end());
230 llvm::sys::path::append(path,
"bin", tool);
231 if (llvm::sys::fs::can_execute(path))
232 return StringRef(path.data(), path.size()).str();
236 if (std::optional<std::string> toolPath =
237 llvm::sys::Process::FindInEnvPath(
"PATH", tool))
243 if (!pathRef.empty()) {
244 path.insert(path.begin(), pathRef.begin(), pathRef.end());
245 llvm::sys::path::append(path,
"bin", tool);
246 if (llvm::sys::fs::can_execute(path))
247 return StringRef(path.data(), path.size()).str();
249 getOperation().emitError()
250 <<
"Couldn't find the `" << tool
251 <<
"` binary. Please specify the toolkit "
252 "path, add the compiler to $PATH, or set one of the environment "
253 "variables in `NVVM::getCUDAToolkitPath()`.";
259 std::optional<SmallVector<char, 0>>
260 NVPTXSerializer::compileToBinary(
const std::string &ptxCode) {
263 const bool createFatbin =
264 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
267 std::optional<std::string> ptxasCompiler = findTool(
"ptxas");
270 std::optional<std::string> fatbinaryTool;
272 fatbinaryTool = findTool(
"fatbinary");
276 Location loc = getOperation().getLoc();
279 std::string basename =
280 llvm::formatv(
"mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
281 getTarget().getTriple(), getTarget().getChip());
284 std::optional<TmpFile> ptxFile = createTemp(basename,
"ptx");
287 std::optional<TmpFile> logFile = createTemp(basename,
"log");
290 std::optional<TmpFile> binaryFile = createTemp(basename,
"bin");
295 Twine cubinFilename = ptxFile->first +
".cubin";
296 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
298 cubinFile.first = binaryFile->first;
304 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
306 emitError(loc) <<
"Couldn't open the file: `" << ptxFile->first
307 <<
"`, error message: " << ec.message();
310 ptxStream << ptxCode;
311 if (ptxStream.has_error()) {
312 emitError(loc) <<
"An error occurred while writing the PTX to: `"
313 << ptxFile->first <<
"`.";
320 std::optional<StringRef> redirects[] = {
327 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
328 targetOptions.tokenizeCmdOptions();
331 std::string optLevel = std::to_string(this->optLevel);
333 {StringRef(
"ptxas"), StringRef(
"-arch"), getTarget().getChip(),
334 StringRef(ptxFile->first), StringRef(
"-o"), StringRef(cubinFile.first),
335 "--opt-level", optLevel});
337 bool useFatbin32 =
false;
338 for (
const auto *cArg : cmdOpts.second) {
342 if (StringRef arg(cArg); arg !=
"-32")
343 ptxasArgs.push_back(arg);
349 StringRef chip = getTarget().getChip();
351 chip.consume_front(
"sm_"), chip.consume_front(
"compute_");
353 std::string cubinArg =
354 llvm::formatv(
"--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
358 llvm::formatv(
"--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
361 useFatbin32 ?
"-32" :
"-64", cubinArg,
362 ptxArg,
"--create", binaryFile->first});
365 #define DEBUG_TYPE "serialize-to-binary"
367 llvm::dbgs() <<
"Tool invocation for module: "
368 << getOperation().getNameAttr() <<
"\n";
369 llvm::interleave(ptxasArgs, llvm::dbgs(),
" ");
370 llvm::dbgs() <<
"\n";
372 llvm::interleave(fatbinArgs, llvm::dbgs(),
" ");
373 llvm::dbgs() <<
"\n";
382 if (message.empty()) {
383 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
384 llvm::MemoryBuffer::getFile(logFile->first);
386 emitError(loc) << toolName <<
" invocation failed. Log:\n"
387 << toolStderr->get()->getBuffer();
389 emitError(loc) << toolName <<
" invocation failed.";
393 <<
" invocation failed, error message: " << message;
398 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
404 return emitLogError(
"`ptxas`");
405 #define DEBUG_TYPE "dump-sass"
407 std::optional<std::string> nvdisasm = findTool(
"nvdisasm");
409 {StringRef(
"nvdisasm"), StringRef(cubinFile.first)});
410 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
416 return emitLogError(
"`nvdisasm`");
417 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
418 llvm::MemoryBuffer::getFile(logFile->first);
419 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
420 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
421 llvm::dbgs().flush();
428 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
434 return emitLogError(
"`fatbinary`");
437 #define DEBUG_TYPE "serialize-to-binary"
439 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
440 llvm::MemoryBuffer::getFile(logFile->first);
441 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
442 llvm::dbgs() <<
"Output:\n" << (*logBuffer)->getBuffer() <<
"\n";
443 llvm::dbgs().flush();
449 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
450 llvm::MemoryBuffer::getFile(binaryFile->first);
452 emitError(loc) <<
"Couldn't open the file: `" << binaryFile->first
453 <<
"`, error message: " << binaryBuffer.getError().message();
456 StringRef fatbin = (*binaryBuffer)->getBuffer();
460 #if MLIR_ENABLE_NVPTXCOMPILER
461 #include "nvPTXCompiler.h"
463 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
465 if (auto status = (expr)) { \
466 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
468 return std::nullopt; \
472 std::optional<SmallVector<char, 0>>
473 NVPTXSerializer::compileToBinaryNVPTX(
const std::string &ptxCode) {
474 Location loc = getOperation().getLoc();
475 nvPTXCompilerHandle compiler =
nullptr;
476 nvPTXCompileResult status;
480 std::string optLevel = std::to_string(this->optLevel);
481 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
482 targetOptions.tokenizeCmdOptions();
483 cmdOpts.second.append(
484 {
"-arch", getTarget().getChip().data(),
"--opt-level", optLevel.c_str()});
487 RETURN_ON_NVPTXCOMPILER_ERROR(
488 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
491 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
492 cmdOpts.second.data());
495 if (status != NVPTXCOMPILE_SUCCESS) {
496 RETURN_ON_NVPTXCOMPILER_ERROR(
497 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
500 RETURN_ON_NVPTXCOMPILER_ERROR(
501 nvPTXCompilerGetErrorLog(compiler, log.data()));
502 emitError(loc) <<
"NVPTX compiler invocation failed, error log: "
505 emitError(loc) <<
"NVPTX compiler invocation failed with error code: "
512 RETURN_ON_NVPTXCOMPILER_ERROR(
513 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
515 RETURN_ON_NVPTXCOMPILER_ERROR(
516 nvPTXCompilerGetCompiledProgram(compiler, (
void *)binary.data()));
519 #define DEBUG_TYPE "serialize-to-binary"
521 RETURN_ON_NVPTXCOMPILER_ERROR(
522 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
525 RETURN_ON_NVPTXCOMPILER_ERROR(
526 nvPTXCompilerGetInfoLog(compiler, log.data()));
527 llvm::dbgs() <<
"NVPTX compiler invocation for module: "
528 << getOperation().getNameAttr() <<
"\n";
529 llvm::dbgs() <<
"Arguments: ";
530 llvm::interleave(cmdOpts.second, llvm::dbgs(),
" ");
531 llvm::dbgs() <<
"\nOutput\n" << log.data() <<
"\n";
532 llvm::dbgs().flush();
536 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
541 std::optional<SmallVector<char, 0>>
542 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
544 #define DEBUG_TYPE "serialize-to-llvm"
546 llvm::dbgs() <<
"LLVM IR for module: " << getOperation().getNameAttr()
548 llvm::dbgs() << llvmModule <<
"\n";
549 llvm::dbgs().flush();
552 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
555 #if !LLVM_HAS_NVPTX_TARGET
556 getOperation()->emitError(
557 "The `NVPTX` target was not built. Please enable it when building LLVM.");
562 std::optional<llvm::TargetMachine *> targetMachine =
563 getOrCreateTargetMachine();
564 if (!targetMachine) {
565 getOperation().emitError() <<
"Target Machine unavailable for triple "
566 << triple <<
", can't optimize with LLVM\n";
569 std::optional<std::string> serializedISA =
570 translateToISA(llvmModule, **targetMachine);
571 if (!serializedISA) {
572 getOperation().emitError() <<
"Failed translating the module to ISA.";
575 #define DEBUG_TYPE "serialize-to-isa"
577 llvm::dbgs() <<
"PTX for module: " << getOperation().getNameAttr() <<
"\n";
578 llvm::dbgs() << *serializedISA <<
"\n";
579 llvm::dbgs().flush();
584 if (targetOptions.getCompilationTarget() ==
585 gpu::CompilationTarget::Assembly) {
587 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
592 #if MLIR_ENABLE_NVPTXCOMPILER
593 return compileToBinaryNVPTX(*serializedISA);
595 return compileToBinary(*serializedISA);
599 std::optional<SmallVector<char, 0>>
602 assert(module &&
"The module must be non null.");
605 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
606 module->
emitError(
"Module must be a GPU module.");
609 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute),
options);
611 return serializer.run();
618 auto target = cast<NVVMTargetAttr>(attribute);
619 gpu::CompilationTarget format =
options.getCompilationTarget();
620 DictionaryAttr objectProps;
622 if (format == gpu::CompilationTarget::Assembly)
623 objectProps = builder.getDictionaryAttr(
624 {builder.getNamedAttr(
"O", builder.getI32IntegerAttr(target.getO()))});
625 return builder.getAttr<gpu::ObjectAttr>(
627 builder.getStringAttr(StringRef(
object.data(),
object.size())),
628 objectProps,
nullptr);
#define __DEFAULT_CUDATOOLKIT_PATH__
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
MLIRContext * getContext() const
Return the context this attribute belongs to.
This class is a general helper class for creating context-global objects like types,...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< std::string > fileList, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
void appendDialectRegistry(const DialectRegistry ®istry)
Append the contents of the given dialect registry to the registry associated with this context.
Base class for all NVVM serializations from GPU modules into binary strings.
ArrayRef< std::string > getFileList() const
Returns the bitcode files to be loaded.
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
SmallVector< std::string > fileList
List of LLVM bitcode files to link to.
NVVMTargetAttr target
NVVM target attribute.
std::string toolkitPath
CUDA toolkit path.
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in fileList.
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into fileList.
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
NVVMTargetAttr getTarget() const
Returns the target attribute.
Operation is the basic unit of execution within MLIR.
MLIRContext * getContext()
Return the context this operation is associated with.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry ®istry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.