MLIR 22.0.0git
Target.cpp
Go to the documentation of this file.
1//===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This files defines NVVM target related functions including registration
10// calls for the `#nvvm.target` compilation attribute.
11//
12//===----------------------------------------------------------------------===//
13
15
27#include "llvm/Support/InterleavedRange.h"
28
29#include "llvm/ADT/ScopeExit.h"
30#include "llvm/Config/Targets.h"
31#include "llvm/Support/DebugLog.h"
32#include "llvm/Support/FileSystem.h"
33#include "llvm/Support/FileUtilities.h"
34#include "llvm/Support/FormatVariadic.h"
35#include "llvm/Support/MemoryBuffer.h"
36#include "llvm/Support/Path.h"
37#include "llvm/Support/Process.h"
38#include "llvm/Support/Program.h"
39#include "llvm/Support/TargetSelect.h"
40#include "llvm/Support/Timer.h"
41#include "llvm/Support/raw_ostream.h"
42
43#include <cstdint>
44#include <cstdlib>
45#include <optional>
46
47using namespace mlir;
48using namespace mlir::NVVM;
49
50#ifndef __DEFAULT_CUDATOOLKIT_PATH__
51#define __DEFAULT_CUDATOOLKIT_PATH__ ""
52#endif
53
54extern "C" const unsigned char _mlir_embedded_libdevice[];
55extern "C" const unsigned _mlir_embedded_libdevice_size;
56
57namespace {
58// Implementation of the `TargetAttrInterface` model.
59class NVVMTargetAttrImpl
60 : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
61public:
62 std::optional<SmallVector<char, 0>>
63 serializeToObject(Attribute attribute, Operation *module,
64 const gpu::TargetOptions &options) const;
65
66 Attribute createObject(Attribute attribute, Operation *module,
67 const SmallVector<char, 0> &object,
68 const gpu::TargetOptions &options) const;
69};
70} // namespace
71
72// Register the NVVM dialect, the NVVM translation & the target interface.
74 DialectRegistry &registry) {
75 registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
76 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
77 });
78}
79
86
87// Search for the CUDA toolkit path.
89 if (const char *var = std::getenv("CUDA_ROOT"))
90 return var;
91 if (const char *var = std::getenv("CUDA_HOME"))
92 return var;
93 if (const char *var = std::getenv("CUDA_PATH"))
94 return var;
96}
97
99 Operation &module, NVVMTargetAttr target,
100 const gpu::TargetOptions &targetOptions)
101 : ModuleToObject(module, target.getTriple(), target.getChip(),
102 target.getFeatures(), target.getO(),
103 targetOptions.getInitialLlvmIRCallback(),
104 targetOptions.getLinkedLlvmIRCallback(),
105 targetOptions.getOptimizedLlvmIRCallback(),
106 targetOptions.getISACallback()),
107 target(target), toolkitPath(targetOptions.getToolkitPath()),
108 librariesToLink(targetOptions.getLibrariesToLink()) {
109
110 // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
111 if (toolkitPath.empty())
113
114 // Append the files in the target attribute.
115 if (target.getLink())
116 librariesToLink.append(target.getLink().begin(), target.getLink().end());
117
118 // Append libdevice to the files to be loaded.
119 (void)appendStandardLibs();
120}
121
123 static llvm::once_flag initializeBackendOnce;
124 llvm::call_once(initializeBackendOnce, []() {
125 // If the `NVPTX` LLVM target was built, initialize it.
126#if LLVM_HAS_NVPTX_TARGET
127 LLVMInitializeNVPTXTarget();
128 LLVMInitializeNVPTXTargetInfo();
129 LLVMInitializeNVPTXTargetMC();
130 LLVMInitializeNVPTXAsmPrinter();
131#endif
132 });
133}
134
135NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
136
138
142
143// Try to append `libdevice` from a CUDA toolkit installation.
145#if MLIR_NVVM_EMBED_LIBDEVICE
146 // If libdevice is embedded in the binary, we don't look it up on the
147 // filesystem.
148 MLIRContext *ctx = target.getContext();
149 auto type =
151 IntegerType::get(ctx, 8));
152 auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
153
154 // Lookup if we already loaded the resource, otherwise create it.
156 resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
157 if (blob) {
158 librariesToLink.push_back(DenseResourceElementsAttr::get(
160 blob, ctx->getLoadedDialect<BuiltinDialect>())));
161 return success();
162 }
163
164 // Allocate a resource using one of the UnManagedResourceBlob method to wrap
165 // the embedded data.
169 librariesToLink.push_back(DenseResourceElementsAttr::get(
170 type, resourceManager.insert("_mlir_embedded_libdevice",
171 std::move(unmanagedBlob))));
172#else
173 StringRef pathRef = getToolkitPath();
174 if (!pathRef.empty()) {
176 path.insert(path.begin(), pathRef.begin(), pathRef.end());
177 pathRef = StringRef(path.data(), path.size());
178 if (!llvm::sys::fs::is_directory(pathRef)) {
179 getOperation().emitError() << "CUDA path: " << pathRef
180 << " does not exist or is not a directory.\n";
181 return failure();
182 }
183 llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
184 pathRef = StringRef(path.data(), path.size());
185 if (!llvm::sys::fs::is_regular_file(pathRef)) {
186 getOperation().emitError() << "LibDevice path: " << pathRef
187 << " does not exist or is not a file.\n";
188 return failure();
189 }
190 librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
191 }
192#endif
193 return success();
194}
195
196std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
199 if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink,
200 bcFiles, true)))
201 return std::nullopt;
202 return std::move(bcFiles);
203}
204
205namespace {
206class NVPTXSerializer : public SerializeGPUModuleBase {
207public:
208 NVPTXSerializer(Operation &module, NVVMTargetAttr target,
209 const gpu::TargetOptions &targetOptions);
210
211 /// Returns the GPU module op being serialized.
212 gpu::GPUModuleOp getOperation();
213
214 /// Compiles PTX to cubin using `ptxas`.
215 std::optional<SmallVector<char, 0>>
216 compileToBinary(const std::string &ptxCode);
217
218 /// Compiles PTX to cubin using the `nvptxcompiler` library.
219 std::optional<SmallVector<char, 0>>
220 compileToBinaryNVPTX(const std::string &ptxCode);
221
222 /// Serializes the LLVM module to an object format, depending on the
223 /// compilation target selected in target options.
224 std::optional<SmallVector<char, 0>>
225 moduleToObject(llvm::Module &llvmModule) override;
226
227 /// Get LLVMIR->ISA performance result.
228 /// Return nullopt if moduleToObject has not been called or the target format
229 /// is LLVMIR.
230 std::optional<int64_t> getLLVMIRToISATimeInMs();
231
232 /// Get ISA->Binary performance result.
233 /// Return nullopt if moduleToObject has not been called or the target format
234 /// is LLVMIR or ISA.
235 std::optional<int64_t> getISAToBinaryTimeInMs();
236
237private:
238 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
239
240 /// Creates a temp file.
241 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
242
243 /// Finds the `tool` path, where `tool` is the name of the binary to search,
244 /// i.e. `ptxas` or `fatbinary`. The search order is:
245 /// 1. The toolkit path in `targetOptions`.
246 /// 2. In the system PATH.
247 /// 3. The path from `getCUDAToolkitPath()`.
248 std::optional<std::string> findTool(StringRef tool);
249
250 /// Target options.
251 gpu::TargetOptions targetOptions;
252
253 /// LLVMIR->ISA perf result.
254 std::optional<int64_t> llvmToISATimeInMs;
255
256 /// ISA->Binary perf result.
257 std::optional<int64_t> isaToBinaryTimeInMs;
258};
259} // namespace
260
261NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
262 const gpu::TargetOptions &targetOptions)
263 : SerializeGPUModuleBase(module, target, targetOptions),
264 targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),
265 isaToBinaryTimeInMs(std::nullopt) {}
266
267std::optional<NVPTXSerializer::TmpFile>
268NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
269 llvm::SmallString<128> filename;
270 if (name.size() > 80)
271 name = name.substr(0, 80);
272 std::error_code ec =
273 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
274 if (ec) {
275 getOperation().emitError() << "Couldn't create the temp file: `" << filename
276 << "`, error message: " << ec.message();
277 return std::nullopt;
278 }
279 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
280}
281
282std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {
283 return llvmToISATimeInMs;
284}
285
286std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
287 return isaToBinaryTimeInMs;
288}
289
290gpu::GPUModuleOp NVPTXSerializer::getOperation() {
291 return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
292}
293
294std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
295 // Find the `tool` path.
296 // 1. Check the toolkit path given in the command line.
297 StringRef pathRef = targetOptions.getToolkitPath();
298 SmallVector<char, 256> path;
299 if (!pathRef.empty()) {
300 path.insert(path.begin(), pathRef.begin(), pathRef.end());
301 llvm::sys::path::append(path, "bin", tool);
302 if (llvm::sys::fs::can_execute(path))
303 return StringRef(path.data(), path.size()).str();
304 }
305
306 // 2. Check PATH.
307 if (std::optional<std::string> toolPath =
308 llvm::sys::Process::FindInEnvPath("PATH", tool))
309 return *toolPath;
310
311 // 3. Check `getCUDAToolkitPath()`.
312 pathRef = getCUDAToolkitPath();
313 path.clear();
314 if (!pathRef.empty()) {
315 path.insert(path.begin(), pathRef.begin(), pathRef.end());
316 llvm::sys::path::append(path, "bin", tool);
317 if (llvm::sys::fs::can_execute(path))
318 return StringRef(path.data(), path.size()).str();
319 }
320 getOperation().emitError()
321 << "Couldn't find the `" << tool
322 << "` binary. Please specify the toolkit "
323 "path, add the compiler to $PATH, or set one of the environment "
324 "variables in `NVVM::getCUDAToolkitPath()`.";
325 return std::nullopt;
326}
327
328/// Adds optional command-line arguments to existing arguments.
329template <typename T>
330static void setOptionalCommandlineArguments(NVVMTargetAttr target,
331 SmallVectorImpl<T> &ptxasArgs) {
332 if (!target.hasCmdOptions())
333 return;
334
335 std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
336 for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {
337 if (auto strAttr = dyn_cast<StringAttr>(attr)) {
338 if constexpr (std::is_same_v<T, StringRef>) {
339 ptxasArgs.push_back(strAttr.getValue());
340 } else if constexpr (std::is_same_v<T, const char *>) {
341 ptxasArgs.push_back(strAttr.getValue().data());
342 }
343 }
344 }
345}
346
347// TODO: clean this method & have a generic tool driver or never emit binaries
348// with this mechanism and let another stage take care of it.
349std::optional<SmallVector<char, 0>>
350NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
351 // Determine if the serializer should create a fatbinary with the PTX embeded
352 // or a simple CUBIN binary.
353 const bool createFatbin =
354 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
355
356 // Find the `ptxas` & `fatbinary` tools.
357 std::optional<std::string> ptxasCompiler = findTool("ptxas");
358 if (!ptxasCompiler)
359 return std::nullopt;
360 std::optional<std::string> fatbinaryTool;
361 if (createFatbin) {
362 fatbinaryTool = findTool("fatbinary");
363 if (!fatbinaryTool)
364 return std::nullopt;
365 }
366 Location loc = getOperation().getLoc();
367
368 // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
369 std::string basename =
370 llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
371 getTarget().getTriple(), getTarget().getChip());
372
373 // Create temp files:
374 std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
375 if (!ptxFile)
376 return std::nullopt;
377 std::optional<TmpFile> logFile = createTemp(basename, "log");
378 if (!logFile)
379 return std::nullopt;
380 std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
381 if (!binaryFile)
382 return std::nullopt;
383 TmpFile cubinFile;
384 if (createFatbin) {
385 std::string cubinFilename = (ptxFile->first + ".cubin").str();
386 cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
387 } else {
388 cubinFile.first = binaryFile->first;
389 }
390
391 std::error_code ec;
392 // Dump the PTX to a temp file.
393 {
394 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
395 if (ec) {
396 emitError(loc) << "Couldn't open the file: `" << ptxFile->first
397 << "`, error message: " << ec.message();
398 return std::nullopt;
399 }
400 ptxStream << ptxCode;
401 if (ptxStream.has_error()) {
402 emitError(loc) << "An error occurred while writing the PTX to: `"
403 << ptxFile->first << "`.";
404 return std::nullopt;
405 }
406 ptxStream.flush();
407 }
408
409 // Command redirects.
410 std::optional<StringRef> redirects[] = {
411 std::nullopt,
412 logFile->first,
413 logFile->first,
414 };
415
416 // Get any extra args passed in `targetOptions`.
417 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
418 targetOptions.tokenizeCmdOptions();
419
420 // Create ptxas args.
421 std::string optLevel = std::to_string(this->optLevel);
422 SmallVector<StringRef, 12> ptxasArgs(
423 {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
424 StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
425 "--opt-level", optLevel});
426
427 bool useFatbin32 = false;
428 for (const auto *cArg : cmdOpts.second) {
429 // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
430 // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
431 // assumed.
432 if (StringRef arg(cArg); arg != "-32")
433 ptxasArgs.push_back(arg);
434 else
435 useFatbin32 = true;
436 }
437
438 // Set optional command line arguments
439 setOptionalCommandlineArguments(getTarget(), ptxasArgs);
440
441 // Create the `fatbinary` args.
442 StringRef chip = getTarget().getChip();
443 // Remove the arch prefix to obtain the compute capability.
444 chip.consume_front("sm_"), chip.consume_front("compute_");
445 // Embed the cubin object.
446 std::string cubinArg =
447 llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
448 .str();
449 // Embed the PTX file so the driver can JIT if needed.
450 std::string ptxArg =
451 llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
452 .str();
453 SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
454 useFatbin32 ? "-32" : "-64", cubinArg,
455 ptxArg, "--create", binaryFile->first});
456
457 // Dump tool invocation commands.
458#define DEBUG_TYPE "serialize-to-binary"
459 LDBG() << "Tool invocation for module: " << getOperation().getNameAttr()
460 << "\nptxas executable:" << ptxasCompiler.value()
461 << "\nptxas args: " << llvm::interleaved(ptxasArgs, " ");
462 if (createFatbin)
463 LDBG() << "fatbin args: " << llvm::interleaved(fatbinArgs, " ");
464#undef DEBUG_TYPE
465
466 // Helper function for printing tool error logs.
467 std::string message;
468 auto emitLogError =
469 [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
470 if (message.empty()) {
471 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
472 llvm::MemoryBuffer::getFile(logFile->first);
473 if (toolStderr)
474 emitError(loc) << toolName << " invocation failed. Log:\n"
475 << toolStderr->get()->getBuffer();
476 else
477 emitError(loc) << toolName << " invocation failed.";
478 return std::nullopt;
479 }
480 emitError(loc) << toolName
481 << " invocation failed, error message: " << message;
482 return std::nullopt;
483 };
484
485 // Invoke PTXAS.
486 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
487 /*Env=*/std::nullopt,
488 /*Redirects=*/redirects,
489 /*SecondsToWait=*/0,
490 /*MemoryLimit=*/0,
491 /*ErrMsg=*/&message))
492 return emitLogError("`ptxas`");
493#define DEBUG_TYPE "dump-sass"
494 LLVM_DEBUG({
495 std::optional<std::string> nvdisasm = findTool("nvdisasm");
496 SmallVector<StringRef> nvdisasmArgs(
497 {StringRef("nvdisasm"), StringRef(cubinFile.first)});
498 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
499 /*Env=*/std::nullopt,
500 /*Redirects=*/redirects,
501 /*SecondsToWait=*/0,
502 /*MemoryLimit=*/0,
503 /*ErrMsg=*/&message))
504 return emitLogError("`nvdisasm`");
505 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
506 llvm::MemoryBuffer::getFile(logFile->first);
507 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
508 LDBG() << "Output:\n" << (*logBuffer)->getBuffer();
509 llvm::dbgs().flush();
510 }
511 });
512#undef DEBUG_TYPE
513
514 // Invoke `fatbin`.
515 message.clear();
516 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
517 /*Env=*/std::nullopt,
518 /*Redirects=*/redirects,
519 /*SecondsToWait=*/0,
520 /*MemoryLimit=*/0,
521 /*ErrMsg=*/&message))
522 return emitLogError("`fatbinary`");
523
524// Dump the output of the tools, helpful if the verbose flag was passed.
525#define DEBUG_TYPE "serialize-to-binary"
526 LLVM_DEBUG({
527 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
528 llvm::MemoryBuffer::getFile(logFile->first);
529 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
530 LDBG() << "Output:\n" << (*logBuffer)->getBuffer();
531 llvm::dbgs().flush();
532 }
533 });
534#undef DEBUG_TYPE
535
536 // Read the fatbin.
537 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
538 llvm::MemoryBuffer::getFile(binaryFile->first);
539 if (!binaryBuffer) {
540 emitError(loc) << "Couldn't open the file: `" << binaryFile->first
541 << "`, error message: " << binaryBuffer.getError().message();
542 return std::nullopt;
543 }
544 StringRef fatbin = (*binaryBuffer)->getBuffer();
545 return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
546}
547
548#if MLIR_ENABLE_NVPTXCOMPILER
549#include "nvPTXCompiler.h"
550
551#define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
552 do { \
553 if (auto status = (expr)) { \
554 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
555 << status; \
556 return std::nullopt; \
557 } \
558 } while (false)
559
560#include "nvFatbin.h"
561
562#define RETURN_ON_NVFATBIN_ERROR(expr) \
563 do { \
564 auto result = (expr); \
565 if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
566 emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
567 << nvFatbinGetErrorString(result); \
568 return std::nullopt; \
569 } \
570 } while (false)
571
572std::optional<SmallVector<char, 0>>
573NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
574 Location loc = getOperation().getLoc();
575 nvPTXCompilerHandle compiler = nullptr;
576 nvPTXCompileResult status;
577 size_t logSize;
578
579 // Create the options.
580 std::string optLevel = std::to_string(this->optLevel);
581 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
582 targetOptions.tokenizeCmdOptions();
583 cmdOpts.second.append(
584 {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
585
586 // Set optional command line arguments
587 setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
588 // Create the compiler handle.
589 RETURN_ON_NVPTXCOMPILER_ERROR(
590 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
591
592 // Try to compile the binary.
593 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
594 cmdOpts.second.data());
595
596 // Check if compilation failed.
597 if (status != NVPTXCOMPILE_SUCCESS) {
598 RETURN_ON_NVPTXCOMPILER_ERROR(
599 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
600 if (logSize != 0) {
601 SmallVector<char> log(logSize + 1, 0);
602 RETURN_ON_NVPTXCOMPILER_ERROR(
603 nvPTXCompilerGetErrorLog(compiler, log.data()));
604 emitError(loc) << "NVPTX compiler invocation failed, error log: "
605 << log.data();
606 } else {
607 emitError(loc) << "NVPTX compiler invocation failed with error code: "
608 << status;
609 }
610 return std::nullopt;
611 }
612
613 // Retrieve the binary.
614 size_t elfSize;
615 RETURN_ON_NVPTXCOMPILER_ERROR(
616 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
617 SmallVector<char, 0> binary(elfSize, 0);
618 RETURN_ON_NVPTXCOMPILER_ERROR(
619 nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
620
621// Dump the log of the compiler, helpful if the verbose flag was passed.
622#define DEBUG_TYPE "serialize-to-binary"
623 LLVM_DEBUG({
624 RETURN_ON_NVPTXCOMPILER_ERROR(
625 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
626 if (logSize != 0) {
627 SmallVector<char> log(logSize + 1, 0);
628 RETURN_ON_NVPTXCOMPILER_ERROR(
629 nvPTXCompilerGetInfoLog(compiler, log.data()));
630 LDBG() << "NVPTX compiler invocation for module: "
631 << getOperation().getNameAttr()
632 << "\nArguments: " << llvm::interleaved(cmdOpts.second, " ")
633 << "\nOutput\n"
634 << log.data();
635 }
636 });
637#undef DEBUG_TYPE
638 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
639
640 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
641 bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
642 return llvm::StringRef(option) == "-32";
643 });
644
645 const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
646 nvFatbinHandle handle;
647
648 auto chip = getTarget().getChip();
649 chip.consume_front("sm_");
650
651 RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
652 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
653 handle, binary.data(), binary.size(), chip.data(), nullptr));
654 RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
655 handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
656
657 size_t fatbinSize;
658 RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
659 SmallVector<char, 0> fatbin(fatbinSize, 0);
660 RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
661 RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
662 return fatbin;
663 }
664
665 return binary;
666}
667#endif // MLIR_ENABLE_NVPTXCOMPILER
668
669std::optional<SmallVector<char, 0>>
670NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
671 llvm::Timer moduleToObjectTimer(
672 "moduleToObjectTimer",
673 "Timer for perf llvm-ir -> isa and isa -> binary.");
674 auto clear = llvm::make_scope_exit([&]() { moduleToObjectTimer.clear(); });
675 // Return LLVM IR if the compilation target is `offload`.
676#define DEBUG_TYPE "serialize-to-llvm"
677 LLVM_DEBUG({
678 LDBG() << "LLVM IR for module: " << getOperation().getNameAttr();
679 LDBG() << llvmModule;
680 });
681#undef DEBUG_TYPE
682 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
684
685#if !LLVM_HAS_NVPTX_TARGET
686 getOperation()->emitError(
687 "The `NVPTX` target was not built. Please enable it when building LLVM.");
688 return std::nullopt;
689#endif // LLVM_HAS_NVPTX_TARGET
690
691 // Emit PTX code.
692 std::optional<llvm::TargetMachine *> targetMachine =
693 getOrCreateTargetMachine();
694 if (!targetMachine) {
695 getOperation().emitError() << "Target Machine unavailable for triple "
696 << triple << ", can't optimize with LLVM\n";
697 return std::nullopt;
698 }
699 moduleToObjectTimer.startTimer();
700 std::optional<std::string> serializedISA =
701 translateToISA(llvmModule, **targetMachine);
702 moduleToObjectTimer.stopTimer();
703 llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
704 moduleToObjectTimer.clear();
705 if (!serializedISA) {
706 getOperation().emitError() << "Failed translating the module to ISA.";
707 return std::nullopt;
708 }
709
710 if (isaCallback)
711 isaCallback(serializedISA.value());
712
713#define DEBUG_TYPE "serialize-to-isa"
714 LDBG() << "PTX for module: " << getOperation().getNameAttr() << "\n"
715 << *serializedISA;
716#undef DEBUG_TYPE
717
718 // Return PTX if the compilation target is `assembly`.
719 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
720 return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
721
722 std::optional<SmallVector<char, 0>> result;
723 moduleToObjectTimer.startTimer();
724 // Compile to binary.
725#if MLIR_ENABLE_NVPTXCOMPILER
726 result = compileToBinaryNVPTX(*serializedISA);
727#else
728 result = compileToBinary(*serializedISA);
729#endif // MLIR_ENABLE_NVPTXCOMPILER
730
731 moduleToObjectTimer.stopTimer();
732 isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
733 moduleToObjectTimer.clear();
734 return result;
735}
736
737std::optional<SmallVector<char, 0>>
738NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
739 const gpu::TargetOptions &options) const {
740 Builder builder(attribute.getContext());
741 assert(module && "The module must be non null.");
742 if (!module)
743 return std::nullopt;
744 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
745 module->emitError("Module must be a GPU module.");
746 return std::nullopt;
747 }
748 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
749 serializer.init();
750 std::optional<SmallVector<char, 0>> result = serializer.run();
751 auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
752 if (llvmToISATimeInMs.has_value())
753 module->setAttr("LLVMIRToISATimeInMs",
754 builder.getI64IntegerAttr(*llvmToISATimeInMs));
755 auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
756 if (isaToBinaryTimeInMs.has_value())
757 module->setAttr("ISAToBinaryTimeInMs",
758 builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
759 return result;
760}
761
762Attribute
763NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
764 const SmallVector<char, 0> &object,
765 const gpu::TargetOptions &options) const {
766 auto target = cast<NVVMTargetAttr>(attribute);
767 gpu::CompilationTarget format = options.getCompilationTarget();
768 DictionaryAttr objectProps;
769 Builder builder(attribute.getContext());
770 SmallVector<NamedAttribute, 4> properties;
771 if (format == gpu::CompilationTarget::Assembly)
772 properties.push_back(
773 builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
774
775 if (StringRef section = options.getELFSection(); !section.empty())
776 properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
777 builder.getStringAttr(section)));
778
779 for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
780 if (module->hasAttr(perfName)) {
781 IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
782 properties.push_back(builder.getNamedAttr(
783 perfName, builder.getI64IntegerAttr(attr.getInt())));
784 }
785 }
786
787 if (!properties.empty())
788 objectProps = builder.getDictionaryAttr(properties);
789
790 return builder.getAttr<gpu::ObjectAttr>(
791 attribute, format,
792 builder.getStringAttr(StringRef(object.data(), object.size())),
793 objectProps, /*kernels=*/nullptr);
794}
return success()
static void setOptionalCommandlineArguments(NVVMTargetAttr target, SmallVectorImpl< T > &ptxasArgs)
Adds optional command-line arguments to existing arguments.
Definition Target.cpp:330
const unsigned _mlir_embedded_libdevice_size
Definition Target.cpp:55
#define __DEFAULT_CUDATOOLKIT_PATH__
Definition Target.cpp:51
const unsigned char _mlir_embedded_libdevice[]
Definition Target.cpp:54
values clear()
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
Definition Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module > > &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
ModuleToObject(Operation &module, StringRef triple, StringRef chip, StringRef features={}, int optLevel=3, function_ref< void(llvm::Module &)> initialLlvmIRCallback={}, function_ref< void(llvm::Module &)> linkedLlvmIRCallback={}, function_ref< void(llvm::Module &)> optimizedLlvmIRCallback={}, function_ref< void(StringRef)> isaCallback={})
Operation & module
Module to transform to a binary object.
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
Definition Utils.h:32
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
Definition Target.cpp:139
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
Definition Target.cpp:98
NVVMTargetAttr target
NVVM target attribute.
Definition Utils.h:63
std::string toolkitPath
CUDA toolkit path.
Definition Utils.h:66
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
Definition Utils.h:71
std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
Definition Target.cpp:197
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
Definition Target.cpp:144
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
Definition Target.cpp:122
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
Definition Target.cpp:137
NVVMTargetAttr getTarget() const
Returns the target attribute.
Definition Target.cpp:135
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
Definition Operation.h:534
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Definition AsmState.h:235
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry &registry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
Definition Target.cpp:73
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Definition Target.cpp:88
constexpr StringLiteral elfSectionName
Include the generated interface declarations.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
static ManagerInterface & getManagerInterface(MLIRContext *ctx)