MLIR  20.0.0git
Target.cpp
Go to the documentation of this file.
1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines NVVM target related functions including registration
10 // calls for the `#nvvm.target` compilation attribute.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
20 #include "mlir/IR/BuiltinDialect.h"
21 #include "mlir/IR/BuiltinTypes.h"
28 
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/FileUtilities.h"
32 #include "llvm/Support/FormatVariadic.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Process.h"
36 #include "llvm/Support/Program.h"
37 #include "llvm/Support/TargetSelect.h"
38 #include "llvm/Support/raw_ostream.h"
39 
40 #include <cstdint>
41 #include <cstdlib>
42 
43 using namespace mlir;
44 using namespace mlir::NVVM;
45 
46 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
47 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
48 #endif
49 
50 extern "C" const char _mlir_embedded_libdevice[];
51 extern "C" const unsigned _mlir_embedded_libdevice_size;
52 
53 namespace {
54 // Implementation of the `TargetAttrInterface` model.
55 class NVVMTargetAttrImpl
56  : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
57 public:
58  std::optional<SmallVector<char, 0>>
59  serializeToObject(Attribute attribute, Operation *module,
60  const gpu::TargetOptions &options) const;
61 
62  Attribute createObject(Attribute attribute, Operation *module,
63  const SmallVector<char, 0> &object,
64  const gpu::TargetOptions &options) const;
65 };
66 } // namespace
67 
68 // Register the NVVM dialect, the NVVM translation & the target interface.
70  DialectRegistry &registry) {
71  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
72  NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
73  });
74 }
75 
77  MLIRContext &context) {
78  DialectRegistry registry;
80  context.appendDialectRegistry(registry);
81 }
82 
83 // Search for the CUDA toolkit path.
85  if (const char *var = std::getenv("CUDA_ROOT"))
86  return var;
87  if (const char *var = std::getenv("CUDA_HOME"))
88  return var;
89  if (const char *var = std::getenv("CUDA_PATH"))
90  return var;
92 }
93 
95  Operation &module, NVVMTargetAttr target,
96  const gpu::TargetOptions &targetOptions)
97  : ModuleToObject(module, target.getTriple(), target.getChip(),
98  target.getFeatures(), target.getO(),
99  targetOptions.getInitialLlvmIRCallback(),
100  targetOptions.getLinkedLlvmIRCallback(),
101  targetOptions.getOptimizedLlvmIRCallback(),
102  targetOptions.getISACallback()),
103  target(target), toolkitPath(targetOptions.getToolkitPath()),
104  librariesToLink(targetOptions.getLibrariesToLink()) {
105 
106  // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
107  if (toolkitPath.empty())
109 
110  // Append the files in the target attribute.
111  if (target.getLink())
112  librariesToLink.append(target.getLink().begin(), target.getLink().end());
113 
114  // Append libdevice to the files to be loaded.
115  (void)appendStandardLibs();
116 }
117 
119  static llvm::once_flag initializeBackendOnce;
120  llvm::call_once(initializeBackendOnce, []() {
121  // If the `NVPTX` LLVM target was built, initialize it.
122 #if LLVM_HAS_NVPTX_TARGET
123  LLVMInitializeNVPTXTarget();
124  LLVMInitializeNVPTXTargetInfo();
125  LLVMInitializeNVPTXTargetMC();
126  LLVMInitializeNVPTXAsmPrinter();
127 #endif
128  });
129 }
130 
131 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
132 
134 
136  return librariesToLink;
137 }
138 
139 // Try to append `libdevice` from a CUDA toolkit installation.
141 #if MLIR_NVVM_EMBED_LIBDEVICE
142  // If libdevice is embedded in the binary, we don't look it up on the
143  // filesystem.
144  MLIRContext *ctx = target.getContext();
145  auto type =
147  IntegerType::get(ctx, 8));
148  auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
149 
150  // Lookup if we already loaded the resource, otherwise create it.
152  resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
153  if (blob) {
156  blob, ctx->getLoadedDialect<BuiltinDialect>())));
157  return success();
158  }
159 
160  // Allocate a resource using one of the UnManagedResourceBlob method to wrap
161  // the embedded data.
165  type, resourceManager.insert("_mlir_embedded_libdevice",
166  std::move(unmanagedBlob))));
167 #else
168  StringRef pathRef = getToolkitPath();
169  if (!pathRef.empty()) {
171  path.insert(path.begin(), pathRef.begin(), pathRef.end());
172  pathRef = StringRef(path.data(), path.size());
173  if (!llvm::sys::fs::is_directory(pathRef)) {
174  getOperation().emitError() << "CUDA path: " << pathRef
175  << " does not exist or is not a directory.\n";
176  return failure();
177  }
178  llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
179  pathRef = StringRef(path.data(), path.size());
180  if (!llvm::sys::fs::is_regular_file(pathRef)) {
181  getOperation().emitError() << "LibDevice path: " << pathRef
182  << " does not exist or is not a file.\n";
183  return failure();
184  }
185  librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
186  }
187 #endif
188  return success();
189 }
190 
191 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
195  bcFiles, true)))
196  return std::nullopt;
197  return std::move(bcFiles);
198 }
199 
200 namespace {
201 class NVPTXSerializer : public SerializeGPUModuleBase {
202 public:
203  NVPTXSerializer(Operation &module, NVVMTargetAttr target,
204  const gpu::TargetOptions &targetOptions);
205 
206  /// Returns the GPU module op being serialized.
207  gpu::GPUModuleOp getOperation();
208 
209  /// Compiles PTX to cubin using `ptxas`.
210  std::optional<SmallVector<char, 0>>
211  compileToBinary(const std::string &ptxCode);
212 
213  /// Compiles PTX to cubin using the `nvptxcompiler` library.
214  std::optional<SmallVector<char, 0>>
215  compileToBinaryNVPTX(const std::string &ptxCode);
216 
217  /// Serializes the LLVM module to an object format, depending on the
218  /// compilation target selected in target options.
219  std::optional<SmallVector<char, 0>>
220  moduleToObject(llvm::Module &llvmModule) override;
221 
222 private:
223  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
224 
225  /// Creates a temp file.
226  std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
227 
228  /// Finds the `tool` path, where `tool` is the name of the binary to search,
229  /// i.e. `ptxas` or `fatbinary`. The search order is:
230  /// 1. The toolkit path in `targetOptions`.
231  /// 2. In the system PATH.
232  /// 3. The path from `getCUDAToolkitPath()`.
233  std::optional<std::string> findTool(StringRef tool);
234 
235  /// Target options.
236  gpu::TargetOptions targetOptions;
237 };
238 } // namespace
239 
240 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
241  const gpu::TargetOptions &targetOptions)
242  : SerializeGPUModuleBase(module, target, targetOptions),
243  targetOptions(targetOptions) {}
244 
245 std::optional<NVPTXSerializer::TmpFile>
246 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
247  llvm::SmallString<128> filename;
248  std::error_code ec =
249  llvm::sys::fs::createTemporaryFile(name, suffix, filename);
250  if (ec) {
251  getOperation().emitError() << "Couldn't create the temp file: `" << filename
252  << "`, error message: " << ec.message();
253  return std::nullopt;
254  }
255  return TmpFile(filename, llvm::FileRemover(filename.c_str()));
256 }
257 
258 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
259  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
260 }
261 
262 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
263  // Find the `tool` path.
264  // 1. Check the toolkit path given in the command line.
265  StringRef pathRef = targetOptions.getToolkitPath();
267  if (!pathRef.empty()) {
268  path.insert(path.begin(), pathRef.begin(), pathRef.end());
269  llvm::sys::path::append(path, "bin", tool);
270  if (llvm::sys::fs::can_execute(path))
271  return StringRef(path.data(), path.size()).str();
272  }
273 
274  // 2. Check PATH.
275  if (std::optional<std::string> toolPath =
276  llvm::sys::Process::FindInEnvPath("PATH", tool))
277  return *toolPath;
278 
279  // 3. Check `getCUDAToolkitPath()`.
280  pathRef = getCUDAToolkitPath();
281  path.clear();
282  if (!pathRef.empty()) {
283  path.insert(path.begin(), pathRef.begin(), pathRef.end());
284  llvm::sys::path::append(path, "bin", tool);
285  if (llvm::sys::fs::can_execute(path))
286  return StringRef(path.data(), path.size()).str();
287  }
288  getOperation().emitError()
289  << "Couldn't find the `" << tool
290  << "` binary. Please specify the toolkit "
291  "path, add the compiler to $PATH, or set one of the environment "
292  "variables in `NVVM::getCUDAToolkitPath()`.";
293  return std::nullopt;
294 }
295 
296 // TODO: clean this method & have a generic tool driver or never emit binaries
297 // with this mechanism and let another stage take care of it.
298 std::optional<SmallVector<char, 0>>
299 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
300  // Determine if the serializer should create a fatbinary with the PTX embeded
301  // or a simple CUBIN binary.
302  const bool createFatbin =
303  targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
304 
305  // Find the `ptxas` & `fatbinary` tools.
306  std::optional<std::string> ptxasCompiler = findTool("ptxas");
307  if (!ptxasCompiler)
308  return std::nullopt;
309  std::optional<std::string> fatbinaryTool;
310  if (createFatbin) {
311  fatbinaryTool = findTool("fatbinary");
312  if (!fatbinaryTool)
313  return std::nullopt;
314  }
315  Location loc = getOperation().getLoc();
316 
317  // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
318  std::string basename =
319  llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
320  getTarget().getTriple(), getTarget().getChip());
321 
322  // Create temp files:
323  std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
324  if (!ptxFile)
325  return std::nullopt;
326  std::optional<TmpFile> logFile = createTemp(basename, "log");
327  if (!logFile)
328  return std::nullopt;
329  std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
330  if (!binaryFile)
331  return std::nullopt;
332  TmpFile cubinFile;
333  if (createFatbin) {
334  Twine cubinFilename = ptxFile->first + ".cubin";
335  cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
336  } else {
337  cubinFile.first = binaryFile->first;
338  }
339 
340  std::error_code ec;
341  // Dump the PTX to a temp file.
342  {
343  llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
344  if (ec) {
345  emitError(loc) << "Couldn't open the file: `" << ptxFile->first
346  << "`, error message: " << ec.message();
347  return std::nullopt;
348  }
349  ptxStream << ptxCode;
350  if (ptxStream.has_error()) {
351  emitError(loc) << "An error occurred while writing the PTX to: `"
352  << ptxFile->first << "`.";
353  return std::nullopt;
354  }
355  ptxStream.flush();
356  }
357 
358  // Command redirects.
359  std::optional<StringRef> redirects[] = {
360  std::nullopt,
361  logFile->first,
362  logFile->first,
363  };
364 
365  // Get any extra args passed in `targetOptions`.
366  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
367  targetOptions.tokenizeCmdOptions();
368 
369  // Create ptxas args.
370  std::string optLevel = std::to_string(this->optLevel);
371  SmallVector<StringRef, 12> ptxasArgs(
372  {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
373  StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
374  "--opt-level", optLevel});
375 
376  bool useFatbin32 = false;
377  for (const auto *cArg : cmdOpts.second) {
378  // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
379  // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
380  // assumed.
381  if (StringRef arg(cArg); arg != "-32")
382  ptxasArgs.push_back(arg);
383  else
384  useFatbin32 = true;
385  }
386 
387  // Create the `fatbinary` args.
388  StringRef chip = getTarget().getChip();
389  // Remove the arch prefix to obtain the compute capability.
390  chip.consume_front("sm_"), chip.consume_front("compute_");
391  // Embed the cubin object.
392  std::string cubinArg =
393  llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
394  .str();
395  // Embed the PTX file so the driver can JIT if needed.
396  std::string ptxArg =
397  llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
398  .str();
399  SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
400  useFatbin32 ? "-32" : "-64", cubinArg,
401  ptxArg, "--create", binaryFile->first});
402 
403  // Dump tool invocation commands.
404 #define DEBUG_TYPE "serialize-to-binary"
405  LLVM_DEBUG({
406  llvm::dbgs() << "Tool invocation for module: "
407  << getOperation().getNameAttr() << "\n";
408  llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
409  llvm::dbgs() << "\n";
410  if (createFatbin) {
411  llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
412  llvm::dbgs() << "\n";
413  }
414  });
415 #undef DEBUG_TYPE
416 
417  // Helper function for printing tool error logs.
418  std::string message;
419  auto emitLogError =
420  [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
421  if (message.empty()) {
422  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
423  llvm::MemoryBuffer::getFile(logFile->first);
424  if (toolStderr)
425  emitError(loc) << toolName << " invocation failed. Log:\n"
426  << toolStderr->get()->getBuffer();
427  else
428  emitError(loc) << toolName << " invocation failed.";
429  return std::nullopt;
430  }
431  emitError(loc) << toolName
432  << " invocation failed, error message: " << message;
433  return std::nullopt;
434  };
435 
436  // Invoke PTXAS.
437  if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
438  /*Env=*/std::nullopt,
439  /*Redirects=*/redirects,
440  /*SecondsToWait=*/0,
441  /*MemoryLimit=*/0,
442  /*ErrMsg=*/&message))
443  return emitLogError("`ptxas`");
444 #define DEBUG_TYPE "dump-sass"
445  LLVM_DEBUG({
446  std::optional<std::string> nvdisasm = findTool("nvdisasm");
447  SmallVector<StringRef> nvdisasmArgs(
448  {StringRef("nvdisasm"), StringRef(cubinFile.first)});
449  if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
450  /*Env=*/std::nullopt,
451  /*Redirects=*/redirects,
452  /*SecondsToWait=*/0,
453  /*MemoryLimit=*/0,
454  /*ErrMsg=*/&message))
455  return emitLogError("`nvdisasm`");
456  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
457  llvm::MemoryBuffer::getFile(logFile->first);
458  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
459  llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
460  llvm::dbgs().flush();
461  }
462  });
463 #undef DEBUG_TYPE
464 
465  // Invoke `fatbin`.
466  message.clear();
467  if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
468  /*Env=*/std::nullopt,
469  /*Redirects=*/redirects,
470  /*SecondsToWait=*/0,
471  /*MemoryLimit=*/0,
472  /*ErrMsg=*/&message))
473  return emitLogError("`fatbinary`");
474 
475 // Dump the output of the tools, helpful if the verbose flag was passed.
476 #define DEBUG_TYPE "serialize-to-binary"
477  LLVM_DEBUG({
478  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
479  llvm::MemoryBuffer::getFile(logFile->first);
480  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
481  llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
482  llvm::dbgs().flush();
483  }
484  });
485 #undef DEBUG_TYPE
486 
487  // Read the fatbin.
488  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
489  llvm::MemoryBuffer::getFile(binaryFile->first);
490  if (!binaryBuffer) {
491  emitError(loc) << "Couldn't open the file: `" << binaryFile->first
492  << "`, error message: " << binaryBuffer.getError().message();
493  return std::nullopt;
494  }
495  StringRef fatbin = (*binaryBuffer)->getBuffer();
496  return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
497 }
498 
499 #if MLIR_ENABLE_NVPTXCOMPILER
500 #include "nvPTXCompiler.h"
501 
502 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
503  do { \
504  if (auto status = (expr)) { \
505  emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
506  << status; \
507  return std::nullopt; \
508  } \
509  } while (false)
510 
511 #include "nvFatbin.h"
512 
513 #define RETURN_ON_NVFATBIN_ERROR(expr) \
514  do { \
515  auto result = (expr); \
516  if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
517  emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
518  << nvFatbinGetErrorString(result); \
519  return std::nullopt; \
520  } \
521  } while (false)
522 
523 std::optional<SmallVector<char, 0>>
524 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
525  Location loc = getOperation().getLoc();
526  nvPTXCompilerHandle compiler = nullptr;
527  nvPTXCompileResult status;
528  size_t logSize;
529 
530  // Create the options.
531  std::string optLevel = std::to_string(this->optLevel);
532  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
533  targetOptions.tokenizeCmdOptions();
534  cmdOpts.second.append(
535  {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
536 
537  // Create the compiler handle.
538  RETURN_ON_NVPTXCOMPILER_ERROR(
539  nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
540 
541  // Try to compile the binary.
542  status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
543  cmdOpts.second.data());
544 
545  // Check if compilation failed.
546  if (status != NVPTXCOMPILE_SUCCESS) {
547  RETURN_ON_NVPTXCOMPILER_ERROR(
548  nvPTXCompilerGetErrorLogSize(compiler, &logSize));
549  if (logSize != 0) {
550  SmallVector<char> log(logSize + 1, 0);
551  RETURN_ON_NVPTXCOMPILER_ERROR(
552  nvPTXCompilerGetErrorLog(compiler, log.data()));
553  emitError(loc) << "NVPTX compiler invocation failed, error log: "
554  << log.data();
555  } else
556  emitError(loc) << "NVPTX compiler invocation failed with error code: "
557  << status;
558  return std::nullopt;
559  }
560 
561  // Retrieve the binary.
562  size_t elfSize;
563  RETURN_ON_NVPTXCOMPILER_ERROR(
564  nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
565  SmallVector<char, 0> binary(elfSize, 0);
566  RETURN_ON_NVPTXCOMPILER_ERROR(
567  nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
568 
569 // Dump the log of the compiler, helpful if the verbose flag was passed.
570 #define DEBUG_TYPE "serialize-to-binary"
571  LLVM_DEBUG({
572  RETURN_ON_NVPTXCOMPILER_ERROR(
573  nvPTXCompilerGetInfoLogSize(compiler, &logSize));
574  if (logSize != 0) {
575  SmallVector<char> log(logSize + 1, 0);
576  RETURN_ON_NVPTXCOMPILER_ERROR(
577  nvPTXCompilerGetInfoLog(compiler, log.data()));
578  llvm::dbgs() << "NVPTX compiler invocation for module: "
579  << getOperation().getNameAttr() << "\n";
580  llvm::dbgs() << "Arguments: ";
581  llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
582  llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
583  llvm::dbgs().flush();
584  }
585  });
586 #undef DEBUG_TYPE
587  RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
588 
589  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
590  bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
591  return llvm::StringRef(option) == "-32";
592  });
593 
594  const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
595  nvFatbinHandle handle;
596 
597  auto chip = getTarget().getChip();
598  chip.consume_front("sm_");
599 
600  RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
601  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
602  handle, binary.data(), binary.size(), chip.data(), nullptr));
603  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
604  handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
605 
606  size_t fatbinSize;
607  RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
608  SmallVector<char, 0> fatbin(fatbinSize, 0);
609  RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
610  RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
611  return fatbin;
612  }
613 
614  return binary;
615 }
616 #endif // MLIR_ENABLE_NVPTXCOMPILER
617 
618 std::optional<SmallVector<char, 0>>
619 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
620  // Return LLVM IR if the compilation target is `offload`.
621 #define DEBUG_TYPE "serialize-to-llvm"
622  LLVM_DEBUG({
623  llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
624  << "\n";
625  llvm::dbgs() << llvmModule << "\n";
626  llvm::dbgs().flush();
627  });
628 #undef DEBUG_TYPE
629  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
630  return SerializeGPUModuleBase::moduleToObject(llvmModule);
631 
632 #if !LLVM_HAS_NVPTX_TARGET
633  getOperation()->emitError(
634  "The `NVPTX` target was not built. Please enable it when building LLVM.");
635  return std::nullopt;
636 #endif // LLVM_HAS_NVPTX_TARGET
637 
638  // Emit PTX code.
639  std::optional<llvm::TargetMachine *> targetMachine =
640  getOrCreateTargetMachine();
641  if (!targetMachine) {
642  getOperation().emitError() << "Target Machine unavailable for triple "
643  << triple << ", can't optimize with LLVM\n";
644  return std::nullopt;
645  }
646  std::optional<std::string> serializedISA =
647  translateToISA(llvmModule, **targetMachine);
648  if (!serializedISA) {
649  getOperation().emitError() << "Failed translating the module to ISA.";
650  return std::nullopt;
651  }
652  if (isaCallback)
653  isaCallback(serializedISA.value());
654 
655 #define DEBUG_TYPE "serialize-to-isa"
656  LLVM_DEBUG({
657  llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
658  llvm::dbgs() << *serializedISA << "\n";
659  llvm::dbgs().flush();
660  });
661 #undef DEBUG_TYPE
662 
663  // Return PTX if the compilation target is `assembly`.
664  if (targetOptions.getCompilationTarget() ==
665  gpu::CompilationTarget::Assembly) {
666  // Make sure to include the null terminator.
667  StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
668  return SmallVector<char, 0>(bin.begin(), bin.end());
669  }
670 
671  // Compile to binary.
672 #if MLIR_ENABLE_NVPTXCOMPILER
673  return compileToBinaryNVPTX(*serializedISA);
674 #else
675  return compileToBinary(*serializedISA);
676 #endif // MLIR_ENABLE_NVPTXCOMPILER
677 }
678 
679 std::optional<SmallVector<char, 0>>
680 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
681  const gpu::TargetOptions &options) const {
682  assert(module && "The module must be non null.");
683  if (!module)
684  return std::nullopt;
685  if (!mlir::isa<gpu::GPUModuleOp>(module)) {
686  module->emitError("Module must be a GPU module.");
687  return std::nullopt;
688  }
689  NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
690  serializer.init();
691  return serializer.run();
692 }
693 
694 Attribute
695 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
696  const SmallVector<char, 0> &object,
697  const gpu::TargetOptions &options) const {
698  auto target = cast<NVVMTargetAttr>(attribute);
699  gpu::CompilationTarget format = options.getCompilationTarget();
700  DictionaryAttr objectProps;
701  Builder builder(attribute.getContext());
703  if (format == gpu::CompilationTarget::Assembly)
704  properties.push_back(
705  builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
706 
707  if (StringRef section = options.getELFSection(); !section.empty())
708  properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
709  builder.getStringAttr(section)));
710 
711  if (!properties.empty())
712  objectProps = builder.getDictionaryAttr(properties);
713 
714  return builder.getAttr<gpu::ObjectAttr>(
715  attribute, format,
716  builder.getStringAttr(StringRef(object.data(), object.size())),
717  objectProps, /*kernels=*/nullptr);
718 }
const char _mlir_embedded_libdevice[]
Definition: Target.cpp:50
const unsigned _mlir_embedded_libdevice_size
Definition: Target.cpp:51
#define __DEFAULT_CUDATOOLKIT_PATH__
Definition: Target.cpp:47
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
Definition: Utils.h:32
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
Definition: Target.cpp:135
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
Definition: Target.cpp:94
NVVMTargetAttr target
NVVM target attribute.
Definition: Utils.h:63
std::string toolkitPath
CUDA toolkit path.
Definition: Utils.h:66
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
Definition: Utils.h:71
virtual std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
Definition: Target.cpp:192
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
Definition: Target.cpp:140
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
Definition: Target.cpp:118
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
Definition: Target.cpp:133
NVVMTargetAttr getTarget() const
Returns the target attribute.
Definition: Target.cpp:131
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Definition: AsmState.h:234
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry &registry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
Definition: Target.cpp:69
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Definition: Target.cpp:84
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.