MLIR  22.0.0git
Target.cpp
Go to the documentation of this file.
1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines NVVM target related functions including registration
10 // calls for the `#nvvm.target` compilation attribute.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
19 #include "mlir/IR/BuiltinDialect.h"
20 #include "mlir/IR/BuiltinTypes.h"
27 #include "llvm/Support/InterleavedRange.h"
28 
29 #include "llvm/ADT/ScopeExit.h"
30 #include "llvm/Config/Targets.h"
31 #include "llvm/Support/DebugLog.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/FileUtilities.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/Path.h"
37 #include "llvm/Support/Process.h"
38 #include "llvm/Support/Program.h"
39 #include "llvm/Support/TargetSelect.h"
40 #include "llvm/Support/Timer.h"
41 #include "llvm/Support/raw_ostream.h"
42 
43 #include <cstdint>
44 #include <cstdlib>
45 #include <optional>
46 
47 using namespace mlir;
48 using namespace mlir::NVVM;
49 
50 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
51 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
52 #endif
53 
54 extern "C" const unsigned char _mlir_embedded_libdevice[];
55 extern "C" const unsigned _mlir_embedded_libdevice_size;
56 
57 namespace {
58 // Implementation of the `TargetAttrInterface` model.
59 class NVVMTargetAttrImpl
60  : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
61 public:
62  std::optional<SmallVector<char, 0>>
63  serializeToObject(Attribute attribute, Operation *module,
64  const gpu::TargetOptions &options) const;
65 
66  Attribute createObject(Attribute attribute, Operation *module,
67  const SmallVector<char, 0> &object,
68  const gpu::TargetOptions &options) const;
69 };
70 } // namespace
71 
72 // Register the NVVM dialect, the NVVM translation & the target interface.
74  DialectRegistry &registry) {
75  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
76  NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
77  });
78 }
79 
81  MLIRContext &context) {
82  DialectRegistry registry;
84  context.appendDialectRegistry(registry);
85 }
86 
87 // Search for the CUDA toolkit path.
89  if (const char *var = std::getenv("CUDA_ROOT"))
90  return var;
91  if (const char *var = std::getenv("CUDA_HOME"))
92  return var;
93  if (const char *var = std::getenv("CUDA_PATH"))
94  return var;
96 }
97 
99  Operation &module, NVVMTargetAttr target,
100  const gpu::TargetOptions &targetOptions)
101  : ModuleToObject(module, target.getTriple(), target.getChip(),
102  target.getFeatures(), target.getO(),
103  targetOptions.getInitialLlvmIRCallback(),
104  targetOptions.getLinkedLlvmIRCallback(),
105  targetOptions.getOptimizedLlvmIRCallback(),
106  targetOptions.getISACallback()),
107  target(target), toolkitPath(targetOptions.getToolkitPath()),
108  librariesToLink(targetOptions.getLibrariesToLink()) {
109 
110  // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
111  if (toolkitPath.empty())
113 
114  // Append the files in the target attribute.
115  if (target.getLink())
116  librariesToLink.append(target.getLink().begin(), target.getLink().end());
117 
118  // Append libdevice to the files to be loaded.
119  (void)appendStandardLibs();
120 }
121 
123  static llvm::once_flag initializeBackendOnce;
124  llvm::call_once(initializeBackendOnce, []() {
125  // If the `NVPTX` LLVM target was built, initialize it.
126 #if LLVM_HAS_NVPTX_TARGET
127  LLVMInitializeNVPTXTarget();
128  LLVMInitializeNVPTXTargetInfo();
129  LLVMInitializeNVPTXTargetMC();
130  LLVMInitializeNVPTXAsmPrinter();
131 #endif
132  });
133 }
134 
135 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
136 
138 
140  return librariesToLink;
141 }
142 
143 // Try to append `libdevice` from a CUDA toolkit installation.
145 #if MLIR_NVVM_EMBED_LIBDEVICE
146  // If libdevice is embedded in the binary, we don't look it up on the
147  // filesystem.
148  MLIRContext *ctx = target.getContext();
149  auto type =
151  IntegerType::get(ctx, 8));
152  auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
153 
154  // Lookup if we already loaded the resource, otherwise create it.
156  resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
157  if (blob) {
160  blob, ctx->getLoadedDialect<BuiltinDialect>())));
161  return success();
162  }
163 
164  // Allocate a resource using one of the UnManagedResourceBlob method to wrap
165  // the embedded data.
170  type, resourceManager.insert("_mlir_embedded_libdevice",
171  std::move(unmanagedBlob))));
172 #else
173  StringRef pathRef = getToolkitPath();
174  if (!pathRef.empty()) {
176  path.insert(path.begin(), pathRef.begin(), pathRef.end());
177  pathRef = StringRef(path.data(), path.size());
178  if (!llvm::sys::fs::is_directory(pathRef)) {
179  getOperation().emitError() << "CUDA path: " << pathRef
180  << " does not exist or is not a directory.\n";
181  return failure();
182  }
183  llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
184  pathRef = StringRef(path.data(), path.size());
185  if (!llvm::sys::fs::is_regular_file(pathRef)) {
186  getOperation().emitError() << "LibDevice path: " << pathRef
187  << " does not exist or is not a file.\n";
188  return failure();
189  }
190  librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
191  }
192 #endif
193  return success();
194 }
195 
196 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
200  bcFiles, true)))
201  return std::nullopt;
202  return std::move(bcFiles);
203 }
204 
205 namespace {
206 class NVPTXSerializer : public SerializeGPUModuleBase {
207 public:
208  NVPTXSerializer(Operation &module, NVVMTargetAttr target,
209  const gpu::TargetOptions &targetOptions);
210 
211  /// Returns the GPU module op being serialized.
212  gpu::GPUModuleOp getOperation();
213 
214  /// Compiles PTX to cubin using `ptxas`.
215  std::optional<SmallVector<char, 0>>
216  compileToBinary(const std::string &ptxCode);
217 
218  /// Compiles PTX to cubin using the `nvptxcompiler` library.
219  std::optional<SmallVector<char, 0>>
220  compileToBinaryNVPTX(const std::string &ptxCode);
221 
222  /// Serializes the LLVM module to an object format, depending on the
223  /// compilation target selected in target options.
224  std::optional<SmallVector<char, 0>>
225  moduleToObject(llvm::Module &llvmModule) override;
226 
227  /// Get LLVMIR->ISA performance result.
228  /// Return nullopt if moduleToObject has not been called or the target format
229  /// is LLVMIR.
230  std::optional<int64_t> getLLVMIRToISATimeInMs();
231 
232  /// Get ISA->Binary performance result.
233  /// Return nullopt if moduleToObject has not been called or the target format
234  /// is LLVMIR or ISA.
235  std::optional<int64_t> getISAToBinaryTimeInMs();
236 
237 private:
238  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
239 
240  /// Creates a temp file.
241  std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
242 
243  /// Finds the `tool` path, where `tool` is the name of the binary to search,
244  /// i.e. `ptxas` or `fatbinary`. The search order is:
245  /// 1. The toolkit path in `targetOptions`.
246  /// 2. In the system PATH.
247  /// 3. The path from `getCUDAToolkitPath()`.
248  std::optional<std::string> findTool(StringRef tool);
249 
250  /// Target options.
251  gpu::TargetOptions targetOptions;
252 
253  /// LLVMIR->ISA perf result.
254  std::optional<int64_t> llvmToISATimeInMs;
255 
256  /// ISA->Binary perf result.
257  std::optional<int64_t> isaToBinaryTimeInMs;
258 };
259 } // namespace
260 
261 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
262  const gpu::TargetOptions &targetOptions)
263  : SerializeGPUModuleBase(module, target, targetOptions),
264  targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),
265  isaToBinaryTimeInMs(std::nullopt) {}
266 
267 std::optional<NVPTXSerializer::TmpFile>
268 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
269  llvm::SmallString<128> filename;
270  if (name.size() > 80)
271  name = name.substr(0, 80);
272  std::error_code ec =
273  llvm::sys::fs::createTemporaryFile(name, suffix, filename);
274  if (ec) {
275  getOperation().emitError() << "Couldn't create the temp file: `" << filename
276  << "`, error message: " << ec.message();
277  return std::nullopt;
278  }
279  return TmpFile(filename, llvm::FileRemover(filename.c_str()));
280 }
281 
282 std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {
283  return llvmToISATimeInMs;
284 }
285 
286 std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
287  return isaToBinaryTimeInMs;
288 }
289 
290 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
291  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
292 }
293 
294 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
295  // Find the `tool` path.
296  // 1. Check the toolkit path given in the command line.
297  StringRef pathRef = targetOptions.getToolkitPath();
299  if (!pathRef.empty()) {
300  path.insert(path.begin(), pathRef.begin(), pathRef.end());
301  llvm::sys::path::append(path, "bin", tool);
302  if (llvm::sys::fs::can_execute(path))
303  return StringRef(path.data(), path.size()).str();
304  }
305 
306  // 2. Check PATH.
307  if (std::optional<std::string> toolPath =
308  llvm::sys::Process::FindInEnvPath("PATH", tool))
309  return *toolPath;
310 
311  // 3. Check `getCUDAToolkitPath()`.
312  pathRef = getCUDAToolkitPath();
313  path.clear();
314  if (!pathRef.empty()) {
315  path.insert(path.begin(), pathRef.begin(), pathRef.end());
316  llvm::sys::path::append(path, "bin", tool);
317  if (llvm::sys::fs::can_execute(path))
318  return StringRef(path.data(), path.size()).str();
319  }
320  getOperation().emitError()
321  << "Couldn't find the `" << tool
322  << "` binary. Please specify the toolkit "
323  "path, add the compiler to $PATH, or set one of the environment "
324  "variables in `NVVM::getCUDAToolkitPath()`.";
325  return std::nullopt;
326 }
327 
328 /// Adds optional command-line arguments to existing arguments.
329 template <typename T>
330 static void setOptionalCommandlineArguments(NVVMTargetAttr target,
331  SmallVectorImpl<T> &ptxasArgs) {
332  if (!target.hasCmdOptions())
333  return;
334 
335  std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
336  for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {
337  if (auto strAttr = dyn_cast<StringAttr>(attr)) {
338  if constexpr (std::is_same_v<T, StringRef>) {
339  ptxasArgs.push_back(strAttr.getValue());
340  } else if constexpr (std::is_same_v<T, const char *>) {
341  ptxasArgs.push_back(strAttr.getValue().data());
342  }
343  }
344  }
345 }
346 
347 // TODO: clean this method & have a generic tool driver or never emit binaries
348 // with this mechanism and let another stage take care of it.
349 std::optional<SmallVector<char, 0>>
350 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
351  // Determine if the serializer should create a fatbinary with the PTX embeded
352  // or a simple CUBIN binary.
353  const bool createFatbin =
354  targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
355 
356  // Find the `ptxas` & `fatbinary` tools.
357  std::optional<std::string> ptxasCompiler = findTool("ptxas");
358  if (!ptxasCompiler)
359  return std::nullopt;
360  std::optional<std::string> fatbinaryTool;
361  if (createFatbin) {
362  fatbinaryTool = findTool("fatbinary");
363  if (!fatbinaryTool)
364  return std::nullopt;
365  }
366  Location loc = getOperation().getLoc();
367 
368  // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
369  std::string basename =
370  llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
371  getTarget().getTriple(), getTarget().getChip());
372 
373  // Create temp files:
374  std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
375  if (!ptxFile)
376  return std::nullopt;
377  std::optional<TmpFile> logFile = createTemp(basename, "log");
378  if (!logFile)
379  return std::nullopt;
380  std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
381  if (!binaryFile)
382  return std::nullopt;
383  TmpFile cubinFile;
384  if (createFatbin) {
385  std::string cubinFilename = (ptxFile->first + ".cubin").str();
386  cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
387  } else {
388  cubinFile.first = binaryFile->first;
389  }
390 
391  std::error_code ec;
392  // Dump the PTX to a temp file.
393  {
394  llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
395  if (ec) {
396  emitError(loc) << "Couldn't open the file: `" << ptxFile->first
397  << "`, error message: " << ec.message();
398  return std::nullopt;
399  }
400  ptxStream << ptxCode;
401  if (ptxStream.has_error()) {
402  emitError(loc) << "An error occurred while writing the PTX to: `"
403  << ptxFile->first << "`.";
404  return std::nullopt;
405  }
406  ptxStream.flush();
407  }
408 
409  // Command redirects.
410  std::optional<StringRef> redirects[] = {
411  std::nullopt,
412  logFile->first,
413  logFile->first,
414  };
415 
416  // Get any extra args passed in `targetOptions`.
417  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
418  targetOptions.tokenizeCmdOptions();
419 
420  // Create ptxas args.
421  std::string optLevel = std::to_string(this->optLevel);
422  SmallVector<StringRef, 12> ptxasArgs(
423  {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
424  StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
425  "--opt-level", optLevel});
426 
427  bool useFatbin32 = false;
428  for (const auto *cArg : cmdOpts.second) {
429  // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
430  // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
431  // assumed.
432  if (StringRef arg(cArg); arg != "-32")
433  ptxasArgs.push_back(arg);
434  else
435  useFatbin32 = true;
436  }
437 
438  // Set optional command line arguments
439  setOptionalCommandlineArguments(getTarget(), ptxasArgs);
440 
441  // Create the `fatbinary` args.
442  StringRef chip = getTarget().getChip();
443  // Remove the arch prefix to obtain the compute capability.
444  chip.consume_front("sm_"), chip.consume_front("compute_");
445  // Embed the cubin object.
446  std::string cubinArg =
447  llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
448  .str();
449  // Embed the PTX file so the driver can JIT if needed.
450  std::string ptxArg =
451  llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
452  .str();
453  SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
454  useFatbin32 ? "-32" : "-64", cubinArg,
455  ptxArg, "--create", binaryFile->first});
456 
457  // Dump tool invocation commands.
458 #define DEBUG_TYPE "serialize-to-binary"
459  LDBG() << "Tool invocation for module: " << getOperation().getNameAttr()
460  << "\nptxas executable:" << ptxasCompiler.value()
461  << "\nptxas args: " << llvm::interleaved(ptxasArgs, " ");
462  if (createFatbin)
463  LDBG() << "fatbin args: " << llvm::interleaved(fatbinArgs, " ");
464 #undef DEBUG_TYPE
465 
466  // Helper function for printing tool error logs.
467  std::string message;
468  auto emitLogError =
469  [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
470  if (message.empty()) {
471  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
472  llvm::MemoryBuffer::getFile(logFile->first);
473  if (toolStderr)
474  emitError(loc) << toolName << " invocation failed. Log:\n"
475  << toolStderr->get()->getBuffer();
476  else
477  emitError(loc) << toolName << " invocation failed.";
478  return std::nullopt;
479  }
480  emitError(loc) << toolName
481  << " invocation failed, error message: " << message;
482  return std::nullopt;
483  };
484 
485  // Invoke PTXAS.
486  if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
487  /*Env=*/std::nullopt,
488  /*Redirects=*/redirects,
489  /*SecondsToWait=*/0,
490  /*MemoryLimit=*/0,
491  /*ErrMsg=*/&message))
492  return emitLogError("`ptxas`");
493 #define DEBUG_TYPE "dump-sass"
494  LLVM_DEBUG({
495  std::optional<std::string> nvdisasm = findTool("nvdisasm");
496  SmallVector<StringRef> nvdisasmArgs(
497  {StringRef("nvdisasm"), StringRef(cubinFile.first)});
498  if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
499  /*Env=*/std::nullopt,
500  /*Redirects=*/redirects,
501  /*SecondsToWait=*/0,
502  /*MemoryLimit=*/0,
503  /*ErrMsg=*/&message))
504  return emitLogError("`nvdisasm`");
505  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
506  llvm::MemoryBuffer::getFile(logFile->first);
507  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
508  LDBG() << "Output:\n" << (*logBuffer)->getBuffer();
509  llvm::dbgs().flush();
510  }
511  });
512 #undef DEBUG_TYPE
513 
514  // Invoke `fatbin`.
515  message.clear();
516  if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
517  /*Env=*/std::nullopt,
518  /*Redirects=*/redirects,
519  /*SecondsToWait=*/0,
520  /*MemoryLimit=*/0,
521  /*ErrMsg=*/&message))
522  return emitLogError("`fatbinary`");
523 
524 // Dump the output of the tools, helpful if the verbose flag was passed.
525 #define DEBUG_TYPE "serialize-to-binary"
526  LLVM_DEBUG({
527  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
528  llvm::MemoryBuffer::getFile(logFile->first);
529  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
530  LDBG() << "Output:\n" << (*logBuffer)->getBuffer();
531  llvm::dbgs().flush();
532  }
533  });
534 #undef DEBUG_TYPE
535 
536  // Read the fatbin.
537  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
538  llvm::MemoryBuffer::getFile(binaryFile->first);
539  if (!binaryBuffer) {
540  emitError(loc) << "Couldn't open the file: `" << binaryFile->first
541  << "`, error message: " << binaryBuffer.getError().message();
542  return std::nullopt;
543  }
544  StringRef fatbin = (*binaryBuffer)->getBuffer();
545  return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
546 }
547 
548 #if MLIR_ENABLE_NVPTXCOMPILER
549 #include "nvPTXCompiler.h"
550 
551 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
552  do { \
553  if (auto status = (expr)) { \
554  emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
555  << status; \
556  return std::nullopt; \
557  } \
558  } while (false)
559 
560 #include "nvFatbin.h"
561 
562 #define RETURN_ON_NVFATBIN_ERROR(expr) \
563  do { \
564  auto result = (expr); \
565  if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
566  emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
567  << nvFatbinGetErrorString(result); \
568  return std::nullopt; \
569  } \
570  } while (false)
571 
572 std::optional<SmallVector<char, 0>>
573 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
574  Location loc = getOperation().getLoc();
575  nvPTXCompilerHandle compiler = nullptr;
576  nvPTXCompileResult status;
577  size_t logSize;
578 
579  // Create the options.
580  std::string optLevel = std::to_string(this->optLevel);
581  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
582  targetOptions.tokenizeCmdOptions();
583  cmdOpts.second.append(
584  {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
585 
586  // Set optional command line arguments
587  setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
588  // Create the compiler handle.
589  RETURN_ON_NVPTXCOMPILER_ERROR(
590  nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
591 
592  // Try to compile the binary.
593  status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
594  cmdOpts.second.data());
595 
596  // Check if compilation failed.
597  if (status != NVPTXCOMPILE_SUCCESS) {
598  RETURN_ON_NVPTXCOMPILER_ERROR(
599  nvPTXCompilerGetErrorLogSize(compiler, &logSize));
600  if (logSize != 0) {
601  SmallVector<char> log(logSize + 1, 0);
602  RETURN_ON_NVPTXCOMPILER_ERROR(
603  nvPTXCompilerGetErrorLog(compiler, log.data()));
604  emitError(loc) << "NVPTX compiler invocation failed, error log: "
605  << log.data();
606  } else {
607  emitError(loc) << "NVPTX compiler invocation failed with error code: "
608  << status;
609  }
610  return std::nullopt;
611  }
612 
613  // Retrieve the binary.
614  size_t elfSize;
615  RETURN_ON_NVPTXCOMPILER_ERROR(
616  nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
617  SmallVector<char, 0> binary(elfSize, 0);
618  RETURN_ON_NVPTXCOMPILER_ERROR(
619  nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
620 
621 // Dump the log of the compiler, helpful if the verbose flag was passed.
622 #define DEBUG_TYPE "serialize-to-binary"
623  LLVM_DEBUG({
624  RETURN_ON_NVPTXCOMPILER_ERROR(
625  nvPTXCompilerGetInfoLogSize(compiler, &logSize));
626  if (logSize != 0) {
627  SmallVector<char> log(logSize + 1, 0);
628  RETURN_ON_NVPTXCOMPILER_ERROR(
629  nvPTXCompilerGetInfoLog(compiler, log.data()));
630  LDBG() << "NVPTX compiler invocation for module: "
631  << getOperation().getNameAttr()
632  << "\nArguments: " << llvm::interleaved(cmdOpts.second, " ")
633  << "\nOutput\n"
634  << log.data();
635  }
636  });
637 #undef DEBUG_TYPE
638  RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
639 
640  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
641  bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
642  return llvm::StringRef(option) == "-32";
643  });
644 
645  const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
646  nvFatbinHandle handle;
647 
648  auto chip = getTarget().getChip();
649  chip.consume_front("sm_");
650 
651  RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
652  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
653  handle, binary.data(), binary.size(), chip.data(), nullptr));
654  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
655  handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
656 
657  size_t fatbinSize;
658  RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
659  SmallVector<char, 0> fatbin(fatbinSize, 0);
660  RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
661  RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
662  return fatbin;
663  }
664 
665  return binary;
666 }
667 #endif // MLIR_ENABLE_NVPTXCOMPILER
668 
669 std::optional<SmallVector<char, 0>>
670 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
671  llvm::Timer moduleToObjectTimer(
672  "moduleToObjectTimer",
673  "Timer for perf llvm-ir -> isa and isa -> binary.");
674  auto clear = llvm::make_scope_exit([&]() { moduleToObjectTimer.clear(); });
675  // Return LLVM IR if the compilation target is `offload`.
676 #define DEBUG_TYPE "serialize-to-llvm"
677  LLVM_DEBUG({
678  LDBG() << "LLVM IR for module: " << getOperation().getNameAttr();
679  LDBG() << llvmModule;
680  });
681 #undef DEBUG_TYPE
682  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
683  return SerializeGPUModuleBase::moduleToObject(llvmModule);
684 
685 #if !LLVM_HAS_NVPTX_TARGET
686  getOperation()->emitError(
687  "The `NVPTX` target was not built. Please enable it when building LLVM.");
688  return std::nullopt;
689 #endif // LLVM_HAS_NVPTX_TARGET
690 
691  // Emit PTX code.
692  std::optional<llvm::TargetMachine *> targetMachine =
693  getOrCreateTargetMachine();
694  if (!targetMachine) {
695  getOperation().emitError() << "Target Machine unavailable for triple "
696  << triple << ", can't optimize with LLVM\n";
697  return std::nullopt;
698  }
699  moduleToObjectTimer.startTimer();
700  std::optional<std::string> serializedISA =
701  translateToISA(llvmModule, **targetMachine);
702  moduleToObjectTimer.stopTimer();
703  llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
704  moduleToObjectTimer.clear();
705  if (!serializedISA) {
706  getOperation().emitError() << "Failed translating the module to ISA.";
707  return std::nullopt;
708  }
709 
710  if (isaCallback)
711  isaCallback(serializedISA.value());
712 
713 #define DEBUG_TYPE "serialize-to-isa"
714  LDBG() << "PTX for module: " << getOperation().getNameAttr() << "\n"
715  << *serializedISA;
716 #undef DEBUG_TYPE
717 
718  // Return PTX if the compilation target is `assembly`.
719  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
720  return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
721 
722  std::optional<SmallVector<char, 0>> result;
723  moduleToObjectTimer.startTimer();
724  // Compile to binary.
725 #if MLIR_ENABLE_NVPTXCOMPILER
726  result = compileToBinaryNVPTX(*serializedISA);
727 #else
728  result = compileToBinary(*serializedISA);
729 #endif // MLIR_ENABLE_NVPTXCOMPILER
730 
731  moduleToObjectTimer.stopTimer();
732  isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
733  moduleToObjectTimer.clear();
734  return result;
735 }
736 
737 std::optional<SmallVector<char, 0>>
738 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
739  const gpu::TargetOptions &options) const {
740  Builder builder(attribute.getContext());
741  assert(module && "The module must be non null.");
742  if (!module)
743  return std::nullopt;
744  if (!mlir::isa<gpu::GPUModuleOp>(module)) {
745  module->emitError("Module must be a GPU module.");
746  return std::nullopt;
747  }
748  NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
749  serializer.init();
750  std::optional<SmallVector<char, 0>> result = serializer.run();
751  auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
752  if (llvmToISATimeInMs.has_value())
753  module->setAttr("LLVMIRToISATimeInMs",
754  builder.getI64IntegerAttr(*llvmToISATimeInMs));
755  auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
756  if (isaToBinaryTimeInMs.has_value())
757  module->setAttr("ISAToBinaryTimeInMs",
758  builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
759  return result;
760 }
761 
762 Attribute
763 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
764  const SmallVector<char, 0> &object,
765  const gpu::TargetOptions &options) const {
766  auto target = cast<NVVMTargetAttr>(attribute);
767  gpu::CompilationTarget format = options.getCompilationTarget();
768  DictionaryAttr objectProps;
769  Builder builder(attribute.getContext());
771  if (format == gpu::CompilationTarget::Assembly)
772  properties.push_back(
773  builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
774 
775  if (StringRef section = options.getELFSection(); !section.empty())
776  properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
777  builder.getStringAttr(section)));
778 
779  for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
780  if (module->hasAttr(perfName)) {
781  IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
782  properties.push_back(builder.getNamedAttr(
783  perfName, builder.getI64IntegerAttr(attr.getInt())));
784  }
785  }
786 
787  if (!properties.empty())
788  objectProps = builder.getDictionaryAttr(properties);
789 
790  return builder.getAttr<gpu::ObjectAttr>(
791  attribute, format,
792  builder.getStringAttr(StringRef(object.data(), object.size())),
793  objectProps, /*kernels=*/nullptr);
794 }
static void setOptionalCommandlineArguments(NVVMTargetAttr target, SmallVectorImpl< T > &ptxasArgs)
Adds optional command-line arguments to existing arguments.
Definition: Target.cpp:330
const unsigned _mlir_embedded_libdevice_size
Definition: Target.cpp:55
#define __DEFAULT_CUDATOOLKIT_PATH__
Definition: Target.cpp:51
const unsigned char _mlir_embedded_libdevice[]
Definition: Target.cpp:54
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
Definition: Utils.h:32
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
Definition: Target.cpp:139
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
Definition: Target.cpp:98
NVVMTargetAttr target
NVVM target attribute.
Definition: Utils.h:63
std::string toolkitPath
CUDA toolkit path.
Definition: Utils.h:66
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
Definition: Utils.h:71
std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
Definition: Target.cpp:197
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
Definition: Target.cpp:144
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
Definition: Target.cpp:122
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
Definition: Target.cpp:137
NVVMTargetAttr getTarget() const
Returns the target attribute.
Definition: Target.cpp:135
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
Definition: Operation.h:534
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition: Operation.h:560
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:582
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Definition: AsmState.h:235
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry &registry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
Definition: Target.cpp:73
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Definition: Target.cpp:88
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.