MLIR  21.0.0git
Target.cpp
Go to the documentation of this file.
1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines NVVM target related functions including registration
10 // calls for the `#nvvm.target` compilation attribute.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
20 #include "mlir/IR/BuiltinDialect.h"
21 #include "mlir/IR/BuiltinTypes.h"
28 
29 #include "llvm/ADT/ScopeExit.h"
30 #include "llvm/Config/Targets.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/FileUtilities.h"
33 #include "llvm/Support/FormatVariadic.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/Path.h"
36 #include "llvm/Support/Process.h"
37 #include "llvm/Support/Program.h"
38 #include "llvm/Support/TargetSelect.h"
39 #include "llvm/Support/Timer.h"
40 #include "llvm/Support/raw_ostream.h"
41 
42 #include <cstdint>
43 #include <cstdlib>
44 #include <optional>
45 
46 using namespace mlir;
47 using namespace mlir::NVVM;
48 
49 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
50 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
51 #endif
52 
53 extern "C" const unsigned char _mlir_embedded_libdevice[];
54 extern "C" const unsigned _mlir_embedded_libdevice_size;
55 
56 namespace {
57 // Implementation of the `TargetAttrInterface` model.
58 class NVVMTargetAttrImpl
59  : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
60 public:
61  std::optional<SmallVector<char, 0>>
62  serializeToObject(Attribute attribute, Operation *module,
63  const gpu::TargetOptions &options) const;
64 
65  Attribute createObject(Attribute attribute, Operation *module,
66  const SmallVector<char, 0> &object,
67  const gpu::TargetOptions &options) const;
68 };
69 } // namespace
70 
71 // Register the NVVM dialect, the NVVM translation & the target interface.
73  DialectRegistry &registry) {
74  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
75  NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
76  });
77 }
78 
80  MLIRContext &context) {
81  DialectRegistry registry;
83  context.appendDialectRegistry(registry);
84 }
85 
86 // Search for the CUDA toolkit path.
88  if (const char *var = std::getenv("CUDA_ROOT"))
89  return var;
90  if (const char *var = std::getenv("CUDA_HOME"))
91  return var;
92  if (const char *var = std::getenv("CUDA_PATH"))
93  return var;
95 }
96 
98  Operation &module, NVVMTargetAttr target,
99  const gpu::TargetOptions &targetOptions)
100  : ModuleToObject(module, target.getTriple(), target.getChip(),
101  target.getFeatures(), target.getO(),
102  targetOptions.getInitialLlvmIRCallback(),
103  targetOptions.getLinkedLlvmIRCallback(),
104  targetOptions.getOptimizedLlvmIRCallback(),
105  targetOptions.getISACallback()),
106  target(target), toolkitPath(targetOptions.getToolkitPath()),
107  librariesToLink(targetOptions.getLibrariesToLink()) {
108 
109  // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
110  if (toolkitPath.empty())
112 
113  // Append the files in the target attribute.
114  if (target.getLink())
115  librariesToLink.append(target.getLink().begin(), target.getLink().end());
116 
117  // Append libdevice to the files to be loaded.
118  (void)appendStandardLibs();
119 }
120 
122  static llvm::once_flag initializeBackendOnce;
123  llvm::call_once(initializeBackendOnce, []() {
124  // If the `NVPTX` LLVM target was built, initialize it.
125 #if LLVM_HAS_NVPTX_TARGET
126  LLVMInitializeNVPTXTarget();
127  LLVMInitializeNVPTXTargetInfo();
128  LLVMInitializeNVPTXTargetMC();
129  LLVMInitializeNVPTXAsmPrinter();
130 #endif
131  });
132 }
133 
134 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
135 
137 
139  return librariesToLink;
140 }
141 
142 // Try to append `libdevice` from a CUDA toolkit installation.
144 #if MLIR_NVVM_EMBED_LIBDEVICE
145  // If libdevice is embedded in the binary, we don't look it up on the
146  // filesystem.
147  MLIRContext *ctx = target.getContext();
148  auto type =
150  IntegerType::get(ctx, 8));
151  auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
152 
153  // Lookup if we already loaded the resource, otherwise create it.
155  resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
156  if (blob) {
159  blob, ctx->getLoadedDialect<BuiltinDialect>())));
160  return success();
161  }
162 
163  // Allocate a resource using one of the UnManagedResourceBlob method to wrap
164  // the embedded data.
169  type, resourceManager.insert("_mlir_embedded_libdevice",
170  std::move(unmanagedBlob))));
171 #else
172  StringRef pathRef = getToolkitPath();
173  if (!pathRef.empty()) {
175  path.insert(path.begin(), pathRef.begin(), pathRef.end());
176  pathRef = StringRef(path.data(), path.size());
177  if (!llvm::sys::fs::is_directory(pathRef)) {
178  getOperation().emitError() << "CUDA path: " << pathRef
179  << " does not exist or is not a directory.\n";
180  return failure();
181  }
182  llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
183  pathRef = StringRef(path.data(), path.size());
184  if (!llvm::sys::fs::is_regular_file(pathRef)) {
185  getOperation().emitError() << "LibDevice path: " << pathRef
186  << " does not exist or is not a file.\n";
187  return failure();
188  }
189  librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
190  }
191 #endif
192  return success();
193 }
194 
195 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
199  bcFiles, true)))
200  return std::nullopt;
201  return std::move(bcFiles);
202 }
203 
204 namespace {
205 class NVPTXSerializer : public SerializeGPUModuleBase {
206 public:
207  NVPTXSerializer(Operation &module, NVVMTargetAttr target,
208  const gpu::TargetOptions &targetOptions);
209 
210  /// Returns the GPU module op being serialized.
211  gpu::GPUModuleOp getOperation();
212 
213  /// Compiles PTX to cubin using `ptxas`.
214  std::optional<SmallVector<char, 0>>
215  compileToBinary(const std::string &ptxCode);
216 
217  /// Compiles PTX to cubin using the `nvptxcompiler` library.
218  std::optional<SmallVector<char, 0>>
219  compileToBinaryNVPTX(const std::string &ptxCode);
220 
221  /// Serializes the LLVM module to an object format, depending on the
222  /// compilation target selected in target options.
223  std::optional<SmallVector<char, 0>>
224  moduleToObject(llvm::Module &llvmModule) override;
225 
226  /// Get LLVMIR->ISA performance result.
227  /// Return nullopt if moduleToObject has not been called or the target format
228  /// is LLVMIR.
229  std::optional<int64_t> getLLVMIRToISATimeInMs();
230 
231  /// Get ISA->Binary performance result.
232  /// Return nullopt if moduleToObject has not been called or the target format
233  /// is LLVMIR or ISA.
234  std::optional<int64_t> getISAToBinaryTimeInMs();
235 
236 private:
237  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
238 
239  /// Creates a temp file.
240  std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
241 
242  /// Finds the `tool` path, where `tool` is the name of the binary to search,
243  /// i.e. `ptxas` or `fatbinary`. The search order is:
244  /// 1. The toolkit path in `targetOptions`.
245  /// 2. In the system PATH.
246  /// 3. The path from `getCUDAToolkitPath()`.
247  std::optional<std::string> findTool(StringRef tool);
248 
249  /// Target options.
250  gpu::TargetOptions targetOptions;
251 
252  /// LLVMIR->ISA perf result.
253  std::optional<int64_t> llvmToISATimeInMs;
254 
255  /// ISA->Binary perf result.
256  std::optional<int64_t> isaToBinaryTimeInMs;
257 };
258 } // namespace
259 
260 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
261  const gpu::TargetOptions &targetOptions)
262  : SerializeGPUModuleBase(module, target, targetOptions),
263  targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),
264  isaToBinaryTimeInMs(std::nullopt) {}
265 
266 std::optional<NVPTXSerializer::TmpFile>
267 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
268  llvm::SmallString<128> filename;
269  std::error_code ec =
270  llvm::sys::fs::createTemporaryFile(name, suffix, filename);
271  if (ec) {
272  getOperation().emitError() << "Couldn't create the temp file: `" << filename
273  << "`, error message: " << ec.message();
274  return std::nullopt;
275  }
276  return TmpFile(filename, llvm::FileRemover(filename.c_str()));
277 }
278 
279 std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {
280  return llvmToISATimeInMs;
281 }
282 
283 std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
284  return isaToBinaryTimeInMs;
285 }
286 
287 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
288  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
289 }
290 
291 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
292  // Find the `tool` path.
293  // 1. Check the toolkit path given in the command line.
294  StringRef pathRef = targetOptions.getToolkitPath();
296  if (!pathRef.empty()) {
297  path.insert(path.begin(), pathRef.begin(), pathRef.end());
298  llvm::sys::path::append(path, "bin", tool);
299  if (llvm::sys::fs::can_execute(path))
300  return StringRef(path.data(), path.size()).str();
301  }
302 
303  // 2. Check PATH.
304  if (std::optional<std::string> toolPath =
305  llvm::sys::Process::FindInEnvPath("PATH", tool))
306  return *toolPath;
307 
308  // 3. Check `getCUDAToolkitPath()`.
309  pathRef = getCUDAToolkitPath();
310  path.clear();
311  if (!pathRef.empty()) {
312  path.insert(path.begin(), pathRef.begin(), pathRef.end());
313  llvm::sys::path::append(path, "bin", tool);
314  if (llvm::sys::fs::can_execute(path))
315  return StringRef(path.data(), path.size()).str();
316  }
317  getOperation().emitError()
318  << "Couldn't find the `" << tool
319  << "` binary. Please specify the toolkit "
320  "path, add the compiler to $PATH, or set one of the environment "
321  "variables in `NVVM::getCUDAToolkitPath()`.";
322  return std::nullopt;
323 }
324 
325 /// Adds optional command-line arguments to existing arguments.
326 template <typename T>
327 static void setOptionalCommandlineArguments(NVVMTargetAttr target,
328  SmallVectorImpl<T> &ptxasArgs) {
329  if (!target.hasCmdOptions())
330  return;
331 
332  std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
333  for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {
334  if (auto strAttr = dyn_cast<StringAttr>(attr)) {
335  if constexpr (std::is_same_v<T, StringRef>) {
336  ptxasArgs.push_back(strAttr.getValue());
337  } else if constexpr (std::is_same_v<T, const char *>) {
338  ptxasArgs.push_back(strAttr.getValue().data());
339  }
340  }
341  }
342 }
343 
344 // TODO: clean this method & have a generic tool driver or never emit binaries
345 // with this mechanism and let another stage take care of it.
346 std::optional<SmallVector<char, 0>>
347 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
348  // Determine if the serializer should create a fatbinary with the PTX embeded
349  // or a simple CUBIN binary.
350  const bool createFatbin =
351  targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
352 
353  // Find the `ptxas` & `fatbinary` tools.
354  std::optional<std::string> ptxasCompiler = findTool("ptxas");
355  if (!ptxasCompiler)
356  return std::nullopt;
357  std::optional<std::string> fatbinaryTool;
358  if (createFatbin) {
359  fatbinaryTool = findTool("fatbinary");
360  if (!fatbinaryTool)
361  return std::nullopt;
362  }
363  Location loc = getOperation().getLoc();
364 
365  // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
366  std::string basename =
367  llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
368  getTarget().getTriple(), getTarget().getChip());
369 
370  // Create temp files:
371  std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
372  if (!ptxFile)
373  return std::nullopt;
374  std::optional<TmpFile> logFile = createTemp(basename, "log");
375  if (!logFile)
376  return std::nullopt;
377  std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
378  if (!binaryFile)
379  return std::nullopt;
380  TmpFile cubinFile;
381  if (createFatbin) {
382  std::string cubinFilename = (ptxFile->first + ".cubin").str();
383  cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
384  } else {
385  cubinFile.first = binaryFile->first;
386  }
387 
388  std::error_code ec;
389  // Dump the PTX to a temp file.
390  {
391  llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
392  if (ec) {
393  emitError(loc) << "Couldn't open the file: `" << ptxFile->first
394  << "`, error message: " << ec.message();
395  return std::nullopt;
396  }
397  ptxStream << ptxCode;
398  if (ptxStream.has_error()) {
399  emitError(loc) << "An error occurred while writing the PTX to: `"
400  << ptxFile->first << "`.";
401  return std::nullopt;
402  }
403  ptxStream.flush();
404  }
405 
406  // Command redirects.
407  std::optional<StringRef> redirects[] = {
408  std::nullopt,
409  logFile->first,
410  logFile->first,
411  };
412 
413  // Get any extra args passed in `targetOptions`.
414  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
415  targetOptions.tokenizeCmdOptions();
416 
417  // Create ptxas args.
418  std::string optLevel = std::to_string(this->optLevel);
419  SmallVector<StringRef, 12> ptxasArgs(
420  {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
421  StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
422  "--opt-level", optLevel});
423 
424  bool useFatbin32 = false;
425  for (const auto *cArg : cmdOpts.second) {
426  // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
427  // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
428  // assumed.
429  if (StringRef arg(cArg); arg != "-32")
430  ptxasArgs.push_back(arg);
431  else
432  useFatbin32 = true;
433  }
434 
435  // Set optional command line arguments
436  setOptionalCommandlineArguments(getTarget(), ptxasArgs);
437 
438  // Create the `fatbinary` args.
439  StringRef chip = getTarget().getChip();
440  // Remove the arch prefix to obtain the compute capability.
441  chip.consume_front("sm_"), chip.consume_front("compute_");
442  // Embed the cubin object.
443  std::string cubinArg =
444  llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
445  .str();
446  // Embed the PTX file so the driver can JIT if needed.
447  std::string ptxArg =
448  llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
449  .str();
450  SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
451  useFatbin32 ? "-32" : "-64", cubinArg,
452  ptxArg, "--create", binaryFile->first});
453 
454  // Dump tool invocation commands.
455 #define DEBUG_TYPE "serialize-to-binary"
456  LLVM_DEBUG({
457  llvm::dbgs() << "Tool invocation for module: "
458  << getOperation().getNameAttr() << "\n";
459  llvm::dbgs() << "ptxas executable:" << ptxasCompiler.value() << "\n";
460  llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
461  llvm::dbgs() << "\n";
462  if (createFatbin) {
463  llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
464  llvm::dbgs() << "\n";
465  }
466  });
467 #undef DEBUG_TYPE
468 
469  // Helper function for printing tool error logs.
470  std::string message;
471  auto emitLogError =
472  [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
473  if (message.empty()) {
474  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
475  llvm::MemoryBuffer::getFile(logFile->first);
476  if (toolStderr)
477  emitError(loc) << toolName << " invocation failed. Log:\n"
478  << toolStderr->get()->getBuffer();
479  else
480  emitError(loc) << toolName << " invocation failed.";
481  return std::nullopt;
482  }
483  emitError(loc) << toolName
484  << " invocation failed, error message: " << message;
485  return std::nullopt;
486  };
487 
488  // Invoke PTXAS.
489  if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
490  /*Env=*/std::nullopt,
491  /*Redirects=*/redirects,
492  /*SecondsToWait=*/0,
493  /*MemoryLimit=*/0,
494  /*ErrMsg=*/&message))
495  return emitLogError("`ptxas`");
496 #define DEBUG_TYPE "dump-sass"
497  LLVM_DEBUG({
498  std::optional<std::string> nvdisasm = findTool("nvdisasm");
499  SmallVector<StringRef> nvdisasmArgs(
500  {StringRef("nvdisasm"), StringRef(cubinFile.first)});
501  if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
502  /*Env=*/std::nullopt,
503  /*Redirects=*/redirects,
504  /*SecondsToWait=*/0,
505  /*MemoryLimit=*/0,
506  /*ErrMsg=*/&message))
507  return emitLogError("`nvdisasm`");
508  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
509  llvm::MemoryBuffer::getFile(logFile->first);
510  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
511  llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
512  llvm::dbgs().flush();
513  }
514  });
515 #undef DEBUG_TYPE
516 
517  // Invoke `fatbin`.
518  message.clear();
519  if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
520  /*Env=*/std::nullopt,
521  /*Redirects=*/redirects,
522  /*SecondsToWait=*/0,
523  /*MemoryLimit=*/0,
524  /*ErrMsg=*/&message))
525  return emitLogError("`fatbinary`");
526 
527 // Dump the output of the tools, helpful if the verbose flag was passed.
528 #define DEBUG_TYPE "serialize-to-binary"
529  LLVM_DEBUG({
530  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
531  llvm::MemoryBuffer::getFile(logFile->first);
532  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
533  llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
534  llvm::dbgs().flush();
535  }
536  });
537 #undef DEBUG_TYPE
538 
539  // Read the fatbin.
540  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
541  llvm::MemoryBuffer::getFile(binaryFile->first);
542  if (!binaryBuffer) {
543  emitError(loc) << "Couldn't open the file: `" << binaryFile->first
544  << "`, error message: " << binaryBuffer.getError().message();
545  return std::nullopt;
546  }
547  StringRef fatbin = (*binaryBuffer)->getBuffer();
548  return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
549 }
550 
551 #if MLIR_ENABLE_NVPTXCOMPILER
552 #include "nvPTXCompiler.h"
553 
554 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
555  do { \
556  if (auto status = (expr)) { \
557  emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
558  << status; \
559  return std::nullopt; \
560  } \
561  } while (false)
562 
563 #include "nvFatbin.h"
564 
565 #define RETURN_ON_NVFATBIN_ERROR(expr) \
566  do { \
567  auto result = (expr); \
568  if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
569  emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
570  << nvFatbinGetErrorString(result); \
571  return std::nullopt; \
572  } \
573  } while (false)
574 
575 std::optional<SmallVector<char, 0>>
576 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
577  Location loc = getOperation().getLoc();
578  nvPTXCompilerHandle compiler = nullptr;
579  nvPTXCompileResult status;
580  size_t logSize;
581 
582  // Create the options.
583  std::string optLevel = std::to_string(this->optLevel);
584  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
585  targetOptions.tokenizeCmdOptions();
586  cmdOpts.second.append(
587  {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
588 
589  // Set optional command line arguments
590  setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
591  // Create the compiler handle.
592  RETURN_ON_NVPTXCOMPILER_ERROR(
593  nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
594 
595  // Try to compile the binary.
596  status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
597  cmdOpts.second.data());
598 
599  // Check if compilation failed.
600  if (status != NVPTXCOMPILE_SUCCESS) {
601  RETURN_ON_NVPTXCOMPILER_ERROR(
602  nvPTXCompilerGetErrorLogSize(compiler, &logSize));
603  if (logSize != 0) {
604  SmallVector<char> log(logSize + 1, 0);
605  RETURN_ON_NVPTXCOMPILER_ERROR(
606  nvPTXCompilerGetErrorLog(compiler, log.data()));
607  emitError(loc) << "NVPTX compiler invocation failed, error log: "
608  << log.data();
609  } else
610  emitError(loc) << "NVPTX compiler invocation failed with error code: "
611  << status;
612  return std::nullopt;
613  }
614 
615  // Retrieve the binary.
616  size_t elfSize;
617  RETURN_ON_NVPTXCOMPILER_ERROR(
618  nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
619  SmallVector<char, 0> binary(elfSize, 0);
620  RETURN_ON_NVPTXCOMPILER_ERROR(
621  nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
622 
623 // Dump the log of the compiler, helpful if the verbose flag was passed.
624 #define DEBUG_TYPE "serialize-to-binary"
625  LLVM_DEBUG({
626  RETURN_ON_NVPTXCOMPILER_ERROR(
627  nvPTXCompilerGetInfoLogSize(compiler, &logSize));
628  if (logSize != 0) {
629  SmallVector<char> log(logSize + 1, 0);
630  RETURN_ON_NVPTXCOMPILER_ERROR(
631  nvPTXCompilerGetInfoLog(compiler, log.data()));
632  llvm::dbgs() << "NVPTX compiler invocation for module: "
633  << getOperation().getNameAttr() << "\n";
634  llvm::dbgs() << "Arguments: ";
635  llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
636  llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
637  llvm::dbgs().flush();
638  }
639  });
640 #undef DEBUG_TYPE
641  RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
642 
643  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
644  bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
645  return llvm::StringRef(option) == "-32";
646  });
647 
648  const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
649  nvFatbinHandle handle;
650 
651  auto chip = getTarget().getChip();
652  chip.consume_front("sm_");
653 
654  RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
655  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
656  handle, binary.data(), binary.size(), chip.data(), nullptr));
657  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
658  handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
659 
660  size_t fatbinSize;
661  RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
662  SmallVector<char, 0> fatbin(fatbinSize, 0);
663  RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
664  RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
665  return fatbin;
666  }
667 
668  return binary;
669 }
670 #endif // MLIR_ENABLE_NVPTXCOMPILER
671 
672 std::optional<SmallVector<char, 0>>
673 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
674  llvm::Timer moduleToObjectTimer(
675  "moduleToObjectTimer",
676  "Timer for perf llvm-ir -> isa and isa -> binary.");
677  auto clear = llvm::make_scope_exit([&]() { moduleToObjectTimer.clear(); });
678  // Return LLVM IR if the compilation target is `offload`.
679 #define DEBUG_TYPE "serialize-to-llvm"
680  LLVM_DEBUG({
681  llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
682  << "\n";
683  llvm::dbgs() << llvmModule << "\n";
684  llvm::dbgs().flush();
685  });
686 #undef DEBUG_TYPE
687  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
688  return SerializeGPUModuleBase::moduleToObject(llvmModule);
689 
690 #if !LLVM_HAS_NVPTX_TARGET
691  getOperation()->emitError(
692  "The `NVPTX` target was not built. Please enable it when building LLVM.");
693  return std::nullopt;
694 #endif // LLVM_HAS_NVPTX_TARGET
695 
696  // Emit PTX code.
697  std::optional<llvm::TargetMachine *> targetMachine =
698  getOrCreateTargetMachine();
699  if (!targetMachine) {
700  getOperation().emitError() << "Target Machine unavailable for triple "
701  << triple << ", can't optimize with LLVM\n";
702  return std::nullopt;
703  }
704  moduleToObjectTimer.startTimer();
705  std::optional<std::string> serializedISA =
706  translateToISA(llvmModule, **targetMachine);
707  moduleToObjectTimer.stopTimer();
708  llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
709  moduleToObjectTimer.clear();
710  if (!serializedISA) {
711  getOperation().emitError() << "Failed translating the module to ISA.";
712  return std::nullopt;
713  }
714 
715  if (isaCallback)
716  isaCallback(serializedISA.value());
717 
718 #define DEBUG_TYPE "serialize-to-isa"
719  LLVM_DEBUG({
720  llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
721  llvm::dbgs() << *serializedISA << "\n";
722  llvm::dbgs().flush();
723  });
724 #undef DEBUG_TYPE
725 
726  // Return PTX if the compilation target is `assembly`.
727  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
728  return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
729 
730  std::optional<SmallVector<char, 0>> result;
731  moduleToObjectTimer.startTimer();
732  // Compile to binary.
733 #if MLIR_ENABLE_NVPTXCOMPILER
734  result = compileToBinaryNVPTX(*serializedISA);
735 #else
736  result = compileToBinary(*serializedISA);
737 #endif // MLIR_ENABLE_NVPTXCOMPILER
738 
739  moduleToObjectTimer.stopTimer();
740  isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
741  moduleToObjectTimer.clear();
742  return result;
743 }
744 
745 std::optional<SmallVector<char, 0>>
746 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
747  const gpu::TargetOptions &options) const {
748  Builder builder(attribute.getContext());
749  assert(module && "The module must be non null.");
750  if (!module)
751  return std::nullopt;
752  if (!mlir::isa<gpu::GPUModuleOp>(module)) {
753  module->emitError("Module must be a GPU module.");
754  return std::nullopt;
755  }
756  NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
757  serializer.init();
758  std::optional<SmallVector<char, 0>> result = serializer.run();
759  auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
760  if (llvmToISATimeInMs.has_value())
761  module->setAttr("LLVMIRToISATimeInMs",
762  builder.getI64IntegerAttr(*llvmToISATimeInMs));
763  auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
764  if (isaToBinaryTimeInMs.has_value())
765  module->setAttr("ISAToBinaryTimeInMs",
766  builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
767  return result;
768 }
769 
770 Attribute
771 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
772  const SmallVector<char, 0> &object,
773  const gpu::TargetOptions &options) const {
774  auto target = cast<NVVMTargetAttr>(attribute);
775  gpu::CompilationTarget format = options.getCompilationTarget();
776  DictionaryAttr objectProps;
777  Builder builder(attribute.getContext());
779  if (format == gpu::CompilationTarget::Assembly)
780  properties.push_back(
781  builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
782 
783  if (StringRef section = options.getELFSection(); !section.empty())
784  properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
785  builder.getStringAttr(section)));
786 
787  for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
788  if (module->hasAttr(perfName)) {
789  IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
790  properties.push_back(builder.getNamedAttr(
791  perfName, builder.getI64IntegerAttr(attr.getInt())));
792  }
793  }
794 
795  if (!properties.empty())
796  objectProps = builder.getDictionaryAttr(properties);
797 
798  return builder.getAttr<gpu::ObjectAttr>(
799  attribute, format,
800  builder.getStringAttr(StringRef(object.data(), object.size())),
801  objectProps, /*kernels=*/nullptr);
802 }
static void setOptionalCommandlineArguments(NVVMTargetAttr target, SmallVectorImpl< T > &ptxasArgs)
Adds optional command-line arguments to existing arguments.
Definition: Target.cpp:327
const unsigned _mlir_embedded_libdevice_size
Definition: Target.cpp:54
#define __DEFAULT_CUDATOOLKIT_PATH__
Definition: Target.cpp:50
const unsigned char _mlir_embedded_libdevice[]
Definition: Target.cpp:53
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
Definition: Utils.h:32
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
Definition: Target.cpp:138
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
Definition: Target.cpp:97
NVVMTargetAttr target
NVVM target attribute.
Definition: Utils.h:63
std::string toolkitPath
CUDA toolkit path.
Definition: Utils.h:66
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
Definition: Utils.h:71
std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
Definition: Target.cpp:196
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
Definition: Target.cpp:143
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
Definition: Target.cpp:121
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
Definition: Target.cpp:136
NVVMTargetAttr getTarget() const
Returns the target attribute.
Definition: Target.cpp:134
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
Definition: Operation.h:534
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition: Operation.h:560
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:582
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Definition: AsmState.h:235
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry &registry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
Definition: Target.cpp:72
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Definition: Target.cpp:87
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.