MLIR  21.0.0git
Target.cpp
Go to the documentation of this file.
1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines NVVM target related functions including registration
10 // calls for the `#nvvm.target` compilation attribute.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 
20 #include "mlir/IR/BuiltinDialect.h"
21 #include "mlir/IR/BuiltinTypes.h"
28 
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/FileUtilities.h"
32 #include "llvm/Support/FormatVariadic.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Process.h"
36 #include "llvm/Support/Program.h"
37 #include "llvm/Support/TargetSelect.h"
38 #include "llvm/Support/Timer.h"
39 #include "llvm/Support/raw_ostream.h"
40 
41 #include <cstdint>
42 #include <cstdlib>
43 #include <optional>
44 
45 using namespace mlir;
46 using namespace mlir::NVVM;
47 
48 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
49 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
50 #endif
51 
52 extern "C" const unsigned char _mlir_embedded_libdevice[];
53 extern "C" const unsigned _mlir_embedded_libdevice_size;
54 
55 namespace {
56 // Implementation of the `TargetAttrInterface` model.
57 class NVVMTargetAttrImpl
58  : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
59 public:
60  std::optional<SmallVector<char, 0>>
61  serializeToObject(Attribute attribute, Operation *module,
62  const gpu::TargetOptions &options) const;
63 
64  Attribute createObject(Attribute attribute, Operation *module,
65  const SmallVector<char, 0> &object,
66  const gpu::TargetOptions &options) const;
67 };
68 } // namespace
69 
70 // Register the NVVM dialect, the NVVM translation & the target interface.
72  DialectRegistry &registry) {
73  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
74  NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
75  });
76 }
77 
79  MLIRContext &context) {
80  DialectRegistry registry;
82  context.appendDialectRegistry(registry);
83 }
84 
85 // Search for the CUDA toolkit path.
87  if (const char *var = std::getenv("CUDA_ROOT"))
88  return var;
89  if (const char *var = std::getenv("CUDA_HOME"))
90  return var;
91  if (const char *var = std::getenv("CUDA_PATH"))
92  return var;
94 }
95 
97  Operation &module, NVVMTargetAttr target,
98  const gpu::TargetOptions &targetOptions)
99  : ModuleToObject(module, target.getTriple(), target.getChip(),
100  target.getFeatures(), target.getO(),
101  targetOptions.getInitialLlvmIRCallback(),
102  targetOptions.getLinkedLlvmIRCallback(),
103  targetOptions.getOptimizedLlvmIRCallback(),
104  targetOptions.getISACallback()),
105  target(target), toolkitPath(targetOptions.getToolkitPath()),
106  librariesToLink(targetOptions.getLibrariesToLink()) {
107 
108  // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
109  if (toolkitPath.empty())
111 
112  // Append the files in the target attribute.
113  if (target.getLink())
114  librariesToLink.append(target.getLink().begin(), target.getLink().end());
115 
116  // Append libdevice to the files to be loaded.
117  (void)appendStandardLibs();
118 }
119 
121  static llvm::once_flag initializeBackendOnce;
122  llvm::call_once(initializeBackendOnce, []() {
123  // If the `NVPTX` LLVM target was built, initialize it.
124 #if LLVM_HAS_NVPTX_TARGET
125  LLVMInitializeNVPTXTarget();
126  LLVMInitializeNVPTXTargetInfo();
127  LLVMInitializeNVPTXTargetMC();
128  LLVMInitializeNVPTXAsmPrinter();
129 #endif
130  });
131 }
132 
133 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
134 
136 
138  return librariesToLink;
139 }
140 
141 // Try to append `libdevice` from a CUDA toolkit installation.
143 #if MLIR_NVVM_EMBED_LIBDEVICE
144  // If libdevice is embedded in the binary, we don't look it up on the
145  // filesystem.
146  MLIRContext *ctx = target.getContext();
147  auto type =
149  IntegerType::get(ctx, 8));
150  auto resourceManager = DenseResourceElementsHandle::getManagerInterface(ctx);
151 
152  // Lookup if we already loaded the resource, otherwise create it.
154  resourceManager.getBlobManager().lookup("_mlir_embedded_libdevice");
155  if (blob) {
158  blob, ctx->getLoadedDialect<BuiltinDialect>())));
159  return success();
160  }
161 
162  // Allocate a resource using one of the UnManagedResourceBlob method to wrap
163  // the embedded data.
168  type, resourceManager.insert("_mlir_embedded_libdevice",
169  std::move(unmanagedBlob))));
170 #else
171  StringRef pathRef = getToolkitPath();
172  if (!pathRef.empty()) {
174  path.insert(path.begin(), pathRef.begin(), pathRef.end());
175  pathRef = StringRef(path.data(), path.size());
176  if (!llvm::sys::fs::is_directory(pathRef)) {
177  getOperation().emitError() << "CUDA path: " << pathRef
178  << " does not exist or is not a directory.\n";
179  return failure();
180  }
181  llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
182  pathRef = StringRef(path.data(), path.size());
183  if (!llvm::sys::fs::is_regular_file(pathRef)) {
184  getOperation().emitError() << "LibDevice path: " << pathRef
185  << " does not exist or is not a file.\n";
186  return failure();
187  }
188  librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef));
189  }
190 #endif
191  return success();
192 }
193 
194 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
198  bcFiles, true)))
199  return std::nullopt;
200  return std::move(bcFiles);
201 }
202 
203 namespace {
204 class NVPTXSerializer : public SerializeGPUModuleBase {
205 public:
206  NVPTXSerializer(Operation &module, NVVMTargetAttr target,
207  const gpu::TargetOptions &targetOptions);
208 
209  /// Returns the GPU module op being serialized.
210  gpu::GPUModuleOp getOperation();
211 
212  /// Compiles PTX to cubin using `ptxas`.
213  std::optional<SmallVector<char, 0>>
214  compileToBinary(const std::string &ptxCode);
215 
216  /// Compiles PTX to cubin using the `nvptxcompiler` library.
217  std::optional<SmallVector<char, 0>>
218  compileToBinaryNVPTX(const std::string &ptxCode);
219 
220  /// Serializes the LLVM module to an object format, depending on the
221  /// compilation target selected in target options.
222  std::optional<SmallVector<char, 0>>
223  moduleToObject(llvm::Module &llvmModule) override;
224 
225  /// Get LLVMIR->ISA performance result.
226  /// Return nullopt if moduleToObject has not been called or the target format
227  /// is LLVMIR.
228  std::optional<int64_t> getLLVMIRToISATimeInMs();
229 
230  /// Get ISA->Binary performance result.
231  /// Return nullopt if moduleToObject has not been called or the target format
232  /// is LLVMIR or ISA.
233  std::optional<int64_t> getISAToBinaryTimeInMs();
234 
235 private:
236  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
237 
238  /// Creates a temp file.
239  std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
240 
241  /// Finds the `tool` path, where `tool` is the name of the binary to search,
242  /// i.e. `ptxas` or `fatbinary`. The search order is:
243  /// 1. The toolkit path in `targetOptions`.
244  /// 2. In the system PATH.
245  /// 3. The path from `getCUDAToolkitPath()`.
246  std::optional<std::string> findTool(StringRef tool);
247 
248  /// Target options.
249  gpu::TargetOptions targetOptions;
250 
251  /// LLVMIR->ISA perf result.
252  std::optional<int64_t> llvmToISATimeInMs;
253 
254  /// ISA->Binary perf result.
255  std::optional<int64_t> isaToBinaryTimeInMs;
256 };
257 } // namespace
258 
259 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
260  const gpu::TargetOptions &targetOptions)
261  : SerializeGPUModuleBase(module, target, targetOptions),
262  targetOptions(targetOptions), llvmToISATimeInMs(std::nullopt),
263  isaToBinaryTimeInMs(std::nullopt) {}
264 
265 std::optional<NVPTXSerializer::TmpFile>
266 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
267  llvm::SmallString<128> filename;
268  std::error_code ec =
269  llvm::sys::fs::createTemporaryFile(name, suffix, filename);
270  if (ec) {
271  getOperation().emitError() << "Couldn't create the temp file: `" << filename
272  << "`, error message: " << ec.message();
273  return std::nullopt;
274  }
275  return TmpFile(filename, llvm::FileRemover(filename.c_str()));
276 }
277 
278 std::optional<int64_t> NVPTXSerializer::getLLVMIRToISATimeInMs() {
279  return llvmToISATimeInMs;
280 }
281 
282 std::optional<int64_t> NVPTXSerializer::getISAToBinaryTimeInMs() {
283  return isaToBinaryTimeInMs;
284 }
285 
286 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
287  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
288 }
289 
290 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
291  // Find the `tool` path.
292  // 1. Check the toolkit path given in the command line.
293  StringRef pathRef = targetOptions.getToolkitPath();
295  if (!pathRef.empty()) {
296  path.insert(path.begin(), pathRef.begin(), pathRef.end());
297  llvm::sys::path::append(path, "bin", tool);
298  if (llvm::sys::fs::can_execute(path))
299  return StringRef(path.data(), path.size()).str();
300  }
301 
302  // 2. Check PATH.
303  if (std::optional<std::string> toolPath =
304  llvm::sys::Process::FindInEnvPath("PATH", tool))
305  return *toolPath;
306 
307  // 3. Check `getCUDAToolkitPath()`.
308  pathRef = getCUDAToolkitPath();
309  path.clear();
310  if (!pathRef.empty()) {
311  path.insert(path.begin(), pathRef.begin(), pathRef.end());
312  llvm::sys::path::append(path, "bin", tool);
313  if (llvm::sys::fs::can_execute(path))
314  return StringRef(path.data(), path.size()).str();
315  }
316  getOperation().emitError()
317  << "Couldn't find the `" << tool
318  << "` binary. Please specify the toolkit "
319  "path, add the compiler to $PATH, or set one of the environment "
320  "variables in `NVVM::getCUDAToolkitPath()`.";
321  return std::nullopt;
322 }
323 
324 /// Adds optional command-line arguments to existing arguments.
325 template <typename T>
326 static void setOptionalCommandlineArguments(NVVMTargetAttr target,
327  SmallVectorImpl<T> &ptxasArgs) {
328  if (!target.hasCmdOptions())
329  return;
330 
331  std::optional<mlir::NamedAttribute> cmdOptions = target.getCmdOptions();
332  for (Attribute attr : cast<ArrayAttr>(cmdOptions->getValue())) {
333  if (auto strAttr = dyn_cast<StringAttr>(attr)) {
334  if constexpr (std::is_same_v<T, StringRef>) {
335  ptxasArgs.push_back(strAttr.getValue());
336  } else if constexpr (std::is_same_v<T, const char *>) {
337  ptxasArgs.push_back(strAttr.getValue().data());
338  }
339  }
340  }
341 }
342 
343 // TODO: clean this method & have a generic tool driver or never emit binaries
344 // with this mechanism and let another stage take care of it.
345 std::optional<SmallVector<char, 0>>
346 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
347  // Determine if the serializer should create a fatbinary with the PTX embeded
348  // or a simple CUBIN binary.
349  const bool createFatbin =
350  targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
351 
352  // Find the `ptxas` & `fatbinary` tools.
353  std::optional<std::string> ptxasCompiler = findTool("ptxas");
354  if (!ptxasCompiler)
355  return std::nullopt;
356  std::optional<std::string> fatbinaryTool;
357  if (createFatbin) {
358  fatbinaryTool = findTool("fatbinary");
359  if (!fatbinaryTool)
360  return std::nullopt;
361  }
362  Location loc = getOperation().getLoc();
363 
364  // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
365  std::string basename =
366  llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
367  getTarget().getTriple(), getTarget().getChip());
368 
369  // Create temp files:
370  std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
371  if (!ptxFile)
372  return std::nullopt;
373  std::optional<TmpFile> logFile = createTemp(basename, "log");
374  if (!logFile)
375  return std::nullopt;
376  std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
377  if (!binaryFile)
378  return std::nullopt;
379  TmpFile cubinFile;
380  if (createFatbin) {
381  std::string cubinFilename = (ptxFile->first + ".cubin").str();
382  cubinFile = TmpFile(cubinFilename, llvm::FileRemover(cubinFilename));
383  } else {
384  cubinFile.first = binaryFile->first;
385  }
386 
387  std::error_code ec;
388  // Dump the PTX to a temp file.
389  {
390  llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
391  if (ec) {
392  emitError(loc) << "Couldn't open the file: `" << ptxFile->first
393  << "`, error message: " << ec.message();
394  return std::nullopt;
395  }
396  ptxStream << ptxCode;
397  if (ptxStream.has_error()) {
398  emitError(loc) << "An error occurred while writing the PTX to: `"
399  << ptxFile->first << "`.";
400  return std::nullopt;
401  }
402  ptxStream.flush();
403  }
404 
405  // Command redirects.
406  std::optional<StringRef> redirects[] = {
407  std::nullopt,
408  logFile->first,
409  logFile->first,
410  };
411 
412  // Get any extra args passed in `targetOptions`.
413  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
414  targetOptions.tokenizeCmdOptions();
415 
416  // Create ptxas args.
417  std::string optLevel = std::to_string(this->optLevel);
418  SmallVector<StringRef, 12> ptxasArgs(
419  {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
420  StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
421  "--opt-level", optLevel});
422 
423  bool useFatbin32 = false;
424  for (const auto *cArg : cmdOpts.second) {
425  // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
426  // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
427  // assumed.
428  if (StringRef arg(cArg); arg != "-32")
429  ptxasArgs.push_back(arg);
430  else
431  useFatbin32 = true;
432  }
433 
434  // Set optional command line arguments
435  setOptionalCommandlineArguments(getTarget(), ptxasArgs);
436 
437  // Create the `fatbinary` args.
438  StringRef chip = getTarget().getChip();
439  // Remove the arch prefix to obtain the compute capability.
440  chip.consume_front("sm_"), chip.consume_front("compute_");
441  // Embed the cubin object.
442  std::string cubinArg =
443  llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
444  .str();
445  // Embed the PTX file so the driver can JIT if needed.
446  std::string ptxArg =
447  llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
448  .str();
449  SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
450  useFatbin32 ? "-32" : "-64", cubinArg,
451  ptxArg, "--create", binaryFile->first});
452 
453  // Dump tool invocation commands.
454 #define DEBUG_TYPE "serialize-to-binary"
455  LLVM_DEBUG({
456  llvm::dbgs() << "Tool invocation for module: "
457  << getOperation().getNameAttr() << "\n";
458  llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
459  llvm::dbgs() << "\n";
460  if (createFatbin) {
461  llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
462  llvm::dbgs() << "\n";
463  }
464  });
465 #undef DEBUG_TYPE
466 
467  // Helper function for printing tool error logs.
468  std::string message;
469  auto emitLogError =
470  [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
471  if (message.empty()) {
472  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
473  llvm::MemoryBuffer::getFile(logFile->first);
474  if (toolStderr)
475  emitError(loc) << toolName << " invocation failed. Log:\n"
476  << toolStderr->get()->getBuffer();
477  else
478  emitError(loc) << toolName << " invocation failed.";
479  return std::nullopt;
480  }
481  emitError(loc) << toolName
482  << " invocation failed, error message: " << message;
483  return std::nullopt;
484  };
485 
486  // Invoke PTXAS.
487  if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
488  /*Env=*/std::nullopt,
489  /*Redirects=*/redirects,
490  /*SecondsToWait=*/0,
491  /*MemoryLimit=*/0,
492  /*ErrMsg=*/&message))
493  return emitLogError("`ptxas`");
494 #define DEBUG_TYPE "dump-sass"
495  LLVM_DEBUG({
496  std::optional<std::string> nvdisasm = findTool("nvdisasm");
497  SmallVector<StringRef> nvdisasmArgs(
498  {StringRef("nvdisasm"), StringRef(cubinFile.first)});
499  if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
500  /*Env=*/std::nullopt,
501  /*Redirects=*/redirects,
502  /*SecondsToWait=*/0,
503  /*MemoryLimit=*/0,
504  /*ErrMsg=*/&message))
505  return emitLogError("`nvdisasm`");
506  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
507  llvm::MemoryBuffer::getFile(logFile->first);
508  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
509  llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
510  llvm::dbgs().flush();
511  }
512  });
513 #undef DEBUG_TYPE
514 
515  // Invoke `fatbin`.
516  message.clear();
517  if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
518  /*Env=*/std::nullopt,
519  /*Redirects=*/redirects,
520  /*SecondsToWait=*/0,
521  /*MemoryLimit=*/0,
522  /*ErrMsg=*/&message))
523  return emitLogError("`fatbinary`");
524 
525 // Dump the output of the tools, helpful if the verbose flag was passed.
526 #define DEBUG_TYPE "serialize-to-binary"
527  LLVM_DEBUG({
528  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
529  llvm::MemoryBuffer::getFile(logFile->first);
530  if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
531  llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
532  llvm::dbgs().flush();
533  }
534  });
535 #undef DEBUG_TYPE
536 
537  // Read the fatbin.
538  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
539  llvm::MemoryBuffer::getFile(binaryFile->first);
540  if (!binaryBuffer) {
541  emitError(loc) << "Couldn't open the file: `" << binaryFile->first
542  << "`, error message: " << binaryBuffer.getError().message();
543  return std::nullopt;
544  }
545  StringRef fatbin = (*binaryBuffer)->getBuffer();
546  return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
547 }
548 
549 #if MLIR_ENABLE_NVPTXCOMPILER
550 #include "nvPTXCompiler.h"
551 
552 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
553  do { \
554  if (auto status = (expr)) { \
555  emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
556  << status; \
557  return std::nullopt; \
558  } \
559  } while (false)
560 
561 #include "nvFatbin.h"
562 
563 #define RETURN_ON_NVFATBIN_ERROR(expr) \
564  do { \
565  auto result = (expr); \
566  if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
567  emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
568  << nvFatbinGetErrorString(result); \
569  return std::nullopt; \
570  } \
571  } while (false)
572 
573 std::optional<SmallVector<char, 0>>
574 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
575  Location loc = getOperation().getLoc();
576  nvPTXCompilerHandle compiler = nullptr;
577  nvPTXCompileResult status;
578  size_t logSize;
579 
580  // Create the options.
581  std::string optLevel = std::to_string(this->optLevel);
582  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
583  targetOptions.tokenizeCmdOptions();
584  cmdOpts.second.append(
585  {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
586 
587  // Set optional command line arguments
588  setOptionalCommandlineArguments(getTarget(), cmdOpts.second);
589  // Create the compiler handle.
590  RETURN_ON_NVPTXCOMPILER_ERROR(
591  nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
592 
593  // Try to compile the binary.
594  status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
595  cmdOpts.second.data());
596 
597  // Check if compilation failed.
598  if (status != NVPTXCOMPILE_SUCCESS) {
599  RETURN_ON_NVPTXCOMPILER_ERROR(
600  nvPTXCompilerGetErrorLogSize(compiler, &logSize));
601  if (logSize != 0) {
602  SmallVector<char> log(logSize + 1, 0);
603  RETURN_ON_NVPTXCOMPILER_ERROR(
604  nvPTXCompilerGetErrorLog(compiler, log.data()));
605  emitError(loc) << "NVPTX compiler invocation failed, error log: "
606  << log.data();
607  } else
608  emitError(loc) << "NVPTX compiler invocation failed with error code: "
609  << status;
610  return std::nullopt;
611  }
612 
613  // Retrieve the binary.
614  size_t elfSize;
615  RETURN_ON_NVPTXCOMPILER_ERROR(
616  nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
617  SmallVector<char, 0> binary(elfSize, 0);
618  RETURN_ON_NVPTXCOMPILER_ERROR(
619  nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
620 
621 // Dump the log of the compiler, helpful if the verbose flag was passed.
622 #define DEBUG_TYPE "serialize-to-binary"
623  LLVM_DEBUG({
624  RETURN_ON_NVPTXCOMPILER_ERROR(
625  nvPTXCompilerGetInfoLogSize(compiler, &logSize));
626  if (logSize != 0) {
627  SmallVector<char> log(logSize + 1, 0);
628  RETURN_ON_NVPTXCOMPILER_ERROR(
629  nvPTXCompilerGetInfoLog(compiler, log.data()));
630  llvm::dbgs() << "NVPTX compiler invocation for module: "
631  << getOperation().getNameAttr() << "\n";
632  llvm::dbgs() << "Arguments: ";
633  llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
634  llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
635  llvm::dbgs().flush();
636  }
637  });
638 #undef DEBUG_TYPE
639  RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
640 
641  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
642  bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
643  return llvm::StringRef(option) == "-32";
644  });
645 
646  const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
647  nvFatbinHandle handle;
648 
649  auto chip = getTarget().getChip();
650  chip.consume_front("sm_");
651 
652  RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
653  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
654  handle, binary.data(), binary.size(), chip.data(), nullptr));
655  RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
656  handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
657 
658  size_t fatbinSize;
659  RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
660  SmallVector<char, 0> fatbin(fatbinSize, 0);
661  RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
662  RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
663  return fatbin;
664  }
665 
666  return binary;
667 }
668 #endif // MLIR_ENABLE_NVPTXCOMPILER
669 
670 std::optional<SmallVector<char, 0>>
671 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
672  llvm::Timer moduleToObjectTimer(
673  "moduleToObjectTimer",
674  "Timer for perf llvm-ir -> isa and isa -> binary.");
675  moduleToObjectTimer.startTimer();
676  // Return LLVM IR if the compilation target is `offload`.
677 #define DEBUG_TYPE "serialize-to-llvm"
678  LLVM_DEBUG({
679  llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
680  << "\n";
681  llvm::dbgs() << llvmModule << "\n";
682  llvm::dbgs().flush();
683  });
684 #undef DEBUG_TYPE
685  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
686  return SerializeGPUModuleBase::moduleToObject(llvmModule);
687 
688 #if !LLVM_HAS_NVPTX_TARGET
689  getOperation()->emitError(
690  "The `NVPTX` target was not built. Please enable it when building LLVM.");
691  return std::nullopt;
692 #endif // LLVM_HAS_NVPTX_TARGET
693 
694  // Emit PTX code.
695  std::optional<llvm::TargetMachine *> targetMachine =
696  getOrCreateTargetMachine();
697  if (!targetMachine) {
698  getOperation().emitError() << "Target Machine unavailable for triple "
699  << triple << ", can't optimize with LLVM\n";
700  return std::nullopt;
701  }
702  std::optional<std::string> serializedISA =
703  translateToISA(llvmModule, **targetMachine);
704  if (!serializedISA) {
705  getOperation().emitError() << "Failed translating the module to ISA.";
706  return std::nullopt;
707  }
708 
709  moduleToObjectTimer.stopTimer();
710  llvmToISATimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
711  moduleToObjectTimer.clear();
712  moduleToObjectTimer.startTimer();
713  if (isaCallback)
714  isaCallback(serializedISA.value());
715 
716 #define DEBUG_TYPE "serialize-to-isa"
717  LLVM_DEBUG({
718  llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
719  llvm::dbgs() << *serializedISA << "\n";
720  llvm::dbgs().flush();
721  });
722 #undef DEBUG_TYPE
723 
724  // Return PTX if the compilation target is `assembly`.
725  if (targetOptions.getCompilationTarget() ==
726  gpu::CompilationTarget::Assembly) {
727  // Make sure to include the null terminator.
728  StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
729  return SmallVector<char, 0>(bin.begin(), bin.end());
730  }
731 
732  std::optional<SmallVector<char, 0>> result;
733  // Compile to binary.
734 #if MLIR_ENABLE_NVPTXCOMPILER
735  result = compileToBinaryNVPTX(*serializedISA);
736 #else
737  result = compileToBinary(*serializedISA);
738 #endif // MLIR_ENABLE_NVPTXCOMPILER
739 
740  moduleToObjectTimer.stopTimer();
741  isaToBinaryTimeInMs = moduleToObjectTimer.getTotalTime().getWallTime() * 1000;
742  moduleToObjectTimer.clear();
743  return result;
744 }
745 
746 std::optional<SmallVector<char, 0>>
747 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
748  const gpu::TargetOptions &options) const {
749  Builder builder(attribute.getContext());
750  assert(module && "The module must be non null.");
751  if (!module)
752  return std::nullopt;
753  if (!mlir::isa<gpu::GPUModuleOp>(module)) {
754  module->emitError("Module must be a GPU module.");
755  return std::nullopt;
756  }
757  NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
758  serializer.init();
759  std::optional<SmallVector<char, 0>> result = serializer.run();
760  auto llvmToISATimeInMs = serializer.getLLVMIRToISATimeInMs();
761  if (llvmToISATimeInMs.has_value())
762  module->setAttr("LLVMIRToISATimeInMs",
763  builder.getI64IntegerAttr(*llvmToISATimeInMs));
764  auto isaToBinaryTimeInMs = serializer.getISAToBinaryTimeInMs();
765  if (isaToBinaryTimeInMs.has_value())
766  module->setAttr("ISAToBinaryTimeInMs",
767  builder.getI64IntegerAttr(*isaToBinaryTimeInMs));
768  return result;
769 }
770 
771 Attribute
772 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
773  const SmallVector<char, 0> &object,
774  const gpu::TargetOptions &options) const {
775  auto target = cast<NVVMTargetAttr>(attribute);
776  gpu::CompilationTarget format = options.getCompilationTarget();
777  DictionaryAttr objectProps;
778  Builder builder(attribute.getContext());
780  if (format == gpu::CompilationTarget::Assembly)
781  properties.push_back(
782  builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
783 
784  if (StringRef section = options.getELFSection(); !section.empty())
785  properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
786  builder.getStringAttr(section)));
787 
788  for (const auto *perfName : {"LLVMIRToISATimeInMs", "ISAToBinaryTimeInMs"}) {
789  if (module->hasAttr(perfName)) {
790  IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
791  properties.push_back(builder.getNamedAttr(
792  perfName, builder.getI64IntegerAttr(attr.getInt())));
793  }
794  }
795 
796  if (!properties.empty())
797  objectProps = builder.getDictionaryAttr(properties);
798 
799  return builder.getAttr<gpu::ObjectAttr>(
800  attribute, format,
801  builder.getStringAttr(StringRef(object.data(), object.size())),
802  objectProps, /*kernels=*/nullptr);
803 }
static void setOptionalCommandlineArguments(NVVMTargetAttr target, SmallVectorImpl< T > &ptxasArgs)
Adds optional command-line arguments to existing arguments.
Definition: Target.cpp:326
const unsigned _mlir_embedded_libdevice_size
Definition: Target.cpp:53
#define __DEFAULT_CUDATOOLKIT_PATH__
Definition: Target.cpp:49
const unsigned char _mlir_embedded_libdevice[]
Definition: Target.cpp:52
static llvm::ManagedStatic< PassManagerOptions > options
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
The class represents an individual entry of a blob.
LogicalResult loadBitcodeFilesFromList(llvm::LLVMContext &context, ArrayRef< Attribute > librariesToLink, SmallVector< std::unique_ptr< llvm::Module >> &llvmModules, bool failureOnError=true)
Loads multiple bitcode files.
virtual std::optional< SmallVector< char, 0 > > moduleToObject(llvm::Module &llvmModule)
Serializes the LLVM IR bitcode to an object file, by default it serializes to LLVM bitcode.
Operation & getOperation()
Returns the operation being serialized.
Operation & module
Module to transform to a binary object.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
void appendDialectRegistry(const DialectRegistry &registry)
Append the contents of the given dialect registry to the registry associated with this context.
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Base class for all NVVM serializations from GPU modules into binary strings.
Definition: Utils.h:32
ArrayRef< Attribute > getLibrariesToLink() const
Returns the bitcode libraries to be linked into the gpu module after translation to LLVM IR.
Definition: Target.cpp:137
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target, const gpu::TargetOptions &targetOptions={})
Initializes the toolkitPath with the path in targetOptions or if empty with the path in getCUDAToolki...
Definition: Target.cpp:96
NVVMTargetAttr target
NVVM target attribute.
Definition: Utils.h:63
std::string toolkitPath
CUDA toolkit path.
Definition: Utils.h:66
SmallVector< Attribute > librariesToLink
List of LLVM bitcode to link into after translation to LLVM IR.
Definition: Utils.h:71
std::optional< SmallVector< std::unique_ptr< llvm::Module > > > loadBitcodeFiles(llvm::Module &module) override
Loads the bitcode files in librariesToLink.
Definition: Target.cpp:195
LogicalResult appendStandardLibs()
Appends nvvm/libdevice.bc into librariesToLink.
Definition: Target.cpp:142
static void init()
Initializes the LLVM NVPTX target by safely calling LLVMInitializeNVPTX* methods if available.
Definition: Target.cpp:120
StringRef getToolkitPath() const
Returns the CUDA toolkit path.
Definition: Target.cpp:135
NVVMTargetAttr getTarget() const
Returns the target attribute.
Definition: Target.cpp:133
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
Definition: Operation.h:534
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition: Operation.h:560
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:582
static AsmResourceBlob allocateInferAlign(ArrayRef< T > data, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Definition: AsmState.h:234
This class serves as an opaque interface for passing options to the TargetAttrInterface methods.
void registerNVVMTargetInterfaceExternalModels(DialectRegistry &registry)
Registers the TargetAttrInterface for the #nvvm.target attribute in the given registry.
Definition: Target.cpp:71
StringRef getCUDAToolkitPath()
Searches & returns the path CUDA toolkit path, the search order is:
Definition: Target.cpp:86
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
DialectResourceBlobHandle< BuiltinDialect > DenseResourceElementsHandle
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ManagerInterface & getManagerInterface(MLIRContext *ctx)
Get the interface for the dialect that owns handles of this type.