MLIR  16.0.0git
SerializeToBlob.cpp
Go to the documentation of this file.
1 //===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a base class for a pass to serialize a gpu module
10 // into a binary blob that can be executed on a GPU. The binary blob is added
11 // as a string attribute to the gpu module.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "mlir/Pass/Pass.h"
19 #include "llvm/IR/LegacyPassManager.h"
20 #include "llvm/MC/TargetRegistry.h"
21 #include "llvm/Support/TargetSelect.h"
22 #include "llvm/Target/TargetMachine.h"
23 
24 #include <string>
25 
26 #define DEBUG_TYPE "serialize-to-blob"
27 
28 using namespace mlir;
29 
30 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
31 
33  : OperationPass<gpu::GPUModuleOp>(passID) {}
34 
36  : OperationPass<gpu::GPUModuleOp>(other) {}
37 
39 gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
40  llvm::TargetMachine &targetMachine) {
41  llvmModule.setDataLayout(targetMachine.createDataLayout());
42 
43  if (failed(optimizeLlvm(llvmModule, targetMachine)))
44  return llvm::None;
45 
46  std::string targetISA;
47  llvm::raw_string_ostream stream(targetISA);
48 
49  { // Drop pstream after this to prevent the ISA from being stuck buffering
50  llvm::buffer_ostream pstream(stream);
51  llvm::legacy::PassManager codegenPasses;
52 
53  if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
54  llvm::CGFT_AssemblyFile))
55  return llvm::None;
56 
57  codegenPasses.run(llvmModule);
58  }
59  return stream.str();
60 }
61 
63  // Lower the module to an LLVM IR module using a separate context to enable
64  // multi-threaded processing.
65  llvm::LLVMContext llvmContext;
66  std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
67  if (!llvmModule)
68  return signalPassFailure();
69 
70  // Lower the LLVM IR module to target ISA.
71  std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
72  if (!targetMachine)
73  return signalPassFailure();
74 
75  Optional<std::string> maybeTargetISA =
76  translateToISA(*llvmModule, *targetMachine);
77 
78  if (!maybeTargetISA.has_value())
79  return signalPassFailure();
80 
81  std::string targetISA = std::move(maybeTargetISA.value());
82 
83  LLVM_DEBUG({
84  llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
85  llvm::dbgs() << targetISA << "\n";
86  llvm::dbgs().flush();
87  });
88 
89  // Serialize the target ISA.
90  std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
91  if (!blob)
92  return signalPassFailure();
93 
94  // Add the blob as module attribute.
95  auto attr =
96  StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
97  getOperation()->setAttr(gpuBinaryAnnotation, attr);
98 }
99 
101 gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
102  llvm::TargetMachine &targetMachine) {
103  // TODO: If serializeToCubin ends up defining optimizations, factor them
104  // into here from SerializeToHsaco
105  return success();
106 }
107 
109  DialectRegistry &registry) const {
112 }
113 
114 std::unique_ptr<llvm::TargetMachine>
115 gpu::SerializeToBlobPass::createTargetMachine() {
116  Location loc = getOperation().getLoc();
117  std::string error;
118  const llvm::Target *target =
119  llvm::TargetRegistry::lookupTarget(triple, error);
120  if (!target) {
121  emitError(loc, Twine("failed to lookup target: ") + error);
122  return {};
123  }
124  llvm::TargetMachine *machine =
125  target->createTargetMachine(triple, chip, features, {}, {});
126  if (!machine) {
127  emitError(loc, "failed to create target machine");
128  return {};
129  }
130 
131  return std::unique_ptr<llvm::TargetMachine>{machine};
132 }
133 
134 std::unique_ptr<llvm::Module>
135 gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
136  return translateModuleToLLVMIR(getOperation(), llvmContext,
137  "LLVMDialectModule");
138 }
Include the generated interface declarations.
Base pass class to serialize kernel functions through LLVM into user-specified IR and add the resulti...
Definition: Passes.h:63
gpu::GPUModuleOp getOperation()
Return the current operation being transformed.
Definition: Pass.h:366
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
Option< std::string > gpuBinaryAnnotation
Definition: Passes.h:101
MLIRContext & getContext()
Return the MLIR context for the current operation being transformed.
Definition: Pass.h:173
Option< std::string > chip
Definition: Passes.h:97
This class provides an efficient unique identifier for a specific C++ type.
Definition: TypeID.h:104
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
void signalPassFailure()
Signal that some invariant was broken when running.
Definition: Pass.h:212
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Pass to transform an operation of a specific type.
Definition: AsyncToLLVM.h:19
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Option< std::string > triple
Definition: Passes.h:95
Option< std::string > features
Definition: Passes.h:99
virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine)
Hook allowing the application of optimizations before codegen By default, does nothing.
void getDependentDialects(DialectRegistry &registry) const override
Register dependent dialects for the current pass.
void registerLLVMDialectTranslation(DialectRegistry &registry)
Register the LLVM dialect and the translation from it to the LLVM IR in the given registry;...
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
std::unique_ptr< llvm::Module > translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext, llvm::StringRef name="LLVMDialectModule")
Translate operation that satisfies LLVM dialect module requirements into an LLVM IR module living in ...
virtual void getDependentDialects(DialectRegistry &registry) const
Register dependent dialects for the current pass.
Definition: Pass.h:73
virtual std::unique_ptr< llvm::Module > translateToLLVMIR(llvm::LLVMContext &llvmContext)
Translates the &#39;getOperation()&#39; result to an LLVM module.
std::string getDefaultGpuBinaryAnnotation()
Returns the default annotation name for GPU binary blobs.
void runOnOperation() final
The polymorphic API that runs the pass over the currently held operation.