MLIR  16.0.0git
Passes.h
Go to the documentation of this file.
1 //===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes that expose pass constructors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14 #define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
15 
17 #include "mlir/Pass/Pass.h"
18 
19 namespace llvm {
20 class TargetMachine;
21 class LLVMContext;
22 class Module;
23 } // namespace llvm
24 
25 namespace mlir {
26 namespace func {
27 class FuncOp;
28 } // namespace func
29 
30 /// Pass that moves ops which are likely an index computation into gpu.launch
31 /// body.
32 std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
33 
34 /// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
35 /// a separate kernel function.
36 std::unique_ptr<OperationPass<ModuleOp>>
37 createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
38 
39 /// Rewrites a function region so that GPU ops execute asynchronously.
40 std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
41 
42 /// Maps the parallel loops found in the given function to workgroups. The first
43 /// loop encountered will be mapped to the global workgroup and the second loop
44 /// encountered to the local workgroup. Within each mapping, the first three
45 /// dimensions are mapped to x/y/z hardware ids and all following dimensions are
46 /// mapped to sequential loops.
47 std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
48 
49 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
50 void populateGpuAllReducePatterns(RewritePatternSet &patterns);
51 
52 /// Collect all patterns to rewrite ops within the GPU dialect.
55 }
56 
57 namespace gpu {
58 /// Returns the default annotation name for GPU binary blobs.
59 std::string getDefaultGpuBinaryAnnotation();
60 
61 /// Base pass class to serialize kernel functions through LLVM into
62 /// user-specified IR and add the resulting blob as module attribute.
63 class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
64 public:
67 
68  void runOnOperation() final;
69 
70 protected:
71  void getDependentDialects(DialectRegistry &registry) const override;
72 
73  /// Hook allowing the application of optimizations before codegen
74  /// By default, does nothing
75  virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule,
76  llvm::TargetMachine &targetMachine);
77 
78  /// Translates the 'getOperation()' result to an LLVM module.
79  virtual std::unique_ptr<llvm::Module>
80  translateToLLVMIR(llvm::LLVMContext &llvmContext);
81 
82 private:
83  /// Creates the LLVM target machine to generate the ISA.
84  std::unique_ptr<llvm::TargetMachine> createTargetMachine();
85 
86  /// Translates the module to ISA
87  Optional<std::string> translateToISA(llvm::Module &llvmModule,
88  llvm::TargetMachine &targetMachine);
89 
90  /// Serializes the target ISA to binary form.
91  virtual std::unique_ptr<std::vector<char>>
92  serializeISA(const std::string &isa) = 0;
93 
94 protected:
95  Option<std::string> triple{*this, "triple",
96  ::llvm::cl::desc("Target triple")};
97  Option<std::string> chip{*this, "chip",
98  ::llvm::cl::desc("Target architecture")};
99  Option<std::string> features{*this, "features",
100  ::llvm::cl::desc("Target features")};
101  Option<std::string> gpuBinaryAnnotation{
102  *this, "gpu-binary-annotation",
103  llvm::cl::desc("Annotation attribute string for GPU binary"),
104  llvm::cl::init(getDefaultGpuBinaryAnnotation())};
105 };
106 } // namespace gpu
107 
108 //===----------------------------------------------------------------------===//
109 // Registration
110 //===----------------------------------------------------------------------===//
111 
112 /// Register pass to serialize GPU kernel functions to a CUBIN binary
113 /// annotation.
115 
116 /// Register pass to serialize GPU kernel functions to a HSAco binary
117 /// annotation.
119 
120 /// Create an instance of the GPU kernel function to HSAco binary serialization
121 /// pass.
122 std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
123  StringRef arch,
124  StringRef features,
125  int optLevel);
126 
127 /// Generate the code for registering passes.
128 #define GEN_PASS_REGISTRATION
129 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
130 
131 } // namespace mlir
132 
133 #endif // MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
Include the generated interface declarations.
Base pass class to serialize kernel functions through LLVM into user-specified IR and add the resulti...
Definition: Passes.h:63
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:53
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:221
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuAsyncRegionPass()
Rewrites a function region so that GPU ops execute asynchronously.
This class provides an efficient unique identifier for a specific C++ type.
Definition: TypeID.h:104
Pass to transform an operation of a specific type.
Definition: AsyncToLLVM.h:19
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
void registerGpuSerializeToHsacoPass()
Register pass to serialize GPU kernel functions to a HSAco binary annotation.
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function...
std::unique_ptr< Pass > createGpuSerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, int optLevel)
Create an instance of the GPU kernel function to HSAco binary serialization pass. ...
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
void registerGpuSerializeToCubinPass()
Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
std::string getDefaultGpuBinaryAnnotation()
Returns the default annotation name for GPU binary blobs.
void populateGpuAllReducePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.