MLIR  18.0.0git
Passes.h
Go to the documentation of this file.
1 //===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes that expose pass constructors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14 #define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
15 
16 #include "Utils.h"
18 #include "mlir/Pass/Pass.h"
19 #include <optional>
20 
21 namespace llvm {
22 class TargetMachine;
23 class LLVMContext;
24 class Module;
25 } // namespace llvm
26 
27 namespace mlir {
28 class TypeConverter;
29 class ConversionTarget;
30 namespace func {
31 class FuncOp;
32 } // namespace func
33 
34 #define GEN_PASS_DECL
35 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
36 
37 /// Pass that moves ops which are likely an index computation into gpu.launch
38 /// body.
39 std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
40 
41 /// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
42 /// a separate kernel function.
43 std::unique_ptr<OperationPass<ModuleOp>>
44 createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
45 
46 /// Rewrites a function region so that GPU ops execute asynchronously.
47 std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
48 
49 /// Maps the parallel loops found in the given function to workgroups. The first
50 /// loop encountered will be mapped to the global workgroup and the second loop
51 /// encountered to the local workgroup. Within each mapping, the first three
52 /// dimensions are mapped to x/y/z hardware ids and all following dimensions are
53 /// mapped to sequential loops.
54 std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
55 
56 /// Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
57 void populateGpuGlobalIdPatterns(RewritePatternSet &patterns);
58 
59 /// Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
60 void populateGpuShufflePatterns(RewritePatternSet &patterns);
61 
62 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
63 void populateGpuAllReducePatterns(RewritePatternSet &patterns);
64 
65 /// Collect all patterns to rewrite ops within the GPU dialect.
70 }
71 
72 namespace gpu {
73 /// Searches for all GPU modules in `op` and transforms them into GPU binary
74 /// operations. The resulting `gpu.binary` has `handler` as its offloading
75 /// handler attribute.
76 LogicalResult transformGpuModulesToBinaries(
77  Operation *op, OffloadingLLVMTranslationAttrInterface handler = nullptr,
78  const gpu::TargetOptions &options = {});
79 
80 /// Base pass class to serialize kernel functions through LLVM into
81 /// user-specified IR and add the resulting blob as module attribute.
82 class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
83 public:
86 
87  void runOnOperation() final;
88 
89 protected:
90  /// Hook allowing the application of optimizations before codegen
91  /// By default, does nothing
92  virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule,
93  llvm::TargetMachine &targetMachine);
94 
95  /// Translates the 'getOperation()' result to an LLVM module.
96  virtual std::unique_ptr<llvm::Module>
97  translateToLLVMIR(llvm::LLVMContext &llvmContext);
98 
99 private:
100  /// Creates the LLVM target machine to generate the ISA.
101  std::unique_ptr<llvm::TargetMachine> createTargetMachine();
102 
103  /// Translates the module to ISA
104  std::optional<std::string> translateToISA(llvm::Module &llvmModule,
105  llvm::TargetMachine &targetMachine);
106 
107  /// Serializes the target ISA to binary form.
108  virtual std::unique_ptr<std::vector<char>>
109  serializeISA(const std::string &isa) = 0;
110 
111 protected:
112  Option<std::string> triple{*this, "triple",
113  ::llvm::cl::desc("Target triple")};
114  Option<std::string> chip{*this, "chip",
115  ::llvm::cl::desc("Target architecture")};
116  Option<std::string> features{*this, "features",
117  ::llvm::cl::desc("Target features")};
118  Option<int> optLevel{*this, "opt-level",
119  llvm::cl::desc("Optimization level for compilation"),
120  llvm::cl::init(2)};
122  *this, "gpu-binary-annotation",
123  llvm::cl::desc("Annotation attribute string for GPU binary"),
124  llvm::cl::init(getDefaultGpuBinaryAnnotation())};
125  Option<bool> dumpPtx{*this, "dump-ptx",
126  ::llvm::cl::desc("Dump generated PTX"),
127  llvm::cl::init(false)};
128 };
129 } // namespace gpu
130 
131 //===----------------------------------------------------------------------===//
132 // Registration
133 //===----------------------------------------------------------------------===//
134 
135 /// Register pass to serialize GPU kernel functions to a CUBIN binary
136 /// annotation.
137 LLVM_DEPRECATED("use Target attributes instead", "")
139 
140 /// Register pass to serialize GPU kernel functions to a HSAco binary
141 /// annotation.
142 LLVM_DEPRECATED("use Target attributes instead", "")
144 
145 /// Create an instance of the GPU kernel function to CUBIN binary serialization
146 /// pass with optLevel (default level 2).
147 LLVM_DEPRECATED("use Target attributes instead", "")
148 std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
149  StringRef chip,
150  StringRef features,
151  int optLevel = 2,
152  bool dumpPtx = false);
153 
154 /// Create an instance of the GPU kernel function to HSAco binary serialization
155 /// pass.
156 LLVM_DEPRECATED("use Target attributes instead", "")
157 std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
158  StringRef arch,
159  StringRef features,
160  int optLevel);
161 
162 /// Collect a set of patterns to decompose memrefs ops.
164 
165 /// Pass decomposes memref ops inside `gpu.launch` body.
166 std::unique_ptr<Pass> createGpuDecomposeMemrefsPass();
167 
168 /// Erase barriers that do not enforce conflicting memory side effects.
170 
171 /// Generate the code for registering passes.
172 #define GEN_PASS_REGISTRATION
173 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
174 
175 } // namespace mlir
176 
177 #endif // MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
static llvm::ManagedStatic< PassManagerOptions > options
Pass to transform an operation of a specific type.
Definition: Pass.h:350
The abstract base pass class.
Definition: Pass.h:51
This class provides an efficient unique identifier for a specific C++ type.
Definition: TypeID.h:104
Base pass class to serialize kernel functions through LLVM into user-specified IR and add the resulti...
Definition: Passes.h:82
Option< std::string > triple
Definition: Passes.h:112
void runOnOperation() final
The polymorphic API that runs the pass over the currently held operation.
Option< std::string > gpuBinaryAnnotation
Definition: Passes.h:121
Option< std::string > chip
Definition: Passes.h:114
Option< bool > dumpPtx
Definition: Passes.h:125
virtual std::unique_ptr< llvm::Module > translateToLLVMIR(llvm::LLVMContext &llvmContext)
Translates the 'getOperation()' result to an LLVM module.
virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine)
Hook allowing the application of optimizations before codegen By default, does nothing.
Option< std::string > features
Definition: Passes.h:116
Include the generated interface declarations.
Definition: CallGraph.h:229
LogicalResult transformGpuModulesToBinaries(Operation *op, OffloadingLLVMTranslationAttrInterface handler=nullptr, const gpu::TargetOptions &options={})
Searches for all GPU modules in op and transforms them into GPU binary operations.
std::string getDefaultGpuBinaryAnnotation()
Returns the default annotation name for GPU binary blobs.
Include the generated interface declarations.
void populateGpuShufflePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
void registerGpuSerializeToHsacoPass()
Register pass to serialize GPU kernel functions to a HSAco binary annotation.
void populateGpuGlobalIdPatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
void registerGpuSerializeToCubinPass()
Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:66
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
std::unique_ptr< Pass > createGpuSerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, int optLevel)
Create an instance of the GPU kernel function to HSAco binary serialization pass.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuAsyncRegionPass()
Rewrites a function region so that GPU ops execute asynchronously.
std::unique_ptr< Pass > createGpuDecomposeMemrefsPass()
Pass decomposes memref ops inside gpu.launch body.
void populateGpuAllReducePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns)
Collect a set of patterns to decompose memrefs ops.
std::unique_ptr< Pass > createGpuSerializeToCubinPass(StringRef triple, StringRef chip, StringRef features, int optLevel=2, bool dumpPtx=false)
Create an instance of the GPU kernel function to CUBIN binary serialization pass with optLevel (defau...
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function.
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
This class represents a specific pass option, with a provided data type.
Definition: Pass.h:92