13 #ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14 #define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
29 class ConversionTarget;
35 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
43 std::unique_ptr<OperationPass<ModuleOp>>
77 Operation *op, OffloadingLLVMTranslationAttrInterface handler =
nullptr,
78 const gpu::TargetOptions &
options = {});
93 llvm::TargetMachine &targetMachine);
96 virtual std::unique_ptr<llvm::Module>
101 std::unique_ptr<llvm::TargetMachine> createTargetMachine();
104 std::optional<std::string> translateToISA(llvm::Module &llvmModule,
105 llvm::TargetMachine &targetMachine);
108 virtual std::unique_ptr<std::vector<char>>
109 serializeISA(
const std::string &isa) = 0;
113 ::llvm::cl::desc(
"Target triple")};
115 ::llvm::cl::desc(
"Target architecture")};
117 ::llvm::cl::desc(
"Target features")};
119 llvm::cl::desc(
"Optimization level for compilation"),
122 *
this,
"gpu-binary-annotation",
123 llvm::cl::desc(
"Annotation attribute string for GPU binary"),
126 ::llvm::cl::desc(
"Dump generated PTX"),
127 llvm::cl::init(
false)};
137 LLVM_DEPRECATED(
"use Target attributes instead",
"")
142 LLVM_DEPRECATED("use Target attributes instead", "")
147 LLVM_DEPRECATED("use Target attributes instead", "")
152 bool dumpPtx = false);
156 LLVM_DEPRECATED("use Target attributes instead", "")
172 #define GEN_PASS_REGISTRATION
173 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
static llvm::ManagedStatic< PassManagerOptions > options
Pass to transform an operation of a specific type.
The abstract base pass class.
This class provides an efficient unique identifier for a specific C++ type.
Base pass class to serialize kernel functions through LLVM into user-specified IR and add the resulti...
Option< std::string > triple
void runOnOperation() final
The polymorphic API that runs the pass over the currently held operation.
Option< std::string > gpuBinaryAnnotation
SerializeToBlobPass(TypeID passID)
Option< std::string > chip
virtual std::unique_ptr< llvm::Module > translateToLLVMIR(llvm::LLVMContext &llvmContext)
Translates the 'getOperation()' result to an LLVM module.
virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine)
Hook allowing the application of optimizations before codegen By default, does nothing.
Option< std::string > features
Include the generated interface declarations.
LogicalResult transformGpuModulesToBinaries(Operation *op, OffloadingLLVMTranslationAttrInterface handler=nullptr, const gpu::TargetOptions &options={})
Searches for all GPU modules in op and transforms them into GPU binary operations.
std::string getDefaultGpuBinaryAnnotation()
Returns the default annotation name for GPU binary blobs.
Include the generated interface declarations.
void populateGpuShufflePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
void registerGpuSerializeToHsacoPass()
Register pass to serialize GPU kernel functions to a HSAco binary annotation.
void populateGpuGlobalIdPatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
void registerGpuSerializeToCubinPass()
Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
std::unique_ptr< Pass > createGpuSerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, int optLevel)
Create an instance of the GPU kernel function to HSAco binary serialization pass.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuAsyncRegionPass()
Rewrites a function region so that GPU ops execute asynchronously.
std::unique_ptr< Pass > createGpuDecomposeMemrefsPass()
Pass decomposes memref ops inside gpu.launch body.
void populateGpuAllReducePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns)
Collect a set of patterns to decompose memrefs ops.
std::unique_ptr< Pass > createGpuSerializeToCubinPass(StringRef triple, StringRef chip, StringRef features, int optLevel=2, bool dumpPtx=false)
Create an instance of the GPU kernel function to CUBIN binary serialization pass with optLevel (defau...
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function.
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.
This class represents an efficient way to signal success or failure.
This class represents a specific pass option, with a provided data type.