13 #ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14 #define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
30 class ConversionTarget;
36 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
44 std::unique_ptr<OperationPass<ModuleOp>>
70 unsigned maxShuffleBitwidth = 32,
71 PatternBenefit benefit = 1);
77 RewritePatternSet &patterns,
unsigned subgroupSize,
78 unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
92 Operation *op, OffloadingLLVMTranslationAttrInterface handler =
nullptr,
93 const gpu::TargetOptions &
options = {});
108 llvm::TargetMachine &targetMachine);
111 virtual std::unique_ptr<llvm::Module>
116 std::unique_ptr<llvm::TargetMachine> createTargetMachine();
119 std::optional<std::string> translateToISA(llvm::Module &llvmModule,
120 llvm::TargetMachine &targetMachine);
123 virtual std::unique_ptr<std::vector<char>>
124 serializeISA(
const std::string &isa) = 0;
128 ::llvm::cl::desc(
"Target triple")};
130 ::llvm::cl::desc(
"Target architecture")};
132 ::llvm::cl::desc(
"Target features")};
134 llvm::cl::desc(
"Optimization level for compilation"),
137 *
this,
"gpu-binary-annotation",
138 llvm::cl::desc(
"Annotation attribute string for GPU binary"),
141 ::llvm::cl::desc(
"Dump generated PTX"),
142 llvm::cl::init(
false)};
152 LLVM_DEPRECATED(
"use Target attributes instead",
"")
157 LLVM_DEPRECATED("use Target attributes instead", "")
173 #define GEN_PASS_REGISTRATION
174 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
static llvm::ManagedStatic< PassManagerOptions > options
Pass to transform an operation of a specific type.
The abstract base pass class.
This class provides an efficient unique identifier for a specific C++ type.
Base pass class to serialize kernel functions through LLVM into user-specified IR and add the resulti...
Option< std::string > triple
void runOnOperation() final
The polymorphic API that runs the pass over the currently held operation.
Option< std::string > gpuBinaryAnnotation
SerializeToBlobPass(TypeID passID)
Option< std::string > chip
virtual std::unique_ptr< llvm::Module > translateToLLVMIR(llvm::LLVMContext &llvmContext)
Translates the 'getOperation()' result to an LLVM module.
virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine)
Hook allowing the application of optimizations before codegen By default, does nothing.
Option< std::string > features
Include the generated interface declarations.
LogicalResult transformGpuModulesToBinaries(Operation *op, OffloadingLLVMTranslationAttrInterface handler=nullptr, const gpu::TargetOptions &options={})
Searches for all GPU modules in op and transforms them into GPU binary operations.
std::string getDefaultGpuBinaryAnnotation()
Returns the default annotation name for GPU binary blobs.
Include the generated interface declarations.
void populateGpuShufflePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
void populateGpuLowerSubgroupReduceToShufflePattenrs(RewritePatternSet &patterns, unsigned subgroupSize, unsigned shuffleBitwidth=32, PatternBenefit benefit=1)
Collect a set of patterns to lower gpu.subgroup_reduce into gpu.shuffle ops over shuffleBitwidth scal...
void registerGpuSerializeToHsacoPass()
Register pass to serialize GPU kernel functions to a HSAco binary annotation.
void populateGpuGlobalIdPatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
std::unique_ptr< Pass > createGpuSerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, int optLevel)
Create an instance of the GPU kernel function to HSAco binary serialization pass.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuAsyncRegionPass()
Rewrites a function region so that GPU ops execute asynchronously.
std::unique_ptr< Pass > createGpuDecomposeMemrefsPass()
Pass decomposes memref ops inside gpu.launch body.
void populateGpuBreakDownSubgrupReducePatterns(RewritePatternSet &patterns, unsigned maxShuffleBitwidth=32, PatternBenefit benefit=1)
Collect a set of patterns to break down subgroup_reduce ops into smaller ones supported by the target...
void populateGpuAllReducePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns)
Collect a set of patterns to decompose memrefs ops.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function.
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.
This class represents an efficient way to signal success or failure.
This class represents a specific pass option, with a provided data type.