13 #ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14 #define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
30 class ConversionTarget;
36 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
44 std::unique_ptr<OperationPass<ModuleOp>>
70 unsigned maxShuffleBitwidth = 32,
71 PatternBenefit benefit = 1);
77 RewritePatternSet &patterns,
unsigned subgroupSize,
78 unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
92 Operation *op, OffloadingLLVMTranslationAttrInterface handler =
nullptr,
93 const gpu::TargetOptions &
options = {});
110 #define GEN_PASS_REGISTRATION
111 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
static llvm::ManagedStatic< PassManagerOptions > options
The OpAsmOpInterface, see OpAsmInterface.td for more details.
LogicalResult transformGpuModulesToBinaries(Operation *op, OffloadingLLVMTranslationAttrInterface handler=nullptr, const gpu::TargetOptions &options={})
Searches for all GPU modules in op and transforms them into GPU binary operations.
Include the generated interface declarations.
void populateGpuShufflePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
void populateGpuLowerSubgroupReduceToShufflePattenrs(RewritePatternSet &patterns, unsigned subgroupSize, unsigned shuffleBitwidth=32, PatternBenefit benefit=1)
Collect a set of patterns to lower gpu.subgroup_reduce into gpu.shuffle ops over shuffleBitwidth scal...
void populateGpuGlobalIdPatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuAsyncRegionPass()
Rewrites a function region so that GPU ops execute asynchronously.
std::unique_ptr< Pass > createGpuDecomposeMemrefsPass()
Pass decomposes memref ops inside gpu.launch body.
void populateGpuBreakDownSubgrupReducePatterns(RewritePatternSet &patterns, unsigned maxShuffleBitwidth=32, PatternBenefit benefit=1)
Collect a set of patterns to break down subgroup_reduce ops into smaller ones supported by the target...
void populateGpuAllReducePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns)
Collect a set of patterns to decompose memrefs ops.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function.
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.