MLIR  20.0.0git
Passes.h
Go to the documentation of this file.
1 //===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes that expose pass constructors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14 #define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
15 
16 #include "Utils.h"
18 #include "mlir/IR/PatternMatch.h"
19 #include "mlir/Pass/Pass.h"
20 #include <optional>
21 
22 namespace llvm {
23 class TargetMachine;
24 class LLVMContext;
25 class Module;
26 } // namespace llvm
27 
28 namespace mlir {
29 class TypeConverter;
30 class ConversionTarget;
31 namespace func {
32 class FuncOp;
33 } // namespace func
34 
35 #define GEN_PASS_DECL
36 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
37 
38 /// Pass that moves ops which are likely an index computation into gpu.launch
39 /// body.
40 std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
41 
42 /// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
43 /// a separate kernel function.
44 std::unique_ptr<OperationPass<ModuleOp>>
45 createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
46 
47 /// Rewrites a function region so that GPU ops execute asynchronously.
48 std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
49 
50 /// Maps the parallel loops found in the given function to workgroups. The first
51 /// loop encountered will be mapped to the global workgroup and the second loop
52 /// encountered to the local workgroup. Within each mapping, the first three
53 /// dimensions are mapped to x/y/z hardware ids and all following dimensions are
54 /// mapped to sequential loops.
55 std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
56 
57 /// Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
58 void populateGpuGlobalIdPatterns(RewritePatternSet &patterns);
59 
60 /// Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
61 void populateGpuShufflePatterns(RewritePatternSet &patterns);
62 
63 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
64 void populateGpuAllReducePatterns(RewritePatternSet &patterns);
65 
66 /// Collect a set of patterns to break down subgroup_reduce ops into smaller
67 /// ones supported by the target of `size <= maxShuffleBitwidth`, where `size`
68 /// is the subgroup_reduce value bitwidth.
70  RewritePatternSet &patterns, unsigned maxShuffleBitwidth = 32,
71  PatternBenefit benefit = 1);
72 
73 /// Collect a set of patterns to lower `gpu.subgroup_reduce` into `gpu.shuffle`
74 /// ops over `shuffleBitwidth` scalar types. Assumes that the subgroup has
75 /// `subgroupSize` lanes. Uses the butterfly shuffle algorithm.
76 ///
77 /// The patterns populated by this function will ignore ops with the
78 /// `cluster_size` attribute.
79 /// `populateGpuLowerClusteredSubgroupReduceToShufflePatterns` is the opposite.
81  RewritePatternSet &patterns, unsigned subgroupSize,
82  unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
83 
84 /// Disjoint counterpart of `populateGpuLowerSubgroupReduceToShufflePatterns`
85 /// that only matches `gpu.subgroup_reduce` ops with a `cluster_size`.
87  RewritePatternSet &patterns, unsigned subgroupSize,
88  unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
89 
90 /// Collect all patterns to rewrite ops within the GPU dialect.
95 }
96 
97 namespace gpu {
98 /// Searches for all GPU modules in `op` and transforms them into GPU binary
99 /// operations. The resulting `gpu.binary` has `handler` as its offloading
100 /// handler attribute.
101 LogicalResult transformGpuModulesToBinaries(
102  Operation *op, OffloadingLLVMTranslationAttrInterface handler = nullptr,
103  const gpu::TargetOptions &options = {});
104 } // namespace gpu
105 
106 //===----------------------------------------------------------------------===//
107 // Registration
108 //===----------------------------------------------------------------------===//
109 
110 /// Collect a set of patterns to decompose memrefs ops.
111 void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns);
112 
113 /// Pass decomposes memref ops inside `gpu.launch` body.
114 std::unique_ptr<Pass> createGpuDecomposeMemrefsPass();
115 
116 /// Erase barriers that do not enforce conflicting memory side effects.
117 void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns);
118 
119 /// Generate the code for registering passes.
120 #define GEN_PASS_REGISTRATION
121 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
122 
123 } // namespace mlir
124 
125 #endif // MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
static llvm::ManagedStatic< PassManagerOptions > options
The OpAsmOpInterface, see OpAsmInterface.td for more details.
Definition: CallGraph.h:229
LogicalResult transformGpuModulesToBinaries(Operation *op, OffloadingLLVMTranslationAttrInterface handler=nullptr, const gpu::TargetOptions &options={})
Searches for all GPU modules in op and transforms them into GPU binary operations.
Include the generated interface declarations.
void populateGpuShufflePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
void populateGpuGlobalIdPatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
void populateGpuRewritePatterns(RewritePatternSet &patterns)
Collect all patterns to rewrite ops within the GPU dialect.
Definition: Passes.h:91
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
void populateGpuLowerSubgroupReduceToShufflePatterns(RewritePatternSet &patterns, unsigned subgroupSize, unsigned shuffleBitwidth=32, PatternBenefit benefit=1)
Collect a set of patterns to lower gpu.subgroup_reduce into gpu.shuffle ops over shuffleBitwidth scal...
void populateGpuLowerClusteredSubgroupReduceToShufflePatterns(RewritePatternSet &patterns, unsigned subgroupSize, unsigned shuffleBitwidth=32, PatternBenefit benefit=1)
Disjoint counterpart of populateGpuLowerSubgroupReduceToShufflePatterns that only matches gpu....
std::unique_ptr< OperationPass< func::FuncOp > > createGpuAsyncRegionPass()
Rewrites a function region so that GPU ops execute asynchronously.
std::unique_ptr< Pass > createGpuDecomposeMemrefsPass()
Pass decomposes memref ops inside gpu.launch body.
void populateGpuAllReducePatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
void populateGpuBreakDownSubgroupReducePatterns(RewritePatternSet &patterns, unsigned maxShuffleBitwidth=32, PatternBenefit benefit=1)
Collect a set of patterns to break down subgroup_reduce ops into smaller ones supported by the target...
void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns)
Collect a set of patterns to decompose memrefs ops.
std::unique_ptr< OperationPass< func::FuncOp > > createGpuMapParallelLoopsPass()
Maps the parallel loops found in the given function to workgroups.
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function.
void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns)
Erase barriers that do not enforce conflicting memory side effects.