MLIR  22.0.0git
Utils.h
Go to the documentation of this file.
1 //===- Utils.h - Utils for GPU transform ops --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
10 #define MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
11 
16 #include "mlir/IR/PatternMatch.h"
17 
18 namespace mlir {
19 namespace gpu {
20 class GPUOp;
21 class LaunchOp;
22 enum class MappingId : uint64_t;
23 } // namespace gpu
24 namespace scf {
25 class ForallOp;
26 } // namespace scf
27 namespace transform {
28 namespace gpu {
29 
30 /// Helper type for functions that generate ids for the mapping of a scf.forall.
32  /// Error message, if not empty then building the ids failed.
33  std::string errorMsg;
34  /// Values used to replace the forall induction variables.
36  /// Values used to predicate the forall body when activeMappingSizes is
37  /// smaller than the available mapping sizes.
39 };
40 
41 inline raw_ostream &operator<<(raw_ostream &os, const IdBuilderResult &res) {
42  llvm::interleaveComma(res.mappingIdOps, os << "----mappingIdOps: ");
43  os << "\n";
44  llvm::interleaveComma(res.predicateOps, os << "----predicateOps: ");
45  os << "\n";
46  return os;
47 }
48 
49 /// Common gpu id builder type, allows the configuration of lowering for various
50 /// mapping schemes. Takes:
51 /// - A rewriter with insertion point set before the forall op to rewrite.
52 /// - The loc of the forall op to rewrite.
53 /// - A list of positive integers carrying the mapping sizes for the current
54 /// forall op to rewrite.
55 using GpuIdBuilderFnType = std::function<IdBuilderResult(
57 
58 /// Helper struct for configuring the rewrite of mapped scf.forall ops to
59 /// various gpu id configurations.
60 struct GpuIdBuilder {
61  using MappingIdBuilderFnType = std::function<DeviceMappingAttrInterface(
62  MLIRContext *, mlir::gpu::MappingId)>;
63 
64  GpuIdBuilder() = default;
65  GpuIdBuilder(MLIRContext *ctx, bool useLinearMapping,
66  const MappingIdBuilderFnType &builder);
67 
68  /// The mapping attributes targeted by this generator.
70 
71  /// The constructor that builds the concrete IR for mapping ids.
73 };
74 
75 /// Builder for gpu::BlockIdOps used to map scf.forall to blocks.
76 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
77 /// used for indexing rewrites as well as 3D sizes for predicate generation.
78 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
79 /// used for indexing rewrites as well as 1D sizes for predicate generation.
80 /// If `mask` is provided, it will be used to filter the active blocks.
82  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
83  DeviceMaskingAttrInterface mask = nullptr);
84 };
85 
86 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
87 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
88 /// used for indexing rewrites as well as 3D sizes for predicate generation.
89 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
90 /// used for indexing rewrites as well as 1D sizes for predicate generation.
91 /// If `mask` is provided, it will be used to filter the active warpgroups.
93  GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
94  bool useLinearMapping = false,
95  DeviceMaskingAttrInterface mask = nullptr);
96  int64_t warpSize = 32;
97  /// In the future this may be configured by the transformation.
98  static constexpr int64_t kNumWarpsPerGroup = 4;
99 };
100 
101 /// Builder for warp ids used to map scf.forall to reindexed warps.
102 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
103 /// used for indexing rewrites as well as 3D sizes for predicate generation.
104 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
105 /// used for indexing rewrites as well as 1D sizes for predicate generation.
106 /// If `mask` is provided, it will be used to filter the active warps.
108  GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
109  bool useLinearMapping = false,
110  DeviceMaskingAttrInterface mask = nullptr);
111  int64_t warpSize = 32;
112 };
113 
114 /// Builder for warp ids used to map scf.forall to reindexed threads.
115 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
116 /// used for indexing rewrites as well as 3D sizes for predicate generation.
117 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
118 /// used for indexing rewrites as well as 1D sizes for predicate generation.
119 /// If `mask` is provided, it will be used to filter the active threads.
121  GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
122  DeviceMaskingAttrInterface mask = nullptr);
123 };
124 
125 /// Builder for lane id.
126 /// The `idBuilder` method returns nD values used for indexing rewrites as well
127 /// as 1D sizes for predicate generation.
128 /// This `useLinearMapping` case is the only supported case.
129 /// If `mask` is provided, it will be used to filter the active lanes.
131  GpuLaneIdBuilder(MLIRContext *ctx, int64_t warpSize, bool unused,
132  DeviceMaskingAttrInterface mask = nullptr);
133  int64_t warpSize = 32;
134 };
135 
136 /// Determine if the size of the kernel configuration is supported by the
137 /// GPU architecture being used.
138 /// TODO this is currently hardwired to CUDA, parameterize and generalize.
139 DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp,
140  std::optional<int64_t> gridDimX,
141  std::optional<int64_t> gridDimY,
142  std::optional<int64_t> gridDimZ,
143  std::optional<int64_t> blockDimX,
144  std::optional<int64_t> blockDimY,
145  std::optional<int64_t> blockDimZ);
146 
147 /// Create an empty-body gpu::LaunchOp using the provided kernel settings
148 /// and put a terminator within.
151  TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp,
152  std::optional<int64_t> gridDimX = std::nullopt,
153  std::optional<int64_t> gridDimY = std::nullopt,
154  std::optional<int64_t> gridDimZ = std::nullopt,
155  std::optional<int64_t> blockDimX = std::nullopt,
156  std::optional<int64_t> blockDimY = std::nullopt,
157  std::optional<int64_t> blockDimZ = std::nullopt);
158 
159 /// Alter kernel configuration of the given kernel.
161 alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch,
162  TransformOpInterface transformOp,
163  std::optional<int64_t> gridDimX = std::nullopt,
164  std::optional<int64_t> gridDimY = std::nullopt,
165  std::optional<int64_t> gridDimZ = std::nullopt,
166  std::optional<int64_t> blockDimX = std::nullopt,
167  std::optional<int64_t> blockDimY = std::nullopt,
168  std::optional<int64_t> blockDimZ = std::nullopt);
169 
170 /// Find the unique top level scf::ForallOp within a given target op.
172 findTopLevelForallOp(Operation *target, scf::ForallOp &topLevelForallOp,
173  TransformOpInterface transformOp);
174 
175 } // namespace gpu
176 } // namespace transform
177 } // namespace mlir
178 
179 #endif // MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
The result of a transform IR operation application.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358
DiagnosedSilenceableFailure findTopLevelForallOp(Operation *target, scf::ForallOp &topLevelForallOp, TransformOpInterface transformOp)
Find the unique top level scf::ForallOp within a given target op.
DiagnosedSilenceableFailure alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch, TransformOpInterface transformOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Alter kernel configuration of the given kernel.
DiagnosedSilenceableFailure createGpuLaunch(RewriterBase &rewriter, Location loc, TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Create an empty-body gpu::LaunchOp using the provided kernel settings and put a terminator within.
DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
Determine if the size of the kernel configuration is supported by the GPU architecture being used.
Definition: Utils.cpp:360
std::function< IdBuilderResult(RewriterBase &, Location, ArrayRef< int64_t >, ArrayRef< int64_t >)> GpuIdBuilderFnType
Common gpu id builder type, allows the configuration of lowering for various mapping schemes.
Definition: Utils.h:56
Include the generated interface declarations.
raw_ostream & operator<<(raw_ostream &os, const AliasResult &result)
Definition: AliasAnalysis.h:78
Builder for gpu::BlockIdOps used to map scf.forall to blocks.
Definition: Utils.h:81
Helper struct for configuring the rewrite of mapped scf.forall ops to various gpu id configurations.
Definition: Utils.h:60
SmallVector< DeviceMappingAttrInterface > mappingAttributes
The mapping attributes targeted by this generator.
Definition: Utils.h:69
GpuIdBuilderFnType idBuilder
The constructor that builds the concrete IR for mapping ids.
Definition: Utils.h:72
std::function< DeviceMappingAttrInterface(MLIRContext *, mlir::gpu::MappingId)> MappingIdBuilderFnType
Definition: Utils.h:62
Builder for warp ids used to map scf.forall to reindexed threads.
Definition: Utils.h:120
Builder for warp ids used to map scf.forall to reindexed warps.
Definition: Utils.h:107
Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
Definition: Utils.h:92
Helper type for functions that generate ids for the mapping of a scf.forall.
Definition: Utils.h:31
std::string errorMsg
Error message, if not empty then building the ids failed.
Definition: Utils.h:33
SmallVector< Value > predicateOps
Values used to predicate the forall body when activeMappingSizes is smaller than the available mappin...
Definition: Utils.h:38
SmallVector< Value > mappingIdOps
Values used to replace the forall induction variables.
Definition: Utils.h:35