MLIR 22.0.0git
Utils.h
Go to the documentation of this file.
1//===- Utils.h - Utils for GPU transform ops --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
10#define MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
11
17
18namespace mlir {
19namespace gpu {
20class GPUOp;
21class LaunchOp;
22enum class MappingId : uint64_t;
23} // namespace gpu
24namespace scf {
25class ForallOp;
26} // namespace scf
27namespace transform {
28namespace gpu {
29
30/// Helper type for functions that generate ids for the mapping of a scf.forall.
32 /// Error message, if not empty then building the ids failed.
33 std::string errorMsg;
34 /// Values used to replace the forall induction variables.
36 /// Values used to predicate the forall body when activeMappingSizes is
37 /// smaller than the available mapping sizes.
39};
40
42 llvm::interleaveComma(res.mappingIdOps, os << "----mappingIdOps: ");
43 os << "\n";
44 llvm::interleaveComma(res.predicateOps, os << "----predicateOps: ");
45 os << "\n";
46 return os;
47}
48
49/// Common gpu id builder type, allows the configuration of lowering for various
50/// mapping schemes. Takes:
51/// - A rewriter with insertion point set before the forall op to rewrite.
52/// - The loc of the forall op to rewrite.
53/// - A list of positive integers carrying the mapping sizes for the current
54/// forall op to rewrite.
55using GpuIdBuilderFnType = std::function<IdBuilderResult(
57
58/// Helper struct for configuring the rewrite of mapped scf.forall ops to
59/// various gpu id configurations.
61 using MappingIdBuilderFnType = std::function<DeviceMappingAttrInterface(
62 MLIRContext *, mlir::gpu::MappingId)>;
63
64 GpuIdBuilder() = default;
65 GpuIdBuilder(MLIRContext *ctx, bool useLinearMapping,
66 const MappingIdBuilderFnType &builder);
67
68 /// The mapping attributes targeted by this generator.
70
71 /// The constructor that builds the concrete IR for mapping ids.
73};
74
75/// Builder for gpu::BlockIdOps used to map scf.forall to blocks.
76/// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
77/// used for indexing rewrites as well as 3D sizes for predicate generation.
78/// If `useLinearMapping` is true, the `idBuilder` method returns nD values
79/// used for indexing rewrites as well as 1D sizes for predicate generation.
80/// If `mask` is provided, it will be used to filter the active blocks.
82 GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
83 DeviceMaskingAttrInterface mask = nullptr);
84};
85
86/// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
87/// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
88/// used for indexing rewrites as well as 3D sizes for predicate generation.
89/// If `useLinearMapping` is true, the `idBuilder` method returns nD values
90/// used for indexing rewrites as well as 1D sizes for predicate generation.
91/// If `mask` is provided, it will be used to filter the active warpgroups.
94 bool useLinearMapping = false,
95 DeviceMaskingAttrInterface mask = nullptr);
97 /// In the future this may be configured by the transformation.
98 static constexpr int64_t kNumWarpsPerGroup = 4;
99};
100
101/// Builder for warp ids used to map scf.forall to reindexed warps.
102/// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
103/// used for indexing rewrites as well as 3D sizes for predicate generation.
104/// If `useLinearMapping` is true, the `idBuilder` method returns nD values
105/// used for indexing rewrites as well as 1D sizes for predicate generation.
106/// If `mask` is provided, it will be used to filter the active warps.
109 bool useLinearMapping = false,
110 DeviceMaskingAttrInterface mask = nullptr);
112};
113
114/// Builder for warp ids used to map scf.forall to reindexed threads.
115/// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
116/// used for indexing rewrites as well as 3D sizes for predicate generation.
117/// If `useLinearMapping` is true, the `idBuilder` method returns nD values
118/// used for indexing rewrites as well as 1D sizes for predicate generation.
119/// If `mask` is provided, it will be used to filter the active threads.
121 GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
122 DeviceMaskingAttrInterface mask = nullptr);
123};
124
125/// Builder for lane id.
126/// The `idBuilder` method returns nD values used for indexing rewrites as well
127/// as 1D sizes for predicate generation.
128/// This `useLinearMapping` case is the only supported case.
129/// If `mask` is provided, it will be used to filter the active lanes.
131 GpuLaneIdBuilder(MLIRContext *ctx, int64_t warpSize, bool unused,
132 DeviceMaskingAttrInterface mask = nullptr);
134};
135
136/// Determine if the size of the kernel configuration is supported by the
137/// GPU architecture being used.
138/// TODO this is currently hardwired to CUDA, parameterize and generalize.
139DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp,
140 std::optional<int64_t> gridDimX,
141 std::optional<int64_t> gridDimY,
142 std::optional<int64_t> gridDimZ,
143 std::optional<int64_t> blockDimX,
144 std::optional<int64_t> blockDimY,
145 std::optional<int64_t> blockDimZ);
146
147/// Create an empty-body gpu::LaunchOp using the provided kernel settings
148/// and put a terminator within.
151 TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp,
152 std::optional<int64_t> gridDimX = std::nullopt,
153 std::optional<int64_t> gridDimY = std::nullopt,
154 std::optional<int64_t> gridDimZ = std::nullopt,
155 std::optional<int64_t> blockDimX = std::nullopt,
156 std::optional<int64_t> blockDimY = std::nullopt,
157 std::optional<int64_t> blockDimZ = std::nullopt);
158
159/// Alter kernel configuration of the given kernel.
161alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch,
162 TransformOpInterface transformOp,
163 std::optional<int64_t> gridDimX = std::nullopt,
164 std::optional<int64_t> gridDimY = std::nullopt,
165 std::optional<int64_t> gridDimZ = std::nullopt,
166 std::optional<int64_t> blockDimX = std::nullopt,
167 std::optional<int64_t> blockDimY = std::nullopt,
168 std::optional<int64_t> blockDimZ = std::nullopt);
169
170/// Find the unique top level scf::ForallOp within a given target op.
172findTopLevelForallOp(Operation *target, scf::ForallOp &topLevelForallOp,
173 TransformOpInterface transformOp);
174
175} // namespace gpu
176} // namespace transform
177} // namespace mlir
178
179#endif // MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
The result of a transform IR operation application.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
DiagnosedSilenceableFailure findTopLevelForallOp(Operation *target, scf::ForallOp &topLevelForallOp, TransformOpInterface transformOp)
Find the unique top level scf::ForallOp within a given target op.
std::function< IdBuilderResult( RewriterBase &, Location, ArrayRef< int64_t >, ArrayRef< int64_t >)> GpuIdBuilderFnType
Common gpu id builder type, allows the configuration of lowering for various mapping schemes.
Definition Utils.h:55
DiagnosedSilenceableFailure alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch, TransformOpInterface transformOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Alter kernel configuration of the given kernel.
DiagnosedSilenceableFailure createGpuLaunch(RewriterBase &rewriter, Location loc, TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Create an empty-body gpu::LaunchOp using the provided kernel settings and put a terminator within.
raw_ostream & operator<<(raw_ostream &os, const IdBuilderResult &res)
Definition Utils.h:41
DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
Determine if the size of the kernel configuration is supported by the GPU architecture being used.
Definition Utils.cpp:360
Include the generated interface declarations.
GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping=false, DeviceMaskingAttrInterface mask=nullptr)
Definition Utils.cpp:296
std::function< DeviceMappingAttrInterface( MLIRContext *, mlir::gpu::MappingId)> MappingIdBuilderFnType
Definition Utils.h:61
SmallVector< DeviceMappingAttrInterface > mappingAttributes
The mapping attributes targeted by this generator.
Definition Utils.h:69
GpuIdBuilderFnType idBuilder
The constructor that builds the concrete IR for mapping ids.
Definition Utils.h:72
GpuLaneIdBuilder(MLIRContext *ctx, int64_t warpSize, bool unused, DeviceMaskingAttrInterface mask=nullptr)
Definition Utils.cpp:349
GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping=false, DeviceMaskingAttrInterface mask=nullptr)
Definition Utils.cpp:338
GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, bool useLinearMapping=false, DeviceMaskingAttrInterface mask=nullptr)
Definition Utils.cpp:323
GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, bool useLinearMapping=false, DeviceMaskingAttrInterface mask=nullptr)
Definition Utils.cpp:307
static constexpr int64_t kNumWarpsPerGroup
In the future this may be configured by the transformation.
Definition Utils.h:98
Helper type for functions that generate ids for the mapping of a scf.forall.
Definition Utils.h:31
std::string errorMsg
Error message, if not empty then building the ids failed.
Definition Utils.h:33
SmallVector< Value > predicateOps
Values used to predicate the forall body when activeMappingSizes is smaller than the available mappin...
Definition Utils.h:38
SmallVector< Value > mappingIdOps
Values used to replace the forall induction variables.
Definition Utils.h:35