MLIR  19.0.0git
Utils.h
Go to the documentation of this file.
1 //===- Utils.h - Utils for GPU transform ops --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
10 #define MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
11 
16 #include "mlir/IR/PatternMatch.h"
17 
18 namespace mlir {
19 namespace gpu {
20 class GPUOp;
21 class LaunchOp;
22 enum class MappingId : uint64_t;
23 } // namespace gpu
24 namespace scf {
25 class ForallOp;
26 } // namespace scf
27 namespace transform {
28 namespace gpu {
29 
30 /// Helper type for functions that generate ids for the mapping of a scf.forall.
31 /// Operates on both 1) an "original" basis that represents the individual
32 /// thread and block ids and 2) a "scaled" basis that represents grouped ids
33 /// (e.g. block clusters, warpgroups and warps).
34 /// The mapping of ids is done in the "scaled" basis (i.e. when mapping to warps
35 /// a division by 32 occurs).
36 /// The predication is in the "original" basis using the "active" quantities
37 /// (`activeMappingSizes`, `availableMappingSizes` and `activeIdOps`).
39  // Ops used to replace the forall induction variables.
41  // Available mapping sizes used to predicate the forall body when they are
42  // larger than the predicate mapping sizes.
44  // Actual mapping sizes used to predicate the forall body when they are
45  // smaller than the available mapping sizes.
47  // Ops used to predicate the forall body when activeMappingSizes is smaller
48  // than the available mapping sizes.
50 };
51 
52 /// Common gpu id builder type, allows the configuration of lowering for various
53 /// mapping schemes. Takes:
54 /// - A rewriter with insertion point set before the forall op to rewrite.
55 /// - The loc of the forall op to rewrite.
56 /// - A list of positive integers carrying the mapping sizes for the current
57 /// forall op to rewrite.
58 using GpuIdBuilderFnType = std::function<IdBuilderResult(
60 
61 /// Helper struct for configuring the rewrite of mapped scf.forall ops to
62 /// various gpu id configurations.
63 struct GpuIdBuilder {
64  using MappingIdBuilderFnType = std::function<DeviceMappingAttrInterface(
65  MLIRContext *, mlir::gpu::MappingId)>;
66 
67  GpuIdBuilder() = default;
68  GpuIdBuilder(MLIRContext *ctx, bool useLinearMapping,
69  const MappingIdBuilderFnType &builder);
70 
71  /// The mapping attributes targeted by this generator.
73 
74  /// The constructor that builds the concrete IR for mapping ids.
76 };
77 
78 /// Builder for gpu::BlockIdOps used to map scf.forall to blocks.
79 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
80 /// used for indexing rewrites as well as 3D sizes for predicate generation.
81 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
82 /// used for indexing rewrites as well as 1D sizes for predicate generation.
84  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
85 };
86 
87 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
88 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
89 /// used for indexing rewrites as well as 3D sizes for predicate generation.
90 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
91 /// used for indexing rewrites as well as 1D sizes for predicate generation.
93  GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
94  bool useLinearMapping = false);
95  int64_t warpSize = 32;
96  /// In the future this may be configured by the transformation.
97  static constexpr int64_t kNumWarpsPerGroup = 4;
98 };
99 
100 /// Builder for warp ids used to map scf.forall to reindexed warps.
101 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
102 /// used for indexing rewrites as well as 3D sizes for predicate generation.
103 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
104 /// used for indexing rewrites as well as 1D sizes for predicate generation.
106  GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
107  bool useLinearMapping = false);
108  int64_t warpSize = 32;
109 };
110 
111 /// Builder for warp ids used to map scf.forall to reindexed threads.
112 /// If `useLinearMapping` is false, the `idBuilder` method returns 3D values
113 /// used for indexing rewrites as well as 3D sizes for predicate generation.
114 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
115 /// used for indexing rewrites as well as 1D sizes for predicate generation.
117  GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
118 };
119 
120 /// Determine if the size of the kernel configuration is supported by the
121 /// GPU architecture being used.
122 /// TODO this is currently hardwired to CUDA, parameterize and generalize.
123 DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp,
124  std::optional<int64_t> gridDimX,
125  std::optional<int64_t> gridDimY,
126  std::optional<int64_t> gridDimZ,
127  std::optional<int64_t> blockDimX,
128  std::optional<int64_t> blockDimY,
129  std::optional<int64_t> blockDimZ);
130 
131 /// Create an empty-body gpu::LaunchOp using the provided kernel settings
132 /// and put a terminator within.
135  TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp,
136  std::optional<int64_t> gridDimX = std::nullopt,
137  std::optional<int64_t> gridDimY = std::nullopt,
138  std::optional<int64_t> gridDimZ = std::nullopt,
139  std::optional<int64_t> blockDimX = std::nullopt,
140  std::optional<int64_t> blockDimY = std::nullopt,
141  std::optional<int64_t> blockDimZ = std::nullopt);
142 
143 /// Alter kernel configuration of the given kernel.
145 alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch,
146  TransformOpInterface transformOp,
147  std::optional<int64_t> gridDimX = std::nullopt,
148  std::optional<int64_t> gridDimY = std::nullopt,
149  std::optional<int64_t> gridDimZ = std::nullopt,
150  std::optional<int64_t> blockDimX = std::nullopt,
151  std::optional<int64_t> blockDimY = std::nullopt,
152  std::optional<int64_t> blockDimZ = std::nullopt);
153 
154 /// Find the unique top level scf::ForallOp within a given target op.
156 findTopLevelForallOp(Operation *target, scf::ForallOp &topLevelForallOp,
157  TransformOpInterface transformOp);
158 
159 } // namespace gpu
160 } // namespace transform
161 } // namespace mlir
162 
163 #endif // MLIR_DIALECT_GPU_TRANSFORMOPS_UTILS_H
The result of a transform IR operation application.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:400
DiagnosedSilenceableFailure findTopLevelForallOp(Operation *target, scf::ForallOp &topLevelForallOp, TransformOpInterface transformOp)
Find the unique top level scf::ForallOp within a given target op.
DiagnosedSilenceableFailure alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch, TransformOpInterface transformOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Alter kernel configuration of the given kernel.
DiagnosedSilenceableFailure createGpuLaunch(RewriterBase &rewriter, Location loc, TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Create an empty-body gpu::LaunchOp using the provided kernel settings and put a terminator within.
DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
Determine if the size of the kernel configuration is supported by the GPU architecture being used.
Definition: Utils.cpp:232
std::function< IdBuilderResult(RewriterBase &, Location, ArrayRef< int64_t >, ArrayRef< int64_t >)> GpuIdBuilderFnType
Common gpu id builder type, allows the configuration of lowering for various mapping schemes.
Definition: Utils.h:59
Include the generated interface declarations.
Builder for gpu::BlockIdOps used to map scf.forall to blocks.
Definition: Utils.h:83
Helper struct for configuring the rewrite of mapped scf.forall ops to various gpu id configurations.
Definition: Utils.h:63
SmallVector< DeviceMappingAttrInterface > mappingAttributes
The mapping attributes targeted by this generator.
Definition: Utils.h:72
GpuIdBuilderFnType idBuilder
The constructor that builds the concrete IR for mapping ids.
Definition: Utils.h:75
std::function< DeviceMappingAttrInterface(MLIRContext *, mlir::gpu::MappingId)> MappingIdBuilderFnType
Definition: Utils.h:65
Builder for warp ids used to map scf.forall to reindexed threads.
Definition: Utils.h:116
Builder for warp ids used to map scf.forall to reindexed warps.
Definition: Utils.h:105
Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
Definition: Utils.h:92
Helper type for functions that generate ids for the mapping of a scf.forall.
Definition: Utils.h:38
SmallVector< int64_t > availableMappingSizes
Definition: Utils.h:43
SmallVector< Value > mappingIdOps
Definition: Utils.h:40
SmallVector< Value > activeIdOps
Definition: Utils.h:49
SmallVector< int64_t > activeMappingSizes
Definition: Utils.h:46