MLIR 23.0.0git
XeGPUUtils.h
Go to the documentation of this file.
1//===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11
15namespace mlir {
16
17class VectorType;
18class OpOperand;
19class OpResult;
20class OpBuilder;
21class ValueRange;
22class TypeConverter;
23class OpFoldResult;
24
25namespace xegpu {
26class DistributeLayoutAttr;
27class LayoutAttr;
28class TensorDescType;
29
30namespace uArch {
31struct uArch;
32} // namespace uArch
33} // namespace xegpu
34
35namespace xegpu {
36
37/// Flatten a set of ValueRange into a single SmallVector<Value>
38SmallVector<Value> flattenValues(ArrayRef<ValueRange> values);
39
40/// If tensor descriptor has a layout attribute it is used in SIMT mode.
41/// In this mode, the distributed vector shape is determined as follows:
42/// Definitions:
43/// lane_data_size = lane_data[0] × lane_data[1]
44/// subgroup_size = lane_layout[0] × lane_layout[1]
45/// distribution_unit_size = subgroup_size × lane_data_size
46///
47/// Case 1: Regular loads/stores.
48/// The following conditions must be met:
49/// * tensor_desc[0] == lane_layout[0]
50/// Distributed vector is a 1D vector with shape:
51/// [chunk_size]
52///
53/// Case 2: Block loads/stores
54/// Additional definitions:
55/// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
56/// n_distribution_units = tensor_size / distribution_unit_size
57/// fragment_size = n_distribution_units * lane_data_size
58/// Given above definitions, the following conditions must be met:
59/// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
60/// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
61/// Distributed vector is a 1D vector with shape:
62/// [fragment_size]
63FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
64
65/// Helper to get the distributed vector type for a given vector type according
66/// to a given LayoutAttr.
67FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
68 LayoutAttr layout);
69
70/// Helper function to get distributed vector type for a source vector type
71/// according to the lane_layout. We simply divide each dimension of tensor
72/// descriptor shape by corresponding lane_layout dimension. If
73/// array_length > 1, that is appended to the front of the distributed shape.
74///
75/// Examples:
76/// | original vector shape | lane_layout | distributed vector shape |
77/// |-----------------------|-------------|--------------------------|
78/// | 32x16 | [1, 16] | 32x1 |
79/// | 32x16 | [2, 8] | 16x2 |
80/// | 2x32x16 | [1, 16] | 2x32x1 |
81FailureOr<VectorType>
82getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout,
83 VectorType originalType);
84
85/// Extract a set of small vectors from a value with a given shape using
86/// vector.extract_stride_slice
88 Location loc, Value value,
90
91/// Create a vector of shape from a set of values using
92/// vector.insert_stride_slice.
94 ValueRange values,
96
97/// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
98/// type convertion patterns. Since VectorType cannot carry the layout
99/// attribute, which is needed to guide the type conversion for XeGPU, they are
100/// first converted into RankedTensorType, where the layout attribute can be
101/// attached. And then upstream SCF structural type conversion patterns are
102/// applied with the provided converter.
103/// TODO: This is a temporary solution. We should refactor it when context-aware
104/// type conversion is available.
106 TypeConverter converter);
107
108/// Retrieves the chip string from the XeVM target attribute of the parent
109/// GPU module operation. Returns the chip identifier if found, or nullopt
110/// if no GPU module parent or XeVM target attribute exists.
111std::optional<std::string> getChipStr(Operation *op);
112
113/// Generates element-wise addition ops of two arrays with same length.
117
118/// Generates element-wise addition ops of two arrays with automatic alignment.
119/// When the input arrays have different sizes, the shorter array is
120/// right-aligned with the longer array, and the unmatched leading elements from
121/// the longer array are preserved unchanged. This is commonly used for offset
122/// computation where higher-dimensional offsets need to be added to
123/// lower-dimensional adjustments.
124///
125/// Example:
126/// lhs = [l1, l2, l3], rhs = [r1, r2]
127/// Result: [11, l2+r1, l3+r2]
131
132/// Given an `input` value representing per-lane data, this function returns the
133/// result after performing a reduction on the input over all lanes (number of
134/// lanes given by `size`). This uses butterfly shuffles to perform the
135/// reduction in a log2(size) number of steps.
136/// NOTE: Implementation taken from TestVectorTransforms.cpp
137Value subgroupReduction(Location loc, OpBuilder &builder, Value input,
138 vector::CombiningKind kind, uint32_t size);
139
140/// Given a `src` and an `acc` argumments from a vector::MultiDimReductionOp,
141/// lower to a set of vector::ReductionOp ops over 1D slices extracted from
142/// `src`. The reduction is performed along `reductionDim`. The result is a
143/// vector with the same shape as `acc`.
144/// TODO: Only 2D to 1D reduction is supported for now.
147 vector::CombiningKind kind, int64_t reductionDim,
148 Location loc, PatternRewriter &rewriter);
149
150/// Lowers cross-lane reductions to shuffle operations on a 2D vector.
151/// Extracts slices along the reduction dimension, performs subgroup reductions
152/// with shuffles across reductionSize work-items, and inserts the results back
153/// into an accumulator vector.
156 vector::CombiningKind kind,
157 int64_t reductionDim,
158 int64_t reductionSize, Location loc,
159 PatternRewriter &rewriter);
160
161/// Helper Function to find a proper instruction multiple for the user-supplied
162/// sg-level data shape (diven by `dim`). `candidates` are uArch allowed shapes.
163/// `candidateMultiples` are uArch multiples of such shapes (i.e. block count or
164/// array length).
165template <typename T>
166int getLargestDivisor(T dim, ArrayRef<T> candidates,
167 ArrayRef<T> candidateMultiples = {});
168
169/// Retrieves the DistributeLayoutAttr associated with a given Value. For
170/// TensorDescType values, the DistributeLayoutAttr is extracted from the
171/// TensorDescType itself. For other values, it is obtained from the attributes
172/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
173/// found.
174DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
175
176/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
177/// will first check the operand_layout_{id} of the owner operation. If not
178/// found, it will check the operand itself and its defining op.
179DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
180
181/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult
182/// user should use setAnchorLayout instead
184 const DistributeLayoutAttr layout);
185
186/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpOperand
187/// user should use setAnchorLayout instead
188void setDistributeLayoutAttr(const OpOperand &opr,
189 const DistributeLayoutAttr layout);
190
191/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
192std::string getTemporaryLayoutName(const OpOperand &operand);
193
194/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
195std::string getTemporaryLayoutName(const OpResult result);
196
197/// get and set distribute layout attribute for non-anchor operations
198/// (and offsets/masks of load/store ops before we get rid of their temp attrs)
199template <typename T,
200 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
201 std::is_same_v<T, OpResult>>>
202DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult);
203
204template <typename T,
205 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
206 std::is_same_v<T, OpResult>>>
207void setTemporaryLayout(const T &operandOrResult,
208 const DistributeLayoutAttr layout);
209
210/// Helper function to check if the layout is packed. Layout is packed if it is
211/// 2D and lane_data[0] != 1 (data packed from col dimension).
212/// TODO: Move to target info.
213bool requirePacked(const LayoutAttr layout);
214
215/// Helper function to check if the layout requires a transpose effect.
216bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch);
217
218// Check if dst shape is an expansion of src shape by inserting unit dimensions.
220 SmallVector<int64_t> &expandedUnitDims);
221
222// Checks if dst shape is an expansion of src shape where each dimension in src
223// is split into one or more consecutive dimensions in dst
225 SmallVector<SmallVector<int64_t>> &splitDimGroups);
226
227} // namespace xegpu
228
229} // namespace mlir
230
231#endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
lhs
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:209
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:497