MLIR 23.0.0git
XeGPUUtils.h
Go to the documentation of this file.
1//===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11
15namespace mlir {
16
17class VectorType;
18class OpOperand;
19class OpResult;
20class OpBuilder;
21class ValueRange;
22class TypeConverter;
23class OpFoldResult;
24
25namespace xegpu {
26class DistributeLayoutAttr;
27class LayoutAttr;
28class TensorDescType;
29
30namespace uArch {
31struct uArch;
32} // namespace uArch
33} // namespace xegpu
34
35namespace xegpu {
36
37/// Flatten a set of ValueRange into a single SmallVector<Value>
38SmallVector<Value> flattenValues(ArrayRef<ValueRange> values);
39
40/// If tensor descriptor has a layout attribute it is used in SIMT mode.
41/// In this mode, the distributed vector shape is determined as follows:
42/// Definitions:
43/// lane_data_size = lane_data[0] × lane_data[1]
44/// subgroup_size = lane_layout[0] × lane_layout[1]
45/// distribution_unit_size = subgroup_size × lane_data_size
46///
47/// Case 1: Regular loads/stores.
48/// The following conditions must be met:
49/// * tensor_desc[0] == lane_layout[0]
50/// Distributed vector is a 1D vector with shape:
51/// [chunk_size]
52///
53/// Case 2: Block loads/stores
54/// Additional definitions:
55/// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
56/// n_distribution_units = tensor_size / distribution_unit_size
57/// fragment_size = n_distribution_units * lane_data_size
58/// Given above definitions, the following conditions must be met:
59/// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
60/// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
61/// Distributed vector is a 1D vector with shape:
62/// [fragment_size]
63FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
64
65/// Helper to get the distributed vector type for a given vector type according
66/// to a given LayoutAttr.
67FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
68 LayoutAttr layout);
69
70/// Helper function to get distributed vector type for a source vector type
71/// according to the lane_layout. We simply divide each dimension of tensor
72/// descriptor shape by corresponding lane_layout dimension. If
73/// array_length > 1, that is appended to the front of the distributed shape.
74///
75/// Examples:
76/// | original vector shape | lane_layout | distributed vector shape |
77/// |-----------------------|-------------|--------------------------|
78/// | 32x16 | [1, 16] | 32x1 |
79/// | 32x16 | [2, 8] | 16x2 |
80/// | 2x32x16 | [1, 16] | 2x32x1 |
81FailureOr<VectorType>
82getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout,
83 VectorType originalType);
84
85/// Extract a set of small vectors from a value with a given shape using
86/// vector.extract_stride_slice
88 Location loc, Value value,
90
91/// Create a vector of shape from a set of values using
92/// vector.insert_stride_slice.
94 ValueRange values,
96
97/// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
98/// type convertion patterns. Since VectorType cannot carry the layout
99/// attribute, which is needed to guide the type conversion for XeGPU, they are
100/// first converted into RankedTensorType, where the layout attribute can be
101/// attached. And then upstream SCF structural type conversion patterns are
102/// applied with the provided converter.
103/// TODO: This is a temporary solution. We should refactor it when context-aware
104/// type conversion is available.
106 TypeConverter converter);
107
108/// Retrieves the chip string from the XeVM target attribute of the parent
109/// GPU module operation. Returns the chip identifier if found, or nullopt
110/// if no GPU module parent or XeVM target attribute exists.
111std::optional<std::string> getChipStr(Operation *op);
112
113/// Generates element-wise addition ops of two arrays with same length.
117
118/// Generates element-wise addition ops of two arrays with automatic alignment.
119/// When the input arrays have different sizes, the shorter array is
120/// right-aligned with the longer array, and the unmatched leading elements from
121/// the longer array are preserved unchanged. This is commonly used for offset
122/// computation where higher-dimensional offsets need to be added to
123/// lower-dimensional adjustments.
124///
125/// Example:
126/// lhs = [l1, l2, l3], rhs = [r1, r2]
127/// Result: [11, l2+r1, l3+r2]
131
132/// Given an `input` value representing per-lane data, this function returns the
133/// result after performing a reduction on the input over all lanes (number of
134/// lanes given by `size`). This uses butterfly shuffles to perform the
135/// reduction in a log2(size) number of steps.
136/// NOTE: Implementation taken from TestVectorTransforms.cpp
137Value subgroupReduction(Location loc, OpBuilder &builder, Value input,
138 vector::CombiningKind kind, uint32_t size);
139
140/// Given a `src` and an `acc` argumments from a vector::MultiDimReductionOp,
141/// lower to a set of vector::ReductionOp ops over 1D slices extracted from
142/// `src`. The reduction is performed along `reductionDim`. The result is a
143/// vector with the same shape as `acc`.
144/// TODO: Only 2D to 1D reduction is supported for now.
147 vector::CombiningKind kind, int64_t reductionDim,
148 Location loc, PatternRewriter &rewriter);
149
150/// Creates a constant filled with the neutral (identity) value for the
151/// given reduction kind. For example: 0 for ADD/OR/XOR, 1 for MUL/AND,
152/// max/min signed/unsigned int for MINSI/MINUI/MAXSI/MAXUI, and +/-infinity
153/// for float min/max operations. If \p type is a VectorType, returns a splat
154/// vector constant; otherwise returns a scalar constant. Returns nullptr if
155/// the element type is incompatible with the requested reduction kind.
157 vector::CombiningKind kind);
158
159/// Lowers cross-lane reductions to shuffle operations on a 2D vector.
160/// Extracts slices along the reduction dimension, performs subgroup reductions
161/// with shuffles across reductionSize work-items, and inserts the results back
162/// into an accumulator vector.
165 vector::CombiningKind kind,
166 int64_t reductionDim,
167 int64_t reductionSize, Location loc,
168 PatternRewriter &rewriter);
169
170/// Helper Function to find a proper instruction multiple for the user-supplied
171/// sg-level data shape (diven by `dim`). `candidates` are uArch allowed shapes.
172/// `candidateMultiples` are uArch multiples of such shapes (i.e. block count or
173/// array length).
174template <typename T>
175int getLargestDivisor(T dim, ArrayRef<T> candidates,
176 ArrayRef<T> candidateMultiples = {});
177
178/// Retrieves the DistributeLayoutAttr associated with a given Value. For
179/// TensorDescType values, the DistributeLayoutAttr is extracted from the
180/// TensorDescType itself. For other values, it is obtained from the attributes
181/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
182/// found.
183DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
184
185/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
186/// will first check the operand_layout_{id} of the owner operation. If not
187/// found, it will check the operand itself and its defining op.
188DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
189
190/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult
191/// user should use setAnchorLayout instead
193 const DistributeLayoutAttr layout);
194
195/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpOperand
196/// user should use setAnchorLayout instead
197void setDistributeLayoutAttr(const OpOperand &opr,
198 const DistributeLayoutAttr layout);
199
200/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
201std::string getTemporaryLayoutName(const OpOperand &operand);
202
203/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
204std::string getTemporaryLayoutName(const OpResult result);
205
206/// get and set distribute layout attribute for non-anchor operations
207/// (and offsets/masks of load/store ops before we get rid of their temp attrs)
208template <typename T,
209 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
210 std::is_same_v<T, OpResult>>>
211DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult);
212
213template <typename T,
214 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
215 std::is_same_v<T, OpResult>>>
216void setTemporaryLayout(const T &operandOrResult,
217 const DistributeLayoutAttr layout);
218
219/// Helper function to check if the layout is packed. Layout is packed if it is
220/// 2D and lane_data[0] != 1 (data packed from col dimension).
221/// TODO: Move to target info.
222bool requirePacked(const DistributeLayoutAttr layout);
223
224/// Helper function to check if the layout requires a transpose effect.
225bool requireTranspose(const DistributeLayoutAttr layout,
226 const uArch::uArch *uArch);
227
228// Check if dst shape is an expansion of src shape by inserting unit dimensions.
230 SmallVector<int64_t> &expandedUnitDims);
231
232// Checks if dst shape is an expansion of src shape where each dimension in src
233// is split into one or more consecutive dimensions in dst
235 SmallVector<SmallVector<int64_t>> &splitDimGroups);
236
237} // namespace xegpu
238
239} // namespace mlir
240
241#endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
lhs
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:209
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:254
This is a value defined by a result of an operation.
Definition Value.h:454
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
bool requirePacked(const DistributeLayoutAttr layout)
Helper function to check if the layout is packed.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
Value createReductionNeutralValue(OpBuilder &builder, Location loc, Type type, vector::CombiningKind kind)
Creates a constant filled with the neutral (identity) value for the given reduction kind.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool requireTranspose(const DistributeLayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:494