MLIR 23.0.0git
XeGPUUtils.h
Go to the documentation of this file.
1//===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11
15namespace mlir {
16
17class VectorType;
18class OpOperand;
19class OpResult;
20class OpBuilder;
21class ValueRange;
22class TypeConverter;
23class OpFoldResult;
24
25namespace xegpu {
26class DistributeLayoutAttr;
27class LayoutAttr;
28class TensorDescType;
29
30namespace uArch {
31struct uArch;
32} // namespace uArch
33} // namespace xegpu
34
35namespace xegpu {
36
37/// Flatten a set of ValueRange into a single SmallVector<Value>
38SmallVector<Value> flattenValues(ArrayRef<ValueRange> values);
39
40/// If tensor descriptor has a layout attribute it is used in SIMT mode.
41/// In this mode, the distributed vector shape is determined as follows:
42/// Definitions:
43/// lane_data_size = lane_data[0] × lane_data[1]
44/// subgroup_size = lane_layout[0] × lane_layout[1]
45/// distribution_unit_size = subgroup_size × lane_data_size
46///
47/// Case 1: Regular loads/stores.
48/// The following conditions must be met:
49/// * tensor_desc[0] == lane_layout[0]
50/// Distributed vector is a 1D vector with shape:
51/// [chunk_size]
52///
53/// Case 2: Block loads/stores
54/// Additional definitions:
55/// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
56/// n_distribution_units = tensor_size / distribution_unit_size
57/// fragment_size = n_distribution_units * lane_data_size
58/// Given above definitions, the following conditions must be met:
59/// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
60/// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
61/// Distributed vector is a 1D vector with shape:
62/// [fragment_size]
63FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
64
65/// Helper to get the distributed vector type for a given vector type according
66/// to a given LayoutAttr.
67FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
68 LayoutAttr layout);
69
70/// Helper function to get distributed vector type for a source vector type
71/// according to the lane_layout. We simply divide each dimension of tensor
72/// descriptor shape by corresponding lane_layout dimension. If
73/// array_length > 1, that is appended to the front of the distributed shape.
74///
75/// Examples:
76/// | original vector shape | lane_layout | distributed vector shape |
77/// |-----------------------|-------------|--------------------------|
78/// | 32x16 | [1, 16] | 32x1 |
79/// | 32x16 | [2, 8] | 16x2 |
80/// | 2x32x16 | [1, 16] | 2x32x1 |
81FailureOr<VectorType>
82getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout,
83 VectorType originalType);
84
85/// Extract a set of small vectors from a value with a given shape using
86/// vector.extract_stride_slice
88 Location loc, Value value,
90
91/// Create a vector of shape from a set of values using
92/// vector.insert_stride_slice.
94 ValueRange values,
96
97/// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
98/// type convertion patterns. Since VectorType cannot carry the layout
99/// attribute, which is needed to guide the type conversion for XeGPU, they are
100/// first converted into RankedTensorType, where the layout attribute can be
101/// attached. And then upstream SCF structural type conversion patterns are
102/// applied with the provided converter.
103/// TODO: This is a temporary solution. We should refactor it when context-aware
104/// type conversion is available.
106 TypeConverter converter);
107
108/// Retrieves the chip string from the XeVM target attribute of the parent
109/// GPU module operation. Returns the chip identifier if found, or nullopt
110/// if no GPU module parent or XeVM target attribute exists.
111std::optional<std::string> getChipStr(Operation *op);
112
113/// Generates element-wise addition ops of two arrays with same length.
117
118/// Generates element-wise addition ops of two arrays with automatic alignment.
119/// When the input arrays have different sizes, the shorter array is
120/// right-aligned with the longer array, and the unmatched leading elements from
121/// the longer array are preserved unchanged. This is commonly used for offset
122/// computation where higher-dimensional offsets need to be added to
123/// lower-dimensional adjustments.
124///
125/// Example:
126/// lhs = [l1, l2, l3], rhs = [r1, r2]
127/// Result: [11, l2+r1, l3+r2]
131
132/// Helper Function to find a proper instruction multiple for the user-supplied
133/// sg-level data shape (diven by `dim`). `candidates` are uArch allowed shapes.
134/// `candidateMultiples` are uArch multiples of such shapes (i.e. block count or
135/// array length).
136template <typename T>
137int getLargestDivisor(T dim, ArrayRef<T> candidates,
138 ArrayRef<T> candidateMultiples = {});
139
140/// Retrieves the DistributeLayoutAttr associated with a given Value. For
141/// TensorDescType values, the DistributeLayoutAttr is extracted from the
142/// TensorDescType itself. For other values, it is obtained from the attributes
143/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
144/// found.
145DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
146
147/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
148/// will first check the operand_layout_{id} of the owner operation. If not
149/// found, it will check the operand itself and its defining op.
150DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
151
152/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult
153/// user should use setAnchorLayout instead
155 const DistributeLayoutAttr layout);
156
157/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpOperand
158/// user should use setAnchorLayout instead
159void setDistributeLayoutAttr(const OpOperand &opr,
160 const DistributeLayoutAttr layout);
161
162/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
163std::string getTemporaryLayoutName(const OpOperand &operand);
164
165/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
166std::string getTemporaryLayoutName(const OpResult result);
167
168/// get and set distribute layout attribute for non-anchor operations
169/// (and offsets/masks of load/store ops before we get rid of their temp attrs)
170template <typename T,
171 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
172 std::is_same_v<T, OpResult>>>
173DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult);
174
175template <typename T,
176 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
177 std::is_same_v<T, OpResult>>>
178void setTemporaryLayout(const T &operandOrResult,
179 const DistributeLayoutAttr layout);
180
181/// Helper function to check if the layout is packed. Layout is packed if it is
182/// 2D and lane_data[0] != 1 (data packed from col dimension).
183/// TODO: Move to target info.
184bool requirePacked(const LayoutAttr layout);
185
186/// Helper function to check if the layout requires a transpose effect.
187bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch);
188
189// Check if dst shape is an expansion of src shape by inserting unit dimensions.
191 SmallVector<int64_t> &expandedUnitDims);
192
193// Checks if dst shape is an expansion of src shape where each dimension in src
194// is split into one or more consecutive dimensions in dst
196 SmallVector<SmallVector<int64_t>> &splitDimGroups);
197
198} // namespace xegpu
199
200} // namespace mlir
201
202#endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
lhs
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:209
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.