MLIR 22.0.0git
XeGPUUtils.h
Go to the documentation of this file.
1//===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11
15namespace mlir {
16
17class VectorType;
18class OpOperand;
19class OpResult;
20class OpBuilder;
21class ValueRange;
22class TypeConverter;
23class OpFoldResult;
24
25namespace xegpu {
26class DistributeLayoutAttr;
27class LayoutAttr;
28class TensorDescType;
29} // namespace xegpu
30
31namespace xegpu {
32
33/// Flatten a set of ValueRange into a single SmallVector<Value>
34SmallVector<Value> flattenValues(ArrayRef<ValueRange> values);
35
36/// If tensor descriptor has a layout attribute it is used in SIMT mode.
37/// In this mode, the distributed vector shape is determined as follows:
38/// Definitions:
39/// lane_data_size = lane_data[0] × lane_data[1]
40/// subgroup_size = lane_layout[0] × lane_layout[1]
41/// distribution_unit_size = subgroup_size × lane_data_size
42///
43/// Case 1: Regular loads/stores.
44/// The following conditions must be met:
45/// * tensor_desc[0] == lane_layout[0]
46/// Distributed vector is a 1D vector with shape:
47/// [chunk_size]
48///
49/// Case 2: Block loads/stores
50/// Additional definitions:
51/// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
52/// n_distribution_units = tensor_size / distribution_unit_size
53/// fragment_size = n_distribution_units * lane_data_size
54/// Given above definitions, the following conditions must be met:
55/// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
56/// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
57/// Distributed vector is a 1D vector with shape:
58/// [fragment_size]
59FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
60
61/// Helper to get the distributed vector type for a given vector type according
62/// to a given LayoutAttr.
63FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
64 LayoutAttr layout);
65
66/// Extract a set of small vectors from a value with a given shape using
67/// vector.extract_stride_slice
69 Location loc, Value value,
71
72/// Create a vector of shape from a set of values using
73/// vector.insert_stride_slice.
75 ValueRange values,
77
78/// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
79/// type convertion patterns. Since VectorType cannot carry the layout
80/// attribute, which is needed to guide the type conversion for XeGPU, they are
81/// first converted into RankedTensorType, where the layout attribute can be
82/// attached. And then upstream SCF structural type conversion patterns are
83/// applied with the provided converter.
84/// TODO: This is a temporary solution. We should refactor it when context-aware
85/// type conversion is available.
87 TypeConverter converter);
88
89/// Retrieves the chip string from the XeVM target attribute of the parent
90/// GPU module operation. Returns the chip identifier if found, or nullopt
91/// if no GPU module parent or XeVM target attribute exists.
92std::optional<std::string> getChipStr(Operation *op);
93
94/// Generates element-wise addition ops of two arrays with same length.
98
99/// Generates element-wise addition ops of two arrays with automatic alignment.
100/// When the input arrays have different sizes, the shorter array is
101/// right-aligned with the longer array, and the unmatched leading elements from
102/// the longer array are preserved unchanged. This is commonly used for offset
103/// computation where higher-dimensional offsets need to be added to
104/// lower-dimensional adjustments.
105///
106/// Example:
107/// lhs = [l1, l2, l3], rhs = [r1, r2]
108/// Result: [11, l2+r1, l3+r2]
112
113/// Helper Function to find a proper instruction multiple for the user-supplied
114/// sg-level data shape (diven by `dim`). `candidates` are uArch allowed shapes.
115/// `candidateMultiples` are uArch multiples of such shapes (i.e. block count or
116/// array length).
117template <typename T>
118int getLargestDivisor(T dim, ArrayRef<T> candidates,
119 ArrayRef<T> candidateMultiples = {});
120
121/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
122std::string getTemporaryLayoutName(const OpOperand &operand);
123
124/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
125std::string getTemporaryLayoutName(const OpResult result);
126
127/// Retrieves the DistributeLayoutAttr associated with a given Value. For
128/// TensorDescType values, the DistributeLayoutAttr is extracted from the
129/// TensorDescType itself. For other values, it is obtained from the attributes
130/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
131/// found.
132DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
133
134/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
135/// will first check the operand_layout_{id} of the owner operation. If not
136/// found, it will check the operand itself and its defining op.
137DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
138
139/// Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
140template <typename T,
141 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
142 std::is_same_v<T, OpResult>>>
143void removeLayoutAttr(const T &operandOrResult);
144
145/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the
146/// given operation if they exist. If the operation contains regions, it is also
147/// applied recursively to the contained operations
149
150/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult
151/// user should use setAnchorLayout instead
153 const DistributeLayoutAttr layout);
154
155/// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpOperand
156/// user should use setAnchorLayout instead
157void setDistributeLayoutAttr(const OpOperand &opr,
158 const DistributeLayoutAttr layout);
159
160/// get and set distribute layout attribute for non-anchor operations
161/// (and offsets/masks of load/store ops before we get rid of their temp attrs)
162template <typename T,
163 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
164 std::is_same_v<T, OpResult>>>
165DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult);
166
167template <typename T,
168 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
169 std::is_same_v<T, OpResult>>>
170void setTemporaryLayout(const T &operandOrResult,
171 const DistributeLayoutAttr layout);
172
173/// [to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and
174/// OpResult of of the given operation. If the operation contains regions, it is
175/// also applied recursively to the contained operations operation.
176/// TODO: To be replaced by recoverTemporaryLayouts()
178
179/// Attach layout attributes to all vector-type operands of operations within
180/// the given operation's region. Reports an error if any vector operand lacks
181/// a layout attribute.
183
184} // namespace xegpu
185
186} // namespace mlir
187
188#endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
lhs
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
bool recoverTemporaryLayouts(Operation *rootOp)
Attach layout attributes to all vector-type operands of operations within the given operation's regio...
void recoverTemporaryLayoutsDeprecated(Operation *op)
[to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given opera...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.