MLIR 22.0.0git
XeGPUUtils.h
Go to the documentation of this file.
1//===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11
15namespace mlir {
16
17class VectorType;
18class OpOperand;
19class OpResult;
20class OpBuilder;
21class ValueRange;
22class TypeConverter;
23class OpFoldResult;
24
25namespace xegpu {
26class DistributeLayoutAttr;
27class LayoutAttr;
28class TensorDescType;
29} // namespace xegpu
30
31namespace xegpu {
32
33/// Flatten a set of ValueRange into a single SmallVector<Value>
34SmallVector<Value> flattenValues(ArrayRef<ValueRange> values);
35
36/// If tensor descriptor has a layout attribute it is used in SIMT mode.
37/// In this mode, the distributed vector shape is determined as follows:
38/// Definitions:
39/// lane_data_size = lane_data[0] × lane_data[1]
40/// subgroup_size = lane_layout[0] × lane_layout[1]
41/// distribution_unit_size = subgroup_size × lane_data_size
42///
43/// Case 1: Regular loads/stores.
44/// The following conditions must be met:
45/// * tensor_desc[0] == lane_layout[0]
46/// Distributed vector is a 1D vector with shape:
47/// [chunk_size]
48///
49/// Case 2: Block loads/stores
50/// Additional definitions:
51/// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
52/// n_distribution_units = tensor_size / distribution_unit_size
53/// fragment_size = n_distribution_units * lane_data_size
54/// Given above definitions, the following conditions must be met:
55/// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
56/// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
57/// Distributed vector is a 1D vector with shape:
58/// [fragment_size]
59FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
60
61/// Helper to get the distributed vector type for a given vector type according
62/// to a given LayoutAttr.
63FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
64 LayoutAttr layout);
65
66/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
67std::string getLayoutName(const OpOperand &operand);
68
69/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
70std::string getLayoutName(const OpResult result);
71
72/// Retrieves the DistributeLayoutAttr associated with a given Value. For
73/// TensorDescType values, the DistributeLayoutAttr is extracted from the
74/// TensorDescType itself. For other values, it is obtained from the attributes
75/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
76/// found.
77DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
78
79template <typename AttrTy>
81 return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(value));
82}
83
84/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
85/// will first check the operand_layout_{id} of the owner operation. If not
86/// found, it will check the operand itself and its defining op.
87DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
88
89template <typename AttrTy>
91 return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(opr));
92}
93
94/// Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
95template <typename T,
96 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
97 std::is_same_v<T, OpResult>>>
98void removeLayoutAttr(const T &operandOrResult);
99
100/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the
101/// given operation if they exist. If the operation contains regions, it is also
102/// applied recursively to the contained operations
104
105/// Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching
106/// it to the owner's dictionary attributes
107/// If `respectPermLayout` is true the existing permament layout
108/// attribute will be kept and assigned to the attribute dict instead
109/// of the provided layout.
110template <typename T,
111 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
112 std::is_same_v<T, OpResult>>>
113void setDistributeLayoutAttr(const T &operandOrResult,
114 const DistributeLayoutAttr layout,
115 bool respectPermLayout = false);
116
117/// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given
118/// operation. If the operation contains regions, it is also applied recursively
119/// to the contained operations
121 Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl);
122
123/// Extract a set of small vectors from a value with a given shape using
124/// vector.extract_stride_slice
126 Location loc, Value value,
128
129/// Create a vector of shape from a set of values using
130/// vector.insert_stride_slice.
132 ValueRange values,
134
135/// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
136/// type convertion patterns. Since VectorType cannot carry the layout
137/// attribute, which is needed to guide the type conversion for XeGPU, they are
138/// first converted into RankedTensorType, where the layout attribute can be
139/// attached. And then upstream SCF structural type conversion patterns are
140/// applied with the provided converter.
141/// TODO: This is a temporary solution. We should refactor it when context-aware
142/// type conversion is available.
144 TypeConverter converter);
145
146/// Retrieves the chip string from the XeVM target attribute of the parent
147/// GPU module operation. Returns the chip identifier if found, or nullopt
148/// if no GPU module parent or XeVM target attribute exists.
149std::optional<std::string> getChipStr(Operation *op);
150
151/// Generates element-wise addition ops of two arrays with same length.
155
156/// Generates element-wise addition ops of two arrays with automatic alignment.
157/// When the input arrays have different sizes, the shorter array is
158/// right-aligned with the longer array, and the unmatched leading elements from
159/// the longer array are preserved unchanged. This is commonly used for offset
160/// computation where higher-dimensional offsets need to be added to
161/// lower-dimensional adjustments.
162///
163/// Example:
164/// lhs = [l1, l2, l3], rhs = [r1, r2]
165/// Result: [11, l2+r1, l3+r2]
169
170/// Helper Function to find a proper instruction multiple for the user-supplied
171/// sg-level data shape (diven by `dim`). `candidates` are uArch allowed shapes.
172/// `candidateMultiples` are uArch multiples of such shapes (i.e. block count or
173/// array length).
174template <typename T>
175int getLargestDivisor(T dim, ArrayRef<T> candidates,
176 ArrayRef<T> candidateMultiples = {});
177
178} // namespace xegpu
179
180} // namespace mlir
181
182#endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
lhs
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
AttrTy getDistributeLayoutAttrOfType(const Value value)
Definition XeGPUUtils.h:80
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout, bool respectPermLayout=false)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152