MLIR  22.0.0git
XeGPUUtils.h
Go to the documentation of this file.
1 //===- XeGPUUtils.h - Vector Utilities --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
10 #define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
11 
13 #include "mlir/IR/BuiltinTypes.h"
14 #include "mlir/IR/OpDefinition.h"
15 namespace mlir {
16 
17 class VectorType;
18 class OpOperand;
19 class OpResult;
20 class OpBuilder;
21 class ValueRange;
22 class TypeConverter;
23 class OpFoldResult;
24 
25 namespace xegpu {
26 class DistributeLayoutAttr;
27 class LayoutAttr;
28 class TensorDescType;
29 } // namespace xegpu
30 
31 namespace xegpu {
32 
33 /// Flatten a set of ValueRange into a single SmallVector<Value>
34 SmallVector<Value> flattenValues(ArrayRef<ValueRange> values);
35 
36 /// If tensor descriptor has a layout attribute it is used in SIMT mode.
37 /// In this mode, the distributed vector shape is determined as follows:
38 /// Definitions:
39 /// lane_data_size = lane_data[0] × lane_data[1]
40 /// subgroup_size = lane_layout[0] × lane_layout[1]
41 /// distribution_unit_size = subgroup_size × lane_data_size
42 ///
43 /// Case 1: Regular loads/stores.
44 /// The following conditions must be met:
45 /// * tensor_desc[0] == lane_layout[0]
46 /// Distributed vector is a 1D vector with shape:
47 /// [chunk_size]
48 ///
49 /// Case 2: Block loads/stores
50 /// Additional definitions:
51 /// tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
52 /// n_distribution_units = tensor_size / distribution_unit_size
53 /// fragment_size = n_distribution_units * lane_data_size
54 /// Given above definitions, the following conditions must be met:
55 /// * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
56 /// * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
57 /// Distributed vector is a 1D vector with shape:
58 /// [fragment_size]
59 FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
60 
61 /// Helper to get the distributed vector type for a given vector type according
62 /// to a given LayoutAttr.
63 FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
64  LayoutAttr layout);
65 
66 /// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
67 std::string getLayoutName(const OpOperand &operand);
68 
69 /// Return the attribute name for the OpResult to attach DistributeLayoutAttr
70 std::string getLayoutName(const OpResult result);
71 
72 /// Retrieves the DistributeLayoutAttr associated with a given Value. For
73 /// TensorDescType values, the DistributeLayoutAttr is extracted from the
74 /// TensorDescType itself. For other values, it is obtained from the attributes
75 /// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
76 /// found.
77 DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
78 
79 template <typename AttrTy>
80 AttrTy getDistributeLayoutAttrOfType(const Value value) {
81  return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(value));
82 }
83 
84 /// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
85 /// will first check the operand_layout_{id} of the owner operation. If not
86 /// found, it will check the operand itself and its defining op.
87 DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
88 
89 template <typename AttrTy>
91  return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(opr));
92 }
93 
94 /// Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
95 template <typename T,
96  typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
97  std::is_same_v<T, OpResult>>>
98 void removeLayoutAttr(const T &operandOrResult);
99 
100 /// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the
101 /// given operation if they exist. If the operation contains regions, it is also
102 /// applied recursively to the contained operations
103 void removeLayoutAttrs(Operation *op);
104 
105 /// Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching
106 /// it to the owner's dictionary attributes
107 template <typename T,
108  typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
109  std::is_same_v<T, OpResult>>>
110 void setDistributeLayoutAttr(const T &operandOrResult,
111  const DistributeLayoutAttr layout);
112 
113 /// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given
114 /// operation. If the operation contains regions, it is also applied recursively
115 /// to the contained operations
117  Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl);
118 
119 /// Extract a set of small vectors from a value with a given shape using
120 /// vector.extract_stride_slice
122  Location loc, Value value,
123  ArrayRef<int64_t> shape);
124 
125 /// Create a vector of shape from a set of values using
126 /// vector.insert_stride_slice.
128  ValueRange values,
129  ArrayRef<int64_t> shape);
130 
131 /// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
132 /// type convertion patterns. Since VectorType cannot carry the layout
133 /// attribute, which is needed to guide the type conversion for XeGPU, they are
134 /// first converted into RankedTensorType, where the layout attribute can be
135 /// attached. And then upstream SCF structural type conversion patterns are
136 /// applied with the provided converter.
137 /// TODO: This is a temporary solution. We should refactor it when context-aware
138 /// type conversion is available.
140  TypeConverter converter);
141 
142 /// Retrieves the chip string from the XeVM target attribute of the parent
143 /// GPU module operation. Returns the chip identifier if found, or nullopt
144 /// if no GPU module parent or XeVM target attribute exists.
145 std::optional<std::string> getChipStr(Operation *op);
146 
147 /// Generates element-wise addition ops of two arrays with same length.
151 
152 /// Generates element-wise addition ops of two arrays with automatic alignment.
153 /// When the input arrays have different sizes, the shorter array is
154 /// right-aligned with the longer array, and the unmatched leading elements from
155 /// the longer array are preserved unchanged. This is commonly used for offset
156 /// computation where higher-dimensional offsets need to be added to
157 /// lower-dimensional adjustments.
158 ///
159 /// Example:
160 /// lhs = [l1, l2, l3], rhs = [r1, r2]
161 /// Result: [11, l2+r1, l3+r2]
165 } // namespace xegpu
166 
167 } // namespace mlir
168 
169 #endif // MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
This class helps build Operations.
Definition: Builders.h:207
This class represents an operand of an operation.
Definition: Value.h:257
This is a value defined by a result of an operation.
Definition: Value.h:447
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Type conversion class.
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
Definition: XeGPUUtils.cpp:260
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
Definition: XeGPUUtils.cpp:179
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
Definition: XeGPUUtils.cpp:197
AttrTy getDistributeLayoutAttrOfType(const Value value)
Definition: XeGPUUtils.h:80
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
Definition: XeGPUUtils.cpp:106
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
Definition: XeGPUUtils.cpp:215
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
Definition: XeGPUUtils.cpp:285
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
Definition: XeGPUUtils.cpp:117
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
Definition: XeGPUUtils.cpp:432
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
Definition: XeGPUUtils.cpp:240
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
Definition: XeGPUUtils.cpp:230
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
Definition: XeGPUUtils.cpp:32
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
Definition: XeGPUUtils.cpp:476
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Definition: XeGPUUtils.cpp:451
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Definition: XeGPUUtils.cpp:40
Include the generated interface declarations.