MLIR 23.0.0git
mlir::xegpu Namespace Reference

Namespaces

namespace  impl
namespace  uArch

Classes

struct  UnrollOptions
 Options to control the XeGPU unrolling. More...
struct  XeGPUPropagateLayoutOptions

Enumerations

enum class  LayoutKind { Lane , InstData , Subgroup }

Functions

void registerTransformDialectExtension (DialectRegistry &registry)
std::unique_ptr<::mlir::PasscreateXeGPUBlocking ()
std::unique_ptr<::mlir::PasscreateXeGPUFoldAliasOps ()
std::unique_ptr<::mlir::PasscreateXeGPUPeepHoleOptimizer ()
std::unique_ptr<::mlir::PasscreateXeGPUPropagateLayout ()
std::unique_ptr<::mlir::PasscreateXeGPUPropagateLayout (XeGPUPropagateLayoutOptions options)
std::unique_ptr<::mlir::PasscreateXeGPUSgToWiDistributeExperimental ()
std::unique_ptr<::mlir::PasscreateXeGPUSubgroupDistribute ()
std::unique_ptr<::mlir::PasscreateXeGPUVectorLinearize ()
std::unique_ptr<::mlir::PasscreateXeGPUWgToSgDistribute ()
void registerXeGPUBlocking ()
void registerXeGPUBlockingPass ()
void registerXeGPUFoldAliasOps ()
void registerXeGPUFoldAliasOpsPass ()
void registerXeGPUPeepHoleOptimizer ()
void registerXeGPUPeepHoleOptimizerPass ()
void registerXeGPUPropagateLayout ()
void registerXeGPUPropagateLayoutPass ()
void registerXeGPUSgToWiDistributeExperimental ()
void registerXeGPUSgToWiDistributeExperimentalPass ()
void registerXeGPUSubgroupDistribute ()
void registerXeGPUSubgroupDistributePass ()
void registerXeGPUVectorLinearize ()
void registerXeGPUVectorLinearizePass ()
void registerXeGPUWgToSgDistribute ()
void registerXeGPUWgToSgDistributePass ()
void registerXeGPUPasses ()
void populateXeGPUFoldAliasOpsPatterns (RewritePatternSet &patterns)
 Appends patterns for folding aliasing ops into XeGPU ops into patterns.
void populateXeGPUPeepHoleOptimizerPatterns (RewritePatternSet &patterns)
 Appends patterns for optimizing block load operations into patterns.
void populateXeGPUSubgroupDistributePatterns (RewritePatternSet &patterns)
 Appends patterns for XeGPU SIMT distribution into patterns.
void populateXeGPUMoveFuncBodyToWarpOpPatterns (RewritePatternSet &patterns)
 Appends patterns for moving function body into gpu.warp_execute_on_lane0 op.
void populateXeGPUWgToSgDistributePatterns (RewritePatternSet &patterns)
 Appends patterns for XeGPU workgroup to subgroup distribution into patterns.
void populateXeGPUSgToWiDistributeTypeConversions (TypeConverter &typeConverter)
 Define only the type conversions needed for XeGPU subgroup to workitem distribution.
void populateXeGPUSgToWiDistributeTypeConversionAndLegality (TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target)
 Defines type conversions and legality for XeGPU subgroup to workitem distribution and appends the required conversion patterns into patterns.
void populateXeGPUUnrollPatterns (RewritePatternSet &patterns, const UnrollOptions &options)
 Collect a set of patterns to unroll xegpu operations to a smaller shapes.
LogicalResult propagateLayouts (OpBuilder &builder, Operation *target, LayoutKind layoutKind, bool printOnly=false)
LogicalResult resolveLayoutConflicts (Operation *target)
void recoverTemporaryLayoutsDeprecated (Operation *op)
 [to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given operation.
bool recoverTemporaryLayouts (Operation *rootOp)
 Attach layout attributes to all vector-type operands of operations within the given operation's nested region.
template<typename T, typename = std::enable_if_t<std::is_same_v<T, OpOperand> || std::is_same_v<T, OpResult>>>
void removeLayoutAttr (const T &operandOrResult)
 Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void removeLayoutAttrs (Operation *op)
 Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist.
SmallVector< NamedAttributedropSgLayoutAndDataOnAttrs (ArrayRef< NamedAttribute > attrs)
 Updates the NamedAttribute sequence by dropping sg-layout and sg-data information from any DistributeLayoutAttr found.
SmallVector< NamedAttributedropInstDataOnAttrs (ArrayRef< NamedAttribute > attrs)
 Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr found.
DistributeLayoutAttr inferBroadcastSourceLayout (DistributeLayoutAttr resLayout, ArrayRef< int64_t > resShape, ArrayRef< int64_t > srcShape)
 Infers the source layout attribute for a broadcast operation given the result layout attribute, result shape, and source shape.
DistributeLayoutAttr inferMultiReductionSourceLayout (DistributeLayoutAttr resLayout, SmallVector< int64_t > reduceDims)
 Infers the source layout attribute for a reduction operation given the result layout attribute and reduced dims.
DistributeLayoutAttr inferBitCastSourceLayout (DistributeLayoutAttr resLayout, int resElemTyBitWidth, int srcElemTyBitWidth)
 Infers the source layout attribute for a bitcast operation given the result layout attribute, result element type bitwidth, and source element type bitwidth.
DistributeLayoutAttr inferShapeCastSourceLayout (DistributeLayoutAttr resLayout, ArrayRef< int64_t > resShape, ArrayRef< int64_t > srcShape)
 Infers the source layout attribute for a shape cast operation given the result layout attribute, result shape, and source shape.
DistributeLayoutAttr inferInsertStridedSliceSourceLayout (DistributeLayoutAttr resLayout, ArrayRef< int64_t > resShape, ArrayRef< int64_t > srcShape)
 Infers the source layout attribute for an insert strided slice operation given the result layout attribute, result shape, and source shape.
SliceAttr setupMultiReductionResultLayout (LayoutKind layoutKind, VectorType srcVectorTy, DistributeLayoutAttr consumerLayout, SmallVector< int64_t > reductionDims, const uArch::uArch *uArch)
 Sets up layout for reduction operations by creating a SliceAttr for the result.
DistributeLayoutAttr setupBitCastResultLayout (LayoutKind layoutKind, VectorType srcVectorTy, VectorType resVectorTy, DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch)
 Setup the result layout attribute for a bitcast operation based on element type bitwidths.
DistributeLayoutAttr setupInsertStridedSliceResultLayout (LayoutKind layoutKind, VectorType srcVectorTy, VectorType resVectorTy, DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch)
 Sets up the result layout for an insert strided slice operation.
DistributeLayoutAttr setupLoadGatherAnchorLayout (LayoutKind layoutKind, VectorType vectorTy, int chunkSize, DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch)
 Sets up the anchor layout for a load gather operation.
DistributeLayoutAttr setupLoadMatrixAnchorLayout (LayoutKind layoutKind, VectorType vectorTy, DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch)
 Sets up the anchor layout for load matrix operation.
DistributeLayoutAttr setupStoreScatterAnchorLayout (LayoutKind layoutKind, VectorType vectorTy, int chunkSize, const uArch::uArch *uArch)
 Sets up the anchor layout for a store scatter operation.
DistributeLayoutAttr setupStoreMatrixAnchorLayout (LayoutKind layoutKind, VectorType vectorTy, const uArch::uArch *uArch)
 Sets up the anchor layout for a store matrix operation.
std::optional< std::tuple< DistributeLayoutAttr, DistributeLayoutAttr, DistributeLayoutAttr > > setupDpasLayout (LayoutKind layoutKind, VectorType aTy, VectorType bTy, VectorType cdTy, DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch, int numSg)
 Sets up the anchor layouts for a dpas operands (A, B, and C/D).
SmallVector< ValueflattenValues (ArrayRef< ValueRange > values)
 Flatten a set of ValueRange into a single SmallVector<Value>
FailureOr< VectorType > getDistributedVectorType (xegpu::TensorDescType tdescTy)
 If tensor descriptor has a layout attribute it is used in SIMT mode.
FailureOr< VectorType > getDistributedVectorType (VectorType originalType, LayoutAttr layout)
 Helper to get the distributed vector type for a given vector type according to a given LayoutAttr.
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout (DistributeLayoutAttr layout, VectorType originalType)
 Helper function to get distributed vector type for a source vector type according to the lane_layout.
SmallVector< ValueextractVectorsWithShapeFromValue (OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
 Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
Value createVectorWithShapeFromValues (OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
 Create a vector of shape from a set of values using vector.insert_stride_slice.
void doSCFStructuralTypeConversionWithTensorType (Operation *op, TypeConverter converter)
 Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns.
std::optional< std::string > getChipStr (Operation *op)
 Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< OpFoldResultaddElementwise (OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
 Generates element-wise addition ops of two arrays with same length.
SmallVector< OpFoldResultaddWithRightAligned (OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
 Generates element-wise addition ops of two arrays with automatic alignment.
template<typename T>
int getLargestDivisor (T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
 Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (diven by dim).
DistributeLayoutAttr getDistributeLayoutAttr (const Value value)
 Retrieves the DistributeLayoutAttr associated with a given Value.
DistributeLayoutAttr getDistributeLayoutAttr (const OpOperand &opr)
 Retrieves the DistributeLayoutAttr associated with a given OpOperand.
void setDistributeLayoutAttr (const OpResult &Result, const DistributeLayoutAttr layout)
 [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout instead
void setDistributeLayoutAttr (const OpOperand &opr, const DistributeLayoutAttr layout)
 [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpOperand user should use setAnchorLayout instead
std::string getTemporaryLayoutName (const OpOperand &operand)
 Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::string getTemporaryLayoutName (const OpResult result)
 Return the attribute name for the OpResult to attach DistributeLayoutAttr.
template<typename T, typename = std::enable_if_t<std::is_same_v<T, OpOperand> || std::is_same_v<T, OpResult>>>
DistributeLayoutAttr getTemporaryLayout (const T &operandOrResult)
 get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store ops before we get rid of their temp attrs)
template<typename T, typename = std::enable_if_t<std::is_same_v<T, OpOperand> || std::is_same_v<T, OpResult>>>
void setTemporaryLayout (const T &operandOrResult, const DistributeLayoutAttr layout)
bool requirePacked (const LayoutAttr layout)
 Helper function to check if the layout is packed.
bool requireTranspose (const LayoutAttr layout, const uArch::uArch *uArch)
 Helper function to check if the layout requires a transpose effect.
bool matchUnitDimExpansion (ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
bool matchSplitDimExpansion (ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
static SmallVector< SmallVector< Value > > genCoordinates (OpBuilder &builder, Location loc, SmallVector< Value > delinearizedId, ArrayRef< int64_t > subShapesLayout, ArrayRef< int64_t > subShape, ArrayRef< int64_t > srcShape)
static SmallVector< int64_tmapSlicedDimsToParentSpace (const SmallVector< int64_t > &dimsToMap, ArrayRef< int64_t > sliceDims)
template<typename ArithOp>
OpFoldResult genBinOp (OpFoldResult a, OpFoldResult b, Location loc, OpBuilder &builder)
SmallVector< OpFoldResultgetBlockedOffsets (OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > offsets, ArrayRef< int64_t > blockShape)

Enumeration Type Documentation

◆ LayoutKind

enum class mlir::xegpu::LayoutKind
strong
Enumerator
Lane 
InstData 
Subgroup 

Definition at line 36 of file XeGPULayoutImpl.h.

Function Documentation

◆ addElementwise()

SmallVector< OpFoldResult > mlir::xegpu::addElementwise ( OpBuilder & builder,
Location loc,
ArrayRef< OpFoldResult > lhs,
ArrayRef< OpFoldResult > rhs )

Generates element-wise addition ops of two arrays with same length.

Definition at line 598 of file XeGPUUtils.cpp.

References mlir::OpBuilder::createOrFold(), mlir::getValueOrCreateConstantIndexOp(), lhs, and rhs.

Referenced by addWithRightAligned().

◆ addWithRightAligned()

SmallVector< OpFoldResult > mlir::xegpu::addWithRightAligned ( OpBuilder & builder,
Location loc,
ArrayRef< OpFoldResult > lhs,
ArrayRef< OpFoldResult > rhs )

Generates element-wise addition ops of two arrays with automatic alignment.

When the input arrays have different sizes, the shorter array is right-aligned with the longer array, and the unmatched leading elements from the longer array are preserved unchanged. This is commonly used for offset computation where higher-dimensional offsets need to be added to lower-dimensional adjustments.

Example: lhs = [l1, l2, l3], rhs = [r1, r2] Result: [11, l2+r1, l3+r2]

Definition at line 623 of file XeGPUUtils.cpp.

References addElementwise(), b, lhs, and rhs.

◆ createVectorWithShapeFromValues()

Value mlir::xegpu::createVectorWithShapeFromValues ( OpBuilder & builder,
Location loc,
ValueRange values,
ArrayRef< int64_t > shape )

Create a vector of shape from a set of values using vector.insert_stride_slice.

Definition at line 407 of file XeGPUUtils.cpp.

References mlir::DenseElementsAttr::get(), mlir::getType(), mlir::ValueRange::getTypes(), mlir::Builder::getZeroAttr(), and result.

Referenced by mlir::xegpu::impl::XeGPUBlockingBase< DerivedT >::clonePass().

◆ createXeGPUBlocking()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUBlocking ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 89 of file XeGPUBlocking.cpp.

◆ createXeGPUFoldAliasOps()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUFoldAliasOps ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 165 of file XeGPUFoldAliasOps.cpp.

◆ createXeGPUPeepHoleOptimizer()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUPeepHoleOptimizer ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 242 of file XeGPUPeepHoleOptimizer.cpp.

References getChipStr(), mlir::getConstantIntValue(), mlir::Value::getType(), mlir::xegpu::uArch::getUArch(), and success().

◆ createXeGPUPropagateLayout() [1/2]

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUPropagateLayout ( )

Definition at line 338 of file XeGPUPropagateLayout.cpp.

References load.

◆ createXeGPUPropagateLayout() [2/2]

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUPropagateLayout ( XeGPUPropagateLayoutOptions options)

Definition at line 342 of file XeGPUPropagateLayout.cpp.

References broadcast(), and load.

◆ createXeGPUSgToWiDistributeExperimental()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUSgToWiDistributeExperimental ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 420 of file XeGPUSgToWiDistributeExperimental.cpp.

◆ createXeGPUSubgroupDistribute()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUSubgroupDistribute ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 498 of file XeGPUSubgroupDistribute.cpp.

◆ createXeGPUVectorLinearize()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUVectorLinearize ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 577 of file XeGPUVectorLinearize.cpp.

◆ createXeGPUWgToSgDistribute()

std::unique_ptr<::mlir::Pass > mlir::xegpu::createXeGPUWgToSgDistribute ( )

We declare an explicit private instantiation because Pass classes should only be visible by the current library.

Definition at line 657 of file XeGPUWgToSgDistribute.cpp.

References mlir::computeShapeRatio(), flattenValues(), mlir::getType(), and success().

◆ doSCFStructuralTypeConversionWithTensorType()

void mlir::xegpu::doSCFStructuralTypeConversionWithTensorType ( Operation * op,
TypeConverter converter )

Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns.

Since VectorType cannot carry the layout attribute, which is needed to guide the type conversion for XeGPU, they are first converted into RankedTensorType, where the layout attribute can be attached. And then upstream SCF structural type conversion patterns are applied with the provided converter. TODO: This is a temporary solution. We should refactor it when context-aware type conversion is available.

Definition at line 432 of file XeGPUUtils.cpp.

References mlir::WalkResult::advance(), flattenValues(), mlir::Operation::getContext(), getDistributeLayoutAttr(), mlir::Operation::getOperandTypes(), mlir::Operation::getOpResults(), mlir::Operation::getParentOp(), mlir::Operation::getResultTypes(), mlir::Value::getType(), mlir::patterns, mlir::scf::populateSCFStructuralTypeConversionsAndLegality(), result, mlir::Value::setType(), mlir::WalkResult::skip(), success(), target, and mlir::Operation::walk().

◆ dropInstDataOnAttrs()

SmallVector< NamedAttribute > mlir::xegpu::dropInstDataOnAttrs ( ArrayRef< NamedAttribute > attrs)

Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr found.

Definition at line 66 of file XeGPULayoutImpl.cpp.

◆ dropSgLayoutAndDataOnAttrs()

SmallVector< NamedAttribute > mlir::xegpu::dropSgLayoutAndDataOnAttrs ( ArrayRef< NamedAttribute > attrs)

Updates the NamedAttribute sequence by dropping sg-layout and sg-data information from any DistributeLayoutAttr found.

Definition at line 48 of file XeGPULayoutImpl.cpp.

◆ extractVectorsWithShapeFromValue()

SmallVector< Value > mlir::xegpu::extractVectorsWithShapeFromValue ( OpBuilder & builder,
Location loc,
Value value,
ArrayRef< int64_t > shape )

Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.

Definition at line 370 of file XeGPUUtils.cpp.

References mlir::computeShapeRatio(), mlir::Value::getType(), and result.

◆ flattenValues()

SmallVector< Value > mlir::xegpu::flattenValues ( ArrayRef< ValueRange > values)

Flatten a set of ValueRange into a single SmallVector<Value>

convert ArrayRef<ValueRange> into SmallVector<Value>

Definition at line 33 of file XeGPUUtils.cpp.

References result.

Referenced by createXeGPUWgToSgDistribute(), and doSCFStructuralTypeConversionWithTensorType().

◆ genBinOp()

template<typename ArithOp>
OpFoldResult mlir::xegpu::genBinOp ( OpFoldResult a,
OpFoldResult b,
Location loc,
OpBuilder & builder )

Definition at line 1191 of file XeGPUDialect.cpp.

References b, and mlir::getValueOrCreateConstantIndexOp().

◆ genCoordinates()

SmallVector< SmallVector< Value > > mlir::xegpu::genCoordinates ( OpBuilder & builder,
Location loc,
SmallVector< Value > delinearizedId,
ArrayRef< int64_t > subShapesLayout,
ArrayRef< int64_t > subShape,
ArrayRef< int64_t > srcShape )
static

◆ getBlockedOffsets()

SmallVector< OpFoldResult > mlir::xegpu::getBlockedOffsets ( OpBuilder & builder,
Location loc,
ArrayRef< OpFoldResult > offsets,
ArrayRef< int64_t > blockShape )

Definition at line 1216 of file XeGPUDialect.cpp.

References div, and rem.

◆ getChipStr()

std::optional< std::string > mlir::xegpu::getChipStr ( Operation * op)

Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.

Returns the chip identifier if found, or nullopt if no GPU module parent or XeVM target attribute exists.

Definition at line 579 of file XeGPUUtils.cpp.

References mlir::Operation::getParentOfType().

Referenced by createXeGPUPeepHoleOptimizer(), and mlir::xegpu::impl::XeGPUSubgroupDistributeBase< DerivedT >::~XeGPUSubgroupDistributeBase().

◆ getDistributedVectorType() [1/2]

FailureOr< VectorType > mlir::xegpu::getDistributedVectorType ( VectorType originalType,
LayoutAttr layout )

Helper to get the distributed vector type for a given vector type according to a given LayoutAttr.

◆ getDistributedVectorType() [2/2]

FailureOr< VectorType > mlir::xegpu::getDistributedVectorType ( xegpu::TensorDescType tdescTy)

If tensor descriptor has a layout attribute it is used in SIMT mode.

In this mode, the distributed vector shape is determined as follows: Definitions: lane_data_size = lane_data[0] × lane_data[1] subgroup_size = lane_layout[0] × lane_layout[1] distribution_unit_size = subgroup_size × lane_data_size

Case 1: Regular loads/stores. The following conditions must be met:

  • tensor_desc[0] == lane_layout[0] Distributed vector is a 1D vector with shape: [chunk_size]

Case 2: Block loads/stores Additional definitions: tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length n_distribution_units = tensor_size / distribution_unit_size fragment_size = n_distribution_units * lane_data_size Given above definitions, the following conditions must be met:

  • tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
  • tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0 Distributed vector is a 1D vector with shape: [fragment_size]

Definition at line 41 of file XeGPUUtils.cpp.

◆ getDistributeLayoutAttr() [1/2]

xegpu::DistributeLayoutAttr mlir::xegpu::getDistributeLayoutAttr ( const OpOperand & opr)

Retrieves the DistributeLayoutAttr associated with a given OpOperand.

It will first check the operand_layout_{id} of the owner operation. If not found, it will check the operand itself and its defining op.

Definition at line 183 of file XeGPUUtils.cpp.

References mlir::Operation::getAttrOfType(), mlir::detail::IROperandBase::getOwner(), getTemporaryLayoutName(), and mlir::Operation::hasAttr().

◆ getDistributeLayoutAttr() [2/2]

xegpu::DistributeLayoutAttr mlir::xegpu::getDistributeLayoutAttr ( const Value value)

Retrieves the DistributeLayoutAttr associated with a given Value.

For TensorDescType values, the DistributeLayoutAttr is extracted from the TensorDescType itself. For other values, it is obtained from the attributes of the defining operation. Returns nullptr if no DistributeLayoutAttr is found.

Definition at line 146 of file XeGPUUtils.cpp.

References mlir::IROperand< DerivedT, IRValueT >::get(), mlir::Operation::getAttrOfType(), getDistributeLayoutAttr(), getTemporaryLayoutName(), mlir::Value::getType(), mlir::Operation::hasAttr(), and result.

Referenced by doSCFStructuralTypeConversionWithTensorType(), getDistributeLayoutAttr(), populateXeGPUSgToWiDistributeTypeConversions(), recoverTemporaryLayouts(), and recoverTemporaryLayoutsDeprecated().

◆ getDistVecTypeBasedOnLaneLayout()

FailureOr< VectorType > mlir::xegpu::getDistVecTypeBasedOnLaneLayout ( DistributeLayoutAttr layout,
VectorType originalType )

Helper function to get distributed vector type for a source vector type according to the lane_layout.

We simply divide each dimension of tensor descriptor shape by corresponding lane_layout dimension. If array_length > 1, that is appended to the front of the distributed shape.

Examples:

original vector shape lane_layout distributed vector shape
32x16 [1, 16] 32x1
32x16 [2, 8] 16x2
2x32x16 [1, 16] 2x32x1

References lhs, and rhs.

Referenced by populateXeGPUSgToWiDistributeTypeConversions().

◆ getLargestDivisor()

template<typename T>
int mlir::xegpu::getLargestDivisor ( T dim,
ArrayRef< T > candidates,
ArrayRef< T > candidateMultiples = {} )

Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (diven by dim).

candidates are uArch allowed shapes. candidateMultiples are uArch multiples of such shapes (i.e. block count or array length).

Definition at line 636 of file XeGPUUtils.cpp.

◆ getTemporaryLayout()

template<typename T, typename = std::enable_if_t<std::is_same_v<T, OpOperand> || std::is_same_v<T, OpResult>>>
DistributeLayoutAttr mlir::xegpu::getTemporaryLayout ( const T & operandOrResult)

get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store ops before we get rid of their temp attrs)

Referenced by populateXeGPUSgToWiDistributeTypeConversionAndLegality(), propagateLayouts(), xegpu::getTemporaryLayout< mlir::OpOperand >(), and xegpu::getTemporaryLayout< mlir::OpResult >().

◆ getTemporaryLayoutName() [1/2]

std::string mlir::xegpu::getTemporaryLayoutName ( const OpOperand & operand)

Return the attribute name for the OpOperand to attach DistributeLayoutAttr.

Definition at line 135 of file XeGPUUtils.cpp.

Referenced by getDistributeLayoutAttr(), getDistributeLayoutAttr(), removeLayoutAttr(), and setDistributeLayoutAttr().

◆ getTemporaryLayoutName() [2/2]

std::string mlir::xegpu::getTemporaryLayoutName ( const OpResult result)

Return the attribute name for the OpResult to attach DistributeLayoutAttr.

Definition at line 141 of file XeGPUUtils.cpp.

References result.

◆ inferBitCastSourceLayout()

DistributeLayoutAttr mlir::xegpu::inferBitCastSourceLayout ( DistributeLayoutAttr resLayout,
int resElemTyBitWidth,
int srcElemTyBitWidth )

Infers the source layout attribute for a bitcast operation given the result layout attribute, result element type bitwidth, and source element type bitwidth.

◆ inferBroadcastSourceLayout()

DistributeLayoutAttr mlir::xegpu::inferBroadcastSourceLayout ( DistributeLayoutAttr resLayout,
ArrayRef< int64_t > resShape,
ArrayRef< int64_t > srcShape )

Infers the source layout attribute for a broadcast operation given the result layout attribute, result shape, and source shape.

◆ inferInsertStridedSliceSourceLayout()

DistributeLayoutAttr mlir::xegpu::inferInsertStridedSliceSourceLayout ( DistributeLayoutAttr resLayout,
ArrayRef< int64_t > resShape,
ArrayRef< int64_t > srcShape )

Infers the source layout attribute for an insert strided slice operation given the result layout attribute, result shape, and source shape.

Removes leading dimensions from the result layout to match the source shape size.

◆ inferMultiReductionSourceLayout()

DistributeLayoutAttr mlir::xegpu::inferMultiReductionSourceLayout ( DistributeLayoutAttr resLayout,
SmallVector< int64_t > reduceDims )

Infers the source layout attribute for a reduction operation given the result layout attribute and reduced dims.

◆ inferShapeCastSourceLayout()

DistributeLayoutAttr mlir::xegpu::inferShapeCastSourceLayout ( DistributeLayoutAttr resLayout,
ArrayRef< int64_t > resShape,
ArrayRef< int64_t > srcShape )

Infers the source layout attribute for a shape cast operation given the result layout attribute, result shape, and source shape.

◆ mapSlicedDimsToParentSpace()

SmallVector< int64_t > mlir::xegpu::mapSlicedDimsToParentSpace ( const SmallVector< int64_t > & dimsToMap,
ArrayRef< int64_t > sliceDims )
static

Definition at line 793 of file XeGPUDialect.cpp.

◆ matchSplitDimExpansion()

bool mlir::xegpu::matchSplitDimExpansion ( ArrayRef< int64_t > src,
ArrayRef< int64_t > dst,
SmallVector< SmallVector< int64_t > > & splitDimGroups )

Definition at line 710 of file XeGPUUtils.cpp.

◆ matchUnitDimExpansion()

bool mlir::xegpu::matchUnitDimExpansion ( ArrayRef< int64_t > src,
ArrayRef< int64_t > dst,
SmallVector< int64_t > & expandedUnitDims )

Definition at line 690 of file XeGPUUtils.cpp.

◆ populateXeGPUFoldAliasOpsPatterns()

void mlir::xegpu::populateXeGPUFoldAliasOpsPatterns ( RewritePatternSet & patterns)

Appends patterns for folding aliasing ops into XeGPU ops into patterns.

Definition at line 63 of file XeGPUFoldAliasOps.cpp.

References mlir::patterns.

◆ populateXeGPUMoveFuncBodyToWarpOpPatterns()

void mlir::xegpu::populateXeGPUMoveFuncBodyToWarpOpPatterns ( RewritePatternSet & patterns)

Appends patterns for moving function body into gpu.warp_execute_on_lane0 op.

Definition at line 2085 of file XeGPUSubgroupDistribute.cpp.

References mlir::patterns.

◆ populateXeGPUPeepHoleOptimizerPatterns()

void mlir::xegpu::populateXeGPUPeepHoleOptimizerPatterns ( RewritePatternSet & patterns)

Appends patterns for optimizing block load operations into patterns.

Definition at line 506 of file XeGPUPeepHoleOptimizer.cpp.

References mlir::patterns.

◆ populateXeGPUSgToWiDistributeTypeConversionAndLegality()

void mlir::xegpu::populateXeGPUSgToWiDistributeTypeConversionAndLegality ( TypeConverter & typeConverter,
RewritePatternSet & patterns,
ConversionTarget & target )

Defines type conversions and legality for XeGPU subgroup to workitem distribution and appends the required conversion patterns into patterns.

Appends patterns for XeGPU subgroup to workitem distribution into patterns.

Definition at line 507 of file XeGPUSgToWiDistributeExperimental.cpp.

References getTemporaryLayout(), mlir::OpTrait::hasElementwiseMappableTraits(), mlir::patterns, populateXeGPUSgToWiDistributeTypeConversions(), and target.

◆ populateXeGPUSgToWiDistributeTypeConversions()

void mlir::xegpu::populateXeGPUSgToWiDistributeTypeConversions ( TypeConverter & typeConverter)

Define only the type conversions needed for XeGPU subgroup to workitem distribution.

Definition at line 473 of file XeGPUSgToWiDistributeExperimental.cpp.

References getDistributeLayoutAttr(), getDistVecTypeBasedOnLaneLayout(), and mlir::Value::getType().

Referenced by populateXeGPUSgToWiDistributeTypeConversionAndLegality().

◆ populateXeGPUSubgroupDistributePatterns()

void mlir::xegpu::populateXeGPUSubgroupDistributePatterns ( RewritePatternSet & patterns)

Appends patterns for XeGPU SIMT distribution into patterns.

Definition at line 2065 of file XeGPUSubgroupDistribute.cpp.

References mlir::patterns.

◆ populateXeGPUUnrollPatterns()

void mlir::xegpu::populateXeGPUUnrollPatterns ( RewritePatternSet & patterns,
const UnrollOptions & options )

Collect a set of patterns to unroll xegpu operations to a smaller shapes.

Users can control whether an operation to be unrolled or not, as well as its target shape via options structure. (via setting filterConstraint and nativeShape respectively, both of them are function refs taking op as input). An op is unrolled to the targetShape as follows, for each of its operands:

  1. the unrolled type unrolledType and number of unrolled instances numUnrolledInstances are computed from the targetShape.
  2. pack each operand. ExtractStridedSlice are created to break-up the vector operands. And BuiltinUnrealizedCastop are created to break-up the TensorDesc operands.
  3. the original op is cloned numUnrolledInstances times, once for each result.
  4. unpack the results. InsertStridedSlice are inserted for VectorType result, and BuiltinUnrealizedCastOp are inserted for TensorDescType result to re-assemble the slices into the original shape.

Definition at line 1037 of file XeGPUUnroll.cpp.

References options, and mlir::patterns.

◆ populateXeGPUWgToSgDistributePatterns()

void mlir::xegpu::populateXeGPUWgToSgDistributePatterns ( RewritePatternSet & patterns)

Appends patterns for XeGPU workgroup to subgroup distribution into patterns.

Definition at line 1637 of file XeGPUWgToSgDistribute.cpp.

References mlir::patterns.

◆ propagateLayouts()

◆ recoverTemporaryLayouts()

bool mlir::xegpu::recoverTemporaryLayouts ( Operation * rootOp)

Attach layout attributes to all vector-type operands of operations within the given operation's nested region.

Reports an error if any vector operand lacks a layout attribute.

Definition at line 86 of file XeGPULayoutImpl.cpp.

References mlir::WalkResult::advance(), mlir::Operation::emitWarning(), getDistributeLayoutAttr(), mlir::Operation::getName(), mlir::Operation::getOpOperands(), result, setDistributeLayoutAttr(), and mlir::Operation::walk().

◆ recoverTemporaryLayoutsDeprecated()

void mlir::xegpu::recoverTemporaryLayoutsDeprecated ( Operation * op)

[to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given operation.

If the operation contains regions, it is also applied recursively to the contained operations operation. TODO: To be replaced by recoverTemporaryLayouts()

Definition at line 33 of file XeGPULayoutImpl.cpp.

References getDistributeLayoutAttr(), mlir::Operation::getOpOperands(), mlir::Operation::getOpResults(), result, setDistributeLayoutAttr(), and mlir::Operation::walk().

◆ registerTransformDialectExtension()

void mlir::xegpu::registerTransformDialectExtension ( DialectRegistry & registry)

◆ registerXeGPUBlocking()

void mlir::xegpu::registerXeGPUBlocking ( )
inline

Definition at line 679 of file Passes.h.

◆ registerXeGPUBlockingPass()

void mlir::xegpu::registerXeGPUBlockingPass ( )
inline

Definition at line 686 of file Passes.h.

◆ registerXeGPUFoldAliasOps()

void mlir::xegpu::registerXeGPUFoldAliasOps ( )
inline

Definition at line 700 of file Passes.h.

◆ registerXeGPUFoldAliasOpsPass()

void mlir::xegpu::registerXeGPUFoldAliasOpsPass ( )
inline

Definition at line 707 of file Passes.h.

◆ registerXeGPUPasses()

void mlir::xegpu::registerXeGPUPasses ( )
inline

Definition at line 847 of file Passes.h.

Referenced by mlir::registerAllPasses().

◆ registerXeGPUPeepHoleOptimizer()

void mlir::xegpu::registerXeGPUPeepHoleOptimizer ( )
inline

Definition at line 721 of file Passes.h.

◆ registerXeGPUPeepHoleOptimizerPass()

void mlir::xegpu::registerXeGPUPeepHoleOptimizerPass ( )
inline

Definition at line 728 of file Passes.h.

◆ registerXeGPUPropagateLayout()

void mlir::xegpu::registerXeGPUPropagateLayout ( )
inline

Definition at line 742 of file Passes.h.

◆ registerXeGPUPropagateLayoutPass()

void mlir::xegpu::registerXeGPUPropagateLayoutPass ( )
inline

Definition at line 749 of file Passes.h.

◆ registerXeGPUSgToWiDistributeExperimental()

void mlir::xegpu::registerXeGPUSgToWiDistributeExperimental ( )
inline

Definition at line 763 of file Passes.h.

◆ registerXeGPUSgToWiDistributeExperimentalPass()

void mlir::xegpu::registerXeGPUSgToWiDistributeExperimentalPass ( )
inline

Definition at line 770 of file Passes.h.

◆ registerXeGPUSubgroupDistribute()

void mlir::xegpu::registerXeGPUSubgroupDistribute ( )
inline

Definition at line 784 of file Passes.h.

◆ registerXeGPUSubgroupDistributePass()

void mlir::xegpu::registerXeGPUSubgroupDistributePass ( )
inline

Definition at line 791 of file Passes.h.

◆ registerXeGPUVectorLinearize()

void mlir::xegpu::registerXeGPUVectorLinearize ( )
inline

Definition at line 805 of file Passes.h.

◆ registerXeGPUVectorLinearizePass()

void mlir::xegpu::registerXeGPUVectorLinearizePass ( )
inline

Definition at line 812 of file Passes.h.

◆ registerXeGPUWgToSgDistribute()

void mlir::xegpu::registerXeGPUWgToSgDistribute ( )
inline

Definition at line 826 of file Passes.h.

◆ registerXeGPUWgToSgDistributePass()

void mlir::xegpu::registerXeGPUWgToSgDistributePass ( )
inline

Definition at line 833 of file Passes.h.

◆ removeLayoutAttr()

template<typename T, typename = std::enable_if_t<std::is_same_v<T, OpOperand> || std::is_same_v<T, OpResult>>>
void mlir::xegpu::removeLayoutAttr ( const T & operandOrResult)

◆ removeLayoutAttrs()

void mlir::xegpu::removeLayoutAttrs ( Operation * op)

Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist.

If the operation contains regions, it is also applied recursively to the contained operations

Definition at line 125 of file XeGPULayoutImpl.cpp.

References mlir::Operation::getAttrs(), mlir::Operation::removeAttr(), and mlir::Operation::walk().

◆ requirePacked()

bool mlir::xegpu::requirePacked ( const LayoutAttr layout)

Helper function to check if the layout is packed.

Layout is packed if it is 2D and lane_data[0] != 1 (data packed from col dimension). TODO: Move to target info.

◆ requireTranspose()

bool mlir::xegpu::requireTranspose ( const LayoutAttr layout,
const uArch::uArch * uArch )

Helper function to check if the layout requires a transpose effect.

◆ resolveLayoutConflicts()

LogicalResult mlir::xegpu::resolveLayoutConflicts ( Operation * target)

Definition at line 1572 of file XeGPUPropagateLayout.cpp.

References target.

◆ setDistributeLayoutAttr() [1/2]

void mlir::xegpu::setDistributeLayoutAttr ( const OpOperand & opr,
const DistributeLayoutAttr layout )

[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpOperand user should use setAnchorLayout instead

Definition at line 283 of file XeGPUUtils.cpp.

References mlir::detail::IROperandBase::getOwner(), getTemporaryLayoutName(), mlir::Operation::hasAttrOfType(), and mlir::Operation::setAttr().

◆ setDistributeLayoutAttr() [2/2]

void mlir::xegpu::setDistributeLayoutAttr ( const OpResult & Result,
const DistributeLayoutAttr layout )

[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout instead

References result.

Referenced by recoverTemporaryLayouts(), recoverTemporaryLayoutsDeprecated(), updateControlFlowOps(), and updateOp().

◆ setTemporaryLayout()

template<typename T, typename = std::enable_if_t<std::is_same_v<T, OpOperand> || std::is_same_v<T, OpResult>>>
void mlir::xegpu::setTemporaryLayout ( const T & operandOrResult,
const DistributeLayoutAttr layout )

◆ setupBitCastResultLayout()

xegpu::DistributeLayoutAttr mlir::xegpu::setupBitCastResultLayout ( xegpu::LayoutKind layoutKind,
VectorType srcVecTy,
VectorType resVecTy,
DistributeLayoutAttr consumerLayout,
const uArch::uArch * uArch )

Setup the result layout attribute for a bitcast operation based on element type bitwidths.

Sets up the result layout for a bitcast operation.

This ensures the source layout can always be derived from the result layout.

When casting from a narrower to a wider element type (srcElemTyBitWidth < resElemTyBitWidth), the result layout's innermost dimension data sizes (inst_data, lane_data) are scaled up by the bitwidth ratio. This maintains the invariant that the source layout can be recovered by adjusting the result layout based on bitwidth ratio of input vs output.

When casting to a smaller bitwidth, adjusts the layout dimensions (sgData, instData, or laneData) by multiplying by the bitwidth ratio to ensure the result layout can be correctly divided back to the source layout during inference.

Examples:

  1. Casting f32 -> f16 (32-bit to 16-bit, bitWidthRatio = 2): Consumer layout: instData=[1, 16], subgroupSize=16 Source shape: [8, 32] Result layout: instData=[1, 32] (16 * 2) The innermost dimension is multiplied by 2 to maintain consistency.
  2. Casting f32 -> i8 (32-bit to 8-bit, bitWidthRatio = 4): Consumer instData=[1, 16], subgroupSize=16 Source shape: [4, 128] adjust the instData from [1, 16] to [1, 16 * 4 = 64]
  3. Casting i8 -> i32 (8-bit to 32-bit, bitWidthRatio = 1/4): Consumer layout: laneLayout=[1, 16], laneData=[1, 4] No adjustment needed - returns consumer layout directly.

Definition at line 536 of file XeGPULayoutImpl.cpp.

References InstData, Lane, and Subgroup.

◆ setupDpasLayout()

std::optional< std::tuple< DistributeLayoutAttr, DistributeLayoutAttr, DistributeLayoutAttr > > mlir::xegpu::setupDpasLayout ( LayoutKind layoutKind,
VectorType aTy,
VectorType bTy,
VectorType cdTy,
DistributeLayoutAttr consumerLayout,
const uArch::uArch * uArch,
int numSg )

Sets up the anchor layouts for a dpas operands (A, B, and C/D).

The numSg and consumerLayout (optional) are only used by sg layout creation.

◆ setupInsertStridedSliceResultLayout()

DistributeLayoutAttr mlir::xegpu::setupInsertStridedSliceResultLayout ( LayoutKind layoutKind,
VectorType srcVectorTy,
VectorType resVectorTy,
DistributeLayoutAttr consumerLayout,
const uArch::uArch * uArch )

Sets up the result layout for an insert strided slice operation.

Creates a result layout based on the specified layout kind (InstData or Lane).

◆ setupLoadGatherAnchorLayout()

DistributeLayoutAttr mlir::xegpu::setupLoadGatherAnchorLayout ( LayoutKind layoutKind,
VectorType vectorTy,
int chunkSize,
DistributeLayoutAttr consumerLayout,
const uArch::uArch * uArch )

Sets up the anchor layout for a load gather operation.

◆ setupLoadMatrixAnchorLayout()

DistributeLayoutAttr mlir::xegpu::setupLoadMatrixAnchorLayout ( LayoutKind layoutKind,
VectorType vectorTy,
DistributeLayoutAttr consumerLayout,
const uArch::uArch * uArch )

Sets up the anchor layout for load matrix operation.

◆ setupMultiReductionResultLayout()

xegpu::SliceAttr mlir::xegpu::setupMultiReductionResultLayout ( xegpu::LayoutKind layoutKind,
VectorType srcVecTy,
DistributeLayoutAttr consumerLayout,
SmallVector< int64_t > reductionDims,
const uArch::uArch * uArch )

Sets up layout for reduction operations by creating a SliceAttr for the result.

This function first attempts to construct a source layout that, when sliced along reduction dimensions, produces a result layout compatible with the consumer's preferred layout. This minimizes data redistribution overhead. The SliceAttr for the result is then created based on the derived source layout and the specified reduction dimensions.

Algorithm Overview: This function attempts to construct a source layout that, when sliced along reduction dimensions, produces a result layout compatible with the consumer layout.

For subgroup layouts, it first tries to align the source layout's subgroup layout and data with the consumer's layout on non-reduction dimensions. Then, it distributes remaining subgroups across reduction dimensions. This avoids subgroup data redistribution overhead between the reduced result and its consumer.

InstData requries {1, ..., min(maxReduceVectorSize, srcShape),subgroupSize} Lane Layout requires {1, ..., 1, subgroupSize} Lane data requires {1, ..., min(maxReduceVectorSize, srcShape), 1}

Examples:

  1. Subgroup layout - Row reduction on 2D tensor: srcShape=[32, 64], reductionDims=[1], resShape=[32], subgroupSize=16, workgroupSize=32 Consumer Layout: #xegpu.slice<#xegpu.layout<sg_layout=[4, 8], sg_data=[8, 8]>, dims = [1]>} Result: srcLayout with sgLayout=[4, 8], sgData=[8, 8] (matches consumer on non-reduction dim, minimizing data redistribution on reduction dim)
  2. Subgroup layout - Same example above but consumer has different layout: sgLayout=[32], sgData=[1] Result: srcLayout with sgLayout=[32,1], sgData=[1, 64] (distributes all subgroups on non reduction dim)
  1. InstData layout - Column reduction: srcShape=[32, 64], reductionDims=[0], subgroupSize=16 Result: instData=[1, 16] (maxReduceVectorSize=1, subgroupSize on innermost)
  2. Lane layout - Multi-dimensional reduction: srcShape=[16, 32, 64], reductionDims=[1], subgroupSize=16 Result: laneLayout=[1, 1, 16], laneData=[1, 1, 1] (subgroupSize on innermost dim, max vector size on reduction dim)

Definition at line 420 of file XeGPULayoutImpl.cpp.

References mlir::detail::DenseArrayAttrImpl< int32_t >::get(), mlir::detail::DenseArrayAttrImpl< int64_t >::get(), InstData, Lane, and Subgroup.

◆ setupStoreMatrixAnchorLayout()

xegpu::DistributeLayoutAttr mlir::xegpu::setupStoreMatrixAnchorLayout ( xegpu::LayoutKind layoutKind,
VectorType vectorTy,
const uArch::uArch * uArch )

Sets up the anchor layout for a store matrix operation.

Definition at line 840 of file XeGPULayoutImpl.cpp.

References setupGenericStoreAnchorLayout(), and mlir::xegpu::uArch::StoreMatrix.

◆ setupStoreScatterAnchorLayout()

xegpu::DistributeLayoutAttr mlir::xegpu::setupStoreScatterAnchorLayout ( xegpu::LayoutKind layoutKind,
VectorType vectorTy,
int chunkSize,
const uArch::uArch * uArch )

Sets up the anchor layout for a store scatter operation.

Definition at line 821 of file XeGPULayoutImpl.cpp.

References setupGenericStoreAnchorLayout(), and mlir::xegpu::uArch::StoreScatter.