MLIR 23.0.0git
XeGPULayoutImpl.cpp File Reference

Go to the source code of this file.

Typedefs

using LayoutRepresentation = std::pair<int64_t, int64_t>

Functions

static void setTensorDescLayout (Value val, xegpu::DistributeLayoutAttr layout)
static void walkRegionBackward (Region &region, llvm::function_ref< void(Operation *)> visit)
static xegpu::DistributeLayoutAttr getLayoutFromUsePoints (Value result)
static void propagateResultsToRegularOperands (Operation *op)
static void propagateRegionResultsToYieldOperands (mlir::RegionBranchTerminatorOpInterface yieldOp)
static void propagateRegionArgsToInits (mlir::RegionBranchOpInterface regionOp)
template void xegpu::removeLayoutAttr< mlir::OpResult > (const mlir::OpResult &result)
template void xegpu::removeLayoutAttr< mlir::OpOperand > (const mlir::OpOperand &operand)
static xegpu::DistributeLayoutAttr setupGenericLoadAnchorLayout (xegpu::LayoutKind layoutKind, mlir::MLIRContext *context, xegpu::DistributeLayoutAttr consumerLayout, bool isChunkedLoad, int maxChunkSize, ArrayRef< int64_t > resShape, int subgroupSize)
 Sets up the anchor layout for load gather and load matrix operation.
static xegpu::DistributeLayoutAttr setupGenericStoreAnchorLayout (xegpu::LayoutKind layoutKind, mlir::MLIRContext *context, bool isChunkedStore, int maxChunkSize, ArrayRef< int64_t > srcShape, int subgroupSize)
 Sets up the anchor layout for store scatter and store matrix operation.
template<typename RankedTy>
static xegpu::LayoutAttr getDefaultLaneLayout2DBlockIo (RankedTy ty, const xegpu::uArch::uArch *uArch, std::optional< unsigned > packingSize=std::nullopt, bool vnni=false)
static SmallVector< LayoutRepresentationgetValidLayouts (ArrayRef< int64_t > wgShape, ArrayRef< int64_t > instData, int64_t sgCount)
static std::optional< std::tuple< SmallVector< int64_t >, SmallVector< int64_t >, SmallVector< int64_t > > > getDpasInstDataVectors (VectorType aTy, VectorType bTy, VectorType cdTy, const xegpu::uArch::uArch *uArch, bool isDpasMx=false)
 Helper function to compute inst_data vectors for DPAS operands A, B, and C/D.
static std::optional< std::tuple< xegpu::DistributeLayoutAttr, xegpu::DistributeLayoutAttr, xegpu::DistributeLayoutAttr > > getupDpasSubgroupLayouts (mlir::MLIRContext *context, VectorType aTy, VectorType bTy, VectorType cdTy, xegpu::DistributeLayoutAttr consumerLayout, int numSg, const xegpu::uArch::uArch *uArch)
 Helper function to set up subgroup layouts for DPAS operands A, B, and C/D.
static xegpu::DistributeLayoutAttr createScaleLayout (mlir::MLIRContext *context, VectorType matrixTy, VectorType scaleTy, xegpu::DistributeLayoutAttr matrixLayout, bool isBScale, const xegpu::uArch::uArch *uArch)
 Helper to create a scale layout derived from a matrix operand layout.

Typedef Documentation

◆ LayoutRepresentation

using LayoutRepresentation = std::pair<int64_t, int64_t>

Definition at line 1390 of file XeGPULayoutImpl.cpp.

Function Documentation

◆ createScaleLayout()

xegpu::DistributeLayoutAttr createScaleLayout ( mlir::MLIRContext * context,
VectorType matrixTy,
VectorType scaleTy,
xegpu::DistributeLayoutAttr matrixLayout,
bool isBScale,
const xegpu::uArch::uArch * uArch )
static

Helper to create a scale layout derived from a matrix operand layout.

The scale layout is computed by mapping each dimension of the matrix layout to the corresponding scale tensor dimension using the ratio between the matrix and scale shapes.

Definition at line 1610 of file XeGPULayoutImpl.cpp.

References mlir::detail::DenseArrayAttrImpl< int32_t >::get(), mlir::xegpu::uArch::uArch::getInstruction(), and mlir::xegpu::uArch::SubgroupScaledMatrixMultiplyAcc.

◆ getDefaultLaneLayout2DBlockIo()

template<typename RankedTy>
xegpu::LayoutAttr getDefaultLaneLayout2DBlockIo ( RankedTy ty,
const xegpu::uArch::uArch * uArch,
std::optional< unsigned > packingSize = std::nullopt,
bool vnni = false )
static

◆ getDpasInstDataVectors()

◆ getLayoutFromUsePoints()

xegpu::DistributeLayoutAttr getLayoutFromUsePoints ( Value result)
static

◆ getupDpasSubgroupLayouts()

std::optional< std::tuple< xegpu::DistributeLayoutAttr, xegpu::DistributeLayoutAttr, xegpu::DistributeLayoutAttr > > getupDpasSubgroupLayouts ( mlir::MLIRContext * context,
VectorType aTy,
VectorType bTy,
VectorType cdTy,
xegpu::DistributeLayoutAttr consumerLayout,
int numSg,
const xegpu::uArch::uArch * uArch )
static

Helper function to set up subgroup layouts for DPAS operands A, B, and C/D.

Returns the three layouts if successful, nullopt otherwise.

Definition at line 1480 of file XeGPULayoutImpl.cpp.

References mlir::detail::DenseArrayAttrImpl< int32_t >::get(), getDpasInstDataVectors(), and getValidLayouts().

◆ getValidLayouts()

SmallVector< LayoutRepresentation > getValidLayouts ( ArrayRef< int64_t > wgShape,
ArrayRef< int64_t > instData,
int64_t sgCount )
static

Definition at line 1392 of file XeGPULayoutImpl.cpp.

References lhs, and rhs.

Referenced by getupDpasSubgroupLayouts().

◆ propagateRegionArgsToInits()

void propagateRegionArgsToInits ( mlir::RegionBranchOpInterface regionOp)
static

◆ propagateRegionResultsToYieldOperands()

void propagateRegionResultsToYieldOperands ( mlir::RegionBranchTerminatorOpInterface yieldOp)
static

◆ propagateResultsToRegularOperands()

◆ setTensorDescLayout()

void setTensorDescLayout ( Value val,
xegpu::DistributeLayoutAttr layout )
static

◆ setupGenericLoadAnchorLayout()

xegpu::DistributeLayoutAttr setupGenericLoadAnchorLayout ( xegpu::LayoutKind layoutKind,
mlir::MLIRContext * context,
xegpu::DistributeLayoutAttr consumerLayout,
bool isChunkedLoad,
int maxChunkSize,
ArrayRef< int64_t > resShape,
int subgroupSize )
static

Sets up the anchor layout for load gather and load matrix operation.

load matrix lowers to load gather and 1d block load. All of them share the same layout setup logic. For Subgroup layout, uses the consumer layout directly. non-chunked loads: InstData = {1, ..., min(consumer, maxLaneLoadSize * subgroupSize)} LaneLayout = {1, ..., subgroupSize} lane_data = {1, ..., min(consumer, maxLaneLoadSize)} chunked loads: InstData = {subgroupSize, min(consumer, maxLaneLoadSize)} LaneLayout = {subgroupSize, 1} lane_data={1,min(consumer, maxLaneLoadSize)}

Definition at line 1174 of file XeGPULayoutImpl.cpp.

References mlir::xegpu::InstData, mlir::xegpu::Lane, and mlir::xegpu::Subgroup.

◆ setupGenericStoreAnchorLayout()

xegpu::DistributeLayoutAttr setupGenericStoreAnchorLayout ( xegpu::LayoutKind layoutKind,
mlir::MLIRContext * context,
bool isChunkedStore,
int maxChunkSize,
ArrayRef< int64_t > srcShape,
int subgroupSize )
static

Sets up the anchor layout for store scatter and store matrix operation.

store matrix lowers to store scatter and 1d block store. All of them share the same layout setup logic. For Subgroup layout, not support yet. non-chunked stores: InstData = {1, ..., subgroupSize} LaneLayout = {1, ..., subgroupSize} lane_data = {1, ..., 1} chunked stores: InstData = {subgroupSize, min(srcVec, maxLaneStoreSize)} LaneLayout = {subgroupSize, 1} lane_data={1,min(srcVec, maxLaneStoreSize)}

Definition at line 1274 of file XeGPULayoutImpl.cpp.

References mlir::xegpu::InstData, mlir::xegpu::Lane, and mlir::xegpu::Subgroup.

Referenced by mlir::xegpu::setupStoreMatrixAnchorLayout(), and mlir::xegpu::setupStoreScatterAnchorLayout().

◆ walkRegionBackward()

void walkRegionBackward ( Region & region,
llvm::function_ref< void(Operation *)> visit )
static

◆ xegpu::removeLayoutAttr< mlir::OpOperand >()

template void xegpu::removeLayoutAttr< mlir::OpOperand > ( const mlir::OpOperand & operand)

◆ xegpu::removeLayoutAttr< mlir::OpResult >()

template void xegpu::removeLayoutAttr< mlir::OpResult > ( const mlir::OpResult & result)