|
MLIR 23.0.0git
|
#include "mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h"#include "mlir/Dialect/Func/IR/FuncOps.h"#include "mlir/Dialect/GPU/IR/GPUDialect.h"#include "mlir/Dialect/LLVMIR/XeVMDialect.h"#include "mlir/Dialect/SCF/Transforms/Patterns.h"#include "mlir/Dialect/Utils/IndexingUtils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/Dialect/XeGPU/IR/XeGPU.h"#include "mlir/IR/Builders.h"#include "mlir/IR/Operation.h"#include "mlir/IR/ValueRange.h"#include "mlir/Interfaces/ControlFlowInterfaces.h"#include "mlir/Interfaces/LoopLikeInterface.h"#include "mlir/Transforms/DialectConversion.h"#include "llvm/ADT/PostOrderIterator.h"#include "llvm/Support/FormatVariadic.h"#include <cstdint>#include <numeric>Go to the source code of this file.
Typedefs | |
| using | LayoutRepresentation = std::pair<int64_t, int64_t> |
Functions | |
| static void | setTensorDescLayout (Value val, xegpu::DistributeLayoutAttr layout) |
| static void | walkRegionBackward (Region ®ion, llvm::function_ref< void(Operation *)> visit) |
| static xegpu::DistributeLayoutAttr | getLayoutFromUsePoints (Value result) |
| static void | propagateResultsToRegularOperands (Operation *op) |
| static void | propagateRegionResultsToYieldOperands (mlir::RegionBranchTerminatorOpInterface yieldOp) |
| static void | propagateRegionArgsToInits (mlir::RegionBranchOpInterface regionOp) |
| template void | xegpu::removeLayoutAttr< mlir::OpResult > (const mlir::OpResult &result) |
| template void | xegpu::removeLayoutAttr< mlir::OpOperand > (const mlir::OpOperand &operand) |
| static xegpu::DistributeLayoutAttr | setupGenericLoadAnchorLayout (xegpu::LayoutKind layoutKind, mlir::MLIRContext *context, xegpu::DistributeLayoutAttr consumerLayout, bool isChunkedLoad, int maxChunkSize, ArrayRef< int64_t > resShape, int subgroupSize) |
| Sets up the anchor layout for load gather and load matrix operation. | |
| static xegpu::DistributeLayoutAttr | setupGenericStoreAnchorLayout (xegpu::LayoutKind layoutKind, mlir::MLIRContext *context, bool isChunkedStore, int maxChunkSize, ArrayRef< int64_t > srcShape, int subgroupSize) |
| Sets up the anchor layout for store scatter and store matrix operation. | |
| template<typename RankedTy> | |
| static xegpu::LayoutAttr | getDefaultLaneLayout2DBlockIo (RankedTy ty, const xegpu::uArch::uArch *uArch, std::optional< unsigned > packingSize=std::nullopt, bool vnni=false) |
| static SmallVector< LayoutRepresentation > | getValidLayouts (ArrayRef< int64_t > wgShape, ArrayRef< int64_t > instData, int64_t sgCount) |
| static std::optional< std::tuple< SmallVector< int64_t >, SmallVector< int64_t >, SmallVector< int64_t > > > | getDpasInstDataVectors (VectorType aTy, VectorType bTy, VectorType cdTy, const xegpu::uArch::uArch *uArch, bool isDpasMx=false) |
| Helper function to compute inst_data vectors for DPAS operands A, B, and C/D. | |
| static std::optional< std::tuple< xegpu::DistributeLayoutAttr, xegpu::DistributeLayoutAttr, xegpu::DistributeLayoutAttr > > | getupDpasSubgroupLayouts (mlir::MLIRContext *context, VectorType aTy, VectorType bTy, VectorType cdTy, xegpu::DistributeLayoutAttr consumerLayout, int numSg, const xegpu::uArch::uArch *uArch) |
| Helper function to set up subgroup layouts for DPAS operands A, B, and C/D. | |
| static xegpu::DistributeLayoutAttr | createScaleLayout (mlir::MLIRContext *context, VectorType matrixTy, VectorType scaleTy, xegpu::DistributeLayoutAttr matrixLayout, bool isBScale, const xegpu::uArch::uArch *uArch) |
| Helper to create a scale layout derived from a matrix operand layout. | |
| using LayoutRepresentation = std::pair<int64_t, int64_t> |
Definition at line 1390 of file XeGPULayoutImpl.cpp.
|
static |
Helper to create a scale layout derived from a matrix operand layout.
The scale layout is computed by mapping each dimension of the matrix layout to the corresponding scale tensor dimension using the ratio between the matrix and scale shapes.
Definition at line 1610 of file XeGPULayoutImpl.cpp.
References mlir::detail::DenseArrayAttrImpl< int32_t >::get(), mlir::xegpu::uArch::uArch::getInstruction(), and mlir::xegpu::uArch::SubgroupScaledMatrixMultiplyAcc.
|
static |
Definition at line 1360 of file XeGPULayoutImpl.cpp.
References mlir::xegpu::uArch::uArch::getSubgroupSize().
|
static |
Helper function to compute inst_data vectors for DPAS operands A, B, and C/D.
Definition at line 1424 of file XeGPULayoutImpl.cpp.
References mlir::xegpu::uArch::uArch::getInstruction(), mlir::xegpu::getLargestDivisor(), mlir::xegpu::uArch::uArch::getSubgroupSize(), mlir::xegpu::uArch::MMAInstructionInterface::getSupportedK(), mlir::xegpu::uArch::MMAInstructionInterface::getSupportedM(), mlir::xegpu::uArch::MMAInstructionInterface::getSupportedN(), mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc, and mlir::xegpu::uArch::SubgroupScaledMatrixMultiplyAcc.
Referenced by getupDpasSubgroupLayouts().
|
static |
Definition at line 115 of file XeGPULayoutImpl.cpp.
References mlir::xegpu::getDistributeLayoutAttr(), and result.
Referenced by propagateRegionArgsToInits(), propagateRegionResultsToYieldOperands(), and propagateResultsToRegularOperands().
|
static |
Helper function to set up subgroup layouts for DPAS operands A, B, and C/D.
Returns the three layouts if successful, nullopt otherwise.
Definition at line 1480 of file XeGPULayoutImpl.cpp.
References mlir::detail::DenseArrayAttrImpl< int32_t >::get(), getDpasInstDataVectors(), and getValidLayouts().
|
static |
Definition at line 1392 of file XeGPULayoutImpl.cpp.
Referenced by getupDpasSubgroupLayouts().
|
static |
Definition at line 220 of file XeGPULayoutImpl.cpp.
References getLayoutFromUsePoints(), mlir::xegpu::setTemporaryLayout(), and setTensorDescLayout().
Referenced by mlir::xegpu::recoverTemporaryLayouts().
|
static |
Definition at line 169 of file XeGPULayoutImpl.cpp.
References mlir::OperandRange::getBeginOperandIndex(), getLayoutFromUsePoints(), mlir::OperandRange::getType(), mlir::xegpu::setTemporaryLayout(), and setTensorDescLayout().
Referenced by mlir::xegpu::recoverTemporaryLayouts().
Definition at line 129 of file XeGPULayoutImpl.cpp.
References getLayoutFromUsePoints(), mlir::Operation::getNumResults(), mlir::Operation::getOpOperands(), mlir::Operation::getResult(), mlir::xegpu::inferSourceLayoutFromResult(), result, mlir::xegpu::setTemporaryLayout(), and setTensorDescLayout().
Referenced by mlir::xegpu::recoverTemporaryLayouts().
Definition at line 74 of file XeGPULayoutImpl.cpp.
References mlir::Value::getType(), and mlir::Value::setType().
Referenced by propagateRegionArgsToInits(), propagateRegionResultsToYieldOperands(), and propagateResultsToRegularOperands().
|
static |
Sets up the anchor layout for load gather and load matrix operation.
load matrix lowers to load gather and 1d block load. All of them share the same layout setup logic. For Subgroup layout, uses the consumer layout directly. non-chunked loads: InstData = {1, ..., min(consumer, maxLaneLoadSize * subgroupSize)} LaneLayout = {1, ..., subgroupSize} lane_data = {1, ..., min(consumer, maxLaneLoadSize)} chunked loads: InstData = {subgroupSize, min(consumer, maxLaneLoadSize)} LaneLayout = {subgroupSize, 1} lane_data={1,min(consumer, maxLaneLoadSize)}
Definition at line 1174 of file XeGPULayoutImpl.cpp.
References mlir::xegpu::InstData, mlir::xegpu::Lane, and mlir::xegpu::Subgroup.
|
static |
Sets up the anchor layout for store scatter and store matrix operation.
store matrix lowers to store scatter and 1d block store. All of them share the same layout setup logic. For Subgroup layout, not support yet. non-chunked stores: InstData = {1, ..., subgroupSize} LaneLayout = {1, ..., subgroupSize} lane_data = {1, ..., 1} chunked stores: InstData = {subgroupSize, min(srcVec, maxLaneStoreSize)} LaneLayout = {subgroupSize, 1} lane_data={1,min(srcVec, maxLaneStoreSize)}
Definition at line 1274 of file XeGPULayoutImpl.cpp.
References mlir::xegpu::InstData, mlir::xegpu::Lane, and mlir::xegpu::Subgroup.
Referenced by mlir::xegpu::setupStoreMatrixAnchorLayout(), and mlir::xegpu::setupStoreScatterAnchorLayout().
|
static |
Definition at line 87 of file XeGPULayoutImpl.cpp.
References mlir::Region::empty(), visit(), and walkRegionBackward().
Referenced by mlir::xegpu::recoverTemporaryLayouts(), and walkRegionBackward().
| template void xegpu::removeLayoutAttr< mlir::OpOperand > | ( | const mlir::OpOperand & | operand | ) |
References mlir::xegpu::removeLayoutAttr().
| template void xegpu::removeLayoutAttr< mlir::OpResult > | ( | const mlir::OpResult & | result | ) |
References mlir::xegpu::removeLayoutAttr(), and result.