|
MLIR 23.0.0git
|
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"#include "mlir/Analysis/SliceAnalysis.h"#include "mlir/Analysis/TopologicalSortUtils.h"#include "mlir/Dialect/Affine/IR/AffineOps.h"#include "mlir/Dialect/Arith/IR/Arith.h"#include "mlir/Dialect/GPU/IR/GPUDialect.h"#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"#include "mlir/Dialect/SCF/IR/SCF.h"#include "mlir/Dialect/Utils/StructuredOpsUtils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"#include "mlir/Dialect/Vector/Utils/VectorUtils.h"#include "mlir/IR/Builders.h"#include "mlir/IR/Region.h"#include "mlir/Pass/Pass.h"#include "mlir/Transforms/GreedyPatternRewriteDriver.h"#include "llvm/ADT/STLExtras.h"#include "llvm/ADT/TypeSwitch.h"#include "llvm/Support/DebugLog.h"#include "mlir/Conversion/Passes.h.inc"Go to the source code of this file.
Namespaces | |
| namespace | mlir |
| Include the generated interface declarations. | |
Macros | |
| #define | DEBUG_TYPE "vector-to-gpu" |
| #define | GEN_PASS_DEF_CONVERTVECTORTOGPU |
Functions | |
| template<typename TransferOpType> | |
| static void | getXferIndices (RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices) |
| For a vector TransferOpType xferOp, an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets. | |
| static bool | contractSupportsMMAMatrixType (vector::ContractionOp contract, bool useNvGpu) |
| static bool | isFirstResultLastMapDimension (AffineMap permutationMap) |
| static std::optional< int64_t > | getStaticallyKnownRowStride (ShapedType type, AffineMap permutationMap) |
| static bool | transferReadSupportsMMAMatrixType (vector::TransferReadOp readOp) |
| static bool | transferWriteSupportsMMAMatrixType (vector::TransferWriteOp writeOp) |
| static bool | constantSupportsMMAMatrixType (arith::ConstantOp constantOp) |
| Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op. | |
| static bool | broadcastSupportsMMAMatrixType (vector::BroadcastOp broadcastOp) |
| Return true if this is a broadcast from scalar to a 2D vector. | |
| template<typename ExtOpTy> | |
| static bool | integerExtendSupportsMMAMatrixType (ExtOpTy extOp) |
| Return true if this integer extend op can be folded into a contract op. | |
| static bool | fpExtendSupportsMMAMatrixType (arith::ExtFOp extOp) |
| static std::optional< gpu::MMAElementwiseOp > | convertElementwiseOpToMMA (Operation *op) |
| Return the MMA elementwise enum associated with op if it is supported. | |
| static bool | elementwiseSupportsMMAMatrixType (Operation *op) |
| Return true if the op is supported as elementwise op on MMAMatrix type. | |
| static bool | extractStridedSliceSupportsMMAMatrixType (vector::ExtractStridedSliceOp op) |
| Returns true if the extract strided slice op is supported with mma.sync path. | |
| static bool | supportsMMaMatrixType (Operation *op, bool useNvGpu) |
| static SetVector< Operation * > | getSliceContract (Operation *op, const BackwardSliceOptions &backwardSliceOptions, const ForwardSliceOptions &forwardSliceOptions) |
| Return an unsorted slice handling scf.for region differently than getSlice. | |
| static SetVector< Operation * > | getOpToConvert (mlir::Operation *op, bool useNvGpu) |
| static const char * | inferFragType (Operation *op) |
| static LogicalResult | convertTransferReadOp (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | convertTransferWriteOp (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static VectorType | getMmaSyncVectorOperandType (const nvgpu::FragmentElementInfo ®Info) |
| Returns the vector type which represents a matrix fragment. | |
| static LogicalResult | convertConstantOpMmaSync (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. | |
| static FailureOr< bool > | isTransposed (vector::TransferReadOp op) |
| Check if the loaded matrix operand requires transposed. | |
| static LogicalResult | creatLdMatrixCompatibleLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | createNonLdMatrixLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static bool | isSharedMemory (MemRefType type) |
| Return true if this is a shared memory memref type. | |
| static LogicalResult | convertTransferReadToLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation. | |
| static LogicalResult | convertTransferWriteToStores (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static void | populateFromInt64AttrArray (ArrayAttr arrayAttr, SmallVectorImpl< int64_t > &results) |
| static LogicalResult | convertExtractStridedSlice (RewriterBase &rewriter, vector::ExtractStridedSliceOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | convertContractOp (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | convertContractOpToMmaSync (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | convertConstantOp (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. | |
| static LogicalResult | convertBroadcastOp (RewriterBase &rewriter, vector::BroadcastOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. | |
| static scf::ForOp | replaceForOpWithNewSignature (RewriterBase &rewriter, scf::ForOp loop, ValueRange newInitArgs) |
| static LogicalResult | convertForOp (RewriterBase &rewriter, scf::ForOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | convertYieldOp (RewriterBase &rewriter, scf::YieldOp op, llvm::DenseMap< Value, Value > &valueMapping) |
| static LogicalResult | convertElementwiseOp (RewriterBase &rewriter, Operation *op, gpu::MMAElementwiseOp opType, llvm::DenseMap< Value, Value > &valueMapping) |
| Convert an elementwise op to the equivalent elementwise op on MMA matrix. | |
| #define DEBUG_TYPE "vector-to-gpu" |
Definition at line 36 of file VectorToGPU.cpp.
| #define GEN_PASS_DEF_CONVERTVECTORTOGPU |
Definition at line 39 of file VectorToGPU.cpp.
|
static |
Return true if this is a broadcast from scalar to a 2D vector.
Definition at line 227 of file VectorToGPU.cpp.
Referenced by convertBroadcastOp(), and supportsMMaMatrixType().
|
static |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op.
Definition at line 219 of file VectorToGPU.cpp.
Referenced by convertConstantOp(), and supportsMMaMatrixType().
Definition at line 72 of file VectorToGPU.cpp.
References mlir::bindDims(), contract(), mlir::AffineMap::inferFromExprList(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().
Referenced by supportsMMaMatrixType().
|
static |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op.
Definition at line 1126 of file VectorToGPU.cpp.
References broadcastSupportsMMAMatrixType(), mlir::gpu::MMAMatrixType::get(), inferFragType(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 1103 of file VectorToGPU.cpp.
References constantSupportsMMAMatrixType(), mlir::gpu::MMAMatrixType::get(), inferFragType(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 672 of file VectorToGPU.cpp.
References mlir::DenseElementsAttr::get(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), result, mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1059 of file VectorToGPU.cpp.
References mlir::Builder::getType(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 1080 of file VectorToGPU.cpp.
References mlir::Builder::getI64ArrayAttr(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Convert an elementwise op to the equivalent elementwise op on MMA matrix.
Definition at line 1235 of file VectorToGPU.cpp.
References mlir::gpu::MMAMatrixType::get(), mlir::Operation::getLoc(), mlir::Operation::getOperands(), mlir::Operation::getResult(), mlir::Operation::getResultTypes(), mlir::getType(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Return the MMA elementwise enum associated with op if it is supported.
Return std::nullopt otherwise.
Definition at line 246 of file VectorToGPU.cpp.
Referenced by mlir::convertVectorToMMAOps(), and elementwiseSupportsMMAMatrixType().
|
static |
Definition at line 983 of file VectorToGPU.cpp.
References mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), populateFromInt64AttrArray(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1177 of file VectorToGPU.cpp.
References mlir::Block::getArgument(), replaceForOpWithNewSignature(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 577 of file VectorToGPU.cpp.
References mlir::gpu::MMAMatrixType::get(), mlir::Builder::getIndexAttr(), mlir::AffineMap::getPermutationMap(), getStaticallyKnownRowStride(), mlir::Builder::getUnitAttr(), inferFragType(), isFirstResultLastMapDimension(), load, mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), success(), and transferReadSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation.
This function should only be used when converting to nvgpu.mma.sync operations.
Definition at line 899 of file VectorToGPU.cpp.
References createNonLdMatrixLoads(), creatLdMatrixCompatibleLoads(), mlir::nvgpu::getWarpMatrixInfo(), isSharedMemory(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 627 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseOp(), mlir::Builder::getIndexAttr(), getStaticallyKnownRowStride(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), success(), and transferWriteSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 931 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseOp(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), getXferIndices(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1210 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseOp(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 800 of file VectorToGPU.cpp.
References mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), getXferIndices(), mlir::Builder::getZeroAttr(), mlir::RewriterBase::notifyMatchFailure(), result, mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by convertTransferReadToLoads().
|
static |
Definition at line 739 of file VectorToGPU.cpp.
References getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), getXferIndices(), indices, isTransposed(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and success().
Referenced by convertTransferReadToLoads().
Return true if the op is supported as elementwise op on MMAMatrix type.
Definition at line 277 of file VectorToGPU.cpp.
References convertElementwiseOpToMMA().
Referenced by getOpToConvert(), and supportsMMaMatrixType().
|
static |
Returns true if the extract strided slice op is supported with mma.sync path.
Definition at line 284 of file VectorToGPU.cpp.
References mlir::nvgpu::B, mlir::nvgpu::C, mlir::nvgpu::getUserContract(), and mlir::nvgpu::getWarpMatrixInfo().
Referenced by supportsMMaMatrixType().
|
static |
Definition at line 241 of file VectorToGPU.cpp.
Referenced by supportsMMaMatrixType().
|
static |
Returns the vector type which represents a matrix fragment.
Definition at line 661 of file VectorToGPU.cpp.
Referenced by convertConstantOpMmaSync(), convertTransferWriteToStores(), createNonLdMatrixLoads(), and creatLdMatrixCompatibleLoads().
|
static |
Definition at line 379 of file VectorToGPU.cpp.
References elementwiseSupportsMMAMatrixType(), mlir::SliceOptions::filter, mlir::Operation::getOperandTypes(), mlir::Operation::getResultTypes(), getSliceContract(), supportsMMaMatrixType(), mlir::topologicalSort(), and mlir::Operation::walk().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Return an unsorted slice handling scf.for region differently than getSlice.
In scf.for we only want to include as part of the slice elements that are part of the use/def chain.
Definition at line 339 of file VectorToGPU.cpp.
References mlir::getBackwardSlice(), mlir::getForwardSlice(), and result.
Referenced by getOpToConvert().
|
static |
Definition at line 127 of file VectorToGPU.cpp.
References mlir::AffineMap::getNumResults(), mlir::AffineMap::getResults(), and result.
Referenced by convertTransferReadOp(), convertTransferWriteOp(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
|
static |
For a vector TransferOpType xferOp, an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets.
The offsets are applied by taking into account the permutation map of the transfer op. If the offsetMap has dimension placeholders, those should be provided in dimValues.
Definition at line 52 of file VectorToGPU.cpp.
References mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), indices, and mlir::affine::makeComposedAffineApply().
Referenced by convertTransferWriteToStores(), createNonLdMatrixLoads(), and creatLdMatrixCompatibleLoads().
|
static |
Definition at line 554 of file VectorToGPU.cpp.
References contract(), mlir::Operation::getNumResults(), mlir::Operation::getResult(), mlir::Operation::getUsers(), mlir::Operation::hasOneUse(), mlir::Operation::hasTrait(), inferFragType(), and mlir::Operation::user_begin().
Referenced by convertBroadcastOp(), convertConstantOp(), convertTransferReadOp(), and inferFragType().
|
static |
Return true if this integer extend op can be folded into a contract op.
Definition at line 233 of file VectorToGPU.cpp.
Referenced by supportsMMaMatrixType().
Definition at line 104 of file VectorToGPU.cpp.
References mlir::getAffineDimExpr(), mlir::AffineMap::getContext(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), and mlir::AffineMap::getResults().
Referenced by convertTransferReadOp().
|
static |
Return true if this is a shared memory memref type.
Definition at line 888 of file VectorToGPU.cpp.
Referenced by convertTransferReadToLoads(), and getStBulkIntrinsicId().
|
static |
Check if the loaded matrix operand requires transposed.
Transposed Map Example: Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2) Example 2 : (d0, d1, d2, d3) -> (d3, d2) The code below checks if the output 2D is transposed using a generalized version : (d0, d1, dn, ..., dm, ...) -> (dm, dn) Returns : true; if m > n, false o.w.
Definition at line 712 of file VectorToGPU.cpp.
References mlir::AffineMap::getNumResults(), mlir::AffineMap::getPermutationMap(), and mlir::AffineMap::getResult().
Referenced by creatLdMatrixCompatibleLoads().
|
static |
Definition at line 976 of file VectorToGPU.cpp.
References ArrayAttr().
Referenced by convertExtractStridedSlice().
|
static |
Definition at line 1145 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseBlock(), mlir::RewriterBase::eraseOp(), mlir::RewriterBase::replaceAllUsesWith(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertForOp().
Definition at line 308 of file VectorToGPU.cpp.
References broadcast(), broadcastSupportsMMAMatrixType(), mlir::nvgpu::canLowerToWarpMatrixOperation(), constantSupportsMMAMatrixType(), contract(), contractSupportsMMAMatrixType(), elementwiseSupportsMMAMatrixType(), extractStridedSliceSupportsMMAMatrixType(), fpExtendSupportsMMAMatrixType(), integerExtendSupportsMMAMatrixType(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
Referenced by getOpToConvert().
|
static |
Definition at line 173 of file VectorToGPU.cpp.
References mlir::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::getResults(), and getStaticallyKnownRowStride().
Referenced by convertTransferReadOp(), and supportsMMaMatrixType().
|
static |
Definition at line 195 of file VectorToGPU.cpp.
References mlir::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::getResult(), and getStaticallyKnownRowStride().
Referenced by convertTransferWriteOp(), and supportsMMaMatrixType().