MLIR
22.0.0git
|
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/DebugLog.h"
#include "mlir/Conversion/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
Include the generated interface declarations. | |
Macros | |
#define | DEBUG_TYPE "vector-to-gpu" |
#define | GEN_PASS_DEF_CONVERTVECTORTOGPU |
Functions | |
template<typename TransferOpType > | |
static void | getXferIndices (RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices) |
For a vector TransferOpType xferOp , an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets. More... | |
static bool | contractSupportsMMAMatrixType (vector::ContractionOp contract, bool useNvGpu) |
static bool | isTransposeMatrixLoadMap (AffineMap permutationMap) |
static std::optional< int64_t > | getStaticallyKnownRowStride (ShapedType type) |
static bool | transferReadSupportsMMAMatrixType (vector::TransferReadOp readOp) |
static bool | transferWriteSupportsMMAMatrixType (vector::TransferWriteOp writeOp) |
static bool | constantSupportsMMAMatrixType (arith::ConstantOp constantOp) |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op. More... | |
static bool | broadcastSupportsMMAMatrixType (vector::BroadcastOp broadcastOp) |
Return true if this is a broadcast from scalar to a 2D vector. More... | |
template<typename ExtOpTy > | |
static bool | integerExtendSupportsMMAMatrixType (ExtOpTy extOp) |
Return true if this integer extend op can be folded into a contract op. More... | |
static bool | fpExtendSupportsMMAMatrixType (arith::ExtFOp extOp) |
static std::optional< gpu::MMAElementwiseOp > | convertElementwiseOpToMMA (Operation *op) |
Return the MMA elementwise enum associated with op if it is supported. More... | |
static bool | elementwiseSupportsMMAMatrixType (Operation *op) |
Return true if the op is supported as elementwise op on MMAMatrix type. More... | |
static bool | extractStridedSliceSupportsMMAMatrixType (vector::ExtractStridedSliceOp op) |
Returns true if the extract strided slice op is supported with mma.sync path. More... | |
static bool | supportsMMaMatrixType (Operation *op, bool useNvGpu) |
static SetVector< Operation * > | getSliceContract (Operation *op, const BackwardSliceOptions &backwardSliceOptions, const ForwardSliceOptions &forwardSliceOptions) |
Return an unsorted slice handling scf.for region differently than getSlice . More... | |
static SetVector< Operation * > | getOpToConvert (mlir::Operation *op, bool useNvGpu) |
static const char * | inferFragType (Operation *op) |
static LogicalResult | convertTransferReadOp (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertTransferWriteOp (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static VectorType | getMmaSyncVectorOperandType (const nvgpu::FragmentElementInfo ®Info) |
Returns the vector type which represents a matrix fragment. More... | |
static LogicalResult | convertConstantOpMmaSync (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static FailureOr< bool > | isTransposed (vector::TransferReadOp op) |
Check if the loaded matrix operand requires transposed. More... | |
static LogicalResult | creatLdMatrixCompatibleLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | createNonLdMatrixLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static bool | isSharedMemory (MemRefType type) |
Return true if this is a shared memory memref type. More... | |
static LogicalResult | convertTransferReadToLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation. More... | |
static LogicalResult | convertTransferWriteToStores (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static void | populateFromInt64AttrArray (ArrayAttr arrayAttr, SmallVectorImpl< int64_t > &results) |
static LogicalResult | convertExtractStridedSlice (RewriterBase &rewriter, vector::ExtractStridedSliceOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOp (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOpToMmaSync (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertConstantOp (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static LogicalResult | convertBroadcastOp (RewriterBase &rewriter, vector::BroadcastOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. More... | |
static scf::ForOp | replaceForOpWithNewSignature (RewriterBase &rewriter, scf::ForOp loop, ValueRange newInitArgs) |
static LogicalResult | convertForOp (RewriterBase &rewriter, scf::ForOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertYieldOp (RewriterBase &rewriter, scf::YieldOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertElementwiseOp (RewriterBase &rewriter, Operation *op, gpu::MMAElementwiseOp opType, llvm::DenseMap< Value, Value > &valueMapping) |
Convert an elementwise op to the equivalent elementwise op on MMA matrix. More... | |
#define DEBUG_TYPE "vector-to-gpu" |
Definition at line 36 of file VectorToGPU.cpp.
#define GEN_PASS_DEF_CONVERTVECTORTOGPU |
Definition at line 39 of file VectorToGPU.cpp.
|
static |
Return true if this is a broadcast from scalar to a 2D vector.
Definition at line 191 of file VectorToGPU.cpp.
Referenced by convertBroadcastOp(), and supportsMMaMatrixType().
|
static |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op.
Definition at line 183 of file VectorToGPU.cpp.
Referenced by convertConstantOp(), and supportsMMaMatrixType().
|
static |
Definition at line 72 of file VectorToGPU.cpp.
References mlir::bindDims(), contract(), mlir::AffineMap::inferFromExprList(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().
Referenced by supportsMMaMatrixType().
|
static |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op.
Definition at line 1089 of file VectorToGPU.cpp.
References broadcastSupportsMMAMatrixType(), mlir::gpu::MMAMatrixType::get(), inferFragType(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 1066 of file VectorToGPU.cpp.
References constantSupportsMMAMatrixType(), mlir::gpu::MMAMatrixType::get(), mlir::Builder::getType(), inferFragType(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 635 of file VectorToGPU.cpp.
References mlir::remark::failed(), mlir::DenseElementsAttr::get(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1022 of file VectorToGPU.cpp.
References mlir::Builder::getType(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 1043 of file VectorToGPU.cpp.
References mlir::Builder::getI64ArrayAttr(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Convert an elementwise op to the equivalent elementwise op on MMA matrix.
Definition at line 1198 of file VectorToGPU.cpp.
References mlir::gpu::MMAMatrixType::get(), mlir::Operation::getLoc(), mlir::Operation::getOperands(), mlir::Operation::getResult(), mlir::Operation::getResultTypes(), mlir::getType(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Return the MMA elementwise enum associated with op
if it is supported.
Return std::nullopt
otherwise.
Definition at line 210 of file VectorToGPU.cpp.
References ADDI, MULI, and SUBI.
Referenced by mlir::convertVectorToMMAOps(), and elementwiseSupportsMMAMatrixType().
|
static |
Definition at line 946 of file VectorToGPU.cpp.
References mlir::remark::failed(), mlir::nvgpu::getMmaSyncRegisterType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), populateFromInt64AttrArray(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1140 of file VectorToGPU.cpp.
References mlir::detail::enumerate(), mlir::Block::getArgument(), replaceForOpWithNewSignature(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 538 of file VectorToGPU.cpp.
References mlir::gpu::MMAMatrixType::get(), mlir::get(), mlir::Builder::getIndexAttr(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::getResult(), getStaticallyKnownRowStride(), mlir::Builder::getUnitAttr(), inferFragType(), isTransposeMatrixLoadMap(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::quant::QuantizationFlags::Signed, and transferReadSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Converts a vector.transfer_read
operation directly to either a vector.load
or a nvgpu.ldmatrix
operation.
This function should only be used when converting to nvgpu.mma.sync
operations.
Definition at line 862 of file VectorToGPU.cpp.
References createNonLdMatrixLoads(), creatLdMatrixCompatibleLoads(), mlir::remark::failed(), mlir::nvgpu::getWarpMatrixInfo(), mlir::nvgpu::inferTileWidthInBits(), isSharedMemory(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 590 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseOp(), mlir::Builder::getIndexAttr(), getStaticallyKnownRowStride(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and transferWriteSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 894 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseOp(), mlir::remark::failed(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), mlir::nvgpu::getLaneIdAndValueIdToOperandCoord(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1173 of file VectorToGPU.cpp.
References mlir::detail::enumerate(), mlir::RewriterBase::eraseOp(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 763 of file VectorToGPU.cpp.
References mlir::remark::failed(), mlir::get(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), mlir::nvgpu::getLaneIdAndValueIdToOperandCoord(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::Builder::getZeroAttr(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertTransferReadToLoads().
|
static |
Definition at line 702 of file VectorToGPU.cpp.
References mlir::remark::failed(), mlir::nvgpu::getLaneIdToLdMatrixMatrixCoord(), mlir::nvgpu::getLdMatrixParams(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), isTransposed(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertTransferReadToLoads().
|
static |
Return true if the op is supported as elementwise op on MMAMatrix type.
Definition at line 241 of file VectorToGPU.cpp.
References convertElementwiseOpToMMA().
Referenced by supportsMMaMatrixType().
|
static |
Returns true if the extract strided slice op is supported with mma.sync
path.
Definition at line 248 of file VectorToGPU.cpp.
References mlir::nvgpu::B, mlir::nvgpu::C, mlir::remark::failed(), mlir::nvgpu::getUserContract(), and mlir::nvgpu::getWarpMatrixInfo().
Referenced by supportsMMaMatrixType().
|
static |
Definition at line 205 of file VectorToGPU.cpp.
Referenced by supportsMMaMatrixType().
|
static |
Returns the vector type which represents a matrix fragment.
Definition at line 624 of file VectorToGPU.cpp.
References mlir::nvgpu::FragmentElementInfo::elementsPerRegister, mlir::get(), mlir::nvgpu::FragmentElementInfo::numRegistersPerFragment, and mlir::nvgpu::FragmentElementInfo::registerLLVMType.
Referenced by convertConstantOpMmaSync(), convertTransferWriteToStores(), createNonLdMatrixLoads(), and creatLdMatrixCompatibleLoads().
|
static |
Definition at line 343 of file VectorToGPU.cpp.
References contract(), mlir::SliceOptions::filter, mlir::Operation::getOperandTypes(), mlir::Operation::getResultTypes(), getSliceContract(), supportsMMaMatrixType(), mlir::topologicalSort(), and mlir::Operation::walk().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Return an unsorted slice handling scf.for region differently than getSlice
.
In scf.for we only want to include as part of the slice elements that are part of the use/def chain.
Definition at line 303 of file VectorToGPU.cpp.
References mlir::getBackwardSlice(), and mlir::getForwardSlice().
Referenced by getOpToConvert().
|
static |
Definition at line 121 of file VectorToGPU.cpp.
References mlir::remark::failed().
Referenced by convertTransferReadOp(), convertTransferWriteOp(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
|
static |
For a vector TransferOpType xferOp
, an empty indices
vector, and an AffineMap representing offsets to apply to indices, the function fills indices
with the original indices plus the offsets.
The offsets are applied by taking into account the permutation map of the transfer op. If the offsetMap
has dimension placeholders, those should be provided in dimValues
.
Definition at line 52 of file VectorToGPU.cpp.
References mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), and mlir::affine::makeComposedAffineApply().
|
static |
Definition at line 515 of file VectorToGPU.cpp.
References contract(), mlir::Operation::getNumResults(), mlir::Operation::getResult(), mlir::Operation::getUsers(), mlir::Operation::hasOneUse(), mlir::Operation::hasTrait(), and mlir::Operation::user_begin().
Referenced by convertBroadcastOp(), convertConstantOp(), and convertTransferReadOp().
|
static |
Return true if this integer extend op can be folded into a contract op.
Definition at line 197 of file VectorToGPU.cpp.
|
static |
Return true if this is a shared memory memref type.
Definition at line 851 of file VectorToGPU.cpp.
Referenced by convertTransferReadToLoads(), and getStBulkIntrinsicId().
|
static |
Check if the loaded matrix operand requires transposed.
Transposed Map Example: Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2) Example 2 : (d0, d1, d2, d3) -> (d3, d2) The code below checks if the output 2D is transposed using a generalized version : (d0, d1, dn, ..., dm, ...) -> (dm, dn) Returns : true; if m > n, false o.w.
Definition at line 675 of file VectorToGPU.cpp.
References mlir::AffineMap::getNumResults(), mlir::AffineMap::getPermutationMap(), and mlir::AffineMap::getResult().
Referenced by creatLdMatrixCompatibleLoads().
|
static |
Definition at line 100 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::Builder::getAffineConstantExpr(), mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getContext(), and mlir::AffineMap::getNumDims().
Referenced by convertTransferReadOp(), and transferReadSupportsMMAMatrixType().
|
static |
Definition at line 939 of file VectorToGPU.cpp.
Referenced by convertExtractStridedSlice().
|
static |
Definition at line 1108 of file VectorToGPU.cpp.
References mlir::RewriterBase::eraseBlock(), mlir::RewriterBase::eraseOp(), mlir::RewriterBase::replaceAllUsesWith(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertForOp().
|
static |
Definition at line 272 of file VectorToGPU.cpp.
References broadcast(), broadcastSupportsMMAMatrixType(), mlir::nvgpu::canLowerToWarpMatrixOperation(), constantSupportsMMAMatrixType(), contract(), contractSupportsMMAMatrixType(), elementwiseSupportsMMAMatrixType(), extractStridedSliceSupportsMMAMatrixType(), fpExtendSupportsMMAMatrixType(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
Referenced by getOpToConvert().
|
static |
Definition at line 140 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::getAffineConstantExpr(), mlir::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getPermutationMap(), getStaticallyKnownRowStride(), mlir::AffineMap::isMinorIdentity(), and isTransposeMatrixLoadMap().
Referenced by convertTransferReadOp(), and supportsMMaMatrixType().
|
static |
Definition at line 165 of file VectorToGPU.cpp.
References getStaticallyKnownRowStride().
Referenced by convertTransferWriteOp(), and supportsMMaMatrixType().