MLIR
20.0.0git
|
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include <type_traits>
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "mlir/Conversion/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
Include the generated interface declarations. | |
Macros | |
#define | DEBUG_TYPE "vector-to-gpu" |
#define | DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
#define | DBGSNL() (llvm::dbgs() << "\n") |
#define | GEN_PASS_DEF_CONVERTVECTORTOGPU |
Functions | |
template<typename TransferOpType > | |
static void | getXferIndices (RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices) |
For a vector TransferOpType xferOp , an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets. More... | |
static bool | contractSupportsMMAMatrixType (vector::ContractionOp contract, bool useNvGpu) |
static bool | isTransposeMatrixLoadMap (AffineMap permutationMap) |
static std::optional< int64_t > | getStaticallyKnownRowStride (ShapedType type) |
static bool | transferReadSupportsMMAMatrixType (vector::TransferReadOp readOp) |
static bool | transferWriteSupportsMMAMatrixType (vector::TransferWriteOp writeOp) |
static bool | constantSupportsMMAMatrixType (arith::ConstantOp constantOp) |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op. More... | |
static bool | broadcastSupportsMMAMatrixType (vector::BroadcastOp broadcastOp) |
Return true if this is a broadcast from scalar to a 2D vector. More... | |
template<typename ExtOpTy > | |
static bool | integerExtendSupportsMMAMatrixType (ExtOpTy extOp) |
Return true if this integer extend op can be folded into a contract op. More... | |
static bool | fpExtendSupportsMMAMatrixType (arith::ExtFOp extOp) |
static std::optional< gpu::MMAElementwiseOp > | convertElementwiseOpToMMA (Operation *op) |
Return the MMA elementwise enum associated with op if it is supported. More... | |
static bool | elementwiseSupportsMMAMatrixType (Operation *op) |
Return true if the op is supported as elementwise op on MMAMatrix type. More... | |
static bool | extractStridedSliceSupportsMMAMatrixType (vector::ExtractStridedSliceOp op) |
Returns true if the extract strided slice op is supported with mma.sync path. More... | |
static bool | supportsMMaMatrixType (Operation *op, bool useNvGpu) |
static SetVector< Operation * > | getSliceContract (Operation *op, const BackwardSliceOptions &backwardSliceOptions, const ForwardSliceOptions &forwardSliceOptions) |
Return an unsorted slice handling scf.for region differently than getSlice . More... | |
static SetVector< Operation * > | getOpToConvert (mlir::Operation *op, bool useNvGpu) |
static const char * | inferFragType (Operation *op) |
static LogicalResult | convertTransferReadOp (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertTransferWriteOp (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static VectorType | getMmaSyncVectorOperandType (const nvgpu::FragmentElementInfo ®Info) |
Returns the vector type which represents a matrix fragment. More... | |
static LogicalResult | convertConstantOpMmaSync (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static FailureOr< bool > | isTransposed (vector::TransferReadOp op) |
Check if the loaded matrix operand requires transposed. More... | |
static LogicalResult | creatLdMatrixCompatibleLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | createNonLdMatrixLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static bool | isSharedMemory (MemRefType type) |
Return true if this is a shared memory memref type. More... | |
static LogicalResult | convertTransferReadToLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation. More... | |
static LogicalResult | convertTransferWriteToStores (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static void | populateFromInt64AttrArray (ArrayAttr arrayAttr, SmallVectorImpl< int64_t > &results) |
static LogicalResult | convertExtractStridedSlice (RewriterBase &rewriter, vector::ExtractStridedSliceOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOp (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOpToMmaSync (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertConstantOp (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static LogicalResult | convertBroadcastOp (RewriterBase &rewriter, vector::BroadcastOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. More... | |
static scf::ForOp | replaceForOpWithNewSignature (RewriterBase &rewriter, scf::ForOp loop, ValueRange newInitArgs) |
static LogicalResult | convertForOp (RewriterBase &rewriter, scf::ForOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertYieldOp (RewriterBase &rewriter, scf::YieldOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertElementwiseOp (RewriterBase &rewriter, Operation *op, gpu::MMAElementwiseOp opType, llvm::DenseMap< Value, Value > &valueMapping) |
Convert an elementwise op to the equivalent elementwise op on MMA matrix. More... | |
#define DBGS | ( | ) | (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
Definition at line 40 of file VectorToGPU.cpp.
#define DBGSNL | ( | ) | (llvm::dbgs() << "\n") |
Definition at line 41 of file VectorToGPU.cpp.
#define DEBUG_TYPE "vector-to-gpu" |
Definition at line 39 of file VectorToGPU.cpp.
#define GEN_PASS_DEF_CONVERTVECTORTOGPU |
Definition at line 44 of file VectorToGPU.cpp.
|
static |
Return true if this is a broadcast from scalar to a 2D vector.
Definition at line 196 of file VectorToGPU.cpp.
Referenced by convertBroadcastOp(), and supportsMMaMatrixType().
|
static |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op.
Definition at line 188 of file VectorToGPU.cpp.
Referenced by convertConstantOp(), and supportsMMaMatrixType().
|
static |
Definition at line 77 of file VectorToGPU.cpp.
Referenced by supportsMMaMatrixType().
|
static |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op.
Definition at line 1094 of file VectorToGPU.cpp.
References broadcastSupportsMMAMatrixType(), mlir::OpBuilder::create(), mlir::gpu::MMAMatrixType::get(), inferFragType(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 1071 of file VectorToGPU.cpp.
References constantSupportsMMAMatrixType(), mlir::OpBuilder::create(), mlir::gpu::MMAMatrixType::get(), inferFragType(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 639 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::DenseElementsAttr::get(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1028 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 1048 of file VectorToGPU.cpp.
References mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Convert an elementwise op to the equivalent elementwise op on MMA matrix.
Definition at line 1203 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::gpu::MMAMatrixType::get(), mlir::Operation::getLoc(), mlir::Operation::getOperands(), mlir::Operation::getResult(), mlir::Operation::getResultTypes(), mlir::getType(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Return the MMA elementwise enum associated with op
if it is supported.
Return std::nullopt
otherwise.
Definition at line 215 of file VectorToGPU.cpp.
References ADDI, MULI, and SUBI.
Referenced by mlir::convertVectorToMMAOps(), and elementwiseSupportsMMAMatrixType().
|
static |
Definition at line 952 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::nvgpu::getMmaSyncRegisterType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), populateFromInt64AttrArray(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1145 of file VectorToGPU.cpp.
References DBGS, mlir::detail::enumerate(), mlir::Block::getArgument(), replaceForOpWithNewSignature(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 542 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::gpu::MMAMatrixType::get(), mlir::get(), mlir::Builder::getIndexAttr(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::getResult(), getStaticallyKnownRowStride(), mlir::Builder::getUnitAttr(), inferFragType(), isTransposeMatrixLoadMap(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::quant::QuantizationFlags::Signed, and transferReadSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Converts a vector.transfer_read
operation directly to either a vector.load
or a nvgpu.ldmatrix
operation.
This function should only be used when converting to nvgpu.mma.sync
operations.
Definition at line 868 of file VectorToGPU.cpp.
References createNonLdMatrixLoads(), creatLdMatrixCompatibleLoads(), mlir::nvgpu::getWarpMatrixInfo(), mlir::nvgpu::inferTileWidthInBits(), isSharedMemory(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 594 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::RewriterBase::eraseOp(), mlir::Builder::getIndexAttr(), getStaticallyKnownRowStride(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and transferWriteSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 900 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::RewriterBase::eraseOp(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), mlir::nvgpu::getLaneIdAndValueIdToOperandCoord(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1178 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::detail::enumerate(), mlir::RewriterBase::eraseOp(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 768 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::get(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), mlir::nvgpu::getLaneIdAndValueIdToOperandCoord(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::Builder::getZeroAttr(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertTransferReadToLoads().
|
static |
Definition at line 706 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::nvgpu::getLaneIdToLdMatrixMatrixCoord(), mlir::nvgpu::getLdMatrixParams(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), isTransposed(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::xegpu::transpose().
Referenced by convertTransferReadToLoads().
|
static |
Return true if the op is supported as elementwise op on MMAMatrix type.
Definition at line 246 of file VectorToGPU.cpp.
References convertElementwiseOpToMMA().
Referenced by supportsMMaMatrixType().
|
static |
Returns true if the extract strided slice op is supported with mma.sync
path.
Definition at line 253 of file VectorToGPU.cpp.
References mlir::nvgpu::B, mlir::nvgpu::C, mlir::nvgpu::getUserContract(), and mlir::nvgpu::getWarpMatrixInfo().
Referenced by supportsMMaMatrixType().
|
static |
Definition at line 210 of file VectorToGPU.cpp.
Referenced by supportsMMaMatrixType().
|
static |
Returns the vector type which represents a matrix fragment.
Definition at line 628 of file VectorToGPU.cpp.
References mlir::nvgpu::FragmentElementInfo::elementsPerRegister, mlir::get(), mlir::nvgpu::FragmentElementInfo::numRegistersPerFragment, and mlir::nvgpu::FragmentElementInfo::registerLLVMType.
Referenced by convertConstantOpMmaSync(), convertTransferWriteToStores(), createNonLdMatrixLoads(), and creatLdMatrixCompatibleLoads().
|
static |
Definition at line 345 of file VectorToGPU.cpp.
References contract(), DBGS, mlir::SliceOptions::filter, mlir::Operation::getOperandTypes(), mlir::Operation::getResultTypes(), getSliceContract(), supportsMMaMatrixType(), mlir::topologicalSort(), and mlir::Operation::walk().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Return an unsorted slice handling scf.for region differently than getSlice
.
In scf.for we only want to include as part of the slice elements that are part of the use/def chain.
Definition at line 308 of file VectorToGPU.cpp.
References mlir::getBackwardSlice(), and mlir::getForwardSlice().
Referenced by getOpToConvert().
|
static |
Definition at line 126 of file VectorToGPU.cpp.
References mlir::getStridesAndOffset().
Referenced by convertTransferReadOp(), convertTransferWriteOp(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
|
static |
For a vector TransferOpType xferOp
, an empty indices
vector, and an AffineMap representing offsets to apply to indices, the function fills indices
with the original indices plus the offsets.
The offsets are applied by taking into account the permutation map of the transfer op. If the offsetMap
has dimension placeholders, those should be provided in dimValues
.
Definition at line 57 of file VectorToGPU.cpp.
References mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), and mlir::affine::makeComposedAffineApply().
|
static |
Definition at line 519 of file VectorToGPU.cpp.
References contract(), mlir::Operation::getNumResults(), mlir::Operation::getResult(), mlir::Operation::getUsers(), mlir::Operation::hasOneUse(), mlir::Operation::hasTrait(), and mlir::Operation::user_begin().
Referenced by convertBroadcastOp(), convertConstantOp(), and convertTransferReadOp().
|
static |
Return true if this integer extend op can be folded into a contract op.
Definition at line 202 of file VectorToGPU.cpp.
|
static |
Return true if this is a shared memory memref type.
Definition at line 857 of file VectorToGPU.cpp.
Referenced by convertTransferReadToLoads().
|
static |
Check if the loaded matrix operand requires transposed.
Transposed Map Example: Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2) Example 2 : (d0, d1, d2, d3) -> (d3, d2) The code below checks if the output 2D is transposed using a generalized version : (d0, d1, dn, ..., dm, ...) -> (dm, dn) Returns : true; if m > n, false o.w.
Definition at line 679 of file VectorToGPU.cpp.
References DBGS, mlir::AffineMap::getNumResults(), mlir::AffineMap::getPermutationMap(), and mlir::AffineMap::getResult().
Referenced by creatLdMatrixCompatibleLoads().
|
static |
Definition at line 105 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::Builder::getAffineConstantExpr(), mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getContext(), and mlir::AffineMap::getNumDims().
Referenced by convertTransferReadOp(), and transferReadSupportsMMAMatrixType().
|
static |
Definition at line 945 of file VectorToGPU.cpp.
Referenced by convertExtractStridedSlice().
|
static |
Definition at line 1113 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::RewriterBase::eraseBlock(), mlir::RewriterBase::eraseOp(), mlir::RewriterBase::replaceAllUsesWith(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertForOp().
|
static |
Definition at line 277 of file VectorToGPU.cpp.
References broadcast(), broadcastSupportsMMAMatrixType(), mlir::nvgpu::canLowerToWarpMatrixOperation(), constantSupportsMMAMatrixType(), contract(), contractSupportsMMAMatrixType(), elementwiseSupportsMMAMatrixType(), extractStridedSliceSupportsMMAMatrixType(), fpExtendSupportsMMAMatrixType(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
Referenced by getOpToConvert().
|
static |
Definition at line 145 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::getAffineConstantExpr(), mlir::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getPermutationMap(), getStaticallyKnownRowStride(), mlir::AffineMap::isMinorIdentity(), and isTransposeMatrixLoadMap().
Referenced by convertTransferReadOp(), and supportsMMaMatrixType().
|
static |
Definition at line 170 of file VectorToGPU.cpp.
References getStaticallyKnownRowStride().
Referenced by convertTransferWriteOp(), and supportsMMaMatrixType().