MLIR
17.0.0git
|
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include <type_traits>
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "mlir/Conversion/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
This header declares functions that assit transformations in the MemRef dialect. | |
Macros | |
#define | DEBUG_TYPE "vector-to-gpu" |
#define | DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
#define | DBGSNL() (llvm::dbgs() << "\n") |
#define | GEN_PASS_DEF_CONVERTVECTORTOGPU |
Functions | |
template<typename TransferOpType > | |
static void | getXferIndices (RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices) |
For a vector TransferOpType xferOp , an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets. More... | |
static bool | contractSupportsMMAMatrixType (vector::ContractionOp contract, bool useNvGpu) |
static bool | isTransposeMatrixLoadMap (AffineMap permutationMap) |
static std::optional< int64_t > | getMemrefConstantHorizontalStride (ShapedType type) |
static bool | transferReadSupportsMMAMatrixType (vector::TransferReadOp readOp, bool useNvGpu) |
static bool | transferWriteSupportsMMAMatrixType (vector::TransferWriteOp writeOp) |
static bool | constantSupportsMMAMatrixType (arith::ConstantOp constantOp) |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op. More... | |
static bool | broadcastSupportsMMAMatrixType (vector::BroadcastOp broadcastOp) |
Return true if this is a broadcast from scalar to a 2D vector. More... | |
template<typename ExtOpTy > | |
static bool | integerExtendSupportsMMAMatrixType (ExtOpTy extOp) |
Return true if this integer extend op can be folded into a contract op. More... | |
static std::optional< gpu::MMAElementwiseOp > | convertElementwiseOpToMMA (Operation *op) |
Return the MMA elementwise enum associated with op if it is supported. More... | |
static bool | elementwiseSupportsMMAMatrixType (Operation *op) |
Return true if the op is supported as elementwise op on MMAMatrix type. More... | |
static bool | extractStridedSliceSupportsMMAMatrixType (vector::ExtractStridedSliceOp op) |
Returns true if the extract strided slice op is supported with mma.sync path. More... | |
static bool | supportsMMaMatrixType (Operation *op, bool useNvGpu) |
static SetVector< Operation * > | getSliceContract (Operation *op, TransitiveFilter backwardFilter, TransitiveFilter forwardFilter) |
Return an unsorted slice handling scf.for region differently than getSlice . More... | |
static SetVector< Operation * > | getOpToConvert (mlir::Operation *op, bool useNvGpu) |
static const char * | inferFragType (Operation *op) |
static LogicalResult | convertTransferReadOp (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertTransferWriteOp (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static VectorType | getMmaSyncVectorOperandType (const nvgpu::FragmentElementInfo ®Info) |
Returns the vector type which represents a matrix fragment. More... | |
static LogicalResult | convertConstantOpMmaSync (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static LogicalResult | creatLdMatrixCompatibleLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | createNonLdMatrixLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static bool | isSharedMemory (MemRefType type) |
Return true if this is a shared memory memref type. More... | |
static LogicalResult | convertTransferReadToLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation. More... | |
static LogicalResult | convertTransferWriteToStores (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static void | populateFromInt64AttrArray (ArrayAttr arrayAttr, SmallVectorImpl< int64_t > &results) |
static LogicalResult | convertExtractStridedSlice (RewriterBase &rewriter, vector::ExtractStridedSliceOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOp (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOpToMmaSync (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertConstantOp (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static LogicalResult | convertBroadcastOp (RewriterBase &rewriter, vector::BroadcastOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. More... | |
static scf::ForOp | replaceForOpWithNewSignature (RewriterBase &rewriter, scf::ForOp loop, ValueRange newIterOperands) |
static LogicalResult | convertForOp (RewriterBase &rewriter, scf::ForOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertYieldOp (RewriterBase &rewriter, scf::YieldOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertElementwiseOp (RewriterBase &rewriter, Operation *op, gpu::MMAElementwiseOp opType, llvm::DenseMap< Value, Value > &valueMapping) |
Convert an elementwise op to the equivalent elementwise op on MMA matrix. More... | |
#define DBGS | ( | ) | (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
Definition at line 40 of file VectorToGPU.cpp.
#define DBGSNL | ( | ) | (llvm::dbgs() << "\n") |
Definition at line 41 of file VectorToGPU.cpp.
#define DEBUG_TYPE "vector-to-gpu" |
Definition at line 39 of file VectorToGPU.cpp.
#define GEN_PASS_DEF_CONVERTVECTORTOGPU |
Definition at line 44 of file VectorToGPU.cpp.
|
static |
Return true if this is a broadcast from scalar to a 2D vector.
Definition at line 203 of file VectorToGPU.cpp.
Referenced by convertBroadcastOp(), and supportsMMaMatrixType().
|
static |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op.
Definition at line 195 of file VectorToGPU.cpp.
Referenced by convertConstantOp(), and supportsMMaMatrixType().
|
static |
Definition at line 77 of file VectorToGPU.cpp.
References mlir::bindDims(), contract(), mlir::AffineMap::inferFromExprList(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().
Referenced by supportsMMaMatrixType().
|
static |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op.
Definition at line 1038 of file VectorToGPU.cpp.
References broadcastSupportsMMAMatrixType(), mlir::OpBuilder::create(), mlir::gpu::MMAMatrixType::get(), inferFragType(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 1015 of file VectorToGPU.cpp.
References constantSupportsMMAMatrixType(), mlir::OpBuilder::create(), mlir::gpu::MMAMatrixType::get(), inferFragType(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 620 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::failed(), mlir::DenseElementsAttr::get(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 972 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 992 of file VectorToGPU.cpp.
References mlir::AffineExpr::cast(), mlir::OpBuilder::create(), mlir::Builder::getI64ArrayAttr(), getShape(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Convert an elementwise op to the equivalent elementwise op on MMA matrix.
Definition at line 1148 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::Operation::getLoc(), mlir::Operation::getOperands(), mlir::Operation::getResult(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Return the MMA elementwise enum associated with op
if it is supported.
Return std::nullopt
otherwise.
Definition at line 220 of file VectorToGPU.cpp.
Referenced by mlir::convertVectorToMMAOps(), and elementwiseSupportsMMAMatrixType().
|
static |
Definition at line 896 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::failed(), mlir::nvgpu::getMmaSyncRegisterType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), populateFromInt64AttrArray(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1090 of file VectorToGPU.cpp.
References DBGS, mlir::detail::enumerate(), mlir::Block::getArgument(), replaceForOpWithNewSignature(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 522 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::AffineExpr::dyn_cast(), mlir::gpu::MMAMatrixType::get(), mlir::Builder::getIndexAttr(), getMemrefConstantHorizontalStride(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::getResult(), mlir::Builder::getUnitAttr(), inferFragType(), isTransposeMatrixLoadMap(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::quant::QuantizationFlags::Signed, mlir::success(), and transferReadSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Converts a vector.transfer_read
operation directly to either a vector.load
or a nvgpu.ldmatrix
operation.
This function should only be used when converting to nvgpu.mma.sync
operations.
Definition at line 812 of file VectorToGPU.cpp.
References createNonLdMatrixLoads(), creatLdMatrixCompatibleLoads(), mlir::failed(), mlir::nvgpu::getWarpMatrixInfo(), mlir::nvgpu::inferTileWidthInBits(), isSharedMemory(), mlir::RewriterBase::notifyMatchFailure(), and mlir::OpBuilder::setInsertionPoint().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 575 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::RewriterBase::eraseOp(), mlir::Builder::getIndexAttr(), getMemrefConstantHorizontalStride(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::success(), and transferWriteSupportsMMAMatrixType().
Referenced by mlir::convertVectorToMMAOps().
|
static |
Definition at line 844 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::RewriterBase::eraseOp(), mlir::failed(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), mlir::nvgpu::getLaneIdAndValueIdToOperandCoord(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 1123 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::detail::enumerate(), mlir::RewriterBase::eraseOp(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Definition at line 709 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), mlir::Type::dyn_cast(), mlir::failed(), mlir::Builder::getI64ArrayAttr(), mlir::Builder::getIndexAttr(), mlir::Builder::getIndexType(), mlir::nvgpu::getLaneIdAndValueIdToOperandCoord(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::Builder::getZeroAttr(), mlir::Type::isa(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by convertTransferReadToLoads().
|
static |
Definition at line 654 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::failed(), mlir::nvgpu::getLaneIdToLdMatrixMatrixCoord(), mlir::nvgpu::getLdMatrixParams(), mlir::nvgpu::getMmaSyncRegisterType(), getMmaSyncVectorOperandType(), mlir::nvgpu::getWarpMatrixInfo(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), and mlir::success().
Referenced by convertTransferReadToLoads().
|
static |
Return true if the op is supported as elementwise op on MMAMatrix type.
Definition at line 249 of file VectorToGPU.cpp.
References convertElementwiseOpToMMA().
Referenced by supportsMMaMatrixType().
|
static |
Returns true if the extract strided slice op is supported with mma.sync
path.
Definition at line 256 of file VectorToGPU.cpp.
References mlir::nvgpu::B, mlir::nvgpu::C, mlir::failed(), mlir::nvgpu::getUserContract(), and mlir::nvgpu::getWarpMatrixInfo().
Referenced by supportsMMaMatrixType().
|
static |
Definition at line 125 of file VectorToGPU.cpp.
References mlir::failed(), and mlir::getStridesAndOffset().
Referenced by convertTransferReadOp(), convertTransferWriteOp(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
|
static |
Returns the vector type which represents a matrix fragment.
Definition at line 609 of file VectorToGPU.cpp.
References mlir::Type::dyn_cast(), mlir::nvgpu::FragmentElementInfo::elementsPerRegister, mlir::nvgpu::FragmentElementInfo::numRegistersPerFragment, and mlir::nvgpu::FragmentElementInfo::registerLLVMType.
Referenced by convertConstantOpMmaSync(), convertTransferWriteToStores(), createNonLdMatrixLoads(), and creatLdMatrixCompatibleLoads().
|
static |
Definition at line 343 of file VectorToGPU.cpp.
References contract(), mlir::Operation::getOperandTypes(), mlir::Operation::getResultTypes(), getSliceContract(), supportsMMaMatrixType(), mlir::topologicalSort(), and mlir::Operation::walk().
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Return an unsorted slice handling scf.for region differently than getSlice
.
In scf.for we only want to include as part of the slice elements that are part of the use/def chain.
Definition at line 306 of file VectorToGPU.cpp.
References mlir::getBackwardSlice(), and mlir::getForwardSlice().
Referenced by getOpToConvert().
|
static |
For a vector TransferOpType xferOp
, an empty indices
vector, and an AffineMap representing offsets to apply to indices, the function fills indices
with the original indices plus the offsets.
The offsets are applied by taking into account the permutation map of the transfer op. If the offsetMap
has dimension placeholders, those should be provided in dimValues
.
Definition at line 57 of file VectorToGPU.cpp.
References mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), and mlir::makeComposedAffineApply().
|
static |
Definition at line 507 of file VectorToGPU.cpp.
References contract(), mlir::Operation::getNumResults(), mlir::Operation::getResult(), and mlir::Operation::getUsers().
Referenced by convertBroadcastOp(), convertConstantOp(), and convertTransferReadOp().
|
static |
Return true if this integer extend op can be folded into a contract op.
Definition at line 209 of file VectorToGPU.cpp.
|
static |
Return true if this is a shared memory memref type.
Definition at line 799 of file VectorToGPU.cpp.
Referenced by convertTransferReadToLoads().
|
static |
Definition at line 103 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::Builder::getAffineConstantExpr(), mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getContext(), and mlir::AffineMap::getNumDims().
Referenced by convertTransferReadOp(), and transferReadSupportsMMAMatrixType().
|
static |
Definition at line 889 of file VectorToGPU.cpp.
Referenced by convertExtractStridedSlice().
|
static |
Definition at line 1057 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::Operation::erase(), mlir::RewriterBase::eraseOp(), mlir::RewriterBase::replaceAllUsesWith(), and mlir::OpBuilder::setInsertionPoint().
Referenced by convertForOp().
|
static |
Definition at line 280 of file VectorToGPU.cpp.
References broadcast(), broadcastSupportsMMAMatrixType(), constantSupportsMMAMatrixType(), contract(), contractSupportsMMAMatrixType(), elementwiseSupportsMMAMatrixType(), extractStridedSliceSupportsMMAMatrixType(), transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
Referenced by getOpToConvert().
|
static |
Definition at line 144 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::getAffineConstantExpr(), mlir::getAffineDimExpr(), getMemrefConstantHorizontalStride(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::isMinorIdentity(), and isTransposeMatrixLoadMap().
Referenced by convertTransferReadOp(), and supportsMMaMatrixType().
|
static |
Definition at line 177 of file VectorToGPU.cpp.
References getMemrefConstantHorizontalStride().
Referenced by convertTransferWriteOp(), and supportsMMaMatrixType().