MLIR
20.0.0git
|
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include <type_traits>
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "mlir/Conversion/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
Include the generated interface declarations. | |
Macros | |
#define | DEBUG_TYPE "vector-to-gpu" |
#define | DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
#define | DBGSNL() (llvm::dbgs() << "\n") |
#define | GEN_PASS_DEF_CONVERTVECTORTOGPU |
Functions | |
template<typename TransferOpType > | |
static void | getXferIndices (RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices) |
For a vector TransferOpType xferOp , an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets. More... | |
static bool | contractSupportsMMAMatrixType (vector::ContractionOp contract, bool useNvGpu) |
static bool | isTransposeMatrixLoadMap (AffineMap permutationMap) |
static std::optional< int64_t > | getStaticallyKnownRowStride (ShapedType type) |
static bool | transferReadSupportsMMAMatrixType (vector::TransferReadOp readOp) |
static bool | transferWriteSupportsMMAMatrixType (vector::TransferWriteOp writeOp) |
static bool | constantSupportsMMAMatrixType (arith::ConstantOp constantOp) |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op. More... | |
static bool | broadcastSupportsMMAMatrixType (vector::BroadcastOp broadcastOp) |
Return true if this is a broadcast from scalar to a 2D vector. More... | |
template<typename ExtOpTy > | |
static bool | integerExtendSupportsMMAMatrixType (ExtOpTy extOp) |
Return true if this integer extend op can be folded into a contract op. More... | |
static bool | fpExtendSupportsMMAMatrixType (arith::ExtFOp extOp) |
static std::optional< gpu::MMAElementwiseOp > | convertElementwiseOpToMMA (Operation *op) |
Return the MMA elementwise enum associated with op if it is supported. More... | |
static bool | elementwiseSupportsMMAMatrixType (Operation *op) |
Return true if the op is supported as elementwise op on MMAMatrix type. More... | |
static bool | extractStridedSliceSupportsMMAMatrixType (vector::ExtractStridedSliceOp op) |
Returns true if the extract strided slice op is supported with mma.sync path. More... | |
static bool | supportsMMaMatrixType (Operation *op, bool useNvGpu) |
static SetVector< Operation * > | getSliceContract (Operation *op, const BackwardSliceOptions &backwardSliceOptions, const ForwardSliceOptions &forwardSliceOptions) |
Return an unsorted slice handling scf.for region differently than getSlice . More... | |
static SetVector< Operation * > | getOpToConvert (mlir::Operation *op, bool useNvGpu) |
static const char * | inferFragType (Operation *op) |
static LogicalResult | convertTransferReadOp (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertTransferWriteOp (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static VectorType | getMmaSyncVectorOperandType (const nvgpu::FragmentElementInfo ®Info) |
Returns the vector type which represents a matrix fragment. More... | |
static LogicalResult | convertConstantOpMmaSync (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static FailureOr< bool > | isTransposed (vector::TransferReadOp op) |
Check if the loaded matrix operand requires transposed. More... | |
static LogicalResult | creatLdMatrixCompatibleLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | createNonLdMatrixLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static bool | isSharedMemory (MemRefType type) |
Return true if this is a shared memory memref type. More... | |
static LogicalResult | convertTransferReadToLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation. More... | |
static LogicalResult | convertTransferWriteToStores (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static void | populateFromInt64AttrArray (ArrayAttr arrayAttr, SmallVectorImpl< int64_t > &results) |
static LogicalResult | convertExtractStridedSlice (RewriterBase &rewriter, vector::ExtractStridedSliceOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOp (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertContractOpToMmaSync (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertConstantOp (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More... | |
static LogicalResult | convertBroadcastOp (RewriterBase &rewriter, vector::BroadcastOp op, llvm::DenseMap< Value, Value > &valueMapping) |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. More... | |
static scf::ForOp | replaceForOpWithNewSignature (RewriterBase &rewriter, scf::ForOp loop, ValueRange newInitArgs) |
static LogicalResult | convertForOp (RewriterBase &rewriter, scf::ForOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertYieldOp (RewriterBase &rewriter, scf::YieldOp op, llvm::DenseMap< Value, Value > &valueMapping) |
static LogicalResult | convertElementwiseOp (RewriterBase &rewriter, Operation *op, gpu::MMAElementwiseOp opType, llvm::DenseMap< Value, Value > &valueMapping) |
Convert an elementwise op to the equivalent elementwise op on MMA matrix. More... | |
#define DBGS | ( | ) | (llvm::dbgs() << "[" DEBUG_TYPE "]: ") |
Definition at line 40 of file VectorToGPU.cpp.
#define DBGSNL | ( | ) | (llvm::dbgs() << "\n") |
Definition at line 41 of file VectorToGPU.cpp.
#define DEBUG_TYPE "vector-to-gpu" |
Definition at line 39 of file VectorToGPU.cpp.
#define GEN_PASS_DEF_CONVERTVECTORTOGPU |
Definition at line 44 of file VectorToGPU.cpp.
|
static |
Return true if this is a broadcast from scalar to a 2D vector.
Definition at line 196 of file VectorToGPU.cpp.
|
static |
Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op.
Definition at line 188 of file VectorToGPU.cpp.
|
static |
Definition at line 77 of file VectorToGPU.cpp.
|
static |
Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op.
Definition at line 1092 of file VectorToGPU.cpp.
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 1069 of file VectorToGPU.cpp.
|
static |
Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.
Definition at line 637 of file VectorToGPU.cpp.
|
static |
Definition at line 1026 of file VectorToGPU.cpp.
|
static |
Definition at line 1046 of file VectorToGPU.cpp.
|
static |
Convert an elementwise op to the equivalent elementwise op on MMA matrix.
Definition at line 1201 of file VectorToGPU.cpp.
|
static |
Return the MMA elementwise enum associated with op
if it is supported.
Return std::nullopt
otherwise.
Definition at line 213 of file VectorToGPU.cpp.
|
static |
Definition at line 950 of file VectorToGPU.cpp.
|
static |
Definition at line 1143 of file VectorToGPU.cpp.
|
static |
Definition at line 540 of file VectorToGPU.cpp.
|
static |
Converts a vector.transfer_read
operation directly to either a vector.load
or a nvgpu.ldmatrix
operation.
This function should only be used when converting to nvgpu.mma.sync
operations.
Definition at line 866 of file VectorToGPU.cpp.
|
static |
Definition at line 592 of file VectorToGPU.cpp.
|
static |
Definition at line 898 of file VectorToGPU.cpp.
|
static |
Definition at line 1176 of file VectorToGPU.cpp.
|
static |
Definition at line 766 of file VectorToGPU.cpp.
|
static |
Definition at line 704 of file VectorToGPU.cpp.
|
static |
Return true if the op is supported as elementwise op on MMAMatrix type.
Definition at line 244 of file VectorToGPU.cpp.
|
static |
Returns true if the extract strided slice op is supported with mma.sync
path.
Definition at line 251 of file VectorToGPU.cpp.
|
static |
Definition at line 208 of file VectorToGPU.cpp.
|
static |
Returns the vector type which represents a matrix fragment.
Definition at line 626 of file VectorToGPU.cpp.
References mlir::nvgpu::FragmentElementInfo::elementsPerRegister, mlir::get(), mlir::nvgpu::FragmentElementInfo::numRegistersPerFragment, and mlir::nvgpu::FragmentElementInfo::registerLLVMType.
|
static |
Definition at line 343 of file VectorToGPU.cpp.
Referenced by mlir::convertVectorToMMAOps(), and mlir::convertVectorToNVVMCompatibleMMASync().
|
static |
Return an unsorted slice handling scf.for region differently than getSlice
.
In scf.for we only want to include as part of the slice elements that are part of the use/def chain.
Definition at line 306 of file VectorToGPU.cpp.
|
static |
Definition at line 126 of file VectorToGPU.cpp.
References mlir::getStridesAndOffset().
Referenced by transferReadSupportsMMAMatrixType(), and transferWriteSupportsMMAMatrixType().
|
static |
For a vector TransferOpType xferOp
, an empty indices
vector, and an AffineMap representing offsets to apply to indices, the function fills indices
with the original indices plus the offsets.
The offsets are applied by taking into account the permutation map of the transfer op. If the offsetMap
has dimension placeholders, those should be provided in dimValues
.
Definition at line 57 of file VectorToGPU.cpp.
References mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), and mlir::affine::makeComposedAffineApply().
|
static |
Definition at line 517 of file VectorToGPU.cpp.
|
static |
Return true if this integer extend op can be folded into a contract op.
Definition at line 202 of file VectorToGPU.cpp.
|
static |
Return true if this is a shared memory memref type.
Definition at line 855 of file VectorToGPU.cpp.
|
static |
Check if the loaded matrix operand requires transposed.
Transposed Map Example: Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2) Example 2 : (d0, d1, d2, d3) -> (d3, d2) The code below checks if the output 2D is transposed using a generalized version : (d0, d1, dn, ..., dm, ...) -> (dm, dn) Returns : true; if m > n, false o.w.
Definition at line 677 of file VectorToGPU.cpp.
|
static |
Definition at line 105 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::Builder::getAffineConstantExpr(), mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getContext(), and mlir::AffineMap::getNumDims().
Referenced by transferReadSupportsMMAMatrixType().
|
static |
Definition at line 943 of file VectorToGPU.cpp.
|
static |
Definition at line 1111 of file VectorToGPU.cpp.
References mlir::OpBuilder::create(), DBGS, mlir::RewriterBase::eraseBlock(), mlir::RewriterBase::eraseOp(), mlir::RewriterBase::replaceAllUsesWith(), and mlir::OpBuilder::setInsertionPoint().
|
static |
Definition at line 275 of file VectorToGPU.cpp.
|
static |
Definition at line 145 of file VectorToGPU.cpp.
References mlir::AffineMap::get(), mlir::getAffineConstantExpr(), mlir::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getPermutationMap(), getStaticallyKnownRowStride(), mlir::AffineMap::isMinorIdentity(), and isTransposeMatrixLoadMap().
|
static |
Definition at line 170 of file VectorToGPU.cpp.
References getStaticallyKnownRowStride().