MLIR  19.0.0git
Namespaces | Macros | Functions
VectorToGPU.cpp File Reference
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include <type_traits>
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "mlir/Conversion/Passes.h.inc"

Go to the source code of this file.

Namespaces

 mlir
 Include the generated interface declarations.
 

Macros

#define DEBUG_TYPE   "vector-to-gpu"
 
#define DBGS()   (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
 
#define DBGSNL()   (llvm::dbgs() << "\n")
 
#define GEN_PASS_DEF_CONVERTVECTORTOGPU
 

Functions

template<typename TransferOpType >
static void getXferIndices (RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices)
 For a vector TransferOpType xferOp, an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets. More...
 
static bool contractSupportsMMAMatrixType (vector::ContractionOp contract, bool useNvGpu)
 
static bool isTransposeMatrixLoadMap (AffineMap permutationMap)
 
static std::optional< int64_t > getStaticallyKnownRowStride (ShapedType type)
 
static bool transferReadSupportsMMAMatrixType (vector::TransferReadOp readOp)
 
static bool transferWriteSupportsMMAMatrixType (vector::TransferWriteOp writeOp)
 
static bool constantSupportsMMAMatrixType (arith::ConstantOp constantOp)
 Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op. More...
 
static bool broadcastSupportsMMAMatrixType (vector::BroadcastOp broadcastOp)
 Return true if this is a broadcast from scalar to a 2D vector. More...
 
template<typename ExtOpTy >
static bool integerExtendSupportsMMAMatrixType (ExtOpTy extOp)
 Return true if this integer extend op can be folded into a contract op. More...
 
static bool fpExtendSupportsMMAMatrixType (arith::ExtFOp extOp)
 
static std::optional< gpu::MMAElementwiseOp > convertElementwiseOpToMMA (Operation *op)
 Return the MMA elementwise enum associated with op if it is supported. More...
 
static bool elementwiseSupportsMMAMatrixType (Operation *op)
 Return true if the op is supported as elementwise op on MMAMatrix type. More...
 
static bool extractStridedSliceSupportsMMAMatrixType (vector::ExtractStridedSliceOp op)
 Returns true if the extract strided slice op is supported with mma.sync path. More...
 
static bool supportsMMaMatrixType (Operation *op, bool useNvGpu)
 
static SetVector< Operation * > getSliceContract (Operation *op, const BackwardSliceOptions &backwardSliceOptions, const ForwardSliceOptions &forwardSliceOptions)
 Return an unsorted slice handling scf.for region differently than getSlice. More...
 
static SetVector< Operation * > getOpToConvert (mlir::Operation *op, bool useNvGpu)
 
static const char * inferFragType (Operation *op)
 
static LogicalResult convertTransferReadOp (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult convertTransferWriteOp (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static VectorType getMmaSyncVectorOperandType (const nvgpu::FragmentElementInfo &regInfo)
 Returns the vector type which represents a matrix fragment. More...
 
static LogicalResult convertConstantOpMmaSync (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping)
 Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More...
 
static FailureOr< bool > isTransposed (vector::TransferReadOp op)
 Check if the loaded matrix operand requires transposed. More...
 
static LogicalResult creatLdMatrixCompatibleLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult createNonLdMatrixLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static bool isSharedMemory (MemRefType type)
 Return true if this is a shared memory memref type. More...
 
static LogicalResult convertTransferReadToLoads (RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap< Value, Value > &valueMapping)
 Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation. More...
 
static LogicalResult convertTransferWriteToStores (RewriterBase &rewriter, vector::TransferWriteOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static void populateFromInt64AttrArray (ArrayAttr arrayAttr, SmallVectorImpl< int64_t > &results)
 
static LogicalResult convertExtractStridedSlice (RewriterBase &rewriter, vector::ExtractStridedSliceOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult convertContractOp (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult convertContractOpToMmaSync (RewriterBase &rewriter, vector::ContractionOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult convertConstantOp (RewriterBase &rewriter, arith::ConstantOp op, llvm::DenseMap< Value, Value > &valueMapping)
 Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op. More...
 
static LogicalResult convertBroadcastOp (RewriterBase &rewriter, vector::BroadcastOp op, llvm::DenseMap< Value, Value > &valueMapping)
 Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. More...
 
static scf::ForOp replaceForOpWithNewSignature (RewriterBase &rewriter, scf::ForOp loop, ValueRange newInitArgs)
 
static LogicalResult convertForOp (RewriterBase &rewriter, scf::ForOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult convertYieldOp (RewriterBase &rewriter, scf::YieldOp op, llvm::DenseMap< Value, Value > &valueMapping)
 
static LogicalResult convertElementwiseOp (RewriterBase &rewriter, Operation *op, gpu::MMAElementwiseOp opType, llvm::DenseMap< Value, Value > &valueMapping)
 Convert an elementwise op to the equivalent elementwise op on MMA matrix. More...
 

Macro Definition Documentation

◆ DBGS

#define DBGS ( )    (llvm::dbgs() << "[" DEBUG_TYPE "]: ")

Definition at line 40 of file VectorToGPU.cpp.

◆ DBGSNL

#define DBGSNL ( )    (llvm::dbgs() << "\n")

Definition at line 41 of file VectorToGPU.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE   "vector-to-gpu"

Definition at line 39 of file VectorToGPU.cpp.

◆ GEN_PASS_DEF_CONVERTVECTORTOGPU

#define GEN_PASS_DEF_CONVERTVECTORTOGPU

Definition at line 44 of file VectorToGPU.cpp.

Function Documentation

◆ broadcastSupportsMMAMatrixType()

static bool broadcastSupportsMMAMatrixType ( vector::BroadcastOp  broadcastOp)
static

Return true if this is a broadcast from scalar to a 2D vector.

Definition at line 196 of file VectorToGPU.cpp.

◆ constantSupportsMMAMatrixType()

static bool constantSupportsMMAMatrixType ( arith::ConstantOp  constantOp)
static

Return true if the constant is a splat to a 2D vector so that it can be converted to a MMA constant matrix op.

Definition at line 188 of file VectorToGPU.cpp.

◆ contractSupportsMMAMatrixType()

static bool contractSupportsMMAMatrixType ( vector::ContractionOp  contract,
bool  useNvGpu 
)
static

◆ convertBroadcastOp()

static LogicalResult convertBroadcastOp ( RewriterBase rewriter,
vector::BroadcastOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op.

Definition at line 1085 of file VectorToGPU.cpp.

◆ convertConstantOp()

static LogicalResult convertConstantOp ( RewriterBase rewriter,
arith::ConstantOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.

Definition at line 1062 of file VectorToGPU.cpp.

◆ convertConstantOpMmaSync()

static LogicalResult convertConstantOpMmaSync ( RewriterBase rewriter,
arith::ConstantOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Convert a 2D splat ConstantOp to a SubgroupMmaConstantMatrix op.

Definition at line 630 of file VectorToGPU.cpp.

◆ convertContractOp()

static LogicalResult convertContractOp ( RewriterBase rewriter,
vector::ContractionOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 1019 of file VectorToGPU.cpp.

◆ convertContractOpToMmaSync()

static LogicalResult convertContractOpToMmaSync ( RewriterBase rewriter,
vector::ContractionOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 1039 of file VectorToGPU.cpp.

◆ convertElementwiseOp()

static LogicalResult convertElementwiseOp ( RewriterBase rewriter,
Operation op,
gpu::MMAElementwiseOp  opType,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Convert an elementwise op to the equivalent elementwise op on MMA matrix.

Definition at line 1194 of file VectorToGPU.cpp.

◆ convertElementwiseOpToMMA()

static std::optional<gpu::MMAElementwiseOp> convertElementwiseOpToMMA ( Operation op)
static

Return the MMA elementwise enum associated with op if it is supported.

Return std::nullopt otherwise.

Definition at line 213 of file VectorToGPU.cpp.

◆ convertExtractStridedSlice()

static LogicalResult convertExtractStridedSlice ( RewriterBase rewriter,
vector::ExtractStridedSliceOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 943 of file VectorToGPU.cpp.

◆ convertForOp()

static LogicalResult convertForOp ( RewriterBase rewriter,
scf::ForOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 1136 of file VectorToGPU.cpp.

◆ convertTransferReadOp()

static LogicalResult convertTransferReadOp ( RewriterBase rewriter,
vector::TransferReadOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 532 of file VectorToGPU.cpp.

◆ convertTransferReadToLoads()

static LogicalResult convertTransferReadToLoads ( RewriterBase rewriter,
vector::TransferReadOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Converts a vector.transfer_read operation directly to either a vector.load or a nvgpu.ldmatrix operation.

This function should only be used when converting to nvgpu.mma.sync operations.

Definition at line 859 of file VectorToGPU.cpp.

◆ convertTransferWriteOp()

static LogicalResult convertTransferWriteOp ( RewriterBase rewriter,
vector::TransferWriteOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 585 of file VectorToGPU.cpp.

◆ convertTransferWriteToStores()

static LogicalResult convertTransferWriteToStores ( RewriterBase rewriter,
vector::TransferWriteOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 891 of file VectorToGPU.cpp.

◆ convertYieldOp()

static LogicalResult convertYieldOp ( RewriterBase rewriter,
scf::YieldOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 1169 of file VectorToGPU.cpp.

◆ createNonLdMatrixLoads()

static LogicalResult createNonLdMatrixLoads ( RewriterBase rewriter,
vector::TransferReadOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 759 of file VectorToGPU.cpp.

◆ creatLdMatrixCompatibleLoads()

static LogicalResult creatLdMatrixCompatibleLoads ( RewriterBase rewriter,
vector::TransferReadOp  op,
llvm::DenseMap< Value, Value > &  valueMapping 
)
static

Definition at line 697 of file VectorToGPU.cpp.

◆ elementwiseSupportsMMAMatrixType()

static bool elementwiseSupportsMMAMatrixType ( Operation op)
static

Return true if the op is supported as elementwise op on MMAMatrix type.

Definition at line 244 of file VectorToGPU.cpp.

◆ extractStridedSliceSupportsMMAMatrixType()

static bool extractStridedSliceSupportsMMAMatrixType ( vector::ExtractStridedSliceOp  op)
static

Returns true if the extract strided slice op is supported with mma.sync path.

Definition at line 251 of file VectorToGPU.cpp.

◆ fpExtendSupportsMMAMatrixType()

static bool fpExtendSupportsMMAMatrixType ( arith::ExtFOp  extOp)
static

Definition at line 208 of file VectorToGPU.cpp.

◆ getMmaSyncVectorOperandType()

static VectorType getMmaSyncVectorOperandType ( const nvgpu::FragmentElementInfo regInfo)
static

◆ getOpToConvert()

static SetVector<Operation *> getOpToConvert ( mlir::Operation op,
bool  useNvGpu 
)
static

◆ getSliceContract()

static SetVector<Operation *> getSliceContract ( Operation op,
const BackwardSliceOptions backwardSliceOptions,
const ForwardSliceOptions forwardSliceOptions 
)
static

Return an unsorted slice handling scf.for region differently than getSlice.

In scf.for we only want to include as part of the slice elements that are part of the use/def chain.

Definition at line 306 of file VectorToGPU.cpp.

◆ getStaticallyKnownRowStride()

static std::optional<int64_t> getStaticallyKnownRowStride ( ShapedType  type)
static

◆ getXferIndices()

template<typename TransferOpType >
static void getXferIndices ( RewriterBase rewriter,
TransferOpType  xferOp,
AffineMap  offsetMap,
ArrayRef< Value dimValues,
SmallVector< Value, 4 > &  indices 
)
static

For a vector TransferOpType xferOp, an empty indices vector, and an AffineMap representing offsets to apply to indices, the function fills indices with the original indices plus the offsets.

The offsets are applied by taking into account the permutation map of the transfer op. If the offsetMap has dimension placeholders, those should be provided in dimValues.

Definition at line 57 of file VectorToGPU.cpp.

References mlir::Builder::getAffineDimExpr(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getResult(), and mlir::affine::makeComposedAffineApply().

◆ inferFragType()

static const char* inferFragType ( Operation op)
static

Definition at line 517 of file VectorToGPU.cpp.

◆ integerExtendSupportsMMAMatrixType()

template<typename ExtOpTy >
static bool integerExtendSupportsMMAMatrixType ( ExtOpTy  extOp)
static

Return true if this integer extend op can be folded into a contract op.

Definition at line 202 of file VectorToGPU.cpp.

◆ isSharedMemory()

static bool isSharedMemory ( MemRefType  type)
static

Return true if this is a shared memory memref type.

Definition at line 848 of file VectorToGPU.cpp.

◆ isTransposed()

static FailureOr<bool> isTransposed ( vector::TransferReadOp  op)
static

Check if the loaded matrix operand requires transposed.

Transposed Map Example: Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2) Example 2 : (d0, d1, d2, d3) -> (d3, d2) The code below checks if the output 2D is transposed using a generalized version : (d0, d1, dn, ..., dm, ...) -> (dm, dn) Returns : true; if m > n, false o.w.

Definition at line 670 of file VectorToGPU.cpp.

◆ isTransposeMatrixLoadMap()

static bool isTransposeMatrixLoadMap ( AffineMap  permutationMap)
static

◆ populateFromInt64AttrArray()

static void populateFromInt64AttrArray ( ArrayAttr  arrayAttr,
SmallVectorImpl< int64_t > &  results 
)
static

Definition at line 936 of file VectorToGPU.cpp.

◆ replaceForOpWithNewSignature()

static scf::ForOp replaceForOpWithNewSignature ( RewriterBase rewriter,
scf::ForOp  loop,
ValueRange  newInitArgs 
)
static

◆ supportsMMaMatrixType()

static bool supportsMMaMatrixType ( Operation op,
bool  useNvGpu 
)
static

Definition at line 275 of file VectorToGPU.cpp.

◆ transferReadSupportsMMAMatrixType()

static bool transferReadSupportsMMAMatrixType ( vector::TransferReadOp  readOp)
static

◆ transferWriteSupportsMMAMatrixType()

static bool transferWriteSupportsMMAMatrixType ( vector::TransferWriteOp  writeOp)
static

Definition at line 170 of file VectorToGPU.cpp.

References getStaticallyKnownRowStride().