doxygen/MMAUtils_8h_source.html

 //===-- MMAUtils.h - MLIR NVGPU dialect utilities for MMA operations-------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file provides utilities to assist in the lowering of other dialects

 // (e.g. Vector) to `nvgpu.mma.*` dialect operations.

 //

 //===----------------------------------------------------------------------===//

 #ifndef MLIR_DIALECT_NVGPU_UTILS_MMAUTILS_H

 #define MLIR_DIALECT_NVGPU_UTILS_MMAUTILS_H


 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/IR/PatternMatch.h"

 #include "mlir/IR/Types.h"


 namespace mlir {

 namespace nvgpu {


 /// Represents the role of an operand in an MMA instruction:

 /// `result := matmul(A, B) + C`

 enum class MatMulOperandRole : int32_t { A = 0, B, C };


 /// Returns the first user of the `op` that is vector.contract. If no

 /// vector.contract user exists, return failure.

 FailureOr<vector::ContractionOp> getUserContract(Operation *op);


 /// Collects information about a warp-level matrix operand represented by a

 /// VectorType.

 struct WarpMatrixInfo {

   VectorType vectorType;

   MatMulOperandRole operandRole;

 };


 /// If `op` is a `vector.transfer_write`, return the `WarpMatrixInfo` for the

 /// vector operand. If op is a `vector.transfer_read`, `vector.contraction`, or

 /// `arith.constant`, return the `WarpMatrixInfo` corresponding to the result.

 /// Otherwise, return failure.

 FailureOr<WarpMatrixInfo> getWarpMatrixInfo(Operation *op);


 /// Returns the number of bits in a single tile row. It is either 128, 256, or

 /// 512 bits depending on the data type and` whether the operand is an

 /// accumulator/result operand

 int64_t inferTileWidthInBits(const WarpMatrixInfo &type);


 /// Specifies information about the registers which compose a matrix fragment

 /// according to the PTX documentation.

 struct FragmentElementInfo {

   Type registerLLVMType;

   int64_t elementsPerRegister;

   int64_t registerWidthBits;

   int64_t numRegistersPerFragment;

 };


 /// Returns a FragmentElementInfo struct describing the register types for the

 /// given matrix fragment type.

 FailureOr<FragmentElementInfo>

 getMmaSyncRegisterType(const WarpMatrixInfo &type);


 /// Returns an AffineMap which maps a two dimensions representing (laneId,

 /// logicalValueId) and returns two results representing offsets within a

 /// matrix operand. The offsets point to the values the thread is responsible

 /// for (AKA the matrix fragment values) during a warp-collective matrix

 /// operation. For a visual reference of this LaneId -> (row, col) mapping,

 /// please see NVIDIA's PTX documentation:

 /// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-mma

 FailureOr<AffineMap>

 getLaneIdAndValueIdToOperandCoord(OpBuilder &builder, Location loc,

                                   const WarpMatrixInfo &fragmentType);


 /// Encapsulates the parameters needed to lower a `nvgpu.ldmatrix` operation to

 /// `nvvm.ldmatrix`.

 struct LdMatrixParams {

   VectorType fragmentType;

   bool isAccum;

   int64_t numTiles;

   vector::IteratorType contiguousDimType;

   NVVM::MMALayout targetLayout;

 };


 /// Given `type` that contains info for a warp-matrix operand and whether or not

 /// the load is a transposed load, return the LdMatrixParams.

 FailureOr<LdMatrixParams> getLdMatrixParams(const WarpMatrixInfo &type,

                                             bool transpose);

 /// Returns an AffineMap which maps a single dimension representing the laneId

 /// to two results representing offsets within the matrix operand that should

 /// be the pointer locations a thread should pass to the ldmatrix instruction.

 FailureOr<AffineMap>

 getLaneIdToLdMatrixMatrixCoord(OpBuilder &builder, Location loc,

                                const LdMatrixParams &params);


 /// Returns whether the `vector.transfer_read` instruction can be interpreted

 /// as a warp-level cooperative matrix load operation. This function is meant to

 /// be used to establish whether `op` is part of a chain of such warp-level

 /// operations.

 bool canLowerToWarpMatrixOperation(vector::TransferReadOp op);


 /// Returns whether the `vector.transfer_write` instruction can be interpreted

 /// as a warp-level cooperative matrix store operation. This function is meant

 /// to be used to establish whether `op` is part of a chain of such warp-level

 /// operations.

 bool canLowerToWarpMatrixOperation(vector::TransferWriteOp op);


 } // namespace nvgpu

 } // namespace mlir


 #endif // MLIR_DIALECT_NVGPU_UTILS_MMAUTILS_H

Types.h

NVVMDialect.h

PatternMatch.h

VectorOps.h

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:204

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::nvgpu::inferTileWidthInBits
int64_t inferTileWidthInBits(const WarpMatrixInfo &type)
Returns the number of bits in a single tile row.
Definition: MMAUtils.cpp:87

mlir::nvgpu::getUserContract
FailureOr< vector::ContractionOp > getUserContract(Operation *op)
Returns the first user of the op that is vector.contract.
Definition: MMAUtils.cpp:50

mlir::nvgpu::getLaneIdAndValueIdToOperandCoord
FailureOr< AffineMap > getLaneIdAndValueIdToOperandCoord(OpBuilder &builder, Location loc, const WarpMatrixInfo &fragmentType)
Returns an AffineMap which maps a two dimensions representing (laneId, logicalValueId) and returns tw...
Definition: MMAUtils.cpp:169

mlir::nvgpu::getWarpMatrixInfo
FailureOr< WarpMatrixInfo > getWarpMatrixInfo(Operation *op)
If op is a vector.transfer_write, return the WarpMatrixInfo for the vector operand.
Definition: MMAUtils.cpp:58

mlir::nvgpu::getLaneIdToLdMatrixMatrixCoord
FailureOr< AffineMap > getLaneIdToLdMatrixMatrixCoord(OpBuilder &builder, Location loc, const LdMatrixParams &params)
Returns an AffineMap which maps a single dimension representing the laneId to two results representin...
Definition: MMAUtils.cpp:234

mlir::nvgpu::MatMulOperandRole
MatMulOperandRole
Represents the role of an operand in an MMA instruction: result := matmul(A, B) + C
Definition: MMAUtils.h:26

mlir::nvgpu::MatMulOperandRole::C
@ C

mlir::nvgpu::MatMulOperandRole::A
@ A

mlir::nvgpu::MatMulOperandRole::B
@ B

mlir::nvgpu::getLdMatrixParams
FailureOr< LdMatrixParams > getLdMatrixParams(const WarpMatrixInfo &type, bool transpose)
Given type that contains info for a warp-matrix operand and whether or not the load is a transposed l...
Definition: MMAUtils.cpp:205

mlir::nvgpu::getMmaSyncRegisterType
FailureOr< FragmentElementInfo > getMmaSyncRegisterType(const WarpMatrixInfo &type)
Returns a FragmentElementInfo struct describing the register types for the given matrix fragment type...
Definition: MMAUtils.cpp:100

mlir::nvgpu::canLowerToWarpMatrixOperation
bool canLowerToWarpMatrixOperation(vector::TransferReadOp op)
Returns whether the vector.transfer_read instruction can be interpreted as a warp-level cooperative m...
Definition: MMAUtils.cpp:272

mlir::xegpu::transpose
static void transpose(llvm::ArrayRef< int64_t > trans, SmallVector< int64_t > &shape)
Definition: XeGPUOps.cpp:23

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::nvgpu::FragmentElementInfo
Specifies information about the registers which compose a matrix fragment according to the PTX docume...
Definition: MMAUtils.h:52

mlir::nvgpu::FragmentElementInfo::elementsPerRegister
int64_t elementsPerRegister
Definition: MMAUtils.h:54

mlir::nvgpu::FragmentElementInfo::registerLLVMType
Type registerLLVMType
Definition: MMAUtils.h:53

mlir::nvgpu::FragmentElementInfo::numRegistersPerFragment
int64_t numRegistersPerFragment
Definition: MMAUtils.h:56

mlir::nvgpu::FragmentElementInfo::registerWidthBits
int64_t registerWidthBits
Definition: MMAUtils.h:55

mlir::nvgpu::LdMatrixParams
Encapsulates the parameters needed to lower a nvgpu.ldmatrix operation to nvvm.ldmatrix.
Definition: MMAUtils.h:77

mlir::nvgpu::LdMatrixParams::numTiles
int64_t numTiles
Definition: MMAUtils.h:80

mlir::nvgpu::LdMatrixParams::isAccum
bool isAccum
Definition: MMAUtils.h:79

mlir::nvgpu::LdMatrixParams::targetLayout
NVVM::MMALayout targetLayout
Definition: MMAUtils.h:82

mlir::nvgpu::LdMatrixParams::fragmentType
VectorType fragmentType
Definition: MMAUtils.h:78

mlir::nvgpu::LdMatrixParams::contiguousDimType
vector::IteratorType contiguousDimType
Definition: MMAUtils.h:81

mlir::nvgpu::WarpMatrixInfo
Collects information about a warp-level matrix operand represented by a VectorType.
Definition: MMAUtils.h:34

mlir::nvgpu::WarpMatrixInfo::operandRole
MatMulOperandRole operandRole
Definition: MMAUtils.h:36

mlir::nvgpu::WarpMatrixInfo::vectorType
VectorType vectorType
Definition: MMAUtils.h:35