#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/Types.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <optional>
#include <string>
#include "mlir/Dialect/LLVMIR/NVVMOpsDialect.cpp.inc"
#include "mlir/Dialect/LLVMIR/NVVMOpsEnums.cpp.inc"
#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
#include "mlir/Dialect/LLVMIR/NVVMOpsAttributes.cpp.inc"
Go to the source code of this file.
|
#define | CP_ASYNC_ID_IMPL(mod, size, suffix) llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix |
|
#define | GET_CP_ASYNC_ID(mod, size, has_cpsize) has_cpsize ? CP_ASYNC_ID_IMPL(mod, size, _s) : CP_ASYNC_ID_IMPL(mod, size, ) |
|
#define | CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, mode) llvm::Intrinsic::nvvm_cp_async_bulk_tensor_##op##_##mode##_##dim##d |
|
#define | CP_ASYNC_BULK_TENSOR_REDUCE(op, dim, is_im2col) |
|
#define | GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) |
|
#define | _none |
|
#define | CVT_F2TF32_ID_IMPL(rnd, relu, sf) |
|
#define | GET_CVT_F2TF32_ID(rnd, relu, sf) |
|
#define | GET_F32x2_TO_F6x2_ID(type, has_relu) |
|
#define | GET_F32x2_TO_F8X2_US_ID(rnd, has_satf) |
|
#define | GET_F32x2_TO_F8X2_S_ID(type, has_relu) |
|
#define | GET_F16x2_TO_F8X2_ID(type, has_relu) |
|
#define | GET_BF16X2_TO_F8X2_ID(rnd, has_satf) |
|
#define | TCGEN05_COMMIT_IMPL(cg, is_shared, mc) |
|
#define | GET_TCGEN05_COMMIT_ID(cta_group, is_shared, has_mc) |
|
#define | TCGEN05_CP_IMPL(shape_mc, src_fmt, cg) llvm::Intrinsic::nvvm_tcgen05_cp##shape_mc##src_fmt##cg |
|
#define | TCGEN05_CP_2CTA(shape_mc, src_fmt, is_2cta) |
|
#define | GET_TCGEN05_CP_ID(shape_mc, src_fmt, is_2cta) |
|
#define | GET_OP_LIST |
|
#define | GET_ATTRDEF_LIST |
|
#define | GET_OP_CLASSES |
|
#define | GET_ATTRDEF_CLASSES |
|
|
static LogicalResult | cpAsyncBulkTensorCommonVerifier (size_t tensorDims, bool isIm2Col, size_t numIm2ColOffsets, Location loc) |
|
static bool | isInt4PtxType (MMATypes type) |
|
static bool | isInt8PtxType (MMATypes type) |
|
static bool | isIntegerPtxType (MMATypes type) |
|
static std::pair< mlir::Type, unsigned > | inferMMATypeFromMNK (NVVM::MMATypes type, NVVM::MMAFrag frag, int m, int n, int k, MLIRContext *context) |
|
FailureOr< int > | getAllowedSizeK (NVVM::WGMMATypes typeA) |
|
LogicalResult | isAllowedWGMMADataType (NVVM::WGMMATypes typeD, NVVM::WGMMATypes typeA, NVVM::WGMMATypes typeB) |
|
LogicalResult | isAllowedSizeN (int sizeN, NVVM::WGMMATypes typeA) |
|
static unsigned | isValidVectorLength (NVVM::Tcgen05LdStShape shape, unsigned vecLen) |
|
static void | nvvmInferResultRanges (Operation *op, Value result, ArrayRef<::mlir::ConstantIntRanges > argRanges, SetIntRangeFn setResultRanges) |
| Infer the result ranges for the NVVM SpecialRangeableRegisterOp that might have ConstantRangeAttr. More...
|
|
◆ _none
◆ CP_ASYNC_BULK_TENSOR_REDUCE
#define CP_ASYNC_BULK_TENSOR_REDUCE |
( |
|
op, |
|
|
|
dim, |
|
|
|
is_im2col |
|
) |
| |
Value:
#define CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, mode)
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition at line 1275 of file NVVMDialect.cpp.
◆ CP_ASYNC_BULK_TENSOR_REDUCE_MODE
#define CP_ASYNC_BULK_TENSOR_REDUCE_MODE |
( |
|
op, |
|
|
|
dim, |
|
|
|
mode |
|
) |
| llvm::Intrinsic::nvvm_cp_async_bulk_tensor_##op##_##mode##_##dim##d |
◆ CP_ASYNC_ID_IMPL
#define CP_ASYNC_ID_IMPL |
( |
|
mod, |
|
|
|
size, |
|
|
|
suffix |
|
) |
| llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix |
◆ CVT_F2TF32_ID_IMPL
#define CVT_F2TF32_ID_IMPL |
( |
|
rnd, |
|
|
|
relu, |
|
|
|
sf |
|
) |
| |
Value: hasRelu ? llvm::Intrinsic::nvvm_f2tf32_##rnd##relu##sf \
: llvm::Intrinsic::nvvm_f2tf32_##rnd##sf
Definition at line 1323 of file NVVMDialect.cpp.
◆ GET_ATTRDEF_CLASSES
#define GET_ATTRDEF_CLASSES |
◆ GET_ATTRDEF_LIST
◆ GET_BF16X2_TO_F8X2_ID
#define GET_BF16X2_TO_F8X2_ID |
( |
|
rnd, |
|
|
|
has_satf |
|
) |
| |
Value: has_satf ? llvm::Intrinsic::nvvm_bf16x2_to_ue8m0x2_##rnd##_satfinite \
: llvm::Intrinsic::nvvm_bf16x2_to_ue8m0x2_##rnd
Definition at line 1408 of file NVVMDialect.cpp.
◆ GET_CP_ASYNC_BULK_TENSOR_ID
#define GET_CP_ASYNC_BULK_TENSOR_ID |
( |
|
op, |
|
|
|
dims, |
|
|
|
is_im2col |
|
) |
| |
Value: [&]() -> auto { \
switch (dims) { \
case 1: \
case 2: \
case 3: \
case 4: \
case 5: \
default: \
llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \
} \
}()
#define CP_ASYNC_BULK_TENSOR_REDUCE(op, dim, is_im2col)
Definition at line 1279 of file NVVMDialect.cpp.
◆ GET_CP_ASYNC_ID
◆ GET_CVT_F2TF32_ID
#define GET_CVT_F2TF32_ID |
( |
|
rnd, |
|
|
|
relu, |
|
|
|
sf |
|
) |
| |
Value:
#define CVT_F2TF32_ID_IMPL(rnd, relu, sf)
Definition at line 1327 of file NVVMDialect.cpp.
◆ GET_F16x2_TO_F8X2_ID
#define GET_F16x2_TO_F8X2_ID |
( |
|
type, |
|
|
|
has_relu |
|
) |
| |
Value: has_relu ? llvm::Intrinsic::nvvm_f16x2_to_##type##_rn_relu \
: llvm::Intrinsic::nvvm_f16x2_to_##type##_rn
Definition at line 1392 of file NVVMDialect.cpp.
◆ GET_F32x2_TO_F6x2_ID
#define GET_F32x2_TO_F6x2_ID |
( |
|
type, |
|
|
|
has_relu |
|
) |
| |
Value: has_relu ? llvm::Intrinsic::nvvm_ff_to_##type##_rn_relu_satfinite \
: llvm::Intrinsic::nvvm_ff_to_##type##_rn_satfinite
Definition at line 1348 of file NVVMDialect.cpp.
◆ GET_F32x2_TO_F8X2_S_ID
#define GET_F32x2_TO_F8X2_S_ID |
( |
|
type, |
|
|
|
has_relu |
|
) |
| |
Value: has_relu ? llvm::Intrinsic::nvvm_ff_to_##type##_rn_relu \
: llvm::Intrinsic::nvvm_ff_to_##type##_rn
Definition at line 1366 of file NVVMDialect.cpp.
◆ GET_F32x2_TO_F8X2_US_ID
#define GET_F32x2_TO_F8X2_US_ID |
( |
|
rnd, |
|
|
|
has_satf |
|
) |
| |
Value: has_satf ? llvm::Intrinsic::nvvm_ff_to_ue8m0x2_##rnd##_satfinite \
: llvm::Intrinsic::nvvm_ff_to_ue8m0x2_##rnd
Definition at line 1362 of file NVVMDialect.cpp.
◆ GET_OP_CLASSES
◆ GET_OP_LIST
◆ GET_TCGEN05_COMMIT_ID
#define GET_TCGEN05_COMMIT_ID |
( |
|
cta_group, |
|
|
|
is_shared, |
|
|
|
has_mc |
|
) |
| |
Value:
#define TCGEN05_COMMIT_IMPL(cg, is_shared, mc)
Definition at line 1471 of file NVVMDialect.cpp.
◆ GET_TCGEN05_CP_ID
#define GET_TCGEN05_CP_ID |
( |
|
shape_mc, |
|
|
|
src_fmt, |
|
|
|
is_2cta |
|
) |
| |
Value: [&]() -> auto { \
if ((src_fmt) == Tcgen05CpSrcFormat::B6x16_P32) \
if ((src_fmt) == Tcgen05CpSrcFormat::B4x16_P64) \
}()
#define TCGEN05_CP_2CTA(shape_mc, src_fmt, is_2cta)
Definition at line 1505 of file NVVMDialect.cpp.
◆ TCGEN05_COMMIT_IMPL
#define TCGEN05_COMMIT_IMPL |
( |
|
cg, |
|
|
|
is_shared, |
|
|
|
mc |
|
) |
| |
Value: is_shared ? llvm::Intrinsic::nvvm_tcgen05_commit##mc##_shared##_##cg \
: llvm::Intrinsic::nvvm_tcgen05_commit##mc##_##cg
Definition at line 1467 of file NVVMDialect.cpp.
◆ TCGEN05_CP_2CTA
#define TCGEN05_CP_2CTA |
( |
|
shape_mc, |
|
|
|
src_fmt, |
|
|
|
is_2cta |
|
) |
| |
Value:
#define TCGEN05_CP_IMPL(shape_mc, src_fmt, cg)
Definition at line 1501 of file NVVMDialect.cpp.
◆ TCGEN05_CP_IMPL
#define TCGEN05_CP_IMPL |
( |
|
shape_mc, |
|
|
|
src_fmt, |
|
|
|
cg |
|
) |
| llvm::Intrinsic::nvvm_tcgen05_cp##shape_mc##src_fmt##cg |
◆ cpAsyncBulkTensorCommonVerifier()
static LogicalResult cpAsyncBulkTensorCommonVerifier |
( |
size_t |
tensorDims, |
|
|
bool |
isIm2Col, |
|
|
size_t |
numIm2ColOffsets, |
|
|
Location |
loc |
|
) |
| |
|
static |
◆ getAllowedSizeK()
FailureOr<int> getAllowedSizeK |
( |
NVVM::WGMMATypes |
typeA | ) |
|
◆ inferMMATypeFromMNK()
static std::pair<mlir::Type, unsigned> inferMMATypeFromMNK |
( |
NVVM::MMATypes |
type, |
|
|
NVVM::MMAFrag |
frag, |
|
|
int |
m, |
|
|
int |
n, |
|
|
int |
k, |
|
|
MLIRContext * |
context |
|
) |
| |
|
static |
◆ isAllowedSizeN()
LogicalResult isAllowedSizeN |
( |
int |
sizeN, |
|
|
NVVM::WGMMATypes |
typeA |
|
) |
| |
◆ isAllowedWGMMADataType()
LogicalResult isAllowedWGMMADataType |
( |
NVVM::WGMMATypes |
typeD, |
|
|
NVVM::WGMMATypes |
typeA, |
|
|
NVVM::WGMMATypes |
typeB |
|
) |
| |
◆ isInt4PtxType()
static bool isInt4PtxType |
( |
MMATypes |
type | ) |
|
|
static |
◆ isInt8PtxType()
static bool isInt8PtxType |
( |
MMATypes |
type | ) |
|
|
static |
◆ isIntegerPtxType()
static bool isIntegerPtxType |
( |
MMATypes |
type | ) |
|
|
static |
◆ isValidVectorLength()
static unsigned isValidVectorLength |
( |
NVVM::Tcgen05LdStShape |
shape, |
|
|
unsigned |
vecLen |
|
) |
| |
|
static |
◆ nvvmInferResultRanges()