MLIR
20.0.0git
|
#include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/STLExtras.h"
#include <optional>
#include "mlir/Conversion/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
Include the generated interface declarations. | |
Macros | |
#define | GEN_PASS_DEF_CONVERTAMDGPUTOROCDL |
Functions | |
static Value | createI32Constant (ConversionPatternRewriter &rewriter, Location loc, int32_t value) |
static Value | createI1Constant (ConversionPatternRewriter &rewriter, Location loc, bool value) |
static Value | convertMFMAVectorOperand (ConversionPatternRewriter &rewriter, Location loc, Value input) |
Converts a MFMA vector operand from MLIR AMDGPU dialect convention to ROCDL and LLVM AMDGPU intrinsics convention. More... | |
static void | wmmaPushInputOperand (ConversionPatternRewriter &rewriter, Location loc, const TypeConverter *typeConverter, bool isUnsigned, Value llvmInput, Value mlirInput, SmallVector< Value, 4 > &operands) |
Push an input operand. More... | |
static void | wmmaPushOutputOperand (ConversionPatternRewriter &rewriter, Location loc, const TypeConverter *typeConverter, Value output, int32_t subwordOffset, bool clamp, SmallVector< Value, 4 > &operands) |
Push the output operand. More... | |
static std::optional< StringRef > | mfmaOpToIntrinsic (MFMAOp mfma, Chipset chipset) |
Return the rocdl intrinsic corresponding to a MFMA operation mfma if one exists. More... | |
static std::optional< StringRef > | wmmaOpToIntrinsic (WMMAOp wmma, Chipset chipset) |
Return the rocdl intrinsic corresponding to a WMMA operation wmma if one exists. More... | |
#define GEN_PASS_DEF_CONVERTAMDGPUTOROCDL |
Definition at line 26 of file AMDGPUToROCDL.cpp.
|
static |
Converts a MFMA vector operand from MLIR AMDGPU dialect convention to ROCDL and LLVM AMDGPU intrinsics convention.
Specifically:
input
is a vector of N bytes, bitcast it to a (N * 8)-bit integer.Definition at line 353 of file AMDGPUToROCDL.cpp.
References mlir::OpBuilder::create(), mlir::Builder::getI16Type(), mlir::Builder::getIntegerType(), and mlir::Value::getType().
|
static |
Definition at line 39 of file AMDGPUToROCDL.cpp.
References mlir::OpBuilder::create(), and mlir::Builder::getI1Type().
Referenced by wmmaPushInputOperand(), and wmmaPushOutputOperand().
|
static |
Definition at line 33 of file AMDGPUToROCDL.cpp.
References mlir::OpBuilder::create(), and mlir::Builder::getI32Type().
|
static |
Return the rocdl
intrinsic corresponding to a MFMA operation mfma
if one exists.
This includes checking to ensure the intrinsic is supported on the architecture you are compiling for.
Definition at line 445 of file AMDGPUToROCDL.cpp.
|
static |
Return the rocdl
intrinsic corresponding to a WMMA operation wmma
if one exists.
This includes checking to ensure the intrinsic is supported on the architecture you are compiling for.
Definition at line 580 of file AMDGPUToROCDL.cpp.
|
static |
Push an input operand.
If it is a float type, nothing to do. If it is an integer type, then we need to also push its signdness (1 for signed, 0 for unsigned) and we need to pack the input 16xi8 vector into a 4xi32 vector. We also need to convert bfloat inputs to i16 to account for the lack of bfloat support in the WMMA intrinsics themselves.
Definition at line 373 of file AMDGPUToROCDL.cpp.
References mlir::TypeConverter::convertType(), mlir::OpBuilder::create(), createI1Constant(), mlir::OpBuilder::createOrFold(), mlir::get(), mlir::Builder::getI16Type(), mlir::Builder::getI32Type(), mlir::Value::getType(), mlir::Type::isBF16(), mlir::Type::isInteger(), mlir::Type::isSignedInteger(), and mlir::Type::isUnsignedInteger().
|
static |
Push the output operand.
For many cases this is only pushing the output in the operand list. But when we have f16 -> f16 or bf16 -> bf16 intrinsics, since the same numbers of VGPRs is used, we need to decide if to store the result in the upper 16 bits of the VGPRs or in the lower part. To store the result in the lower 16 bits, set subwordOffset to 1, otherwise result will be stored it in the upper part
Definition at line 423 of file AMDGPUToROCDL.cpp.
References clamp(), mlir::OpBuilder::create(), createI1Constant(), mlir::Builder::getI16Type(), mlir::Value::getType(), mlir::Type::isBF16(), mlir::Type::isF16(), and mlir::Type::isInteger().