MLIR 22.0.0git
NVGPUToNVVM.cpp File Reference
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Pass/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#include "mlir/Conversion/Passes.h.inc"

Go to the source code of this file.

Classes

class  mlir::impl::ConvertNVGPUToNVVMPassBase< DerivedT >

Namespaces

namespace  mlir
 Include the generated interface declarations.
namespace  mlir::impl
 Attribute collections provide a dictionary-like interface.

Macros

#define DEBUG_TYPE   "nvgpu-to-nvvm"
#define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS

Functions

std::unique_ptr<::mlir::Passmlir::impl::createConvertNVGPUToNVVMPass ()
std::unique_ptr<::mlir::Passmlir::createConvertNVGPUToNVVMPass ()
static Value truncToI32 (ImplicitLocOpBuilder &b, Value value)
 GPU has 32 bit registers, this function truncates values when larger width is not needed.
static Type inferIntrinsicResultType (Type vectorResultType)
 Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation.
static Value convertIntrinsicResult (Location loc, Type intrinsicResultType, Type resultType, Value intrinsicResult, RewriterBase &rewriter)
 Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation.
static SmallVector< ValueunpackOperandVector (ImplicitLocOpBuilder &b, Value operand, NVVM::MMATypes operandPtxType)
 The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide.
static bool isMbarrierShared (nvgpu::MBarrierGroupType barrierType)
 Returns whether mbarrier object has shared memory address space.

Variables

constexpr int exclude4LSB = 4
 Number of bits that needs to be excluded when building matrix descriptor for wgmma operations.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "nvgpu-to-nvvm"

Definition at line 34 of file NVGPUToNVVM.cpp.

◆ GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS

#define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS

Definition at line 37 of file NVGPUToNVVM.cpp.

Function Documentation

◆ convertIntrinsicResult()

Value convertIntrinsicResult ( Location loc,
Type intrinsicResultType,
Type resultType,
Value intrinsicResult,
RewriterBase & rewriter )
static

Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation.

This involves extracting elements from the struct and inserting them into an LLVM array. These extra data-movement operations should be canonicalized away by the LLVM backend.

Definition at line 98 of file NVGPUToNVVM.cpp.

References mlir::OpBuilder::createOrFold(), mlir::Builder::getContext(), mlir::Builder::getF16Type(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::Builder::getI32IntegerAttr(), mlir::Builder::getI32Type(), mlir::Value::getType(), and result.

◆ inferIntrinsicResultType()

Type inferIntrinsicResultType ( Type vectorResultType)
static

Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation.

Definition at line 59 of file NVGPUToNVVM.cpp.

References mlir::Type::getContext().

◆ isMbarrierShared()

bool isMbarrierShared ( nvgpu::MBarrierGroupType barrierType)
static

Returns whether mbarrier object has shared memory address space.

Definition at line 221 of file NVGPUToNVVM.cpp.

◆ truncToI32()

Value truncToI32 ( ImplicitLocOpBuilder & b,
Value value )
static

GPU has 32 bit registers, this function truncates values when larger width is not needed.

Definition at line 49 of file NVGPUToNVVM.cpp.

References b, mlir::Type::getIntOrFloatBitWidth(), and mlir::Value::getType().

◆ unpackOperandVector()

SmallVector< Value > unpackOperandVector ( ImplicitLocOpBuilder & b,
Value operand,
NVVM::MMATypes operandPtxType )
static

The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide.

The nvvm.mma.sync op expects these argments to be a given in a long list of scalars of certain types. This function helps unpack the vector arguments and cast them to the types expected by nvvm.mma.sync.

Definition at line 174 of file NVGPUToNVVM.cpp.

References b, mlir::Value::getType(), and result.

Variable Documentation

◆ exclude4LSB

int exclude4LSB = 4
constexpr

Number of bits that needs to be excluded when building matrix descriptor for wgmma operations.

Definition at line 45 of file NVGPUToNVVM.cpp.