MLIR  19.0.0git
Namespaces | Macros | Functions | Variables
NVGPUToNVVM.cpp File Reference
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Pass/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#include "mlir/Conversion/Passes.h.inc"

Go to the source code of this file.

Namespaces

 mlir
 Include the generated interface declarations.
 

Macros

#define DEBUG_TYPE   "nvgpu-to-nvvm"
 
#define DBGS()   (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
 
#define DBGSE()   (llvm::dbgs())
 
#define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS
 

Functions

static Value truncToI32 (ImplicitLocOpBuilder &b, Value value)
 GPU has 32 bit registers, this function truncates values when larger width is not needed. More...
 
static Type inferIntrinsicResultType (Type vectorResultType)
 Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation. More...
 
static Value convertIntrinsicResult (Location loc, Type intrinsicResultType, Type resultType, Value intrinsicResult, RewriterBase &rewriter)
 Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation. More...
 
static SmallVector< ValueunpackOperandVector (ImplicitLocOpBuilder &b, Value operand, NVVM::MMATypes operandPtxType)
 The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide. More...
 
static bool isMbarrierShared (nvgpu::MBarrierGroupType barrierType)
 Returns whether mbarrier object has shared memory address space. More...
 

Variables

constexpr int exclude4LSB = 4
 Number of bits that needs to be excluded when building matrix descriptor for wgmma operations. More...
 

Macro Definition Documentation

◆ DBGS

#define DBGS ( )    (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")

Definition at line 34 of file NVGPUToNVVM.cpp.

◆ DBGSE

#define DBGSE ( )    (llvm::dbgs())

Definition at line 35 of file NVGPUToNVVM.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE   "nvgpu-to-nvvm"

Definition at line 33 of file NVGPUToNVVM.cpp.

◆ GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS

#define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS

Definition at line 38 of file NVGPUToNVVM.cpp.

Function Documentation

◆ convertIntrinsicResult()

static Value convertIntrinsicResult ( Location  loc,
Type  intrinsicResultType,
Type  resultType,
Value  intrinsicResult,
RewriterBase rewriter 
)
static

Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation.

This involves extracting elements from the struct and inserting them into an LLVM array. These extra data-movement operations should be canonicalized away by the LLVM backend.

Definition at line 99 of file NVGPUToNVVM.cpp.

References mlir::OpBuilder::create(), mlir::OpBuilder::createOrFold(), mlir::detail::enumerate(), mlir::get(), mlir::Builder::getContext(), mlir::Builder::getF16Type(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::LLVM::getFixedVectorType(), mlir::Builder::getI32IntegerAttr(), mlir::Builder::getI32Type(), and mlir::Value::getType().

◆ inferIntrinsicResultType()

static Type inferIntrinsicResultType ( Type  vectorResultType)
static

Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation.

Definition at line 60 of file NVGPUToNVVM.cpp.

References mlir::get(), mlir::Type::getContext(), mlir::LLVM::getFixedVectorType(), and mlir::LLVM::LLVMStructType::getLiteral().

◆ isMbarrierShared()

static bool isMbarrierShared ( nvgpu::MBarrierGroupType  barrierType)
static

Returns whether mbarrier object has shared memory address space.

Definition at line 222 of file NVGPUToNVVM.cpp.

◆ truncToI32()

static Value truncToI32 ( ImplicitLocOpBuilder b,
Value  value 
)
static

GPU has 32 bit registers, this function truncates values when larger width is not needed.

Definition at line 50 of file NVGPUToNVVM.cpp.

References mlir::ImplicitLocOpBuilder::create(), mlir::Builder::getI32Type(), mlir::Type::getIntOrFloatBitWidth(), and mlir::Value::getType().

◆ unpackOperandVector()

static SmallVector<Value> unpackOperandVector ( ImplicitLocOpBuilder b,
Value  operand,
NVVM::MMATypes  operandPtxType 
)
static

The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide.

The nvvm.mma.sync op expects these argments to be a given in a long list of scalars of certain types. This function helps unpack the vector arguments and cast them to the types expected by nvvm.mma.sync.

Definition at line 175 of file NVGPUToNVVM.cpp.

References mlir::ImplicitLocOpBuilder::create(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::LLVM::getFixedVectorType(), mlir::Builder::getI32Type(), mlir::Builder::getI64IntegerAttr(), mlir::Builder::getI64Type(), mlir::Builder::getI8Type(), mlir::Builder::getIntegerType(), and mlir::Value::getType().

Variable Documentation

◆ exclude4LSB

constexpr int exclude4LSB = 4
constexpr

Number of bits that needs to be excluded when building matrix descriptor for wgmma operations.

Definition at line 46 of file NVGPUToNVVM.cpp.