MLIR
20.0.0git
|
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Pass/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#include "mlir/Conversion/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
Include the generated interface declarations. | |
Macros | |
#define | DEBUG_TYPE "nvgpu-to-nvvm" |
#define | DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") |
#define | DBGSE() (llvm::dbgs()) |
#define | GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS |
Functions | |
static Value | truncToI32 (ImplicitLocOpBuilder &b, Value value) |
GPU has 32 bit registers, this function truncates values when larger width is not needed. More... | |
static Type | inferIntrinsicResultType (Type vectorResultType) |
Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation. More... | |
static Value | convertIntrinsicResult (Location loc, Type intrinsicResultType, Type resultType, Value intrinsicResult, RewriterBase &rewriter) |
Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation. More... | |
static SmallVector< Value > | unpackOperandVector (ImplicitLocOpBuilder &b, Value operand, NVVM::MMATypes operandPtxType) |
The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide. More... | |
static bool | isMbarrierShared (nvgpu::MBarrierGroupType barrierType) |
Returns whether mbarrier object has shared memory address space. More... | |
Variables | |
constexpr int | exclude4LSB = 4 |
Number of bits that needs to be excluded when building matrix descriptor for wgmma operations. More... | |
#define DBGS | ( | ) | (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") |
Definition at line 35 of file NVGPUToNVVM.cpp.
#define DBGSE | ( | ) | (llvm::dbgs()) |
Definition at line 36 of file NVGPUToNVVM.cpp.
#define DEBUG_TYPE "nvgpu-to-nvvm" |
Definition at line 34 of file NVGPUToNVVM.cpp.
#define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS |
Definition at line 39 of file NVGPUToNVVM.cpp.
|
static |
Convert the SSA result of the NVVM intrinsic nvvm.mma.sync
(which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation.
This involves extracting elements from the struct and inserting them into an LLVM array. These extra data-movement operations should be canonicalized away by the LLVM backend.
Definition at line 100 of file NVGPUToNVVM.cpp.
References mlir::OpBuilder::create(), mlir::OpBuilder::createOrFold(), mlir::detail::enumerate(), mlir::get(), mlir::Builder::getContext(), mlir::Builder::getF16Type(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::LLVM::getFixedVectorType(), mlir::Builder::getI32IntegerAttr(), mlir::Builder::getI32Type(), and mlir::Value::getType().
Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync
operation.
Definition at line 61 of file NVGPUToNVVM.cpp.
References mlir::get(), mlir::Type::getContext(), mlir::LLVM::getFixedVectorType(), and mlir::LLVM::LLVMStructType::getLiteral().
|
static |
Returns whether mbarrier object has shared memory address space.
Definition at line 223 of file NVGPUToNVVM.cpp.
|
static |
GPU has 32 bit registers, this function truncates values when larger width is not needed.
Definition at line 51 of file NVGPUToNVVM.cpp.
References mlir::ImplicitLocOpBuilder::create(), mlir::Builder::getI32Type(), mlir::Type::getIntOrFloatBitWidth(), and mlir::Value::getType().
|
static |
The gpu.mma.sync
converter below expects matrix fragment operands to be given as 2D vectors
where the rows are 32b or 64b wide.
The nvvm.mma.sync
op expects these argments to be a given in a long list of scalars of certain types. This function helps unpack the vector
arguments and cast them to the types expected by nvvm.mma.sync
.
Definition at line 176 of file NVGPUToNVVM.cpp.
References mlir::ImplicitLocOpBuilder::create(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::LLVM::getFixedVectorType(), mlir::Builder::getI32Type(), mlir::Builder::getI64IntegerAttr(), mlir::Builder::getI64Type(), mlir::Builder::getI8Type(), mlir::Builder::getIntegerType(), and mlir::Value::getType().
|
constexpr |
Number of bits that needs to be excluded when building matrix descriptor for wgmma operations.
Definition at line 47 of file NVGPUToNVVM.cpp.