|
MLIR
22.0.0git
|
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"#include "mlir/Conversion/LLVMCommon/Pattern.h"#include "mlir/Conversion/LLVMCommon/VectorPattern.h"#include "mlir/Dialect/Arith/IR/Arith.h"#include "mlir/Dialect/GPU/IR/GPUDialect.h"#include "mlir/Dialect/LLVMIR/LLVMDialect.h"#include "mlir/Dialect/LLVMIR/LLVMTypes.h"#include "mlir/Dialect/LLVMIR/NVVMDialect.h"#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"#include "mlir/Dialect/SCF/Transforms/Patterns.h"#include "mlir/IR/BuiltinTypes.h"#include "mlir/IR/PatternMatch.h"#include "mlir/IR/TypeUtilities.h"#include "mlir/IR/Value.h"#include "mlir/Pass/Pass.h"#include "llvm/Support/Debug.h"#include "llvm/Support/DebugLog.h"#include "llvm/Support/ErrorHandling.h"#include "llvm/Support/raw_ostream.h"#include <optional>#include "mlir/Conversion/Passes.h.inc"Go to the source code of this file.
Namespaces | |
| mlir | |
| Include the generated interface declarations. | |
Macros | |
| #define | DEBUG_TYPE "nvgpu-to-nvvm" |
| #define | GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS |
Functions | |
| static Value | truncToI32 (ImplicitLocOpBuilder &b, Value value) |
| GPU has 32 bit registers, this function truncates values when larger width is not needed. More... | |
| static Type | inferIntrinsicResultType (Type vectorResultType) |
Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation. More... | |
| static Value | convertIntrinsicResult (Location loc, Type intrinsicResultType, Type resultType, Value intrinsicResult, RewriterBase &rewriter) |
Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation. More... | |
| static SmallVector< Value > | unpackOperandVector (ImplicitLocOpBuilder &b, Value operand, NVVM::MMATypes operandPtxType) |
The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide. More... | |
| static bool | isMbarrierShared (nvgpu::MBarrierGroupType barrierType) |
| Returns whether mbarrier object has shared memory address space. More... | |
Variables | |
| constexpr int | exclude4LSB = 4 |
| Number of bits that needs to be excluded when building matrix descriptor for wgmma operations. More... | |
| #define DEBUG_TYPE "nvgpu-to-nvvm" |
Definition at line 34 of file NVGPUToNVVM.cpp.
| #define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS |
Definition at line 37 of file NVGPUToNVVM.cpp.
|
static |
Convert the SSA result of the NVVM intrinsic nvvm.mma.sync (which is always an LLVM struct) into a fragment that is compatible with the vector type of this operation.
This involves extracting elements from the struct and inserting them into an LLVM array. These extra data-movement operations should be canonicalized away by the LLVM backend.
Definition at line 98 of file NVGPUToNVVM.cpp.
References mlir::OpBuilder::createOrFold(), mlir::detail::enumerate(), mlir::get(), mlir::Builder::getContext(), mlir::Builder::getF16Type(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::Builder::getI32IntegerAttr(), mlir::Builder::getI32Type(), and mlir::Value::getType().
Returns the type for the intrinsic given the vectorResultType of the gpu.mma.sync operation.
Definition at line 59 of file NVGPUToNVVM.cpp.
References mlir::get(), and mlir::Type::getContext().
|
static |
Returns whether mbarrier object has shared memory address space.
Definition at line 221 of file NVGPUToNVVM.cpp.
|
static |
GPU has 32 bit registers, this function truncates values when larger width is not needed.
Definition at line 49 of file NVGPUToNVVM.cpp.
References mlir::Builder::getI32Type(), mlir::Type::getIntOrFloatBitWidth(), and mlir::Value::getType().
|
static |
The gpu.mma.sync converter below expects matrix fragment operands to be given as 2D vectors where the rows are 32b or 64b wide.
The nvvm.mma.sync op expects these argments to be a given in a long list of scalars of certain types. This function helps unpack the vector arguments and cast them to the types expected by nvvm.mma.sync.
Definition at line 174 of file NVGPUToNVVM.cpp.
References mlir::get(), mlir::Builder::getF32Type(), mlir::Builder::getF64Type(), mlir::Builder::getI32Type(), mlir::Builder::getI64IntegerAttr(), mlir::Builder::getI64Type(), mlir::Builder::getI8Type(), mlir::Builder::getIntegerType(), and mlir::Value::getType().
|
constexpr |
Number of bits that needs to be excluded when building matrix descriptor for wgmma operations.
Definition at line 45 of file NVGPUToNVVM.cpp.