MLIR 22.0.0git
OptimizeSharedMemory.cpp File Reference
#include "mlir/Dialect/NVGPU/Transforms/Passes.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Transforms/Transforms.h"
#include "mlir/Dialect/NVGPU/Transforms/Utils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/MathExtras.h"
#include "mlir/Dialect/NVGPU/Transforms/Passes.h.inc"

Go to the source code of this file.

Classes

class  mlir::nvgpu::impl::OptimizeSharedMemoryBase< DerivedT >

Namespaces

namespace  mlir
 Include the generated interface declarations.
namespace  mlir::nvgpu
namespace  mlir::nvgpu::impl

Macros

#define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY
#define GEN_PASS_DECL_OPTIMIZESHAREDMEMORY
#define GEN_PASS_REGISTRATION_OPTIMIZESHAREDMEMORY

Functions

void mlir::nvgpu::registerOptimizeSharedMemory ()
void mlir::nvgpu::registerOptimizeSharedMemoryPass ()
void mlir::nvgpu::registerNVGPUPasses ()
static Value permuteVectorOffset (OpBuilder &b, Location loc, ArrayRef< Value > indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim)
 Uses srcIndexValue to permute tgtIndexValue via result = xor(floordiv(srcIdxVal,permuteEveryN), / floordiv(tgtIdxVal,vectorSize))) / + tgtIdxVal % vectorSize This is done using an optimized sequence of arith operations.
static void transformIndices (OpBuilder &builder, Location loc, SmallVector< Value, 4 > &indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim)
static LogicalResult getShmReadAndWriteOps (Operation *parentOp, Value shmMemRef, SmallVector< Operation *, 16 > &readOps, SmallVector< Operation *, 16 > &writeOps)
 Return all operations within parentOp that read from or write to shmMemRef.

Variables

constexpr int64_t kSharedMemoryLineSizeBytes = 128
 The size of a shared memory line according to NV documentation.
constexpr int64_t kDefaultVectorSizeBits = 128
 We optimize for 128bit accesses, but this can be made an argument in the future.

Macro Definition Documentation

◆ GEN_PASS_DECL_OPTIMIZESHAREDMEMORY

#define GEN_PASS_DECL_OPTIMIZESHAREDMEMORY

◆ GEN_PASS_DEF_OPTIMIZESHAREDMEMORY

#define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY

Definition at line 27 of file OptimizeSharedMemory.cpp.

◆ GEN_PASS_REGISTRATION_OPTIMIZESHAREDMEMORY

#define GEN_PASS_REGISTRATION_OPTIMIZESHAREDMEMORY

Function Documentation

◆ getShmReadAndWriteOps()

LogicalResult getShmReadAndWriteOps ( Operation * parentOp,
Value shmMemRef,
SmallVector< Operation *, 16 > & readOps,
SmallVector< Operation *, 16 > & writeOps )
static

Return all operations within parentOp that read from or write to shmMemRef.

Definition at line 113 of file OptimizeSharedMemory.cpp.

References mlir::nvgpu::getIndices(), success(), and mlir::Operation::walk().

Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().

◆ permuteVectorOffset()

Value permuteVectorOffset ( OpBuilder & b,
Location loc,
ArrayRef< Value > indices,
MemRefType memrefTy,
int64_t srcDim,
int64_t tgtDim )
static

Uses srcIndexValue to permute tgtIndexValue via result = xor(floordiv(srcIdxVal,permuteEveryN), / floordiv(tgtIdxVal,vectorSize))) / + tgtIdxVal % vectorSize This is done using an optimized sequence of arith operations.

Definition at line 46 of file OptimizeSharedMemory.cpp.

Referenced by mlir::nvgpu::registerNVGPUPasses().

◆ transformIndices()

void transformIndices ( OpBuilder & builder,
Location loc,
SmallVector< Value, 4 > & indices,
MemRefType memrefTy,
int64_t srcDim,
int64_t tgtDim )
static

Variable Documentation

◆ kDefaultVectorSizeBits

int64_t kDefaultVectorSizeBits = 128
constexpr

We optimize for 128bit accesses, but this can be made an argument in the future.

Definition at line 39 of file OptimizeSharedMemory.cpp.

Referenced by mlir::nvgpu::impl::OptimizeSharedMemoryBase< DerivedT >::getDependentDialects(), and mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().

◆ kSharedMemoryLineSizeBytes

int64_t kSharedMemoryLineSizeBytes = 128
constexpr

The size of a shared memory line according to NV documentation.

Definition at line 36 of file OptimizeSharedMemory.cpp.

Referenced by mlir::nvgpu::impl::OptimizeSharedMemoryBase< DerivedT >::getDependentDialects(), and mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().