MLIR
20.0.0git
|
#include "mlir/Dialect/NVGPU/Transforms/Passes.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Transforms/Transforms.h"
#include "mlir/Dialect/NVGPU/Transforms/Utils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/MathExtras.h"
#include "mlir/Dialect/NVGPU/Transforms/Passes.h.inc"
Go to the source code of this file.
Namespaces | |
mlir | |
Include the generated interface declarations. | |
mlir::nvgpu | |
Macros | |
#define | GEN_PASS_DEF_OPTIMIZESHAREDMEMORY |
Functions | |
static Value | permuteVectorOffset (OpBuilder &b, Location loc, ArrayRef< Value > indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) |
Uses srcIndexValue to permute tgtIndexValue via `result = xor(floordiv(srcIdxVal,permuteEveryN), floordiv(tgtIdxVal,vectorSize))) More... | |
static void | transformIndices (OpBuilder &builder, Location loc, SmallVector< Value, 4 > &indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) |
static LogicalResult | getShmReadAndWriteOps (Operation *parentOp, Value shmMemRef, SmallVector< Operation *, 16 > &readOps, SmallVector< Operation *, 16 > &writeOps) |
Return all operations within parentOp that read from or write to shmMemRef . More... | |
Variables | |
constexpr int64_t | kSharedMemoryLineSizeBytes = 128 |
The size of a shared memory line according to NV documentation. More... | |
constexpr int64_t | kDefaultVectorSizeBits = 128 |
We optimize for 128bit accesses, but this can be made an argument in the future. More... | |
#define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY |
Definition at line 28 of file OptimizeSharedMemory.cpp.
|
static |
Return all operations within parentOp
that read from or write to shmMemRef
.
Definition at line 113 of file OptimizeSharedMemory.cpp.
References mlir::nvgpu::getIndices(), and mlir::Operation::walk().
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
static |
Uses srcIndexValue
to permute tgtIndexValue
via `result = xor(floordiv(srcIdxVal,permuteEveryN), floordiv(tgtIdxVal,vectorSize)))
This is done using an optimized sequence of
arith` operations. Definition at line 47 of file OptimizeSharedMemory.cpp.
References kDefaultVectorSizeBits, and kSharedMemoryLineSizeBytes.
Referenced by transformIndices().
|
static |
Definition at line 102 of file OptimizeSharedMemory.cpp.
References permuteVectorOffset().
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
constexpr |
We optimize for 128bit accesses, but this can be made an argument in the future.
Definition at line 40 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites(), and permuteVectorOffset().
|
constexpr |
The size of a shared memory line according to NV documentation.
Definition at line 37 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites(), and permuteVectorOffset().