|
MLIR
22.0.0git
|
#include "mlir/Dialect/NVGPU/Transforms/Passes.h"#include "mlir/Dialect/Arith/IR/Arith.h"#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"#include "mlir/Dialect/NVGPU/Transforms/Transforms.h"#include "mlir/Dialect/NVGPU/Transforms/Utils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/Interfaces/SideEffectInterfaces.h"#include "llvm/ADT/STLExtras.h"#include "llvm/Support/MathExtras.h"#include "mlir/Dialect/NVGPU/Transforms/Passes.h.inc"Go to the source code of this file.
Namespaces | |
| mlir | |
| Include the generated interface declarations. | |
| mlir::nvgpu | |
Macros | |
| #define | GEN_PASS_DEF_OPTIMIZESHAREDMEMORY |
Functions | |
| static Value | permuteVectorOffset (OpBuilder &b, Location loc, ArrayRef< Value > indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) |
Uses srcIndexValue to permute tgtIndexValue via `result = xor(floordiv(srcIdxVal,permuteEveryN), floordiv(tgtIdxVal,vectorSize))) More... | |
| static void | transformIndices (OpBuilder &builder, Location loc, SmallVector< Value, 4 > &indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) |
| static LogicalResult | getShmReadAndWriteOps (Operation *parentOp, Value shmMemRef, SmallVector< Operation *, 16 > &readOps, SmallVector< Operation *, 16 > &writeOps) |
Return all operations within parentOp that read from or write to shmMemRef. More... | |
Variables | |
| constexpr int64_t | kSharedMemoryLineSizeBytes = 128 |
| The size of a shared memory line according to NV documentation. More... | |
| constexpr int64_t | kDefaultVectorSizeBits = 128 |
| We optimize for 128bit accesses, but this can be made an argument in the future. More... | |
| #define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY |
Definition at line 27 of file OptimizeSharedMemory.cpp.
|
static |
Return all operations within parentOp that read from or write to shmMemRef.
Definition at line 113 of file OptimizeSharedMemory.cpp.
References mlir::nvgpu::getIndices(), and mlir::Operation::walk().
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
static |
Uses srcIndexValue to permute tgtIndexValue via `result = xor(floordiv(srcIdxVal,permuteEveryN), floordiv(tgtIdxVal,vectorSize)))
This is done using an optimized sequence ofarith` operations. Definition at line 46 of file OptimizeSharedMemory.cpp.
References mlir::arith::ConstantIndexOp::create(), mlir::OpBuilder::createOrFold(), kDefaultVectorSizeBits, and kSharedMemoryLineSizeBytes.
Referenced by transformIndices().
|
static |
Definition at line 102 of file OptimizeSharedMemory.cpp.
References permuteVectorOffset().
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
constexpr |
We optimize for 128bit accesses, but this can be made an argument in the future.
Definition at line 39 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites(), and permuteVectorOffset().
|
constexpr |
The size of a shared memory line according to NV documentation.
Definition at line 36 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites(), and permuteVectorOffset().