|
MLIR 22.0.0git
|
#include "mlir/Dialect/NVGPU/Transforms/Passes.h"#include "mlir/Dialect/Arith/IR/Arith.h"#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"#include "mlir/Dialect/NVGPU/Transforms/Transforms.h"#include "mlir/Dialect/NVGPU/Transforms/Utils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/Interfaces/SideEffectInterfaces.h"#include "llvm/ADT/STLExtras.h"#include "llvm/Support/MathExtras.h"#include "mlir/Dialect/NVGPU/Transforms/Passes.h.inc"Go to the source code of this file.
Classes | |
| class | mlir::nvgpu::impl::OptimizeSharedMemoryBase< DerivedT > |
Namespaces | |
| namespace | mlir |
| Include the generated interface declarations. | |
| namespace | mlir::nvgpu |
| namespace | mlir::nvgpu::impl |
Macros | |
| #define | GEN_PASS_DEF_OPTIMIZESHAREDMEMORY |
| #define | GEN_PASS_DECL_OPTIMIZESHAREDMEMORY |
| #define | GEN_PASS_REGISTRATION_OPTIMIZESHAREDMEMORY |
Functions | |
| void | mlir::nvgpu::registerOptimizeSharedMemory () |
| void | mlir::nvgpu::registerOptimizeSharedMemoryPass () |
| void | mlir::nvgpu::registerNVGPUPasses () |
| static Value | permuteVectorOffset (OpBuilder &b, Location loc, ArrayRef< Value > indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) |
| Uses srcIndexValue to permute tgtIndexValue via result = xor(floordiv(srcIdxVal,permuteEveryN),
/ floordiv(tgtIdxVal,vectorSize)))
/ + tgtIdxVal % vectorSize This is done using an optimized sequence of arith operations. | |
| static void | transformIndices (OpBuilder &builder, Location loc, SmallVector< Value, 4 > &indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) |
| static LogicalResult | getShmReadAndWriteOps (Operation *parentOp, Value shmMemRef, SmallVector< Operation *, 16 > &readOps, SmallVector< Operation *, 16 > &writeOps) |
| Return all operations within parentOp that read from or write to shmMemRef. | |
Variables | |
| constexpr int64_t | kSharedMemoryLineSizeBytes = 128 |
| The size of a shared memory line according to NV documentation. | |
| constexpr int64_t | kDefaultVectorSizeBits = 128 |
| We optimize for 128bit accesses, but this can be made an argument in the future. | |
| #define GEN_PASS_DECL_OPTIMIZESHAREDMEMORY |
| #define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY |
Definition at line 27 of file OptimizeSharedMemory.cpp.
| #define GEN_PASS_REGISTRATION_OPTIMIZESHAREDMEMORY |
|
static |
Return all operations within parentOp that read from or write to shmMemRef.
Definition at line 113 of file OptimizeSharedMemory.cpp.
References mlir::nvgpu::getIndices(), success(), and mlir::Operation::walk().
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
static |
Uses srcIndexValue to permute tgtIndexValue via result = xor(floordiv(srcIdxVal,permuteEveryN), / floordiv(tgtIdxVal,vectorSize))) / + tgtIdxVal % vectorSize This is done using an optimized sequence of arith operations.
Definition at line 46 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::registerNVGPUPasses().
|
static |
Definition at line 102 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
constexpr |
We optimize for 128bit accesses, but this can be made an argument in the future.
Definition at line 39 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::impl::OptimizeSharedMemoryBase< DerivedT >::getDependentDialects(), and mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().
|
constexpr |
The size of a shared memory line according to NV documentation.
Definition at line 36 of file OptimizeSharedMemory.cpp.
Referenced by mlir::nvgpu::impl::OptimizeSharedMemoryBase< DerivedT >::getDependentDialects(), and mlir::nvgpu::optimizeSharedMemoryReadsAndWrites().