MLIR: include/mlir/Dialect/NVGPU/Transforms/Transforms.h File Reference

#include "mlir/IR/Operation.h"

Go to the source code of this file.

Namespaces
	mlir
	Include the generated interface declarations.

	mlir::nvgpu

Enumerations
enum class	mlir::nvgpu::MmaSyncF32Lowering { mlir::nvgpu::TF32 = 0 , mlir::nvgpu::TF32x3 = 1 , mlir::nvgpu::Unkown = 2 }
	Rewrites patterns. More...

Functions
llvm::LogicalResult	mlir::nvgpu::optimizeSharedMemoryReadsAndWrites (Operation *parentOp, Value memrefValue)
	Passes. More...

void	mlir::nvgpu::populateMmaSyncF32ToTF32Patterns (RewritePatternSet &patterns, nvgpu::MmaSyncF32Lowering precision=nvgpu::MmaSyncF32Lowering::TF32)
	Collect patterns to convert mma.sync on f32 input and rewrite to use tensor cores with user provided level of accuracy: (a) tf32 (1 mma.sync per warp-level matrix-multiply-accumulate) (b) tf32x3 (3 mma.sync per warp-level matrix-multiply-accumulate) Typically, tf32 tensor core acceleration comes at a cost of accuracy from missing precision bits. More...

void	mlir::nvgpu::createAsyncGroups (RewriterBase &rewriter, Operation *op, bool bypassL1)
	Convert global->shared vector transfers to async device copies. More...