#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InterleavedRange.h"
#include <type_traits>
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"

Classes
struct	ForallRewriteResult
	Struct to return the result of the rewrite of a forall operation. More...

Macros
#define	DEBUG_TYPE "gpu-transforms"

#define	DEBUG_TYPE_ALIAS "gpu-transforms-alias"

#define	DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")

#define	LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")

#define	DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")

#define	GET_OP_LIST

#define	GET_OP_CLASSES

Functions
static std::optional< SmallVector< int64_t > >	gpuMmaUnrollOrder (vector::ContractionOp contract)
	Pick an unrolling order that will allow tensorcore operation to reuse LHS register. More...

static std::optional< SmallVector< int64_t > >	getSubgroupMmaNativeVectorSize (Operation *op, int64_t m, int64_t n, int64_t k)
	Returns the target vector size for the target operation based on the native vector size specified with `m`, `n`, and `k`. More...

static DiagnosedSilenceableFailure	definiteFailureHelper (std::optional< TransformOpInterface > transformOp, Operation *target, const Twine &message)

template<typename MappingKindType >
static DiagnosedSilenceableFailure	checkMappingAttributeTypes (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp)
	Check if given mapping attributes are one of the desired attributes. More...

template<typename MappingKindType >
static DiagnosedSilenceableFailure	verifyGpuMapping (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp)

template<typename OpTy , typename OperationOrBlock >
static void	replaceUnitMappingIdsHelper (RewriterBase &rewriter, Location loc, OperationOrBlock *parent, Value replacement, ArrayRef< int64_t > availableMappingSizes)
	Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR. More...

static DiagnosedSilenceableFailure	rewriteOneForallCommonImpl (RewriterBase &rewriter, std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > availableMappingSizes, ForallRewriteResult &result, const GpuIdBuilder &gpuIdBuilder)

static DiagnosedSilenceableFailure	checkMappingSpec (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > numParallelIterations, ArrayRef< int64_t > blockOrGridSizes, int factor, bool useLinearMapping=false)

static DiagnosedSilenceableFailure	getThreadIdBuilder (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > blockSizes, int64_t warpSize, GpuIdBuilder &gpuIdBuilder)

Macro Definition Documentation

◆ DBGS

#define DBGS ( ) (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")

Definition at line 55 of file GPUTransformOps.cpp.

◆ DBGS_ALIAS

#define DBGS_ALIAS ( ) (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")

Definition at line 57 of file GPUTransformOps.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE "gpu-transforms"

Definition at line 52 of file GPUTransformOps.cpp.

◆ DEBUG_TYPE_ALIAS

#define DEBUG_TYPE_ALIAS "gpu-transforms-alias"

Definition at line 53 of file GPUTransformOps.cpp.

◆ GET_OP_CLASSES

#define GET_OP_CLASSES

Definition at line 935 of file GPUTransformOps.cpp.

◆ GET_OP_LIST

#define GET_OP_LIST

◆ LDBG

#define LDBG ( X ) LLVM_DEBUG(DBGS() << X << "\n")

Definition at line 56 of file GPUTransformOps.cpp.

Function Documentation

◆ checkMappingAttributeTypes()

template<typename MappingKindType >

static DiagnosedSilenceableFailure checkMappingAttributeTypes	(	std::optional< TransformOpInterface >	transformOp,
		scf::ForallOp	forallOp
	)

static

Check if given mapping attributes are one of the desired attributes.

Definition at line 301 of file GPUTransformOps.cpp.

References definiteFailureHelper(), and mlir::DiagnosedSilenceableFailure::success().

◆ checkMappingSpec()

static DiagnosedSilenceableFailure checkMappingSpec	(	std::optional< TransformOpInterface >	transformOp,
		scf::ForallOp	forallOp,
		ArrayRef< int64_t >	numParallelIterations,
		ArrayRef< int64_t >	blockOrGridSizes,
		int	factor,
		bool	useLinearMapping = `false`
	)

static

Definition at line 727 of file GPUTransformOps.cpp.

References mlir::computeProduct(), definiteFailureHelper(), diag(), and mlir::DiagnosedSilenceableFailure::success().

Referenced by getThreadIdBuilder().

◆ definiteFailureHelper()

static DiagnosedSilenceableFailure definiteFailureHelper	(	std::optional< TransformOpInterface >	transformOp,
		Operation *	target,
		const Twine &	message
	)

static

Definition at line 291 of file GPUTransformOps.cpp.

References mlir::emitDefiniteFailure().

Referenced by checkMappingAttributeTypes(), checkMappingSpec(), getThreadIdBuilder(), mlir::transform::gpu::mapNestedForallToThreadsImpl(), rewriteOneForallCommonImpl(), and verifyGpuMapping().

◆ getSubgroupMmaNativeVectorSize()

static std::optional<SmallVector<int64_t> > getSubgroupMmaNativeVectorSize	(	Operation *	op,
		int64_t	m,
		int64_t	n,
		int64_t	k
	)

static

Returns the target vector size for the target operation based on the native vector size specified with m, n, and k.

Definition at line 183 of file GPUTransformOps.cpp.

◆ getThreadIdBuilder()

static DiagnosedSilenceableFailure getThreadIdBuilder	(	std::optional< TransformOpInterface >	transformOp,
		scf::ForallOp	forallOp,
		ArrayRef< int64_t >	blockSizes,
		int64_t	warpSize,
		GpuIdBuilder &	gpuIdBuilder
	)

static

Definition at line 753 of file GPUTransformOps.cpp.

References checkMappingSpec(), definiteFailureHelper(), diag(), mlir::getConstantIntValues(), and mlir::DiagnosedSilenceableFailure::success().

Referenced by mlir::transform::gpu::mapOneForallToThreadsImpl().

◆ gpuMmaUnrollOrder()

static std::optional<SmallVector<int64_t> > gpuMmaUnrollOrder ( vector::ContractionOp contract )

static

Pick an unrolling order that will allow tensorcore operation to reuse LHS register.

Definition at line 152 of file GPUTransformOps.cpp.

References contract(), mlir::detail::enumerate(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().

◆ replaceUnitMappingIdsHelper()

template<typename OpTy , typename OperationOrBlock >

static void replaceUnitMappingIdsHelper	(	RewriterBase &	rewriter,
		Location	loc,
		OperationOrBlock *	parent,
		Value	replacement,
		ArrayRef< int64_t >	availableMappingSizes
	)

static

Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR.

Definition at line 414 of file GPUTransformOps.cpp.

References mlir::RewriterBase::replaceAllUsesWith().

◆ rewriteOneForallCommonImpl()

static DiagnosedSilenceableFailure rewriteOneForallCommonImpl	(	RewriterBase &	rewriter,
		std::optional< TransformOpInterface >	transformOp,
		scf::ForallOp	forallOp,
		ArrayRef< int64_t >	availableMappingSizes,
		ForallRewriteResult &	result,
		const GpuIdBuilder &	gpuIdBuilder
	)

static

Definition at line 423 of file GPUTransformOps.cpp.

References mlir::transform::gpu::IdBuilderResult::activeIdOps, mlir::transform::gpu::IdBuilderResult::activeMappingSizes, mlir::transform::gpu::IdBuilderResult::availableMappingSizes, mlir::Block::begin(), mlir::OpBuilder::create(), definiteFailureHelper(), mlir::RewriterBase::eraseOp(), mlir::Block::front(), mlir::getConstantIntValues(), mlir::OpBuilder::getInsertionPoint(), mlir::Block::getOperations(), mlir::getValuesSortedByKey(), mlir::transform::gpu::GpuIdBuilder::idBuilder, LDBG, mlir::IRMapping::lookup(), mlir::IRMapping::map(), mlir::transform::gpu::GpuIdBuilder::mappingAttributes, mlir::transform::gpu::IdBuilderResult::mappingIdOps, mlir::RewriterBase::replaceAllUsesWith(), mlir::OpBuilder::setInsertionPoint(), and mlir::DiagnosedSilenceableFailure::success().

Referenced by mlir::transform::gpu::mapForallToBlocksImpl(), and mlir::transform::gpu::mapOneForallToThreadsImpl().

◆ verifyGpuMapping()

template<typename MappingKindType >

static DiagnosedSilenceableFailure verifyGpuMapping	(	std::optional< TransformOpInterface >	transformOp,
		scf::ForallOp	forallOp
	)

static

Definition at line 365 of file GPUTransformOps.cpp.

References definiteFailureHelper(), mlir::getConstantIntValues(), mlir::DiagnosedSilenceableFailure::succeeded(), and mlir::DiagnosedSilenceableFailure::success().

Classes

Macros

Functions

Macro Definition Documentation

◆ DBGS

◆ DBGS_ALIAS

◆ DEBUG_TYPE

◆ DEBUG_TYPE_ALIAS

◆ GET_OP_CLASSES

◆ GET_OP_LIST

◆ LDBG

Function Documentation

◆ checkMappingAttributeTypes()

◆ checkMappingSpec()

◆ definiteFailureHelper()

◆ getSubgroupMmaNativeVectorSize()

◆ getThreadIdBuilder()

◆ gpuMmaUnrollOrder()

◆ replaceUnitMappingIdsHelper()

◆ rewriteOneForallCommonImpl()

◆ verifyGpuMapping()