MLIR 22.0.0git
GPUTransformOps.cpp File Reference
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InterleavedRange.h"
#include "llvm/Support/LogicalResult.h"
#include <optional>
#include <type_traits>
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"

Go to the source code of this file.

Classes

struct  ForallRewriteResult
 Struct to return the result of the rewrite of a forall operation. More...

Macros

#define DEBUG_TYPE   "gpu-transforms"
#define GET_OP_LIST
#define GET_OP_CLASSES

Functions

static std::optional< SmallVector< int64_t > > gpuMmaUnrollOrder (vector::ContractionOp contract)
 Pick an unrolling order that will allow tensorcore operation to reuse LHS register.
static std::optional< SmallVector< int64_t > > getSubgroupMmaNativeVectorSize (Operation *op, int64_t m, int64_t n, int64_t k)
 Returns the target vector size for the target operation based on the native vector size specified with m, n, and k.
static DiagnosedSilenceableFailure definiteFailureHelper (std::optional< TransformOpInterface > transformOp, Operation *target, const Twine &message)
template<typename MappingKindType>
static DiagnosedSilenceableFailure checkMappingAttributeTypes (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp)
 Check if given mapping attributes are one of the desired attributes.
template<typename MappingKindType>
static DiagnosedSilenceableFailure verifyGpuMapping (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp)
template<typename OpTy, typename OperationOrBlock>
static void replaceUnitMappingIdsHelper (RewriterBase &rewriter, Location loc, OperationOrBlock *parent, Value replacement, ArrayRef< int64_t > availableMappingSizes)
 Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR.
static DiagnosedSilenceableFailure rewriteOneForallCommonImpl (RewriterBase &rewriter, std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > availableMappingSizes, ForallRewriteResult &result, const GpuIdBuilder &gpuIdBuilder)
static DiagnosedSilenceableFailure checkMappingSpec (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > numParallelIterations, ArrayRef< int64_t > blockOrGridSizes, int factor, bool useLinearMapping=false)
static DiagnosedSilenceableFailure getThreadIdBuilder (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > blockSizes, int64_t warpSize, GpuIdBuilder &gpuIdBuilder)

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "gpu-transforms"

Definition at line 55 of file GPUTransformOps.cpp.

◆ GET_OP_CLASSES

#define GET_OP_CLASSES

Definition at line 990 of file GPUTransformOps.cpp.

◆ GET_OP_LIST

#define GET_OP_LIST

Function Documentation

◆ checkMappingAttributeTypes()

template<typename MappingKindType>
DiagnosedSilenceableFailure checkMappingAttributeTypes ( std::optional< TransformOpInterface > transformOp,
scf::ForallOp forallOp )
static

Check if given mapping attributes are one of the desired attributes.

Definition at line 341 of file GPUTransformOps.cpp.

References definiteFailureHelper(), and mlir::DiagnosedSilenceableFailure::success().

Referenced by verifyGpuMapping().

◆ checkMappingSpec()

DiagnosedSilenceableFailure checkMappingSpec ( std::optional< TransformOpInterface > transformOp,
scf::ForallOp forallOp,
ArrayRef< int64_t > numParallelIterations,
ArrayRef< int64_t > blockOrGridSizes,
int factor,
bool useLinearMapping = false )
static

◆ definiteFailureHelper()

DiagnosedSilenceableFailure definiteFailureHelper ( std::optional< TransformOpInterface > transformOp,
Operation * target,
const Twine & message )
static

◆ getSubgroupMmaNativeVectorSize()

std::optional< SmallVector< int64_t > > getSubgroupMmaNativeVectorSize ( Operation * op,
int64_t m,
int64_t n,
int64_t k )
static

Returns the target vector size for the target operation based on the native vector size specified with m, n, and k.

Definition at line 223 of file GPUTransformOps.cpp.

References contract(), mlir::Operation::getNumResults(), mlir::Operation::getResultTypes(), mlir::Operation::getUsers(), and mlir::OpTrait::hasElementwiseMappableTraits().

◆ getThreadIdBuilder()

DiagnosedSilenceableFailure getThreadIdBuilder ( std::optional< TransformOpInterface > transformOp,
scf::ForallOp forallOp,
ArrayRef< int64_t > blockSizes,
int64_t warpSize,
GpuIdBuilder & gpuIdBuilder )
static

◆ gpuMmaUnrollOrder()

std::optional< SmallVector< int64_t > > gpuMmaUnrollOrder ( vector::ContractionOp contract)
static

Pick an unrolling order that will allow tensorcore operation to reuse LHS register.

Definition at line 192 of file GPUTransformOps.cpp.

References contract(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().

◆ replaceUnitMappingIdsHelper()

template<typename OpTy, typename OperationOrBlock>
void replaceUnitMappingIdsHelper ( RewriterBase & rewriter,
Location loc,
OperationOrBlock * parent,
Value replacement,
ArrayRef< int64_t > availableMappingSizes )
static

Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR.

Definition at line 465 of file GPUTransformOps.cpp.

References mlir::RewriterBase::replaceAllUsesWith(), and replacement().

Referenced by mlir::transform::gpu::mapForallToBlocksImpl(), and mlir::transform::gpu::mapNestedForallToThreadsImpl().

◆ rewriteOneForallCommonImpl()

◆ verifyGpuMapping()

template<typename MappingKindType>
DiagnosedSilenceableFailure verifyGpuMapping ( std::optional< TransformOpInterface > transformOp,
scf::ForallOp forallOp )
static