|
MLIR
22.0.0git
|
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"#include "mlir/Conversion/LLVMCommon/TypeConverter.h"#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"#include "mlir/Dialect/Arith/IR/Arith.h"#include "mlir/Dialect/GPU/IR/GPUDialect.h"#include "mlir/Dialect/GPU/TransformOps/Utils.h"#include "mlir/Dialect/GPU/Transforms/Passes.h"#include "mlir/Dialect/LLVMIR/NVVMDialect.h"#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"#include "mlir/Dialect/SCF/IR/SCF.h"#include "mlir/Dialect/Transform/IR/TransformDialect.h"#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"#include "mlir/Dialect/Utils/IndexingUtils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"#include "mlir/IR/AffineExpr.h"#include "mlir/IR/Builders.h"#include "mlir/IR/BuiltinAttributes.h"#include "mlir/IR/IRMapping.h"#include "mlir/IR/MLIRContext.h"#include "mlir/IR/OpDefinition.h"#include "mlir/IR/Visitors.h"#include "mlir/Support/LLVM.h"#include "mlir/Transforms/DialectConversion.h"#include "llvm/ADT/STLExtras.h"#include "llvm/ADT/SmallVector.h"#include "llvm/ADT/TypeSwitch.h"#include "llvm/Support/DebugLog.h"#include "llvm/Support/ErrorHandling.h"#include "llvm/Support/InterleavedRange.h"#include "llvm/Support/LogicalResult.h"#include <optional>#include <type_traits>#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"Go to the source code of this file.
Classes | |
| struct | ForallRewriteResult |
| Struct to return the result of the rewrite of a forall operation. More... | |
Macros | |
| #define | DEBUG_TYPE "gpu-transforms" |
| #define | GET_OP_LIST |
| #define | GET_OP_CLASSES |
Functions | |
| static std::optional< SmallVector< int64_t > > | gpuMmaUnrollOrder (vector::ContractionOp contract) |
| Pick an unrolling order that will allow tensorcore operation to reuse LHS register. More... | |
| static std::optional< SmallVector< int64_t > > | getSubgroupMmaNativeVectorSize (Operation *op, int64_t m, int64_t n, int64_t k) |
Returns the target vector size for the target operation based on the native vector size specified with m, n, and k. More... | |
| static DiagnosedSilenceableFailure | definiteFailureHelper (std::optional< TransformOpInterface > transformOp, Operation *target, const Twine &message) |
| template<typename MappingKindType > | |
| static DiagnosedSilenceableFailure | checkMappingAttributeTypes (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp) |
| Check if given mapping attributes are one of the desired attributes. More... | |
| template<typename MappingKindType > | |
| static DiagnosedSilenceableFailure | verifyGpuMapping (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp) |
| template<typename OpTy , typename OperationOrBlock > | |
| static void | replaceUnitMappingIdsHelper (RewriterBase &rewriter, Location loc, OperationOrBlock *parent, Value replacement, ArrayRef< int64_t > availableMappingSizes) |
| Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR. More... | |
| static DiagnosedSilenceableFailure | rewriteOneForallCommonImpl (RewriterBase &rewriter, std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > availableMappingSizes, ForallRewriteResult &result, const GpuIdBuilder &gpuIdBuilder) |
| static DiagnosedSilenceableFailure | checkMappingSpec (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > numParallelIterations, ArrayRef< int64_t > blockOrGridSizes, int factor, bool useLinearMapping=false) |
| static DiagnosedSilenceableFailure | getThreadIdBuilder (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > blockSizes, int64_t warpSize, GpuIdBuilder &gpuIdBuilder) |
| #define DEBUG_TYPE "gpu-transforms" |
Definition at line 55 of file GPUTransformOps.cpp.
| #define GET_OP_CLASSES |
Definition at line 990 of file GPUTransformOps.cpp.
| #define GET_OP_LIST |
|
static |
Check if given mapping attributes are one of the desired attributes.
Definition at line 341 of file GPUTransformOps.cpp.
References definiteFailureHelper(), and mlir::DiagnosedSilenceableFailure::success().
|
static |
Definition at line 772 of file GPUTransformOps.cpp.
References mlir::computeProduct(), definiteFailureHelper(), diag(), and mlir::DiagnosedSilenceableFailure::success().
Referenced by getThreadIdBuilder().
|
static |
Definition at line 331 of file GPUTransformOps.cpp.
References mlir::emitDefiniteFailure().
Referenced by checkMappingAttributeTypes(), checkMappingSpec(), getThreadIdBuilder(), mlir::transform::gpu::mapNestedForallToThreadsImpl(), rewriteOneForallCommonImpl(), and verifyGpuMapping().
|
static |
Returns the target vector size for the target operation based on the native vector size specified with m, n, and k.
Definition at line 223 of file GPUTransformOps.cpp.
References contract(), mlir::Operation::getNumResults(), mlir::Operation::getResultTypes(), mlir::Operation::getUsers(), and mlir::OpTrait::hasElementwiseMappableTraits().
|
static |
Definition at line 798 of file GPUTransformOps.cpp.
References checkMappingSpec(), definiteFailureHelper(), diag(), mlir::getConstantIntValues(), and mlir::DiagnosedSilenceableFailure::success().
Referenced by mlir::transform::gpu::mapOneForallToThreadsImpl().
|
static |
Pick an unrolling order that will allow tensorcore operation to reuse LHS register.
Definition at line 192 of file GPUTransformOps.cpp.
References contract(), mlir::detail::enumerate(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().
|
static |
Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR.
Definition at line 465 of file GPUTransformOps.cpp.
References mlir::RewriterBase::replaceAllUsesWith().
|
static |
Definition at line 474 of file GPUTransformOps.cpp.
References mlir::Block::begin(), definiteFailureHelper(), mlir::RewriterBase::eraseOp(), mlir::transform::gpu::IdBuilderResult::errorMsg, mlir::Block::front(), mlir::getConstantIntValues(), mlir::OpBuilder::getInsertionPoint(), mlir::Block::getOperations(), mlir::getValuesSortedByKey(), mlir::transform::gpu::GpuIdBuilder::idBuilder, mlir::IRMapping::lookup(), mlir::IRMapping::map(), mlir::transform::gpu::GpuIdBuilder::mappingAttributes, mlir::transform::gpu::IdBuilderResult::mappingIdOps, mlir::transform::gpu::IdBuilderResult::predicateOps, mlir::RewriterBase::replaceAllUsesWith(), mlir::OpBuilder::setInsertionPoint(), and mlir::DiagnosedSilenceableFailure::success().
Referenced by mlir::transform::gpu::mapForallToBlocksImpl(), and mlir::transform::gpu::mapOneForallToThreadsImpl().
|
static |
Definition at line 418 of file GPUTransformOps.cpp.
References definiteFailureHelper(), mlir::getConstantIntValues(), mlir::DiagnosedSilenceableFailure::succeeded(), and mlir::DiagnosedSilenceableFailure::success().