MLIR
20.0.0git
|
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include <type_traits>
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.cpp.inc"
Go to the source code of this file.
Classes | |
struct | ForallRewriteResult |
Struct to return the result of the rewrite of a forall operation. More... | |
Macros | |
#define | DEBUG_TYPE "gpu-transforms" |
#define | DEBUG_TYPE_ALIAS "gpu-transforms-alias" |
#define | DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") |
#define | LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") |
#define | DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ") |
#define | GET_OP_LIST |
#define | GET_OP_CLASSES |
Functions | |
static std::optional< SmallVector< int64_t > > | gpuMmaUnrollOrder (vector::ContractionOp contract) |
Pick an unrolling order that will allow tensorcore operation to reuse LHS register. More... | |
static std::optional< SmallVector< int64_t > > | getSubgroupMmaNativeVectorSize (Operation *op, int64_t m, int64_t n, int64_t k) |
Returns the target vector size for the target operation based on the native vector size specified with m , n , and k . More... | |
static DiagnosedSilenceableFailure | definiteFailureHelper (std::optional< TransformOpInterface > transformOp, Operation *target, const Twine &message) |
template<typename MappingKindType > | |
static DiagnosedSilenceableFailure | checkMappingAttributeTypes (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp) |
Check if given mapping attributes are one of the desired attributes. More... | |
template<typename MappingKindType > | |
static DiagnosedSilenceableFailure | verifyGpuMapping (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp) |
template<typename OpTy , typename OperationOrBlock > | |
static void | replaceUnitMappingIdsHelper (RewriterBase &rewriter, Location loc, OperationOrBlock *parent, Value replacement, ArrayRef< int64_t > availableMappingSizes) |
Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR. More... | |
static DiagnosedSilenceableFailure | rewriteOneForallCommonImpl (RewriterBase &rewriter, std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > availableMappingSizes, ForallRewriteResult &result, const GpuIdBuilder &gpuIdBuilder) |
static DiagnosedSilenceableFailure | checkMappingSpec (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > numParallelIterations, ArrayRef< int64_t > blockOrGridSizes, int factor, bool useLinearMapping=false) |
static DiagnosedSilenceableFailure | getThreadIdBuilder (std::optional< TransformOpInterface > transformOp, scf::ForallOp forallOp, ArrayRef< int64_t > blockSizes, int64_t warpSize, GpuIdBuilder &gpuIdBuilder) |
#define DBGS | ( | ) | (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") |
Definition at line 53 of file GPUTransformOps.cpp.
#define DBGS_ALIAS | ( | ) | (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ") |
Definition at line 55 of file GPUTransformOps.cpp.
#define DEBUG_TYPE "gpu-transforms" |
Definition at line 50 of file GPUTransformOps.cpp.
#define DEBUG_TYPE_ALIAS "gpu-transforms-alias" |
Definition at line 51 of file GPUTransformOps.cpp.
#define GET_OP_CLASSES |
Definition at line 941 of file GPUTransformOps.cpp.
#define GET_OP_LIST |
#define LDBG | ( | X | ) | LLVM_DEBUG(DBGS() << X << "\n") |
Definition at line 54 of file GPUTransformOps.cpp.
|
static |
Check if given mapping attributes are one of the desired attributes.
Definition at line 292 of file GPUTransformOps.cpp.
References definiteFailureHelper(), and mlir::DiagnosedSilenceableFailure::success().
|
static |
Definition at line 734 of file GPUTransformOps.cpp.
References mlir::computeProduct(), definiteFailureHelper(), diag(), and mlir::DiagnosedSilenceableFailure::success().
Referenced by getThreadIdBuilder().
|
static |
Definition at line 282 of file GPUTransformOps.cpp.
References mlir::emitDefiniteFailure().
Referenced by checkMappingAttributeTypes(), checkMappingSpec(), getThreadIdBuilder(), mlir::transform::gpu::mapNestedForallToThreadsImpl(), rewriteOneForallCommonImpl(), and verifyGpuMapping().
|
static |
Returns the target vector size for the target operation based on the native vector size specified with m
, n
, and k
.
Definition at line 174 of file GPUTransformOps.cpp.
|
static |
Definition at line 760 of file GPUTransformOps.cpp.
References checkMappingSpec(), definiteFailureHelper(), diag(), mlir::getConstantIntValues(), and mlir::DiagnosedSilenceableFailure::success().
Referenced by mlir::transform::gpu::mapOneForallToThreadsImpl().
|
static |
Pick an unrolling order that will allow tensorcore operation to reuse LHS register.
Definition at line 143 of file GPUTransformOps.cpp.
References contract(), mlir::detail::enumerate(), mlir::vector::isParallelIterator(), and mlir::vector::isReductionIterator().
|
static |
Helper to replace ids of dimensions known to be 1 by 0 to simplify the IR.
Definition at line 405 of file GPUTransformOps.cpp.
References mlir::RewriterBase::replaceAllUsesWith().
|
static |
Definition at line 414 of file GPUTransformOps.cpp.
References mlir::transform::gpu::IdBuilderResult::activeIdOps, mlir::transform::gpu::IdBuilderResult::activeMappingSizes, mlir::transform::gpu::IdBuilderResult::availableMappingSizes, mlir::Block::begin(), mlir::OpBuilder::create(), DBGS, definiteFailureHelper(), mlir::RewriterBase::eraseOp(), mlir::Block::front(), mlir::getConstantIntValues(), mlir::OpBuilder::getInsertionPoint(), mlir::Block::getOperations(), mlir::getValuesSortedByKey(), mlir::transform::gpu::GpuIdBuilder::idBuilder, LDBG, mlir::IRMapping::lookup(), mlir::IRMapping::map(), mlir::transform::gpu::GpuIdBuilder::mappingAttributes, mlir::transform::gpu::IdBuilderResult::mappingIdOps, mlir::RewriterBase::replaceAllUsesWith(), mlir::OpBuilder::setInsertionPoint(), and mlir::DiagnosedSilenceableFailure::success().
Referenced by mlir::transform::gpu::mapForallToBlocksImpl(), and mlir::transform::gpu::mapOneForallToThreadsImpl().
|
static |
Definition at line 356 of file GPUTransformOps.cpp.
References definiteFailureHelper(), mlir::getConstantIntValues(), mlir::DiagnosedSilenceableFailure::succeeded(), and mlir::DiagnosedSilenceableFailure::success().