MLIR
15.0.0git
|
#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Debug.h"
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "loops-to-gpu" |
Functions | |
static Value | getDim3Value (const gpu::KernelDim3 &dim3, unsigned pos) |
static Operation::operand_range | getLowerBoundOperands (AffineForOp forOp) |
static Operation::operand_range | getUpperBoundOperands (AffineForOp forOp) |
static Value | getOrCreateStep (AffineForOp forOp, OpBuilder &builder) |
static Value | getOrEmitLowerBound (AffineForOp forOp, OpBuilder &builder) |
static Value | getOrEmitUpperBound (AffineForOp forOp, OpBuilder &builder) |
static LogicalResult | checkAffineLoopNestMappableImpl (AffineForOp forOp, unsigned numDims) |
static LogicalResult | checkAffineLoopNestMappable (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims) |
static bool | isConstantOne (Value value) |
static LogicalResult | convertAffineLoopNestToGPULaunch (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims) |
static Value | deriveStaticUpperBound (Value upperBound, PatternRewriter &rewriter) |
Tries to derive a static upper bound from the defining operation of upperBound . More... | |
static bool | isMappedToProcessor (gpu::Processor processor) |
static unsigned | getLaunchOpArgumentNum (gpu::Processor processor) |
static LogicalResult | processParallelLoop (ParallelOp parallelOp, gpu::LaunchOp launchOp, BlockAndValueMapping &cloningMap, SmallVectorImpl< Operation *> &worklist, DenseMap< gpu::Processor, Value > &bounds, PatternRewriter &rewriter) |
Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted. More... | |
Variables | |
static constexpr StringLiteral | kVisitedAttrName = "SCFToGPU_visited" |
#define DEBUG_TYPE "loops-to-gpu" |
Definition at line 34 of file SCFToGPU.cpp.
|
static |
Definition at line 133 of file SCFToGPU.cpp.
References checkAffineLoopNestMappableImpl(), and mlir::success().
Referenced by convertAffineLoopNestToGPULaunch().
|
static |
Definition at line 107 of file SCFToGPU.cpp.
References mlir::areValuesDefinedAbove(), mlir::Operation::emitError(), getLowerBoundOperands(), getUpperBoundOperands(), and mlir::success().
Referenced by checkAffineLoopNestMappable().
|
static |
Definition at line 283 of file SCFToGPU.cpp.
References checkAffineLoopNestMappable(), mlir::failed(), mlir::failure(), and mlir::success().
Referenced by mlir::convertAffineLoopNestToGPULaunch().
|
static |
Tries to derive a static upper bound from the defining operation of upperBound
.
Definition at line 316 of file SCFToGPU.cpp.
References mlir::OpBuilder::create(), and mlir::Value::getDefiningOp().
Referenced by processParallelLoop().
|
static |
Definition at line 58 of file SCFToGPU.cpp.
References mlir::gpu::KernelDim3::x, mlir::gpu::KernelDim3::y, and mlir::gpu::KernelDim3::z.
Referenced by isConstantOne().
|
static |
Definition at line 355 of file SCFToGPU.cpp.
Referenced by processParallelLoop().
|
static |
Definition at line 73 of file SCFToGPU.cpp.
Referenced by checkAffineLoopNestMappableImpl(), mlir::AffineDmaWaitOp::fold(), hasTrivialZeroTripCount(), and printBound().
Definition at line 84 of file SCFToGPU.cpp.
References mlir::OpBuilder::create().
Referenced by isConstantOne().
Definition at line 91 of file SCFToGPU.cpp.
References mlir::lowerAffineLowerBound().
Referenced by isConstantOne().
Definition at line 97 of file SCFToGPU.cpp.
References mlir::lowerAffineUpperBound().
Referenced by isConstantOne().
|
static |
Definition at line 78 of file SCFToGPU.cpp.
Referenced by checkAffineLoopNestMappableImpl(), mlir::AffineDmaWaitOp::fold(), hasTrivialZeroTripCount(), and printBound().
|
static |
Definition at line 171 of file SCFToGPU.cpp.
References mlir::detail::enumerate(), mlir::Value::getDefiningOp(), getDim3Value(), getOrCreateStep(), getOrEmitLowerBound(), getOrEmitUpperBound(), and mlir::Value::replaceAllUsesWith().
|
static |
Definition at line 351 of file SCFToGPU.cpp.
Referenced by processParallelLoop().
|
static |
Modifies the current transformation state to capture the effect of the given scf.parallel
operation on index substitutions and the operations to be inserted.
Specifically, if a dimension of a parallel loop is mapped to a hardware id, this function will
cloningMap
to substitute all uses.gpu.launch operation
accordingly.gpu.launch
and update the rewriter to insert into the conditional's body. If the dimension is mapped to sequential,cloningMap
to replace uses of the index with the index of the new for loop. In either case,gpu.launch
operation) is inserted into the worklist. This signals the processor of the worklist to pop the rewriter one scope-level up. Definition at line 397 of file SCFToGPU.cpp.
References mlir::ceilDiv(), mlir::OpBuilder::clone(), mlir::sparse_tensor::constantOne(), mlir::OpBuilder::create(), deriveStaticUpperBound(), mlir::Attribute::dyn_cast(), mlir::RewriterBase::eraseOp(), mlir::failed(), mlir::failure(), mlir::AffineMap::get(), mlir::Builder::getAffineDimExpr(), mlir::Builder::getAffineSymbolExpr(), mlir::Value::getDefiningOp(), mlir::OpBuilder::getInsertionPoint(), getLaunchOpArgumentNum(), mlir::gpu::getMappingAttrName(), mlir::Operation::getNumRegions(), mlir::Block::getOperations(), mlir::gpu::getProcessor(), mlir::Operation::getResults(), mlir::Builder::getUnitAttr(), isMappedToProcessor(), kVisitedAttrName, mlir::BlockAndValueMapping::lookupOrDefault(), mlir::BlockAndValueMapping::map(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::OpBuilder::setInsertionPointAfter(), mlir::OpBuilder::setInsertionPointToEnd(), mlir::OpBuilder::setInsertionPointToStart(), mlir::success(), and mlir::Block::without_terminator().
|
static |
Definition at line 55 of file SCFToGPU.cpp.
Referenced by mlir::configureParallelLoopToGPULegality(), mlir::finalizeParallelLoopToGPUConversion(), and processParallelLoop().