MLIR
20.0.0git
|
#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Debug.h"
#include <optional>
Go to the source code of this file.
Macros | |
#define | DEBUG_TYPE "loops-to-gpu" |
Functions | |
static Value | getDim3Value (const gpu::KernelDim3 &dim3, unsigned pos) |
static Operation::operand_range | getLowerBoundOperands (AffineForOp forOp) |
static Operation::operand_range | getUpperBoundOperands (AffineForOp forOp) |
static Value | getOrCreateStep (AffineForOp forOp, OpBuilder &builder) |
static Value | getOrEmitLowerBound (AffineForOp forOp, OpBuilder &builder) |
static Value | getOrEmitUpperBound (AffineForOp forOp, OpBuilder &builder) |
static LogicalResult | checkAffineLoopNestMappableImpl (AffineForOp forOp, unsigned numDims) |
static LogicalResult | checkAffineLoopNestMappable (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims) |
static LogicalResult | convertAffineLoopNestToGPULaunch (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims) |
static Value | deriveStaticUpperBound (Value upperBound, PatternRewriter &rewriter) |
Tries to derive a static upper bound from the defining operation of upperBound . More... | |
static bool | isMappedToProcessor (gpu::Processor processor) |
static unsigned | getLaunchOpArgumentNum (gpu::Processor processor) |
static LogicalResult | processParallelLoop (ParallelOp parallelOp, gpu::LaunchOp launchOp, IRMapping &cloningMap, SmallVectorImpl< Operation * > &worklist, DenseMap< gpu::Processor, Value > &bounds, PatternRewriter &rewriter) |
Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted. More... | |
Variables | |
static constexpr StringLiteral | kVisitedAttrName = "SCFToGPU_visited" |
#define DEBUG_TYPE "loops-to-gpu" |
Definition at line 36 of file SCFToGPU.cpp.
|
static |
Definition at line 136 of file SCFToGPU.cpp.
References checkAffineLoopNestMappableImpl().
Referenced by convertAffineLoopNestToGPULaunch().
|
static |
Definition at line 110 of file SCFToGPU.cpp.
References mlir::areValuesDefinedAbove(), mlir::Operation::emitError(), getLowerBoundOperands(), and getUpperBoundOperands().
Referenced by checkAffineLoopNestMappable().
|
static |
Definition at line 281 of file SCFToGPU.cpp.
References checkAffineLoopNestMappable().
|
static |
Tries to derive a static upper bound from the defining operation of upperBound
.
Definition at line 314 of file SCFToGPU.cpp.
References mlir::OpBuilder::create(), and mlir::Value::getDefiningOp().
Referenced by processParallelLoop().
|
static |
Definition at line 61 of file SCFToGPU.cpp.
References mlir::gpu::KernelDim3::x, mlir::gpu::KernelDim3::y, and mlir::gpu::KernelDim3::z.
|
static |
Definition at line 360 of file SCFToGPU.cpp.
Referenced by processParallelLoop().
|
static |
Definition at line 76 of file SCFToGPU.cpp.
Referenced by checkAffineLoopNestMappableImpl().
Definition at line 87 of file SCFToGPU.cpp.
References mlir::OpBuilder::create().
Definition at line 94 of file SCFToGPU.cpp.
References mlir::lowerAffineLowerBound().
Definition at line 100 of file SCFToGPU.cpp.
References mlir::lowerAffineUpperBound().
|
static |
Definition at line 81 of file SCFToGPU.cpp.
Referenced by checkAffineLoopNestMappableImpl().
|
static |
Definition at line 356 of file SCFToGPU.cpp.
Referenced by processParallelLoop().
|
static |
Modifies the current transformation state to capture the effect of the given scf.parallel
operation on index substitutions and the operations to be inserted.
Specifically, if a dimension of a parallel loop is mapped to a hardware id, this function will
cloningMap
to substitute all uses.gpu.launch operation
accordingly.gpu.launch
and update the rewriter to insert into the conditional's body. If the dimension is mapped to sequential,cloningMap
to replace uses of the index with the index of the new for loop. In either case,gpu.launch
operation) is inserted into the worklist. This signals the processor of the worklist to pop the rewriter one scope-level up. Definition at line 402 of file SCFToGPU.cpp.
References mlir::OpBuilder::create(), deriveStaticUpperBound(), mlir::AffineMap::get(), mlir::Builder::getAffineDimExpr(), mlir::Builder::getAffineSymbolExpr(), mlir::Value::getDefiningOp(), getLaunchOpArgumentNum(), mlir::gpu::getMappingAttrName(), mlir::Block::getOperations(), isMappedToProcessor(), mlir::IRMapping::lookupOrDefault(), mlir::IRMapping::map(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::OpBuilder::setInsertionPointToStart(), and mlir::Block::without_terminator().
|
staticconstexpr |
Definition at line 58 of file SCFToGPU.cpp.
Referenced by mlir::configureParallelLoopToGPULegality(), and mlir::finalizeParallelLoopToGPUConversion().