MLIR  16.0.0git
Macros | Functions | Variables
SCFToGPU.cpp File Reference
#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Debug.h"
+ Include dependency graph for SCFToGPU.cpp:

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "loops-to-gpu"
 

Functions

static Value getDim3Value (const gpu::KernelDim3 &dim3, unsigned pos)
 
static Operation::operand_range getLowerBoundOperands (AffineForOp forOp)
 
static Operation::operand_range getUpperBoundOperands (AffineForOp forOp)
 
static Value getOrCreateStep (AffineForOp forOp, OpBuilder &builder)
 
static Value getOrEmitLowerBound (AffineForOp forOp, OpBuilder &builder)
 
static Value getOrEmitUpperBound (AffineForOp forOp, OpBuilder &builder)
 
static LogicalResult checkAffineLoopNestMappableImpl (AffineForOp forOp, unsigned numDims)
 
static LogicalResult checkAffineLoopNestMappable (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)
 
static bool isConstantOne (Value value)
 
static LogicalResult convertAffineLoopNestToGPULaunch (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)
 
static Value deriveStaticUpperBound (Value upperBound, PatternRewriter &rewriter)
 Tries to derive a static upper bound from the defining operation of upperBound. More...
 
static bool isMappedToProcessor (gpu::Processor processor)
 
static unsigned getLaunchOpArgumentNum (gpu::Processor processor)
 
static LogicalResult processParallelLoop (ParallelOp parallelOp, gpu::LaunchOp launchOp, BlockAndValueMapping &cloningMap, SmallVectorImpl< Operation *> &worklist, DenseMap< gpu::Processor, Value > &bounds, PatternRewriter &rewriter)
 Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted. More...
 

Variables

static constexpr StringLiteral kVisitedAttrName = "SCFToGPU_visited"
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "loops-to-gpu"

Definition at line 34 of file SCFToGPU.cpp.

Function Documentation

◆ checkAffineLoopNestMappable()

static LogicalResult checkAffineLoopNestMappable ( AffineForOp  forOp,
unsigned  numBlockDims,
unsigned  numThreadDims 
)
static

Definition at line 133 of file SCFToGPU.cpp.

References checkAffineLoopNestMappableImpl(), and mlir::success().

Referenced by convertAffineLoopNestToGPULaunch().

◆ checkAffineLoopNestMappableImpl()

static LogicalResult checkAffineLoopNestMappableImpl ( AffineForOp  forOp,
unsigned  numDims 
)
static

◆ convertAffineLoopNestToGPULaunch()

static LogicalResult convertAffineLoopNestToGPULaunch ( AffineForOp  forOp,
unsigned  numBlockDims,
unsigned  numThreadDims 
)
static

◆ deriveStaticUpperBound()

static Value deriveStaticUpperBound ( Value  upperBound,
PatternRewriter rewriter 
)
static

Tries to derive a static upper bound from the defining operation of upperBound.

Definition at line 316 of file SCFToGPU.cpp.

References mlir::OpBuilder::create(), and mlir::Value::getDefiningOp().

Referenced by processParallelLoop().

◆ getDim3Value()

static Value getDim3Value ( const gpu::KernelDim3 dim3,
unsigned  pos 
)
static

Definition at line 58 of file SCFToGPU.cpp.

References mlir::gpu::KernelDim3::x, mlir::gpu::KernelDim3::y, and mlir::gpu::KernelDim3::z.

Referenced by isConstantOne().

◆ getLaunchOpArgumentNum()

static unsigned getLaunchOpArgumentNum ( gpu::Processor  processor)
static

Definition at line 362 of file SCFToGPU.cpp.

Referenced by processParallelLoop().

◆ getLowerBoundOperands()

static Operation::operand_range getLowerBoundOperands ( AffineForOp  forOp)
static

◆ getOrCreateStep()

static Value getOrCreateStep ( AffineForOp  forOp,
OpBuilder builder 
)
static

Definition at line 84 of file SCFToGPU.cpp.

References mlir::OpBuilder::create().

Referenced by isConstantOne().

◆ getOrEmitLowerBound()

static Value getOrEmitLowerBound ( AffineForOp  forOp,
OpBuilder builder 
)
static

Definition at line 91 of file SCFToGPU.cpp.

References mlir::lowerAffineLowerBound().

Referenced by isConstantOne().

◆ getOrEmitUpperBound()

static Value getOrEmitUpperBound ( AffineForOp  forOp,
OpBuilder builder 
)
static

Definition at line 97 of file SCFToGPU.cpp.

References mlir::lowerAffineUpperBound().

Referenced by isConstantOne().

◆ getUpperBoundOperands()

static Operation::operand_range getUpperBoundOperands ( AffineForOp  forOp)
static

◆ isConstantOne()

static bool isConstantOne ( Value  value)
static

◆ isMappedToProcessor()

static bool isMappedToProcessor ( gpu::Processor  processor)
static

Definition at line 358 of file SCFToGPU.cpp.

Referenced by processParallelLoop().

◆ processParallelLoop()

static LogicalResult processParallelLoop ( ParallelOp  parallelOp,
gpu::LaunchOp  launchOp,
BlockAndValueMapping cloningMap,
SmallVectorImpl< Operation *> &  worklist,
DenseMap< gpu::Processor, Value > &  bounds,
PatternRewriter rewriter 
)
static

Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted.

Specifically, if a dimension of a parallel loop is mapped to a hardware id, this function will

  • compute the loop index based on the hardware id and affine map from the mapping and update cloningMap to substitute all uses.
  • derive a new upper bound for the hardware id and augment the provided gpu.launch operation accordingly.
  • if the upper bound is imprecise, insert a conditional in the gpu.launch and update the rewriter to insert into the conditional's body. If the dimension is mapped to sequential,
  • insert a for loop into the body and update the rewriter to insert into the for loop's body.
  • update the cloningMap to replace uses of the index with the index of the new for loop. In either case,
  • append the instructions from the loops body to worklist, in reverse order. To note the end of the current scope in case a loop or conditional was inserted, a sentinel (the gpu.launch operation) is inserted into the worklist. This signals the processor of the worklist to pop the rewriter one scope-level up.

Definition at line 404 of file SCFToGPU.cpp.

References mlir::ceilDiv(), mlir::OpBuilder::clone(), mlir::sparse_tensor::constantOne(), mlir::OpBuilder::create(), deriveStaticUpperBound(), mlir::Attribute::dyn_cast(), mlir::RewriterBase::eraseOp(), mlir::failed(), mlir::failure(), mlir::AffineMap::get(), mlir::Builder::getAffineDimExpr(), mlir::Builder::getAffineSymbolExpr(), mlir::Value::getDefiningOp(), mlir::OpBuilder::getInsertionPoint(), getLaunchOpArgumentNum(), mlir::gpu::getMappingAttrName(), mlir::Operation::getNumRegions(), mlir::Block::getOperations(), mlir::Operation::getResults(), mlir::Builder::getUnitAttr(), isMappedToProcessor(), kVisitedAttrName, mlir::BlockAndValueMapping::lookupOrDefault(), mlir::BlockAndValueMapping::map(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::OpBuilder::setInsertionPointAfter(), mlir::OpBuilder::setInsertionPointToEnd(), mlir::OpBuilder::setInsertionPointToStart(), mlir::success(), and mlir::Block::without_terminator().

Variable Documentation

◆ kVisitedAttrName

constexpr StringLiteral kVisitedAttrName = "SCFToGPU_visited"
static