MLIR  20.0.0git
Macros | Functions | Variables
SCFToGPU.cpp File Reference
#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Debug.h"
#include <optional>

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "loops-to-gpu"
 

Functions

static Value getDim3Value (const gpu::KernelDim3 &dim3, unsigned pos)
 
static Operation::operand_range getLowerBoundOperands (AffineForOp forOp)
 
static Operation::operand_range getUpperBoundOperands (AffineForOp forOp)
 
static Value getOrCreateStep (AffineForOp forOp, OpBuilder &builder)
 
static Value getOrEmitLowerBound (AffineForOp forOp, OpBuilder &builder)
 
static Value getOrEmitUpperBound (AffineForOp forOp, OpBuilder &builder)
 
static LogicalResult checkAffineLoopNestMappableImpl (AffineForOp forOp, unsigned numDims)
 
static LogicalResult checkAffineLoopNestMappable (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)
 
static LogicalResult convertAffineLoopNestToGPULaunch (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)
 
static Value deriveStaticUpperBound (Value upperBound, PatternRewriter &rewriter)
 Tries to derive a static upper bound from the defining operation of upperBound. More...
 
static bool isMappedToProcessor (gpu::Processor processor)
 
static unsigned getLaunchOpArgumentNum (gpu::Processor processor)
 
static LogicalResult processParallelLoop (ParallelOp parallelOp, gpu::LaunchOp launchOp, IRMapping &cloningMap, SmallVectorImpl< Operation * > &worklist, DenseMap< gpu::Processor, Value > &bounds, PatternRewriter &rewriter)
 Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted. More...
 

Variables

static constexpr StringLiteral kVisitedAttrName = "SCFToGPU_visited"
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "loops-to-gpu"

Definition at line 36 of file SCFToGPU.cpp.

Function Documentation

◆ checkAffineLoopNestMappable()

static LogicalResult checkAffineLoopNestMappable ( AffineForOp  forOp,
unsigned  numBlockDims,
unsigned  numThreadDims 
)
static

Definition at line 136 of file SCFToGPU.cpp.

References checkAffineLoopNestMappableImpl().

Referenced by convertAffineLoopNestToGPULaunch().

◆ checkAffineLoopNestMappableImpl()

static LogicalResult checkAffineLoopNestMappableImpl ( AffineForOp  forOp,
unsigned  numDims 
)
static

◆ convertAffineLoopNestToGPULaunch()

static LogicalResult convertAffineLoopNestToGPULaunch ( AffineForOp  forOp,
unsigned  numBlockDims,
unsigned  numThreadDims 
)
static

Definition at line 281 of file SCFToGPU.cpp.

References checkAffineLoopNestMappable().

◆ deriveStaticUpperBound()

static Value deriveStaticUpperBound ( Value  upperBound,
PatternRewriter rewriter 
)
static

Tries to derive a static upper bound from the defining operation of upperBound.

Definition at line 314 of file SCFToGPU.cpp.

References mlir::OpBuilder::create(), and mlir::Value::getDefiningOp().

Referenced by processParallelLoop().

◆ getDim3Value()

static Value getDim3Value ( const gpu::KernelDim3 dim3,
unsigned  pos 
)
static

◆ getLaunchOpArgumentNum()

static unsigned getLaunchOpArgumentNum ( gpu::Processor  processor)
static

Definition at line 360 of file SCFToGPU.cpp.

Referenced by processParallelLoop().

◆ getLowerBoundOperands()

static Operation::operand_range getLowerBoundOperands ( AffineForOp  forOp)
static

Definition at line 76 of file SCFToGPU.cpp.

Referenced by checkAffineLoopNestMappableImpl().

◆ getOrCreateStep()

static Value getOrCreateStep ( AffineForOp  forOp,
OpBuilder builder 
)
static

Definition at line 87 of file SCFToGPU.cpp.

References mlir::OpBuilder::create().

◆ getOrEmitLowerBound()

static Value getOrEmitLowerBound ( AffineForOp  forOp,
OpBuilder builder 
)
static

Definition at line 94 of file SCFToGPU.cpp.

References mlir::lowerAffineLowerBound().

◆ getOrEmitUpperBound()

static Value getOrEmitUpperBound ( AffineForOp  forOp,
OpBuilder builder 
)
static

Definition at line 100 of file SCFToGPU.cpp.

References mlir::lowerAffineUpperBound().

◆ getUpperBoundOperands()

static Operation::operand_range getUpperBoundOperands ( AffineForOp  forOp)
static

Definition at line 81 of file SCFToGPU.cpp.

Referenced by checkAffineLoopNestMappableImpl().

◆ isMappedToProcessor()

static bool isMappedToProcessor ( gpu::Processor  processor)
static

Definition at line 356 of file SCFToGPU.cpp.

Referenced by processParallelLoop().

◆ processParallelLoop()

static LogicalResult processParallelLoop ( ParallelOp  parallelOp,
gpu::LaunchOp  launchOp,
IRMapping cloningMap,
SmallVectorImpl< Operation * > &  worklist,
DenseMap< gpu::Processor, Value > &  bounds,
PatternRewriter rewriter 
)
static

Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted.

Specifically, if a dimension of a parallel loop is mapped to a hardware id, this function will

  • compute the loop index based on the hardware id and affine map from the mapping and update cloningMap to substitute all uses.
  • derive a new upper bound for the hardware id and augment the provided gpu.launch operation accordingly.
  • if the upper bound is imprecise, insert a conditional in the gpu.launch and update the rewriter to insert into the conditional's body. If the dimension is mapped to sequential,
  • insert a for loop into the body and update the rewriter to insert into the for loop's body.
  • update the cloningMap to replace uses of the index with the index of the new for loop. In either case,
  • append the instructions from the loops body to worklist, in reverse order. To note the end of the current scope in case a loop or conditional was inserted, a sentinel (the gpu.launch operation) is inserted into the worklist. This signals the processor of the worklist to pop the rewriter one scope-level up.

Definition at line 402 of file SCFToGPU.cpp.

References mlir::OpBuilder::create(), deriveStaticUpperBound(), mlir::AffineMap::get(), mlir::Builder::getAffineDimExpr(), mlir::Builder::getAffineSymbolExpr(), mlir::Value::getDefiningOp(), getLaunchOpArgumentNum(), mlir::gpu::getMappingAttrName(), mlir::Block::getOperations(), isMappedToProcessor(), mlir::IRMapping::lookupOrDefault(), mlir::IRMapping::map(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::OpBuilder::setInsertionPointToStart(), and mlir::Block::without_terminator().

Variable Documentation

◆ kVisitedAttrName

constexpr StringLiteral kVisitedAttrName = "SCFToGPU_visited"
staticconstexpr