#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/Support/Debug.h"
#include <optional>

Macros
#define	DEBUG_TYPE "loops-to-gpu"

Functions
static Value	getDim3Value (const gpu::KernelDim3 &dim3, unsigned pos)

static Operation::operand_range	getLowerBoundOperands (AffineForOp forOp)

static Operation::operand_range	getUpperBoundOperands (AffineForOp forOp)

static Value	getOrCreateStep (AffineForOp forOp, OpBuilder &builder)

static Value	getOrEmitLowerBound (AffineForOp forOp, OpBuilder &builder)

static Value	getOrEmitUpperBound (AffineForOp forOp, OpBuilder &builder)

static LogicalResult	checkAffineLoopNestMappableImpl (AffineForOp forOp, unsigned numDims)

static LogicalResult	checkAffineLoopNestMappable (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)

static LogicalResult	convertAffineLoopNestToGPULaunch (AffineForOp forOp, unsigned numBlockDims, unsigned numThreadDims)

static Value	deriveStaticUpperBound (Value upperBound, PatternRewriter &rewriter)
	Tries to derive a static upper bound from the defining operation of `upperBound`. More...

static bool	isMappedToProcessor (gpu::Processor processor)

static unsigned	getLaunchOpArgumentNum (gpu::Processor processor)

static LogicalResult	processParallelLoop (ParallelOp parallelOp, gpu::LaunchOp launchOp, IRMapping &cloningMap, SmallVectorImpl< Operation * > &worklist, DenseMap< gpu::Processor, Value > &bounds, PatternRewriter &rewriter)
	Modifies the current transformation state to capture the effect of the given `scf.parallel` operation on index substitutions and the operations to be inserted. More...

Variables
static constexpr StringLiteral	kVisitedAttrName = "SCFToGPU_visited"

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "loops-to-gpu"

Definition at line 35 of file SCFToGPU.cpp.

Function Documentation

◆ checkAffineLoopNestMappable()

static LogicalResult checkAffineLoopNestMappable	(	AffineForOp	forOp,
		unsigned	numBlockDims,
		unsigned	numThreadDims
	)

static

Definition at line 135 of file SCFToGPU.cpp.

References checkAffineLoopNestMappableImpl().

Referenced by convertAffineLoopNestToGPULaunch().

◆ checkAffineLoopNestMappableImpl()

static LogicalResult checkAffineLoopNestMappableImpl	(	AffineForOp	forOp,
		unsigned	numDims
	)

static

Definition at line 109 of file SCFToGPU.cpp.

References mlir::areValuesDefinedAbove(), mlir::Operation::emitError(), getLowerBoundOperands(), and getUpperBoundOperands().

Referenced by checkAffineLoopNestMappable().

◆ convertAffineLoopNestToGPULaunch()

static LogicalResult convertAffineLoopNestToGPULaunch	(	AffineForOp	forOp,
		unsigned	numBlockDims,
		unsigned	numThreadDims
	)

static

Definition at line 280 of file SCFToGPU.cpp.

References checkAffineLoopNestMappable().

◆ deriveStaticUpperBound()

static Value deriveStaticUpperBound	(	Value	upperBound,
		PatternRewriter &	rewriter
	)

static

Tries to derive a static upper bound from the defining operation of upperBound.

Definition at line 313 of file SCFToGPU.cpp.

References mlir::arith::ConstantIndexOp::create(), and mlir::Value::getDefiningOp().

Referenced by processParallelLoop().

◆ getDim3Value()

static Value getDim3Value	(	const gpu::KernelDim3 &	dim3,
		unsigned	pos
	)

static

Definition at line 60 of file SCFToGPU.cpp.

References mlir::gpu::KernelDim3::x, mlir::gpu::KernelDim3::y, and mlir::gpu::KernelDim3::z.

◆ getLaunchOpArgumentNum()

static unsigned getLaunchOpArgumentNum ( gpu::Processor processor )

static

Definition at line 359 of file SCFToGPU.cpp.

Referenced by processParallelLoop().

◆ getLowerBoundOperands()

static Operation::operand_range getLowerBoundOperands ( AffineForOp forOp )

static

Definition at line 75 of file SCFToGPU.cpp.

Referenced by checkAffineLoopNestMappableImpl().

◆ getOrCreateStep()

static Value getOrCreateStep	(	AffineForOp	forOp,
		OpBuilder &	builder
	)

static

Definition at line 86 of file SCFToGPU.cpp.

References mlir::arith::ConstantIndexOp::create().

◆ getOrEmitLowerBound()

static Value getOrEmitLowerBound	(	AffineForOp	forOp,
		OpBuilder &	builder
	)

static

Definition at line 93 of file SCFToGPU.cpp.

References mlir::lowerAffineLowerBound().

◆ getOrEmitUpperBound()

static Value getOrEmitUpperBound	(	AffineForOp	forOp,
		OpBuilder &	builder
	)

static

Definition at line 99 of file SCFToGPU.cpp.

References mlir::lowerAffineUpperBound().

◆ getUpperBoundOperands()

static Operation::operand_range getUpperBoundOperands ( AffineForOp forOp )

static

Definition at line 80 of file SCFToGPU.cpp.

Referenced by checkAffineLoopNestMappableImpl().

◆ isMappedToProcessor()

static bool isMappedToProcessor ( gpu::Processor processor )

static

Definition at line 355 of file SCFToGPU.cpp.

Referenced by processParallelLoop().

◆ processParallelLoop()

static LogicalResult processParallelLoop	(	ParallelOp	parallelOp,
		gpu::LaunchOp	launchOp,
		IRMapping &	cloningMap,
		SmallVectorImpl< Operation * > &	worklist,
		DenseMap< gpu::Processor, Value > &	bounds,
		PatternRewriter &	rewriter
	)

static

Modifies the current transformation state to capture the effect of the given scf.parallel operation on index substitutions and the operations to be inserted.

Specifically, if a dimension of a parallel loop is mapped to a hardware id, this function will

compute the loop index based on the hardware id and affine map from the mapping and update cloningMap to substitute all uses.
derive a new upper bound for the hardware id and augment the provided gpu.launch operation accordingly.
if the upper bound is imprecise, insert a conditional in the gpu.launch and update the rewriter to insert into the conditional's body. If the dimension is mapped to sequential,
insert a for loop into the body and update the rewriter to insert into the for loop's body.
update the cloningMap to replace uses of the index with the index of the new for loop. In either case,
append the instructions from the loops body to worklist, in reverse order. To note the end of the current scope in case a loop or conditional was inserted, a sentinel (the gpu.launch operation) is inserted into the worklist. This signals the processor of the worklist to pop the rewriter one scope-level up.

Definition at line 401 of file SCFToGPU.cpp.

References mlir::config, deriveStaticUpperBound(), mlir::AffineMap::get(), mlir::Builder::getAffineDimExpr(), mlir::Builder::getAffineSymbolExpr(), mlir::Value::getDefiningOp(), getLaunchOpArgumentNum(), mlir::gpu::getMappingAttrName(), mlir::Operation::getOperands(), mlir::Block::getOperations(), mlir::Block::getTerminator(), isMappedToProcessor(), mlir::IRMapping::lookupOrDefault(), mlir::IRMapping::map(), mlir::RewriterBase::notifyMatchFailure(), mlir::OpBuilder::setInsertionPoint(), mlir::OpBuilder::setInsertionPointToStart(), and mlir::Block::without_terminator().

Variable Documentation

◆ kVisitedAttrName

constexpr StringLiteral kVisitedAttrName = "SCFToGPU_visited"

staticconstexpr

Definition at line 57 of file SCFToGPU.cpp.

Referenced by mlir::configureParallelLoopToGPULegality(), and mlir::finalizeParallelLoopToGPUConversion().

Macros

Functions

Variables

Macro Definition Documentation

◆ DEBUG_TYPE

Function Documentation

◆ checkAffineLoopNestMappable()

◆ checkAffineLoopNestMappableImpl()

◆ convertAffineLoopNestToGPULaunch()

◆ deriveStaticUpperBound()

◆ getDim3Value()

◆ getLaunchOpArgumentNum()

◆ getLowerBoundOperands()

◆ getOrCreateStep()

◆ getOrEmitLowerBound()

◆ getOrEmitUpperBound()

◆ getUpperBoundOperands()

◆ isMappedToProcessor()

◆ processParallelLoop()

Variable Documentation

◆ kVisitedAttrName