MLIR 23.0.0git
ParallelLoopFusion.cpp File Reference
#include "mlir/Dialect/SCF/Transforms/Passes.h"
#include "mlir/Analysis/AliasAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include <optional>
#include <tuple>
#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"

Go to the source code of this file.

Namespaces

namespace  mlir
 Include the generated interface declarations.

Macros

#define GEN_PASS_DEF_SCFPARALLELLOOPFUSION

Functions

static bool hasNestedParallelOp (ParallelOp ploop)
 Verify there are no nested ParallelOps.
static bool equalIterationSpaces (ParallelOp firstPloop, ParallelOp secondPloop)
 Verify equal iteration spaces.
static bool opsWriteSameMemLocation (Operation *op1, Operation *op2)
 Check if both operations are the same type of memory write op and write to the same memory location (same buffer and same indices).
static bool valsAreEquivalent (Value val1, Value val2, const IRMapping &loopsIVsMap)
 Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent, considering the mapping of induction variables from the first to the second parallel loop.
static std::optional< int64_tgetAddConstant (Value expr, Value base, const IRMapping &loopsIVsMap)
 If the expr value is the result of an integer addition of base and a constant, return the constant.
static bool loadIndexWithinWriteRange (Value loadIndex, OpFoldResult offset, Value writeIndex, int64_t extent, const IRMapping &loopsIVsMap)
static Value getBaseMemref (Operation *op)
 Return the base memref value used by the given memory op.
static bool isLoadOnWrittenVector (memref::LoadOp loadOp, Value writeBase, ValueRange writeIndices, VectorType vecTy, ArrayRef< int64_t > vectorDimForWriteDim, const IRMapping &ivsMap)
 Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vector.store, optionally through a rank-reducing unit-stride subview) of the same buffer.
static bool loadMatchesVectorWrite (memref::LoadOp loadOp, vector::TransferWriteOp writeOp, const IRMapping &ivsMap)
 Recognize scalar memref.load of an element produced by a vector.transfer_write.
static bool loadMatchesVectorStore (memref::LoadOp loadOp, vector::StoreOp storeOp, const IRMapping &ivsMap)
 Recognize scalar memref.load of an element produced by a vector.store.
template<typename OpTy1, typename OpTy2>
static bool opsAccessSameIndicesViaRankReducingSubview (OpTy1 op1, OpTy2 op2, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b)
 Check if both operations access the same positions of the same buffer, but one of the two does it through a rank-reducing full subview of the buffer (the other's base).
template<typename OpTy1, typename OpTy2>
static bool opsAccessSameIndices (OpTy1 op1, OpTy2 op2, const IRMapping &loopsIVsMap, OpBuilder &b)
 Check if both memory read/write operations access the same indices (considering also the mapping of induction variables from the first to the second parallel loop).
static bool loadsFromSameMemoryLocationWrittenBy (Operation *loadOp, Operation *storeOp, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b)
 Check if the loadOp reads from the same memory location (same buffer, same indices and same properties) as written by the storeOp.
static Value getStoreOpTargetBuffer (Operation *op)
static bool canResolveAlias (Operation *loadOp, Operation *storeOp, const IRMapping &loopsIVsMap)
 To be called when mayAlias(val1, val2) is true.
static bool haveNoDataDependenciesExceptSameIndex (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
 Check that the parallel loops have no mixed access to the same buffers.
static bool noIncompatibleDataDependencies (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
 Check that in each loop there are no read ops on the buffers written by the other loop, except when reading from the same exact memory location (same indices) as written in the other loop.
static bool isFusionLegal (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
 Check if fusion of the two parallel loops is legal: i.e.
static void fuseIfLegal (ParallelOp firstPloop, ParallelOp &secondPloop, OpBuilder builder, llvm::function_ref< bool(Value, Value)> mayAlias)
 Prepend operations of firstPloop's body into secondPloop's body.

Macro Definition Documentation

◆ GEN_PASS_DEF_SCFPARALLELLOOPFUSION

#define GEN_PASS_DEF_SCFPARALLELLOOPFUSION

Definition at line 42 of file ParallelLoopFusion.cpp.

Function Documentation

◆ canResolveAlias()

bool canResolveAlias ( Operation * loadOp,
Operation * storeOp,
const IRMapping & loopsIVsMap )
static

To be called when mayAlias(val1, val2) is true.

Check if the potential aliasing between the loadOp and storeOp can be resolved by analyzing their access patterns.

Definition at line 572 of file ParallelLoopFusion.cpp.

References loadMatchesVectorStore(), and loadMatchesVectorWrite().

Referenced by haveNoDataDependenciesExceptSameIndex().

◆ equalIterationSpaces()

bool equalIterationSpaces ( ParallelOp firstPloop,
ParallelOp secondPloop )
static

Verify equal iteration spaces.

Definition at line 57 of file ParallelLoopFusion.cpp.

References lhs, and rhs.

Referenced by isFusionLegal().

◆ fuseIfLegal()

void fuseIfLegal ( ParallelOp firstPloop,
ParallelOp & secondPloop,
OpBuilder builder,
llvm::function_ref< bool(Value, Value)> mayAlias )
static

Prepend operations of firstPloop's body into secondPloop's body.

Update secondPloop with new loop.

Definition at line 738 of file ParallelLoopFusion.cpp.

References b, mlir::Block::begin(), mlir::Block::front(), mlir::Block::getArguments(), mlir::Block::getTerminator(), mlir::Operation::getUsers(), isFusionLegal(), mlir::IRMapping::map(), mayAlias(), and mlir::DominanceInfo::properlyDominates().

Referenced by mlir::scf::naivelyFuseParallelOps().

◆ getAddConstant()

std::optional< int64_t > getAddConstant ( Value expr,
Value base,
const IRMapping & loopsIVsMap )
static

If the expr value is the result of an integer addition of base and a constant, return the constant.

Definition at line 139 of file ParallelLoopFusion.cpp.

References mlir::Add, mlir::getConstantIntValue(), mlir::Value::getDefiningOp(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getNumResults(), mlir::AffineMap::getNumSymbols(), mlir::AffineMap::getResult(), result, and valsAreEquivalent().

Referenced by loadIndexWithinWriteRange().

◆ getBaseMemref()

Value getBaseMemref ( Operation * op)
static

Return the base memref value used by the given memory op.

Definition at line 300 of file ParallelLoopFusion.cpp.

References load.

Referenced by opsAccessSameIndicesViaRankReducingSubview().

◆ getStoreOpTargetBuffer()

Value getStoreOpTargetBuffer ( Operation * op)
static

Definition at line 561 of file ParallelLoopFusion.cpp.

Referenced by haveNoDataDependenciesExceptSameIndex().

◆ hasNestedParallelOp()

bool hasNestedParallelOp ( ParallelOp ploop)
static

Verify there are no nested ParallelOps.

Definition at line 50 of file ParallelLoopFusion.cpp.

References mlir::WalkResult::interrupt().

Referenced by isFusionLegal().

◆ haveNoDataDependenciesExceptSameIndex()

bool haveNoDataDependenciesExceptSameIndex ( ParallelOp firstPloop,
ParallelOp secondPloop,
const IRMapping & firstToSecondPloopIndices,
llvm::function_ref< bool(Value, Value)> mayAlias,
OpBuilder & b )
static

Check that the parallel loops have no mixed access to the same buffers.

Return true if the second parallel loop does not read or write the buffers written by the first loop using different indices.

Definition at line 588 of file ParallelLoopFusion.cpp.

References mlir::WalkResult::advance(), b, canResolveAlias(), getStoreOpTargetBuffer(), mlir::WalkResult::interrupt(), loadsFromSameMemoryLocationWrittenBy(), mayAlias(), opsWriteSameMemLocation(), and mlir::memref::skipFullyAliasingOperations().

Referenced by noIncompatibleDataDependencies().

◆ isFusionLegal()

bool isFusionLegal ( ParallelOp firstPloop,
ParallelOp secondPloop,
const IRMapping & firstToSecondPloopIndices,
llvm::function_ref< bool(Value, Value)> mayAlias,
OpBuilder & b )
static

Check if fusion of the two parallel loops is legal: i.e.

no nested parallel loops, equal iteration spaces, and no incompatible data dependencies between the loops.

Definition at line 725 of file ParallelLoopFusion.cpp.

References b, equalIterationSpaces(), hasNestedParallelOp(), mayAlias(), and noIncompatibleDataDependencies().

Referenced by fuseIfLegal().

◆ isLoadOnWrittenVector()

bool isLoadOnWrittenVector ( memref::LoadOp loadOp,
Value writeBase,
ValueRange writeIndices,
VectorType vecTy,
ArrayRef< int64_t > vectorDimForWriteDim,
const IRMapping & ivsMap )
static

Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vector.store, optionally through a rank-reducing unit-stride subview) of the same buffer.

This covers the pattern where a vector write stores a full lane pack and a subsequent scalar load reads an element from that lane pack. EXAMPLE: vector.transfer_write V, arg[x, y, ..., 0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32, strided<[1], offset: ?>> scf.for iter = c0 to c4 step c1 iter_args(...) -> (f32) { %0 = memref.load arg[x, y, ..., iter] : memref<1x128x16x4xf32> ... }

Definition at line 324 of file ParallelLoopFusion.cpp.

References mlir::Value::getDefiningOp(), and loadIndexWithinWriteRange().

Referenced by loadMatchesVectorStore(), and loadMatchesVectorWrite().

◆ loadIndexWithinWriteRange()

bool loadIndexWithinWriteRange ( Value loadIndex,
OpFoldResult offset,
Value writeIndex,
int64_t extent,
const IRMapping & loopsIVsMap )
static

◆ loadMatchesVectorStore()

bool loadMatchesVectorStore ( memref::LoadOp loadOp,
vector::StoreOp storeOp,
const IRMapping & ivsMap )
static

Recognize scalar memref.load of an element produced by a vector.store.

Definition at line 419 of file ParallelLoopFusion.cpp.

References isLoadOnWrittenVector().

Referenced by canResolveAlias(), and loadsFromSameMemoryLocationWrittenBy().

◆ loadMatchesVectorWrite()

bool loadMatchesVectorWrite ( memref::LoadOp loadOp,
vector::TransferWriteOp writeOp,
const IRMapping & ivsMap )
static

◆ loadsFromSameMemoryLocationWrittenBy()

bool loadsFromSameMemoryLocationWrittenBy ( Operation * loadOp,
Operation * storeOp,
const IRMapping & firstToSecondPloopIVsMap,
OpBuilder & b )
static

Check if the loadOp reads from the same memory location (same buffer, same indices and same properties) as written by the storeOp.

Definition at line 518 of file ParallelLoopFusion.cpp.

References b, loadMatchesVectorStore(), loadMatchesVectorWrite(), and opsAccessSameIndices().

Referenced by haveNoDataDependenciesExceptSameIndex().

◆ noIncompatibleDataDependencies()

bool noIncompatibleDataDependencies ( ParallelOp firstPloop,
ParallelOp secondPloop,
const IRMapping & firstToSecondPloopIndices,
llvm::function_ref< bool(Value, Value)> mayAlias,
OpBuilder & b )
static

Check that in each loop there are no read ops on the buffers written by the other loop, except when reading from the same exact memory location (same indices) as written in the other loop.

Definition at line 707 of file ParallelLoopFusion.cpp.

References b, haveNoDataDependenciesExceptSameIndex(), mlir::IRMapping::map(), and mayAlias().

Referenced by isFusionLegal().

◆ opsAccessSameIndices()

template<typename OpTy1, typename OpTy2>
bool opsAccessSameIndices ( OpTy1 op1,
OpTy2 op2,
const IRMapping & loopsIVsMap,
OpBuilder & b )
static

Check if both memory read/write operations access the same indices (considering also the mapping of induction variables from the first to the second parallel loop).

Definition at line 502 of file ParallelLoopFusion.cpp.

References b, opsAccessSameIndicesViaRankReducingSubview(), and valsAreEquivalent().

Referenced by loadsFromSameMemoryLocationWrittenBy().

◆ opsAccessSameIndicesViaRankReducingSubview()

template<typename OpTy1, typename OpTy2>
bool opsAccessSameIndicesViaRankReducingSubview ( OpTy1 op1,
OpTy2 op2,
const IRMapping & firstToSecondPloopIVsMap,
OpBuilder & b )
static

Check if both operations access the same positions of the same buffer, but one of the two does it through a rank-reducing full subview of the buffer (the other's base).

EXAMPLE: memref.store a, buf[c0, i, j] : memref<1x2x2xf32> alias = memref.subview buf[0, 0, 0][1, 2, 2][1, 1, 1]: memref<1x2x2xf32> to memref<2x2xf32> val = memref.load alias[i, j] : memref<2x2xf32>

Definition at line 449 of file ParallelLoopFusion.cpp.

References b, getBaseMemref(), mlir::memref::isSameViewOrTrivialAlias(), mlir::m_Zero(), mlir::matchPattern(), and valsAreEquivalent().

Referenced by opsAccessSameIndices().

◆ opsWriteSameMemLocation()

bool opsWriteSameMemLocation ( Operation * op1,
Operation * op2 )
static

Check if both operations are the same type of memory write op and write to the same memory location (same buffer and same indices).

Definition at line 76 of file ParallelLoopFusion.cpp.

References mlir::Operation::getName(), and mlir::getType().

Referenced by haveNoDataDependenciesExceptSameIndex().

◆ valsAreEquivalent()

bool valsAreEquivalent ( Value val1,
Value val2,
const IRMapping & loopsIVsMap )
static

Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent, considering the mapping of induction variables from the first to the second parallel loop.

Definition at line 117 of file ParallelLoopFusion.cpp.

References mlir::Value::getDefiningOp(), mlir::OperationEquivalence::IgnoreLocations, mlir::OperationEquivalence::isEquivalentTo(), mlir::isMemoryEffectFree(), mlir::IRMapping::lookupOrDefault(), and success().

Referenced by getAddConstant(), loadIndexWithinWriteRange(), opsAccessSameIndices(), and opsAccessSameIndicesViaRankReducingSubview().