MLIR 23.0.0git
ParallelLoopFusion.cpp File Reference
#include "mlir/Dialect/SCF/Transforms/Passes.h"
#include "mlir/Analysis/AliasAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/Value.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/InterleavedRange.h"
#include "llvm/Support/DebugLog.h"
#include <numeric>
#include <optional>
#include <tuple>
#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"

Go to the source code of this file.

Classes

class  mlir::impl::SCFParallelLoopFusionBase< DerivedT >
struct  LoopIV
struct  llvm::DenseMapInfo< LoopIV >

Namespaces

namespace  mlir
 Include the generated interface declarations.
namespace  mlir::impl
 Attribute collections provide a dictionary-like interface.

Macros

#define DEBUG_TYPE   "parallel-loop-fusion"
#define GEN_PASS_DEF_SCFPARALLELLOOPFUSION

Functions

static bool hasNestedParallelOp (ParallelOp ploop)
 Verify there are no nested ParallelOps.
static bool equalIterationSpaces (ParallelOp firstPloop, ParallelOp secondPloop)
 Verify equal iteration spaces.
static bool opsWriteSameMemLocation (Operation *op1, Operation *op2)
 Check if both operations are the same type of memory write op and write to the same memory location (same buffer and same indices).
static bool valsAreEquivalent (Value val1, Value val2, const IRMapping &loopsIVsMap)
 Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent, considering the mapping of induction variables from the first to the second parallel loop.
static std::optional< int64_tgetAddConstant (Value expr, Value base, const IRMapping &loopsIVsMap)
 If the expr value is the result of an integer addition of base and a constant, return the constant.
static bool loadIndexWithinWriteRange (Value loadIndex, OpFoldResult offset, Value writeIndex, int64_t extent, const IRMapping &loopsIVsMap)
static Value getBaseMemref (Operation *op)
 Return the base memref value used by the given memory op.
static bool isLoadOnWrittenVector (memref::LoadOp loadOp, Value writeBase, ValueRange writeIndices, VectorType vecTy, ArrayRef< int64_t > vectorDimForWriteDim, const IRMapping &ivsMap)
 Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vector.store, optionally through a rank-reducing unit-stride subview) of the same buffer.
static bool loadMatchesVectorWrite (memref::LoadOp loadOp, vector::TransferWriteOp writeOp, const IRMapping &ivsMap)
 Recognize scalar memref.load of an element produced by a vector.transfer_write.
static bool loadMatchesVectorStore (memref::LoadOp loadOp, vector::StoreOp storeOp, const IRMapping &ivsMap)
 Recognize scalar memref.load of an element produced by a vector.store.
template<typename OpTy1, typename OpTy2>
static bool opsAccessSameIndicesViaRankReducingSubview (OpTy1 op1, OpTy2 op2, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b)
 Check if both operations access the same positions of the same buffer, but one of the two does it through a rank-reducing full subview of the buffer (the other's base).
template<typename OpTy1, typename OpTy2>
static bool opsAccessSameIndices (OpTy1 op1, OpTy2 op2, const IRMapping &loopsIVsMap, OpBuilder &b)
 Check if both memory read/write operations access the same indices (considering also the mapping of induction variables from the first to the second parallel loop).
static bool loadsFromSameMemoryLocationWrittenBy (Operation *loadOp, Operation *storeOp, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b)
 Check if the loadOp reads from the same memory location (same buffer, same indices and same properties) as written by the storeOp.
static Value getStoreOpTargetBuffer (Operation *op)
static bool canResolveAlias (Operation *loadOp, Operation *storeOp, const IRMapping &loopsIVsMap)
 To be called when mayAlias(val1, val2) is true.
static bool haveNoDataDependenciesExceptSameIndex (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
 Check that the parallel loops have no mixed access to the same buffers.
static bool noIncompatibleDataDependencies (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
 Check that in each loop there are no read ops on the buffers written by the other loop, except when reading from the same exact memory location (same indices) as written in the other loop.
static bool isFusionLegal (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
 Check if fusion of the two parallel loops is legal: i.e.
static std::optional< ParallelOp > interchangeLoops (OpBuilder &builder, ParallelOp &loop, const ArrayRef< int64_t > &indices)
static SmallVector< SmallVector< int64_t > > computeCandidateInterchangePermutations (ParallelOp &firstPloop, ParallelOp &secondPloop, int permBudget=120)
static void applyLoopFusion (ParallelOp &firstPloop, ParallelOp &secondPloop, OpBuilder &builder)
 Prepend operations of firstPloop's body into secondPloop's body.
static void fuseIfLegal (ParallelOp firstPloop, ParallelOp &secondPloop, OpBuilder builder, llvm::function_ref< bool(Value, Value)> mayAlias)
 Check fusion pre-conditions and call fusion if it is possible.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "parallel-loop-fusion"

Definition at line 45 of file ParallelLoopFusion.cpp.

◆ GEN_PASS_DEF_SCFPARALLELLOOPFUSION

#define GEN_PASS_DEF_SCFPARALLELLOOPFUSION

Definition at line 48 of file ParallelLoopFusion.cpp.

Function Documentation

◆ applyLoopFusion()

void applyLoopFusion ( ParallelOp & firstPloop,
ParallelOp & secondPloop,
OpBuilder & builder )
static

Prepend operations of firstPloop's body into secondPloop's body.

Update secondPloop with new loop.

Definition at line 941 of file ParallelLoopFusion.cpp.

References b, mlir::Block::begin(), mlir::Block::front(), mlir::Block::getArguments(), and mlir::Block::getTerminator().

Referenced by fuseIfLegal().

◆ canResolveAlias()

bool canResolveAlias ( Operation * loadOp,
Operation * storeOp,
const IRMapping & loopsIVsMap )
static

To be called when mayAlias(val1, val2) is true.

Check if the potential aliasing between the loadOp and storeOp can be resolved by analyzing their access patterns.

Definition at line 578 of file ParallelLoopFusion.cpp.

◆ computeCandidateInterchangePermutations()

SmallVector< SmallVector< int64_t > > computeCandidateInterchangePermutations ( ParallelOp & firstPloop,
ParallelOp & secondPloop,
int permBudget = 120 )
static

Definition at line 816 of file ParallelLoopFusion.cpp.

References indices.

Referenced by fuseIfLegal().

◆ equalIterationSpaces()

bool equalIterationSpaces ( ParallelOp firstPloop,
ParallelOp secondPloop )
static

Verify equal iteration spaces.

Definition at line 63 of file ParallelLoopFusion.cpp.

References lhs, and rhs.

Referenced by isFusionLegal().

◆ fuseIfLegal()

void fuseIfLegal ( ParallelOp firstPloop,
ParallelOp & secondPloop,
OpBuilder builder,
llvm::function_ref< bool(Value, Value)> mayAlias )
static

◆ getAddConstant()

std::optional< int64_t > getAddConstant ( Value expr,
Value base,
const IRMapping & loopsIVsMap )
static

If the expr value is the result of an integer addition of base and a constant, return the constant.

Definition at line 145 of file ParallelLoopFusion.cpp.

References mlir::Add, mlir::getConstantIntValue(), mlir::Value::getDefiningOp(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getNumResults(), mlir::AffineMap::getNumSymbols(), mlir::AffineMap::getResult(), result, and valsAreEquivalent().

Referenced by loadIndexWithinWriteRange().

◆ getBaseMemref()

Value getBaseMemref ( Operation * op)
static

Return the base memref value used by the given memory op.

Definition at line 306 of file ParallelLoopFusion.cpp.

References load.

Referenced by opsAccessSameIndicesViaRankReducingSubview().

◆ getStoreOpTargetBuffer()

Value getStoreOpTargetBuffer ( Operation * op)
static

◆ hasNestedParallelOp()

bool hasNestedParallelOp ( ParallelOp ploop)
static

Verify there are no nested ParallelOps.

Definition at line 56 of file ParallelLoopFusion.cpp.

References mlir::WalkResult::interrupt().

Referenced by isFusionLegal().

◆ haveNoDataDependenciesExceptSameIndex()

bool haveNoDataDependenciesExceptSameIndex ( ParallelOp firstPloop,
ParallelOp secondPloop,
const IRMapping & firstToSecondPloopIndices,
llvm::function_ref< bool(Value, Value)> mayAlias,
OpBuilder & b )
static

Check that the parallel loops have no mixed access to the same buffers.

Return true if the second parallel loop does not read or write the buffers written by the first loop using different indices.

Definition at line 594 of file ParallelLoopFusion.cpp.

Referenced by noIncompatibleDataDependencies().

◆ interchangeLoops()

std::optional< ParallelOp > interchangeLoops ( OpBuilder & builder,
ParallelOp & loop,
const ArrayRef< int64_t > & indices )
static

◆ isFusionLegal()

bool isFusionLegal ( ParallelOp firstPloop,
ParallelOp secondPloop,
const IRMapping & firstToSecondPloopIndices,
llvm::function_ref< bool(Value, Value)> mayAlias,
OpBuilder & b )
static

Check if fusion of the two parallel loops is legal: i.e.

no nested parallel loops, equal iteration spaces, and no incompatible data dependencies between the loops.

Definition at line 731 of file ParallelLoopFusion.cpp.

References b, equalIterationSpaces(), mlir::Operation::getUsers(), hasNestedParallelOp(), mayAlias(), noIncompatibleDataDependencies(), and mlir::DominanceInfo::properlyDominates().

Referenced by fuseIfLegal().

◆ isLoadOnWrittenVector()

bool isLoadOnWrittenVector ( memref::LoadOp loadOp,
Value writeBase,
ValueRange writeIndices,
VectorType vecTy,
ArrayRef< int64_t > vectorDimForWriteDim,
const IRMapping & ivsMap )
static

Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vector.store, optionally through a rank-reducing unit-stride subview) of the same buffer.

This covers the pattern where a vector write stores a full lane pack and a subsequent scalar load reads an element from that lane pack. EXAMPLE: vector.transfer_write V, arg[x, y, ..., 0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32, strided<[1], offset: ?>> scf.for iter = c0 to c4 step c1 iter_args(...) -> (f32) { %0 = memref.load arg[x, y, ..., iter] : memref<1x128x16x4xf32> ... }

Definition at line 330 of file ParallelLoopFusion.cpp.

References mlir::Value::getDefiningOp(), and loadIndexWithinWriteRange().

Referenced by loadMatchesVectorStore(), and loadMatchesVectorWrite().

◆ loadIndexWithinWriteRange()

bool loadIndexWithinWriteRange ( Value loadIndex,
OpFoldResult offset,
Value writeIndex,
int64_t extent,
const IRMapping & loopsIVsMap )
static

◆ loadMatchesVectorStore()

bool loadMatchesVectorStore ( memref::LoadOp loadOp,
vector::StoreOp storeOp,
const IRMapping & ivsMap )
static

Recognize scalar memref.load of an element produced by a vector.store.

Definition at line 425 of file ParallelLoopFusion.cpp.

References isLoadOnWrittenVector().

Referenced by loadsFromSameMemoryLocationWrittenBy().

◆ loadMatchesVectorWrite()

bool loadMatchesVectorWrite ( memref::LoadOp loadOp,
vector::TransferWriteOp writeOp,
const IRMapping & ivsMap )
static

◆ loadsFromSameMemoryLocationWrittenBy()

bool loadsFromSameMemoryLocationWrittenBy ( Operation * loadOp,
Operation * storeOp,
const IRMapping & firstToSecondPloopIVsMap,
OpBuilder & b )
static

Check if the loadOp reads from the same memory location (same buffer, same indices and same properties) as written by the storeOp.

Definition at line 524 of file ParallelLoopFusion.cpp.

References b, loadMatchesVectorStore(), loadMatchesVectorWrite(), and opsAccessSameIndices().

◆ noIncompatibleDataDependencies()

bool noIncompatibleDataDependencies ( ParallelOp firstPloop,
ParallelOp secondPloop,
const IRMapping & firstToSecondPloopIndices,
llvm::function_ref< bool(Value, Value)> mayAlias,
OpBuilder & b )
static

Check that in each loop there are no read ops on the buffers written by the other loop, except when reading from the same exact memory location (same indices) as written in the other loop.

Definition at line 713 of file ParallelLoopFusion.cpp.

References b, haveNoDataDependenciesExceptSameIndex(), mlir::IRMapping::map(), and mayAlias().

Referenced by isFusionLegal().

◆ opsAccessSameIndices()

template<typename OpTy1, typename OpTy2>
bool opsAccessSameIndices ( OpTy1 op1,
OpTy2 op2,
const IRMapping & loopsIVsMap,
OpBuilder & b )
static

Check if both memory read/write operations access the same indices (considering also the mapping of induction variables from the first to the second parallel loop).

Definition at line 508 of file ParallelLoopFusion.cpp.

References b, opsAccessSameIndicesViaRankReducingSubview(), and valsAreEquivalent().

Referenced by loadsFromSameMemoryLocationWrittenBy().

◆ opsAccessSameIndicesViaRankReducingSubview()

template<typename OpTy1, typename OpTy2>
bool opsAccessSameIndicesViaRankReducingSubview ( OpTy1 op1,
OpTy2 op2,
const IRMapping & firstToSecondPloopIVsMap,
OpBuilder & b )
static

Check if both operations access the same positions of the same buffer, but one of the two does it through a rank-reducing full subview of the buffer (the other's base).

EXAMPLE: memref.store a, buf[c0, i, j] : memref<1x2x2xf32> alias = memref.subview buf[0, 0, 0][1, 2, 2][1, 1, 1]: memref<1x2x2xf32> to memref<2x2xf32> val = memref.load alias[i, j] : memref<2x2xf32>

Definition at line 455 of file ParallelLoopFusion.cpp.

References b, getBaseMemref(), mlir::memref::isSameViewOrTrivialAlias(), mlir::m_Zero(), mlir::matchPattern(), and valsAreEquivalent().

Referenced by opsAccessSameIndices().

◆ opsWriteSameMemLocation()

bool opsWriteSameMemLocation ( Operation * op1,
Operation * op2 )
static

Check if both operations are the same type of memory write op and write to the same memory location (same buffer and same indices).

Definition at line 82 of file ParallelLoopFusion.cpp.

References mlir::Operation::getName(), and mlir::getType().

◆ valsAreEquivalent()

bool valsAreEquivalent ( Value val1,
Value val2,
const IRMapping & loopsIVsMap )
static

Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent, considering the mapping of induction variables from the first to the second parallel loop.

Definition at line 123 of file ParallelLoopFusion.cpp.

References mlir::Value::getDefiningOp(), mlir::OperationEquivalence::IgnoreLocations, mlir::OperationEquivalence::isEquivalentTo(), mlir::isMemoryEffectFree(), mlir::IRMapping::lookupOrDefault(), and success().

Referenced by getAddConstant(), loadIndexWithinWriteRange(), opsAccessSameIndices(), and opsAccessSameIndicesViaRankReducingSubview().