|
MLIR 23.0.0git
|
#include "mlir/Dialect/SCF/Transforms/Passes.h"#include "mlir/Analysis/AliasAnalysis.h"#include "mlir/Dialect/Affine/IR/AffineOps.h"#include "mlir/Dialect/Index/IR/IndexOps.h"#include "mlir/Dialect/MemRef/IR/MemRef.h"#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"#include "mlir/Dialect/SCF/IR/SCF.h"#include "mlir/Dialect/SCF/Transforms/Transforms.h"#include "mlir/Dialect/SCF/Utils/Utils.h"#include "mlir/Dialect/Utils/IndexingUtils.h"#include "mlir/Dialect/Vector/IR/VectorOps.h"#include "mlir/IR/Builders.h"#include "mlir/IR/BuiltinTypes.h"#include "mlir/IR/IRMapping.h"#include "mlir/IR/Matchers.h"#include "mlir/IR/OpDefinition.h"#include "mlir/IR/OperationSupport.h"#include "mlir/IR/PatternMatch.h"#include "mlir/IR/Value.h"#include "mlir/Interfaces/SideEffectInterfaces.h"#include "llvm/ADT/STLExtras.h"#include "llvm/ADT/SetVector.h"#include "llvm/ADT/SmallBitVector.h"#include "llvm/ADT/TypeSwitch.h"#include "llvm/Support/InterleavedRange.h"#include "llvm/Support/DebugLog.h"#include <numeric>#include <optional>#include <tuple>#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"Go to the source code of this file.
Classes | |
| class | mlir::impl::SCFParallelLoopFusionBase< DerivedT > |
| struct | LoopIV |
| struct | llvm::DenseMapInfo< LoopIV > |
Namespaces | |
| namespace | mlir |
| Include the generated interface declarations. | |
| namespace | mlir::impl |
| Attribute collections provide a dictionary-like interface. | |
Macros | |
| #define | DEBUG_TYPE "parallel-loop-fusion" |
| #define | GEN_PASS_DEF_SCFPARALLELLOOPFUSION |
Functions | |
| static bool | hasNestedParallelOp (ParallelOp ploop) |
| Verify there are no nested ParallelOps. | |
| static bool | equalIterationSpaces (ParallelOp firstPloop, ParallelOp secondPloop) |
| Verify equal iteration spaces. | |
| static bool | opsWriteSameMemLocation (Operation *op1, Operation *op2) |
| Check if both operations are the same type of memory write op and write to the same memory location (same buffer and same indices). | |
| static bool | valsAreEquivalent (Value val1, Value val2, const IRMapping &loopsIVsMap) |
| Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent, considering the mapping of induction variables from the first to the second parallel loop. | |
| static std::optional< int64_t > | getAddConstant (Value expr, Value base, const IRMapping &loopsIVsMap) |
| If the expr value is the result of an integer addition of base and a constant, return the constant. | |
| static bool | loadIndexWithinWriteRange (Value loadIndex, OpFoldResult offset, Value writeIndex, int64_t extent, const IRMapping &loopsIVsMap) |
| static Value | getBaseMemref (Operation *op) |
| Return the base memref value used by the given memory op. | |
| static bool | isLoadOnWrittenVector (memref::LoadOp loadOp, Value writeBase, ValueRange writeIndices, VectorType vecTy, ArrayRef< int64_t > vectorDimForWriteDim, const IRMapping &ivsMap) |
| Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vector.store, optionally through a rank-reducing unit-stride subview) of the same buffer. | |
| static bool | loadMatchesVectorWrite (memref::LoadOp loadOp, vector::TransferWriteOp writeOp, const IRMapping &ivsMap) |
| Recognize scalar memref.load of an element produced by a vector.transfer_write. | |
| static bool | loadMatchesVectorStore (memref::LoadOp loadOp, vector::StoreOp storeOp, const IRMapping &ivsMap) |
| Recognize scalar memref.load of an element produced by a vector.store. | |
| template<typename OpTy1, typename OpTy2> | |
| static bool | opsAccessSameIndicesViaRankReducingSubview (OpTy1 op1, OpTy2 op2, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b) |
| Check if both operations access the same positions of the same buffer, but one of the two does it through a rank-reducing full subview of the buffer (the other's base). | |
| template<typename OpTy1, typename OpTy2> | |
| static bool | opsAccessSameIndices (OpTy1 op1, OpTy2 op2, const IRMapping &loopsIVsMap, OpBuilder &b) |
| Check if both memory read/write operations access the same indices (considering also the mapping of induction variables from the first to the second parallel loop). | |
| static bool | loadsFromSameMemoryLocationWrittenBy (Operation *loadOp, Operation *storeOp, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b) |
| Check if the loadOp reads from the same memory location (same buffer, same indices and same properties) as written by the storeOp. | |
| static Value | getStoreOpTargetBuffer (Operation *op) |
| static bool | canResolveAlias (Operation *loadOp, Operation *storeOp, const IRMapping &loopsIVsMap) |
| To be called when mayAlias(val1, val2) is true. | |
| static bool | haveNoDataDependenciesExceptSameIndex (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b) |
| Check that the parallel loops have no mixed access to the same buffers. | |
| static bool | noIncompatibleDataDependencies (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b) |
| Check that in each loop there are no read ops on the buffers written by the other loop, except when reading from the same exact memory location (same indices) as written in the other loop. | |
| static bool | isFusionLegal (ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b) |
| Check if fusion of the two parallel loops is legal: i.e. | |
| static std::optional< ParallelOp > | interchangeLoops (OpBuilder &builder, ParallelOp &loop, const ArrayRef< int64_t > &indices) |
| static SmallVector< SmallVector< int64_t > > | computeCandidateInterchangePermutations (ParallelOp &firstPloop, ParallelOp &secondPloop, int permBudget=120) |
| static void | applyLoopFusion (ParallelOp &firstPloop, ParallelOp &secondPloop, OpBuilder &builder) |
| Prepend operations of firstPloop's body into secondPloop's body. | |
| static void | fuseIfLegal (ParallelOp firstPloop, ParallelOp &secondPloop, OpBuilder builder, llvm::function_ref< bool(Value, Value)> mayAlias) |
| Check fusion pre-conditions and call fusion if it is possible. | |
| #define DEBUG_TYPE "parallel-loop-fusion" |
Definition at line 45 of file ParallelLoopFusion.cpp.
| #define GEN_PASS_DEF_SCFPARALLELLOOPFUSION |
Definition at line 48 of file ParallelLoopFusion.cpp.
|
static |
Prepend operations of firstPloop's body into secondPloop's body.
Update secondPloop with new loop.
Definition at line 941 of file ParallelLoopFusion.cpp.
References b, mlir::Block::begin(), mlir::Block::front(), mlir::Block::getArguments(), and mlir::Block::getTerminator().
Referenced by fuseIfLegal().
|
static |
To be called when mayAlias(val1, val2) is true.
Check if the potential aliasing between the loadOp and storeOp can be resolved by analyzing their access patterns.
Definition at line 578 of file ParallelLoopFusion.cpp.
|
static |
Definition at line 816 of file ParallelLoopFusion.cpp.
References indices.
Referenced by fuseIfLegal().
|
static |
Verify equal iteration spaces.
Definition at line 63 of file ParallelLoopFusion.cpp.
Referenced by isFusionLegal().
|
static |
Check fusion pre-conditions and call fusion if it is possible.
Definition at line 996 of file ParallelLoopFusion.cpp.
References applyLoopFusion(), mlir::IRMapping::clear(), computeCandidateInterchangePermutations(), mlir::Block::getArguments(), interchangeLoops(), isFusionLegal(), mlir::IRMapping::map(), and mayAlias().
Referenced by mlir::scf::naivelyFuseParallelOps().
|
static |
If the expr value is the result of an integer addition of base and a constant, return the constant.
Definition at line 145 of file ParallelLoopFusion.cpp.
References mlir::Add, mlir::getConstantIntValue(), mlir::Value::getDefiningOp(), mlir::AffineMap::getNumDims(), mlir::AffineMap::getNumResults(), mlir::AffineMap::getNumSymbols(), mlir::AffineMap::getResult(), result, and valsAreEquivalent().
Referenced by loadIndexWithinWriteRange().
Return the base memref value used by the given memory op.
Definition at line 306 of file ParallelLoopFusion.cpp.
References load.
Referenced by opsAccessSameIndicesViaRankReducingSubview().
Definition at line 567 of file ParallelLoopFusion.cpp.
Referenced by mlir::impl::SCFParallelLoopFusionBase< DerivedT >::getDependentDialects().
|
static |
Verify there are no nested ParallelOps.
Definition at line 56 of file ParallelLoopFusion.cpp.
References mlir::WalkResult::interrupt().
Referenced by isFusionLegal().
|
static |
Check that the parallel loops have no mixed access to the same buffers.
Return true if the second parallel loop does not read or write the buffers written by the first loop using different indices.
Definition at line 594 of file ParallelLoopFusion.cpp.
Referenced by noIncompatibleDataDependencies().
|
static |
Definition at line 754 of file ParallelLoopFusion.cpp.
References mlir::applyPermutation(), mlir::OpBuilder::atBlockBegin(), b, indices, mlir::invertPermutationVector(), mlir::IRMapping::map(), and mlir::OpBuilder::setInsertionPoint().
Referenced by fuseIfLegal().
|
static |
Check if fusion of the two parallel loops is legal: i.e.
no nested parallel loops, equal iteration spaces, and no incompatible data dependencies between the loops.
Definition at line 731 of file ParallelLoopFusion.cpp.
References b, equalIterationSpaces(), mlir::Operation::getUsers(), hasNestedParallelOp(), mayAlias(), noIncompatibleDataDependencies(), and mlir::DominanceInfo::properlyDominates().
Referenced by fuseIfLegal().
|
static |
Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vector.store, optionally through a rank-reducing unit-stride subview) of the same buffer.
This covers the pattern where a vector write stores a full lane pack and a subsequent scalar load reads an element from that lane pack. EXAMPLE: vector.transfer_write V, arg[x, y, ..., 0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32, strided<[1], offset: ?>> scf.for iter = c0 to c4 step c1 iter_args(...) -> (f32) { %0 = memref.load arg[x, y, ..., iter] : memref<1x128x16x4xf32> ... }
Definition at line 330 of file ParallelLoopFusion.cpp.
References mlir::Value::getDefiningOp(), and loadIndexWithinWriteRange().
Referenced by loadMatchesVectorStore(), and loadMatchesVectorWrite().
|
static |
Definition at line 214 of file ParallelLoopFusion.cpp.
References getAddConstant(), mlir::getConstantIntValue(), mlir::getConstLoopBounds(), and valsAreEquivalent().
Referenced by isLoadOnWrittenVector().
|
static |
Recognize scalar memref.load of an element produced by a vector.store.
Definition at line 425 of file ParallelLoopFusion.cpp.
References isLoadOnWrittenVector().
Referenced by loadsFromSameMemoryLocationWrittenBy().
|
static |
Recognize scalar memref.load of an element produced by a vector.transfer_write.
Definition at line 395 of file ParallelLoopFusion.cpp.
References mlir::AffineMap::getNumDims(), mlir::AffineMap::getNumResults(), mlir::AffineMap::getPermutationMap(), mlir::AffineMap::getResult(), isLoadOnWrittenVector(), and mlir::AffineMap::isProjectedPermutation().
Referenced by loadsFromSameMemoryLocationWrittenBy().
|
static |
Check if the loadOp reads from the same memory location (same buffer, same indices and same properties) as written by the storeOp.
Definition at line 524 of file ParallelLoopFusion.cpp.
References b, loadMatchesVectorStore(), loadMatchesVectorWrite(), and opsAccessSameIndices().
|
static |
Check that in each loop there are no read ops on the buffers written by the other loop, except when reading from the same exact memory location (same indices) as written in the other loop.
Definition at line 713 of file ParallelLoopFusion.cpp.
References b, haveNoDataDependenciesExceptSameIndex(), mlir::IRMapping::map(), and mayAlias().
Referenced by isFusionLegal().
|
static |
Check if both memory read/write operations access the same indices (considering also the mapping of induction variables from the first to the second parallel loop).
Definition at line 508 of file ParallelLoopFusion.cpp.
References b, opsAccessSameIndicesViaRankReducingSubview(), and valsAreEquivalent().
Referenced by loadsFromSameMemoryLocationWrittenBy().
|
static |
Check if both operations access the same positions of the same buffer, but one of the two does it through a rank-reducing full subview of the buffer (the other's base).
EXAMPLE: memref.store a, buf[c0, i, j] : memref<1x2x2xf32> alias = memref.subview buf[0, 0, 0][1, 2, 2][1, 1, 1]: memref<1x2x2xf32> to memref<2x2xf32> val = memref.load alias[i, j] : memref<2x2xf32>
Definition at line 455 of file ParallelLoopFusion.cpp.
References b, getBaseMemref(), mlir::memref::isSameViewOrTrivialAlias(), mlir::m_Zero(), mlir::matchPattern(), and valsAreEquivalent().
Referenced by opsAccessSameIndices().
Check if both operations are the same type of memory write op and write to the same memory location (same buffer and same indices).
Definition at line 82 of file ParallelLoopFusion.cpp.
References mlir::Operation::getName(), and mlir::getType().
Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent, considering the mapping of induction variables from the first to the second parallel loop.
Definition at line 123 of file ParallelLoopFusion.cpp.
References mlir::Value::getDefiningOp(), mlir::OperationEquivalence::IgnoreLocations, mlir::OperationEquivalence::isEquivalentTo(), mlir::isMemoryEffectFree(), mlir::IRMapping::lookupOrDefault(), and success().
Referenced by getAddConstant(), loadIndexWithinWriteRange(), opsAccessSameIndices(), and opsAccessSameIndicesViaRankReducingSubview().