35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/SetVector.h"
37#include "llvm/ADT/SmallBitVector.h"
38#include "llvm/ADT/TypeSwitch.h"
39#include "llvm/Support/InterleavedRange.h"
41#include "llvm/Support/DebugLog.h"
45#define DEBUG_TYPE "parallel-loop-fusion"
48#define GEN_PASS_DEF_SCFPARALLELLOOPFUSION
49#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
59 return walkResult.wasInterrupted();
64 ParallelOp secondPloop) {
65 if (firstPloop.getNumLoops() != secondPloop.getNumLoops())
71 return std::equal(
lhs.begin(),
lhs.end(),
rhs.begin());
73 return matchOperands(firstPloop.getLowerBound(),
74 secondPloop.getLowerBound()) &&
75 matchOperands(firstPloop.getUpperBound(),
76 secondPloop.getUpperBound()) &&
77 matchOperands(firstPloop.getStep(), secondPloop.getStep());
88 if (!isa<memref::StoreOp, vector::TransferWriteOp, vector::StoreOp>(op1))
90 bool opsAreIdentical =
92 .Case([&](memref::StoreOp storeOp1) {
93 auto storeOp2 = cast<memref::StoreOp>(op2);
94 return (storeOp1.getMemRef() == storeOp2.getMemRef()) &&
95 (storeOp1.getIndices() == storeOp2.getIndices());
97 .Case([&](vector::TransferWriteOp writeOp1) {
98 auto writeOp2 = cast<vector::TransferWriteOp>(op2);
99 return (writeOp1.getBase() == writeOp2.getBase()) &&
100 (writeOp1.getIndices() == writeOp2.getIndices()) &&
101 (writeOp1.getMask() == writeOp2.getMask()) &&
102 (writeOp1.getValueToStore().
getType() ==
103 writeOp2.getValueToStore().getType()) &&
104 (writeOp1.getInBounds() == writeOp2.getInBounds());
106 .Case([&](vector::StoreOp vecStoreOp1) {
107 auto vecStoreOp2 = cast<vector::StoreOp>(op2);
108 return (vecStoreOp1.getBase() == vecStoreOp2.getBase()) &&
109 (vecStoreOp1.getIndices() == vecStoreOp2.getIndices()) &&
110 (vecStoreOp1.getValueToStore().
getType() ==
111 vecStoreOp2.getValueToStore().getType()) &&
112 (vecStoreOp1.getAlignment() == vecStoreOp2.getAlignment()) &&
113 (vecStoreOp1.getNontemporal() ==
114 vecStoreOp2.getNontemporal());
116 .Default([](
Operation *) {
return false; });
117 return opsAreIdentical;
130 if (!val1DefOp || !val2DefOp)
135 val1DefOp, val2DefOp,
150 return constOp.value();
153 return constOp.value();
160 return constOp.value();
163 return constOp.value();
167 if (
auto applyOp = expr.
getDefiningOp<affine::AffineApplyOp>()) {
175 auto bin = dyn_cast<AffineBinaryOpExpr>(
result);
178 auto lhsDim = dyn_cast<AffineDimExpr>(bin.getLHS());
179 auto rhsDim = dyn_cast<AffineDimExpr>(bin.getRHS());
180 auto lhsConst = dyn_cast<AffineConstantExpr>(bin.getLHS());
181 auto rhsConst = dyn_cast<AffineConstantExpr>(bin.getRHS());
182 if (lhsConst && rhsDim)
183 return lhsConst.getValue();
184 if (rhsConst && lhsDim)
185 return rhsConst.getValue();
221 auto getConstLoopBoundsForIV =
222 [](
Value index) -> std::optional<std::tuple<int64_t, int64_t, int64_t>> {
223 auto blockArg = dyn_cast<BlockArgument>(
index);
226 auto *parentOp = blockArg.getOwner()->getParentOp();
227 auto loopLike = dyn_cast<LoopLikeOpInterface>(parentOp);
234 auto ivs = loopLike.getLoopInductionVars();
237 auto it = llvm::find(*ivs, blockArg);
238 if (it == ivs->end())
240 unsigned pos = std::distance(ivs->begin(), it);
241 if (pos >= ranges.size())
243 auto [lb,
ub, step] = ranges[pos];
244 return std::make_tuple(lb,
ub, step);
248 std::optional<int64_t> writeConst =
250 if (!writeConst && writeIndex) {
252 if (
auto bounds = getConstLoopBoundsForIV(writeIndex)) {
253 auto [lb,
ub, step] = *bounds;
254 if (step > 0 &&
ub == lb + step)
262 if (rangeExtent <= 0 || step <= 0)
266 int64_t rangeEnd = rangeStart + rangeExtent;
267 return lb >= rangeStart &&
ub <= rangeEnd;
270 if (offsetConst && writeConst) {
272 int64_t start = *offsetConst + *writeConst;
274 return (*loadConst >= start && *loadConst < start + extent);
275 if (
auto bounds = getConstLoopBoundsForIV(loadIndex)) {
276 auto [lb,
ub, step] = *bounds;
277 return loopIVWithinRange(lb,
ub, step, start, extent);
283 if (offsetConst && *offsetConst == 0 &&
286 if (
auto addConst =
getAddConstant(loadIndex, writeIndex, loopsIVsMap)) {
290 return (*addConst >= start && *addConst < start + extent);
296 if (
auto offsetVal = dyn_cast<Value>(offset)) {
309 .Case([&](memref::LoadOp
load) {
return load.getMemRef(); })
310 .Case([&](memref::StoreOp store) {
return store.getMemRef(); })
311 .Case([&](vector::TransferReadOp read) {
return read.getBase(); })
312 .Case([&](vector::TransferWriteOp write) {
return write.getBase(); })
313 .Case([&](vector::LoadOp
load) {
return load.getBase(); })
314 .Case([&](vector::StoreOp store) {
return store.getBase(); })
337 Value base = writeBase;
341 llvm::SmallBitVector droppedDims;
342 bool hasSubview =
false;
343 auto *ctx = loadOp.getContext();
344 if (
auto subView = base.
getDefiningOp<memref::SubViewOp>()) {
345 if (!subView.hasUnitStride())
347 baseMemref = cast<MemrefValue>(subView.getSource());
348 offsets = llvm::to_vector(subView.getMixedOffsets());
349 droppedDims = subView.getDroppedDims();
352 baseMemref = dyn_cast<MemrefValue>(base);
357 auto loadIndices = loadOp.getIndices();
358 unsigned baseRank = baseMemref.getType().getRank();
359 if ((loadOp.getMemref() != baseMemref) || (loadIndices.size() != baseRank))
362 unsigned writeRank = writeIndices.size();
363 if ((!hasSubview && writeRank != baseRank) ||
364 (hasSubview && offsets.size() != baseRank) ||
365 (vectorDimForWriteDim.size() != writeRank))
368 auto zeroAttr = IntegerAttr::get(IndexType::get(ctx), 0);
369 unsigned writeMemrefDim = 0;
370 for (
unsigned baseDim : llvm::seq(baseRank)) {
371 bool wasDropped = (hasSubview && droppedDims.test(baseDim));
372 int64_t vectorDim = !wasDropped ? vectorDimForWriteDim[writeMemrefDim] : -1;
374 if (vectorDim >= 0) {
375 int64_t dimSize = vecTy.getDimSize(vectorDim);
376 if (dimSize == ShapedType::kDynamic)
380 Value writeIndex = !wasDropped ? writeIndices[writeMemrefDim] :
Value();
396 vector::TransferWriteOp writeOp,
398 auto vecTy = dyn_cast<VectorType>(writeOp.getVector().getType());
402 unsigned writeRank = writeOp.getIndices().size();
410 for (
unsigned vecDim = 0; vecDim < permutationMap.
getNumResults(); ++vecDim) {
411 auto dimExpr = dyn_cast<AffineDimExpr>(permutationMap.
getResult(vecDim));
414 unsigned writeDim = dimExpr.getPosition();
415 if (writeDim >= writeRank || vectorDimForWriteDim[writeDim] != -1)
417 vectorDimForWriteDim[writeDim] = vecDim;
421 vecTy, vectorDimForWriteDim, ivsMap);
426 vector::StoreOp storeOp,
428 auto vecTy = dyn_cast<VectorType>(storeOp.getValueToStore().getType());
432 unsigned writeRank = storeOp.getIndices().size();
433 if (vecTy.getRank() > writeRank)
437 unsigned vecRank = vecTy.getRank();
438 for (
unsigned i = 0; i < vecRank; ++i) {
439 unsigned writeDim = writeRank - vecRank + i;
440 vectorDimForWriteDim[writeDim] = i;
444 vecTy, vectorDimForWriteDim, ivsMap);
454template <
typename OpTy1,
typename OpTy2>
456 OpTy1 op1, OpTy2 op2,
const IRMapping &firstToSecondPloopIVsMap,
460 if (!base1 || !base2)
463 auto accessThroughTrivialSubviewIsSame =
464 [&
b](memref::SubViewOp subView,
ValueRange subViewAccess,
467 LogicalResult resolved = resolveSourceIndicesRankReducingSubview(
468 subView.getLoc(),
b, subView, subViewAccess, resolvedSubviewAccess);
469 if (failed(resolved) ||
470 (resolvedSubviewAccess.size() != sourceAccess.size()))
472 for (
auto [dimIdx, resolvedIndex] :
473 llvm::enumerate(resolvedSubviewAccess)) {
482 if (
auto subView = base1.template getDefiningOp<memref::SubViewOp>();
485 base2, cast<MemrefValue>(subView.getSource())) &&
486 accessThroughTrivialSubviewIsSame(subView, op1.getIndices(),
488 firstToSecondPloopIVsMap))
492 if (
auto subView = base2.template getDefiningOp<memref::SubViewOp>();
495 base1, cast<MemrefValue>(subView.getSource())) &&
496 accessThroughTrivialSubviewIsSame(subView, op2.getIndices(),
498 firstToSecondPloopIVsMap))
507template <
typename OpTy1,
typename OpTy2>
510 auto indices1 = op1.getIndices();
511 auto indices2 = op2.getIndices();
512 if (indices1.size() != indices2.size())
514 for (
auto [idx1, idx2] : llvm::zip(indices1, indices2)) {
525 const IRMapping &firstToSecondPloopIVsMap,
527 if (!loadOp || !storeOp)
530 if (!isa<memref::LoadOp, vector::TransferReadOp, vector::LoadOp>(loadOp))
532 bool accessSameMemory =
534 .Case([&](memref::LoadOp memLoadOp) {
535 if (
auto memStoreOp = dyn_cast<memref::StoreOp>(storeOp))
537 firstToSecondPloopIVsMap,
b);
538 if (
auto vecWriteOp = dyn_cast<vector::TransferWriteOp>(storeOp))
540 firstToSecondPloopIVsMap);
541 if (
auto vecStoreOp = dyn_cast<vector::StoreOp>(storeOp))
543 firstToSecondPloopIVsMap);
546 .Case([&](vector::TransferReadOp vecReadOp) {
547 auto vecWriteOp = dyn_cast<vector::TransferWriteOp>(storeOp);
551 firstToSecondPloopIVsMap,
b) &&
552 (vecReadOp.getMask() == vecWriteOp.getMask()) &&
553 (vecReadOp.getInBounds() == vecWriteOp.getInBounds());
555 .Case([&](vector::LoadOp vecLoadOp) {
556 auto vecStoreOp = dyn_cast<vector::StoreOp>(storeOp);
560 firstToSecondPloopIVsMap,
b) &&
561 (vecLoadOp.getAlignment() == vecStoreOp.getAlignment());
563 .Default([](
Operation *) {
return false; });
564 return accessSameMemory;
569 .Case([&](memref::StoreOp storeOp) {
return storeOp.getMemRef(); })
570 .Case([&](vector::TransferWriteOp writeOp) {
return writeOp.getBase(); })
571 .Case([&](vector::StoreOp vecStoreOp) {
return vecStoreOp.getBase(); })
580 if (
auto transfWriteOp = dyn_cast<vector::TransferWriteOp>(storeOp);
581 transfWriteOp && isa<memref::LoadOp>(loadOp))
584 if (
auto vecStoreOp = dyn_cast<vector::StoreOp>(storeOp);
585 vecStoreOp && isa<memref::LoadOp>(loadOp))
595 ParallelOp firstPloop, ParallelOp secondPloop,
601 llvm::SmallSetVector<Value, 4> buffersWrittenInFirstPloop;
603 auto collectStoreOpsInWalk = [&](
Operation *op) {
604 auto memOpInterf = dyn_cast_if_present<MemoryEffectOpInterface>(op);
617 MemrefValue storeOpBaseMemref = dyn_cast<MemrefValue>(storeOpBase);
618 if (!storeOpBaseMemref)
622 bufferStoresInFirstPloop[buffer].push_back(op);
623 buffersWrittenInFirstPloop.insert(buffer);
629 if (firstPloop.getBody()->walk(collectStoreOpsInWalk).wasInterrupted())
638 auto checkLoadInWalkHasNoIncompatibleDataDeps = [&](
Operation *loadOp) {
639 auto memOpInterf = dyn_cast_if_present<MemoryEffectOpInterface>(loadOp);
655 if (!isa<memref::LoadOp, vector::TransferReadOp, vector::LoadOp>(loadOp) ||
662 for (
Value storedMem : buffersWrittenInFirstPloop)
663 if ((storedMem != loadedOrigBuf) &&
mayAlias(storedMem, loadedOrigBuf) &&
664 !llvm::all_of(bufferStoresInFirstPloop[storedMem],
667 firstToSecondPloopIndices);
672 auto writeOpsIt = bufferStoresInFirstPloop.find(loadedOrigBuf);
673 if (writeOpsIt == bufferStoresInFirstPloop.end())
679 if (writeOps.empty())
686 if (!llvm::all_of(writeOps, [&](
Operation *otherWriteOp) {
695 firstToSecondPloopIndices,
b)) {
704 return !secondPloop.getBody()
705 ->walk(checkLoadInWalkHasNoIncompatibleDataDeps)
714 const IRMapping &firstToSecondPloopIndices,
718 firstPloop, secondPloop, firstToSecondPloopIndices,
mayAlias,
b))
722 secondToFirstPloopIndices.
map(secondPloop.getBody()->getArguments(),
723 firstPloop.getBody()->getArguments());
725 secondPloop, firstPloop, secondToFirstPloopIndices,
mayAlias,
b);
732 const IRMapping &firstToSecondPloopIndices,
753static std::optional<ParallelOp>
756 assert(loop.getNumLoops() ==
indices.size());
757 if (loop.getNumLoops() < 2)
768 auto newOp = ParallelOp::create(builder, loop.getLoc(), newLB, newUB, newStep,
769 loop.getInitVals(),
nullptr);
770 auto ivs = loop.getInductionVars();
774 for (
auto [iv, riv] : llvm::zip(ivs, newIvs)) {
775 mapping.
map(iv, riv);
780 for (
auto &o : loop.getNumReductions()
781 ? loop.getBodyRegion().front()
782 : loop.getBodyRegion().front().without_terminator()) {
804 return llvm::hash_combine(
817 ParallelOp &secondPloop,
818 int permBudget = 120) {
820 if (firstPloop.getNumLoops() < 2 ||
821 firstPloop.getNumLoops() != secondPloop.getNumLoops())
826 llvm::SmallSetVector<LoopIV, 6> unique;
827 for (
unsigned index : llvm::seq(firstPloop.getNumLoops())) {
828 firstIVs[
index].lBound = firstPloop.getLowerBound()[
index];
829 firstIVs[
index].uBound = firstPloop.getUpperBound()[
index];
830 firstIVs[
index].step = firstPloop.getStep()[
index];
831 secondIVs[
index].lBound = secondPloop.getLowerBound()[
index];
832 secondIVs[
index].uBound = secondPloop.getUpperBound()[
index];
833 secondIVs[
index].step = secondPloop.getStep()[
index];
834 unique.insert(firstIVs[
index]);
839 llvm::zip(firstIVs, secondIVs), diffIVs.begin(),
840 [](
auto const &pair) { return std::get<0>(pair) != std::get<1>(pair); });
843 for (
auto [idx, val] : enumerate(diffIVs))
853 std::iota(basic.begin(), basic.end(), 0);
855 if (
indices.empty() && unique.size() == firstIVs.size())
865 if (fIdx != sIdx && firstIVs[fIdx] == secondIVs[sIdx] &&
866 remaps.end() == std::find(remaps.begin(), remaps.end(), sIdx)) {
867 remaps.push_back(sIdx);
874 if (
indices.size() != remaps.size())
878 for (
auto [from, to] : zip(
indices, remaps)) {
882 LDBG() <<
"Collected basic permutations: "
883 << llvm::interleaved_array(basic);
886 if (unique.size() == firstIVs.size()) {
893 assert(unique.size() != firstIVs.size() &&
894 "Expected at least two equal axes");
899 for (
auto iv : unique) {
901 for (
unsigned index : llvm::seq(firstIVs.size())) {
902 if (firstIVs[
index] == iv)
903 group.push_back(
index);
905 if (group.size() > 1)
906 groups.push_back(std::move(group));
912 while (repeat && permBudget) {
914 for (
auto const &[group, groupRemaps] : zip(groups, rmpdGroups)) {
915 repeat |= std::next_permutation(groupRemaps.begin(), groupRemaps.end());
922 for (
auto const &[group, groupRemaps] : zip(groups, rmpdGroups)) {
923 for (
auto [from, to] : zip(group, groupRemaps))
924 extra[from] = basic[to];
926 if (basic != extra) {
927 LDBG() <<
"Collected extra permutations: "
928 << llvm::interleaved_array(extra);
930 extraResults.push_back(std::move(extra));
943 Block *block1 = firstPloop.getBody();
944 Block *block2 = secondPloop.getBody();
946 ValueRange inits2 = secondPloop.getInitVals();
949 newInitVars.append(inits2.begin(), inits2.end());
952 b.setInsertionPoint(secondPloop);
953 auto newSecondPloop = ParallelOp::create(
954 b, secondPloop.getLoc(), secondPloop.getLowerBound(),
955 secondPloop.getUpperBound(), secondPloop.getStep(), newInitVars);
957 Block *newBlock = newSecondPloop.getBody();
961 b.inlineBlockBefore(block2, newBlock, newBlock->
begin(),
963 b.inlineBlockBefore(block1, newBlock, newBlock->
begin(),
966 ValueRange results = newSecondPloop.getResults();
967 if (!results.empty()) {
968 b.setInsertionPointToEnd(newBlock);
973 newReduceArgs.append(reduceArgs2.begin(), reduceArgs2.end());
975 auto newReduceOp = scf::ReduceOp::create(
b, term2.getLoc(), newReduceArgs);
977 for (
auto &&[i, reg] : llvm::enumerate(llvm::concat<Region>(
978 term1.getReductions(), term2.getReductions()))) {
980 Block &newRedBlock = newReduceOp.getReductions()[i].
front();
981 b.inlineBlockBefore(&oldRedBlock, &newRedBlock, newRedBlock.
begin(),
985 firstPloop.replaceAllUsesWith(results.take_front(inits1.size()));
986 secondPloop.replaceAllUsesWith(results.take_back(inits2.size()));
992 secondPloop = newSecondPloop;
996static void fuseIfLegal(ParallelOp firstPloop, ParallelOp &secondPloop,
999 Block *block1 = firstPloop.getBody();
1000 Block *block2 = secondPloop.getBody();
1004 if (
isFusionLegal(firstPloop, secondPloop, firstToSecondPloopIndices,
1016 LDBG() <<
"Applied permutation: " << llvm::interleaved_array(perms);
1019 firstToSecondPloopIndices.
clear();
1021 newLoop->getBody()->getArguments());
1022 if (!
isFusionLegal(firstPloop, *newLoop, firstToSecondPloopIndices,
1024 LDBG() <<
"Rejected: " << newLoop;
1030 secondPloop.replaceAllUsesWith(newLoop->getResults());
1031 secondPloop->erase();
1032 secondPloop = *newLoop;
1043 for (
auto &block : region) {
1044 ploopChains.clear();
1045 ploopChains.push_back({});
1050 bool noSideEffects =
true;
1051 for (
auto &op : block) {
1052 if (
auto ploop = dyn_cast<ParallelOp>(op)) {
1053 if (noSideEffects) {
1054 ploopChains.back().push_back(ploop);
1056 ploopChains.push_back({ploop});
1057 noSideEffects =
true;
1065 for (
int i = 0, e = ploops.size(); i + 1 < e; ++i)
1072struct ParallelLoopFusion
1074 void runOnOperation()
override {
1075 auto &aa = getAnalysis<AliasAnalysis>();
1082 auto val2Def = val2.getDefiningOp();
1086 val2Def ? val2Def->getParentOfType<ParallelOp>() :
nullptr;
1087 if (val1Loop != val2Loop)
1090 return !aa.alias(val1, val2).isNo();
1093 getOperation()->walk([&](
Operation *child) {
1102 return std::make_unique<ParallelLoopFusion>();
static bool mayAlias(Value first, Value second)
Returns true if two values may be referencing aliasing memory.
static bool canResolveAlias(Operation *loadOp, Operation *storeOp, const IRMapping &loopsIVsMap)
To be called when mayAlias(val1, val2) is true.
static std::optional< ParallelOp > interchangeLoops(OpBuilder &builder, ParallelOp &loop, const ArrayRef< int64_t > &indices)
static bool equalIterationSpaces(ParallelOp firstPloop, ParallelOp secondPloop)
Verify equal iteration spaces.
static bool isLoadOnWrittenVector(memref::LoadOp loadOp, Value writeBase, ValueRange writeIndices, VectorType vecTy, ArrayRef< int64_t > vectorDimForWriteDim, const IRMapping &ivsMap)
Recognize scalar memref.load of an element produced by a vector write (vector.transfer_write or vecto...
static bool loadMatchesVectorWrite(memref::LoadOp loadOp, vector::TransferWriteOp writeOp, const IRMapping &ivsMap)
Recognize scalar memref.load of an element produced by a vector.transfer_write.
static std::optional< int64_t > getAddConstant(Value expr, Value base, const IRMapping &loopsIVsMap)
If the expr value is the result of an integer addition of base and a constant, return the constant.
static bool opsAccessSameIndices(OpTy1 op1, OpTy2 op2, const IRMapping &loopsIVsMap, OpBuilder &b)
Check if both memory read/write operations access the same indices (considering also the mapping of i...
static Value getStoreOpTargetBuffer(Operation *op)
static void applyLoopFusion(ParallelOp &firstPloop, ParallelOp &secondPloop, OpBuilder &builder)
Prepend operations of firstPloop's body into secondPloop's body.
static bool haveNoDataDependenciesExceptSameIndex(ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
Check that the parallel loops have no mixed access to the same buffers.
static Value getBaseMemref(Operation *op)
Return the base memref value used by the given memory op.
static bool loadsFromSameMemoryLocationWrittenBy(Operation *loadOp, Operation *storeOp, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b)
Check if the loadOp reads from the same memory location (same buffer, same indices and same propertie...
static SmallVector< SmallVector< int64_t > > computeCandidateInterchangePermutations(ParallelOp &firstPloop, ParallelOp &secondPloop, int permBudget=120)
static bool loadIndexWithinWriteRange(Value loadIndex, OpFoldResult offset, Value writeIndex, int64_t extent, const IRMapping &loopsIVsMap)
static bool opsWriteSameMemLocation(Operation *op1, Operation *op2)
Check if both operations are the same type of memory write op and write to the same memory location (...
static bool noIncompatibleDataDependencies(ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
Check that in each loop there are no read ops on the buffers written by the other loop,...
static bool valsAreEquivalent(Value val1, Value val2, const IRMapping &loopsIVsMap)
Check if val1 (from the first parallel loop) and val2 (from the second) are equivalent,...
static bool isFusionLegal(ParallelOp firstPloop, ParallelOp secondPloop, const IRMapping &firstToSecondPloopIndices, llvm::function_ref< bool(Value, Value)> mayAlias, OpBuilder &b)
Check if fusion of the two parallel loops is legal: i.e.
static bool opsAccessSameIndicesViaRankReducingSubview(OpTy1 op1, OpTy2 op2, const IRMapping &firstToSecondPloopIVsMap, OpBuilder &b)
Check if both operations access the same positions of the same buffer, but one of the two does it thr...
static bool loadMatchesVectorStore(memref::LoadOp loadOp, vector::StoreOp storeOp, const IRMapping &ivsMap)
Recognize scalar memref.load of an element produced by a vector.store.
static bool hasNestedParallelOp(ParallelOp ploop)
Verify there are no nested ParallelOps.
static void fuseIfLegal(ParallelOp firstPloop, ParallelOp &secondPloop, OpBuilder builder, llvm::function_ref< bool(Value, Value)> mayAlias)
Check fusion pre-conditions and call fusion if it is possible.
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
A class for computing basic dominance information.
bool properlyDominates(Operation *a, Operation *b, bool enclosingOpOk=true) const
Return true if operation A properly dominates operation B, i.e.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void clear()
Clears all mappings held by the mapper.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
static OpBuilder atBlockBegin(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
This class represents a single result from folding an operation.
This trait indicates that the memory effects of an operation includes the effects of operations neste...
This class implements the operand iterators for the Operation class.
Operation is the basic unit of execution within MLIR.
Value getOperand(unsigned idx)
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
OperationName getName()
The name of an operation is the key identifier for it.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
user_range getUsers()
Returns a range of all users.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
MemrefValue skipFullyAliasingOperations(MemrefValue source)
Walk up the source chain until an operation that changes/defines the view of memory is found (i....
bool isSameViewOrTrivialAlias(MemrefValue a, MemrefValue b)
Checks if two (memref) values are the same or statically known to alias the same region of memory.
void naivelyFuseParallelOps(Region ®ion, llvm::function_ref< bool(Value, Value)> mayAlias)
Fuses all adjacent scf.parallel operations with identical bounds and step into one scf....
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
SmallVector< T > applyPermutation(ArrayRef< T > input, ArrayRef< int64_t > permutation)
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
llvm::SmallVector< std::tuple< int64_t, int64_t, int64_t > > getConstLoopBounds(mlir::LoopLikeOpInterface loopOp)
Get constant loop bounds and steps for each of the induction variables of the given loop operation,...
detail::constant_int_predicate_matcher m_Zero()
Matches a constant scalar / vector splat / tensor splat integer zero.
TypedValue< BaseMemRefType > MemrefValue
A value with a memref type.
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
std::unique_ptr< Pass > createParallelLoopFusionPass()
Creates a loop fusion pass which fuses parallel loops.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
bool operator==(LoopIV const &other) const
bool operator!=(LoopIV const &other) const
static bool isEqual(const LoopIV &lhs, const LoopIV &rhs)
static unsigned getHashValue(const LoopIV &val)
The following effect indicates that the operation frees some resource that has been allocated.
The following effect indicates that the operation reads from some resource.
The following effect indicates that the operation writes to some resource.
static bool isEquivalentTo(Operation *lhs, Operation *rhs, function_ref< LogicalResult(Value, Value)> checkEquivalent, function_ref< void(Value, Value)> markEquivalent=nullptr, Flags flags=Flags::None, function_ref< LogicalResult(ValueRange, ValueRange)> checkCommutativeEquivalent=nullptr)
Compare two operations (including their regions) and return if they are equivalent.