29#include "llvm/ADT/STLExtras.h"
30#include "llvm/Support/Debug.h"
35#define GEN_PASS_DEF_AFFINEVECTORIZE
36#include "mlir/Dialect/Affine/Transforms/Passes.h.inc"
575#define DEBUG_TYPE "early-vect"
582 int fastestVaryingMemRefDimension);
588static std::optional<NestedPattern>
592 int64_t d0 = fastestVaryingPattern.empty() ? -1 : fastestVaryingPattern[0];
593 int64_t d1 = fastestVaryingPattern.size() < 2 ? -1 : fastestVaryingPattern[1];
594 int64_t d2 = fastestVaryingPattern.size() < 3 ? -1 : fastestVaryingPattern[2];
595 switch (vectorRank) {
613 llvm::IsaPred<vector::TransferReadOp, vector::TransferWriteOp>);
624 void runOnOperation()
override;
630 unsigned patternDepth,
631 VectorizationStrategy *strategy) {
632 assert(patternDepth > depthInPattern &&
633 "patternDepth is greater than depthInPattern");
634 if (patternDepth - depthInPattern > strategy->vectorSizes.size()) {
638 strategy->loopToVectorDim[loop] =
639 strategy->vectorSizes.size() - (patternDepth - depthInPattern);
658 unsigned depthInPattern,
659 unsigned patternDepth,
660 VectorizationStrategy *strategy) {
661 for (
auto m : matches) {
663 patternDepth, strategy))) {
667 patternDepth, strategy);
676struct VectorizationState {
689 void registerOpVectorReplacement(Operation *replaced, Operation *
replacement);
702 void registerValueVectorReplacement(Value replaced, Operation *
replacement);
709 void registerBlockArgVectorReplacement(BlockArgument replaced,
722 void registerValueScalarReplacement(Value replaced, Value
replacement);
734 void registerLoopResultScalarReplacement(Value replaced, Value
replacement);
738 void getScalarValueReplacementsFor(
ValueRange inputVals,
739 SmallVectorImpl<Value> &replacedVals);
742 void finishVectorizationPattern(AffineForOp rootLoop);
751 IRMapping valueVectorReplacement;
754 IRMapping valueScalarReplacement;
756 DenseMap<Value, Value> loopResultScalarReplacement;
766 const VectorizationStrategy *strategy =
nullptr;
771 void registerValueVectorReplacementImpl(Value replaced, Value
replacement);
785void VectorizationState::registerOpVectorReplacement(
Operation *replaced,
787 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ commit vectorized op:\n");
788 LLVM_DEBUG(dbgs() << *replaced <<
"\n");
789 LLVM_DEBUG(dbgs() <<
"into\n");
793 "Unexpected replaced and replacement results");
794 assert(opVectorReplacement.count(replaced) == 0 &&
"already registered");
797 for (
auto resultTuple :
799 registerValueVectorReplacementImpl(std::get<0>(resultTuple),
800 std::get<1>(resultTuple));
813void VectorizationState::registerValueVectorReplacement(
816 "Expected single-result replacement");
820 registerValueVectorReplacementImpl(replaced,
replacement->getResult(0));
828void VectorizationState::registerBlockArgVectorReplacement(
829 BlockArgument replaced, BlockArgument
replacement) {
830 registerValueVectorReplacementImpl(replaced,
replacement);
833void VectorizationState::registerValueVectorReplacementImpl(Value replaced,
835 assert(!valueVectorReplacement.
contains(replaced) &&
836 "Vector replacement already registered");
838 "Expected vector type in vector replacement");
852void VectorizationState::registerValueScalarReplacement(Value replaced,
854 assert(!valueScalarReplacement.
contains(replaced) &&
855 "Scalar value replacement already registered");
857 "Expected scalar type in scalar replacement");
870void VectorizationState::registerLoopResultScalarReplacement(
873 assert(loopResultScalarReplacement.count(replaced) == 0 &&
874 "already registered");
875 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ will replace a result of the loop "
878 loopResultScalarReplacement[replaced] =
replacement;
882void VectorizationState::getScalarValueReplacementsFor(
883 ValueRange inputVals, SmallVectorImpl<Value> &replacedVals) {
884 for (Value inputVal : inputVals)
885 replacedVals.push_back(valueScalarReplacement.
lookupOrDefault(inputVal));
890 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ erasing:\n" << forOp <<
"\n");
895void VectorizationState::finishVectorizationPattern(AffineForOp rootLoop) {
896 LLVM_DEBUG(dbgs() <<
"\n[early-vect] Finalizing vectorization\n");
903 VectorizationState &state,
908 auto afOp = AffineApplyOp::create(state.builder, op->
getLoc(), singleResMap,
910 results.push_back(afOp);
919 int fastestVaryingMemRefDimension) {
920 return [¶llelLoops, fastestVaryingMemRefDimension](
Operation &forOp) {
921 auto loop = cast<AffineForOp>(forOp);
922 if (!parallelLoops.contains(loop))
925 auto vectorizableBody =
927 if (!vectorizableBody)
929 return memRefDim == -1 || fastestVaryingMemRefDimension == -1 ||
930 memRefDim == fastestVaryingMemRefDimension;
937 const VectorizationStrategy *strategy) {
938 assert(!isa<VectorType>(scalarTy) &&
"Expected scalar type");
939 return VectorType::get(strategy->vectorSizes, scalarTy);
946 VectorizationState &state) {
947 Type scalarTy = constOp.getType();
948 if (!VectorType::isValidElementType(scalarTy))
957 while (parentOp && !state.vecLoopToVecDim.count(parentOp))
959 assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
960 isa<AffineForOp>(parentOp) &&
"Expected a vectorized for op");
961 auto vecForOp = cast<AffineForOp>(parentOp);
964 arith::ConstantOp::create(state.builder, constOp.getLoc(), vecAttr);
967 state.registerOpVectorReplacement(constOp, newConstOp);
974 if (isa<IndexType>(scalarTy)) {
975 auto scalarConstOp = arith::ConstantOp::create(
976 state.builder, constOp.getLoc(), constOp.getValue());
977 state.registerValueScalarReplacement(constOp.getResult(),
978 scalarConstOp.getResult());
987 VectorizationState &state) {
989 for (
Value operand : applyOp.getOperands()) {
990 if (state.valueVectorReplacement.
contains(operand)) {
992 dbgs() <<
"\n[early-vect]+++++ affine.apply on vector operand\n");
997 updatedOperand = operand;
998 updatedOperands.push_back(updatedOperand);
1001 auto newApplyOp = AffineApplyOp::create(
1002 state.builder, applyOp.getLoc(), applyOp.getAffineMap(), updatedOperands);
1005 state.registerValueScalarReplacement(applyOp.getResult(),
1006 newApplyOp.getResult());
1017 if (!VectorType::isValidElementType(scalarTy))
1020 Attribute valueAttr = getIdentityValueAttr(
1021 reductionKind, scalarTy, state.builder, oldOperand.
getLoc());
1025 arith::ConstantOp::create(state.builder, oldOperand.
getLoc(), vecAttr);
1038 assert(state.strategy->vectorSizes.size() == 1 &&
1039 "Creating a mask non-1-D vectors is not supported.");
1040 assert(vecForOp.getStep() == state.strategy->vectorSizes[0] &&
1041 "Creating a mask for loops with non-unit original step size is not "
1045 if (
Value mask = state.vecLoopToMask.lookup(vecForOp))
1050 if (vecForOp.hasConstantBounds()) {
1052 vecForOp.getConstantUpperBound() - vecForOp.getConstantLowerBound();
1053 if (originalTripCount % vecForOp.getStepAsInt() == 0)
1074 AffineMap ubMap = vecForOp.getUpperBoundMap();
1077 ub = AffineApplyOp::create(state.builder, loc, vecForOp.getUpperBoundMap(),
1078 vecForOp.getUpperBoundOperands());
1080 ub = AffineMinOp::create(state.builder, loc, vecForOp.getUpperBoundMap(),
1081 vecForOp.getUpperBoundOperands());
1087 {ub, vecForOp.getInductionVar()});
1090 ub.getDefiningOp()->erase();
1092 Type maskTy = VectorType::get(state.strategy->vectorSizes,
1095 vector::CreateMaskOp::create(state.builder, loc, maskTy, itersLeft);
1097 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a mask:\n"
1098 << itersLeft <<
"\n"
1101 state.vecLoopToMask[vecForOp] = mask;
1111 const VectorizationStrategy *strategy) {
1113 if (forOp && strategy->loopToVectorDim.count(forOp) == 0)
1116 for (
auto loopToDim : strategy->loopToVectorDim) {
1117 auto loop = cast<AffineForOp>(loopToDim.first);
1118 if (!loop.isDefinedOutsideOfLoop(value))
1128 VectorizationState &state) {
1130 Value uniformScalarRepl =
1135 auto bcastOp = BroadcastOp::create(state.builder, uniformVal.
getLoc(),
1136 vectorTy, uniformScalarRepl);
1137 state.registerValueVectorReplacement(uniformVal, bcastOp);
1159 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorize operand: " << operand);
1162 LLVM_DEBUG(dbgs() <<
" -> already vectorized: " << vecRepl);
1169 assert(!isa<VectorType>(operand.
getType()) &&
1170 "Vector op not found in replacement map");
1173 if (
auto constOp = operand.
getDefiningOp<arith::ConstantOp>()) {
1175 LLVM_DEBUG(dbgs() <<
"-> constant: " << vecConstant);
1176 return vecConstant.getResult();
1182 LLVM_DEBUG(dbgs() <<
"-> uniform: " << *vecUniform);
1189 LLVM_DEBUG(dbgs() <<
"-> unsupported block argument\n");
1192 LLVM_DEBUG(dbgs() <<
"-> non-vectorizable\n");
1201 for (
auto &kvp : loopToVectorDim) {
1202 AffineForOp forOp = cast<AffineForOp>(kvp.first);
1207 unsigned nonInvariant = 0;
1209 if (invariants.count(idx))
1212 if (++nonInvariant > 1) {
1213 LLVM_DEBUG(dbgs() <<
"[early‑vect] Bail out: IV "
1214 << forOp.getInductionVar() <<
" drives "
1215 << nonInvariant <<
" indices\n");
1230 VectorizationState &state) {
1231 MemRefType memRefType = loadOp.getMemRefType();
1232 Type elementType = memRefType.getElementType();
1233 auto vectorType = VectorType::get(state.strategy->vectorSizes, elementType);
1237 state.getScalarValueReplacementsFor(loadOp.getMapOperands(), mapOperands);
1241 indices.reserve(memRefType.getRank());
1242 if (loadOp.getAffineMap() !=
1245 for (
auto op : mapOperands) {
1246 if (op.getDefiningOp<AffineApplyOp>())
1252 indices.append(mapOperands.begin(), mapOperands.end());
1260 indices, state.vecLoopToVecDim);
1261 if (!permutationMap) {
1262 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ can't compute permutationMap\n");
1265 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1266 LLVM_DEBUG(permutationMap.print(dbgs()));
1269 state.builder, loadOp.getLoc(), loadOp.getMemRef(), vectorType,
1275 state.registerOpVectorReplacement(loadOp, transfer);
1286 VectorizationState &state) {
1287 MemRefType memRefType = storeOp.getMemRefType();
1294 state.getScalarValueReplacementsFor(storeOp.getMapOperands(), mapOperands);
1298 indices.reserve(memRefType.getRank());
1299 if (storeOp.getAffineMap() !=
1304 indices.append(mapOperands.begin(), mapOperands.end());
1311 indices, state.vecLoopToVecDim);
1312 if (!permutationMap)
1314 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1315 LLVM_DEBUG(permutationMap.print(dbgs()));
1319 if (llvm::any_of(permutationMap.getResults(),
1320 llvm::IsaPred<AffineConstantExpr>)) {
1321 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ store permutation map has "
1322 "broadcast dims, bailing out\n");
1327 state.builder, storeOp.getLoc(), vectorValue, storeOp.getMemRef(),
1329 true, permutationMap);
1330 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorized store: " << *transfer);
1333 state.registerOpVectorReplacement(storeOp, transfer);
1340 Value value, VectorizationState &state) {
1342 if (!VectorType::isValidElementType(scalarTy))
1344 Attribute valueAttr = getIdentityValueAttr(reductionKind, scalarTy,
1345 state.builder, value.
getLoc());
1346 if (
auto constOp = value.
getDefiningOp<arith::ConstantOp>())
1347 return constOp.getValue() == valueAttr;
1358 VectorizationState &state) {
1359 const VectorizationStrategy &strategy = *state.strategy;
1360 auto loopToVecDimIt = strategy.loopToVectorDim.find(forOp);
1361 bool isLoopVecDim = loopToVecDimIt != strategy.loopToVectorDim.end();
1364 if (isLoopVecDim && forOp.getNumIterOperands() > 0 && forOp.getStep() != 1) {
1367 <<
"\n[early-vect]+++++ unsupported step size for reduction loop: "
1368 << forOp.getStep() <<
"\n");
1377 unsigned vectorDim = loopToVecDimIt->second;
1378 assert(vectorDim < strategy.vectorSizes.size() &&
"vector dim overflow");
1379 int64_t forOpVecFactor = strategy.vectorSizes[vectorDim];
1380 newStep = forOp.getStepAsInt() * forOpVecFactor;
1382 newStep = forOp.getStepAsInt();
1387 if (isLoopVecDim && forOp.getNumIterOperands() > 0) {
1388 auto it = strategy.reductionLoops.find(forOp);
1389 assert(it != strategy.reductionLoops.end() &&
1390 "Reduction descriptors not found when vectorizing a reduction loop");
1391 reductions = it->second;
1392 assert(reductions.size() == forOp.getNumIterOperands() &&
1393 "The size of reductions array must match the number of iter_args");
1398 if (!isLoopVecDim) {
1399 for (
auto operand : forOp.getInits())
1405 for (
auto redAndOperand : llvm::zip(reductions, forOp.getInits())) {
1407 std::get<0>(redAndOperand).kind, std::get<1>(redAndOperand), state));
1415 state.getScalarValueReplacementsFor(forOp.getLowerBoundOperands(),
1417 state.getScalarValueReplacementsFor(forOp.getUpperBoundOperands(),
1419 auto vecForOp = AffineForOp::create(
1420 state.builder, forOp.getLoc(), lbOperands, forOp.getLowerBoundMap(),
1421 ubOperands, forOp.getUpperBoundMap(), newStep, vecIterOperands,
1440 state.registerOpVectorReplacement(forOp, vecForOp);
1441 state.registerValueScalarReplacement(forOp.getInductionVar(),
1442 vecForOp.getInductionVar());
1443 for (
auto iterTuple :
1444 llvm ::zip(forOp.getRegionIterArgs(), vecForOp.getRegionIterArgs()))
1445 state.registerBlockArgVectorReplacement(std::get<0>(iterTuple),
1446 std::get<1>(iterTuple));
1449 for (
unsigned i = 0; i < vecForOp.getNumIterOperands(); ++i) {
1453 vecForOp.getLoc(), vecForOp.getResult(i));
1454 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a vector reduction: "
1458 Value origInit = forOp.getOperand(forOp.getNumControlOperands() + i);
1459 Value finalRes = reducedRes;
1463 reducedRes.
getLoc(), reducedRes, origInit);
1464 state.registerLoopResultScalarReplacement(forOp.getResult(i), finalRes);
1469 state.vecLoopToVecDim[vecForOp] = loopToVecDimIt->second;
1477 if (isLoopVecDim && forOp.getNumIterOperands() > 0)
1489 vectorTypes.push_back(
1490 VectorType::get(state.strategy->vectorSizes,
result.getType()));
1496 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ an operand failed vectorize\n");
1499 vectorOperands.push_back(vecOperand);
1509 vectorOperands, vectorTypes, op->
getAttrs());
1510 state.registerOpVectorReplacement(op, vecOp);
1519 VectorizationState &state) {
1532 if (
Value mask = state.vecLoopToMask.lookup(newParentOp)) {
1537 cast<AffineForOp>(newParentOp).getRegionIterArgs(), i, combinerOps);
1538 assert(reducedVal &&
"expect non-null value for parallel reduction loop");
1539 assert(combinerOps.size() == 1 &&
"expect only one combiner op");
1541 Value neutralVal = cast<AffineForOp>(newParentOp).getInits()[i];
1543 Value maskedReducedVal = arith::SelectOp::create(
1544 state.builder, reducedVal.
getLoc(), mask, reducedVal, neutralVal);
1546 dbgs() <<
"\n[early-vect]+++++ masking an input to a binary op that"
1547 "produces value for a yield Op: "
1548 << maskedReducedVal);
1549 combinerOps.back()->replaceUsesOfWith(reducedVal, maskedReducedVal);
1567 VectorizationState &state) {
1569 assert(!isa<vector::TransferReadOp>(op) &&
1570 "vector.transfer_read cannot be further vectorized");
1571 assert(!isa<vector::TransferWriteOp>(op) &&
1572 "vector.transfer_write cannot be further vectorized");
1574 if (
auto loadOp = dyn_cast<AffineLoadOp>(op))
1576 if (
auto storeOp = dyn_cast<AffineStoreOp>(op))
1578 if (
auto forOp = dyn_cast<AffineForOp>(op))
1580 if (
auto yieldOp = dyn_cast<AffineYieldOp>(op))
1582 if (
auto constant = dyn_cast<arith::ConstantOp>(op))
1584 if (
auto applyOp = dyn_cast<AffineApplyOp>(op))
1602 assert(currentLevel <= loops.size() &&
"Unexpected currentLevel");
1603 if (currentLevel == loops.size())
1604 loops.emplace_back();
1628 const VectorizationStrategy &strategy) {
1629 assert(loops[0].size() == 1 &&
"Expected single root loop");
1630 AffineForOp rootLoop = loops[0][0];
1631 VectorizationState state(rootLoop.getContext());
1633 state.strategy = &strategy;
1643 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ loop is not vectorizable");
1656 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ Vectorizing: " << *op);
1660 dbgs() <<
"[early-vect]+++++ failed vectorizing the operation: "
1668 if (opVecResult.wasInterrupted()) {
1669 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ failed vectorization for: "
1670 << rootLoop <<
"\n");
1672 auto vecRootLoopIt = state.opVectorReplacement.find(rootLoop);
1673 if (vecRootLoopIt != state.opVectorReplacement.end())
1681 for (
auto resPair : state.loopResultScalarReplacement)
1682 resPair.first.replaceAllUsesWith(resPair.second);
1684 assert(state.opVectorReplacement.count(rootLoop) == 1 &&
1685 "Expected vector replacement for loop nest");
1686 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ success vectorizing pattern");
1687 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorization result:\n"
1688 << *state.opVectorReplacement[rootLoop]);
1691 state.finishVectorizationPattern(rootLoop);
1699 const VectorizationStrategy &strategy) {
1700 std::vector<SmallVector<AffineForOp, 2>> loopsToVectorize;
1712 assert(intersectionBuckets.empty() &&
"Expected empty output");
1717 AffineForOp matchRoot = cast<AffineForOp>(match.getMatchedOperation());
1718 bool intersects =
false;
1719 for (
int i = 0, end = intersectionBuckets.size(); i < end; ++i) {
1720 AffineForOp bucketRoot = bucketRoots[i];
1722 if (bucketRoot->isAncestor(matchRoot)) {
1723 intersectionBuckets[i].push_back(match);
1729 if (matchRoot->isAncestor(bucketRoot)) {
1730 bucketRoots[i] = matchRoot;
1731 intersectionBuckets[i].push_back(match);
1740 bucketRoots.push_back(matchRoot);
1741 intersectionBuckets.emplace_back();
1742 intersectionBuckets.back().push_back(match);
1757 assert((reductionLoops.empty() || vectorSizes.size() == 1) &&
1758 "Vectorizing reductions is supported only for 1-D vectors");
1761 std::optional<NestedPattern> pattern =
1762 makePattern(loops, vectorSizes.size(), fastestVaryingPattern);
1764 LLVM_DEBUG(dbgs() <<
"\n[early-vect] pattern couldn't be computed\n");
1768 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1769 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1770 LLVM_DEBUG(dbgs() <<
"\n[early-vect] new pattern on parent op\n");
1771 LLVM_DEBUG(dbgs() << *parentOp <<
"\n");
1773 unsigned patternDepth = pattern->getDepth();
1778 pattern->match(parentOp, &allMatches);
1779 std::vector<SmallVector<NestedMatch, 8>> intersectionBuckets;
1785 for (
auto &intersectingMatches : intersectionBuckets) {
1787 VectorizationStrategy strategy;
1789 strategy.vectorSizes.assign(vectorSizes.begin(), vectorSizes.end());
1790 strategy.reductionLoops = reductionLoops;
1792 patternDepth, &strategy))) {
1806 LLVM_DEBUG(dbgs() <<
"\n");
1809void affine::vectorizeChildAffineLoops(
1810 Operation *parentOp,
bool vectorizeReductions,
1811 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern) {
1817 if (vectorizeReductions) {
1818 parentOp->
walk([¶llelLoops, &reductionLoops](AffineForOp loop) {
1819 SmallVector<LoopReduction, 2> reductions;
1820 if (isLoopParallel(loop, &reductions)) {
1821 parallelLoops.insert(loop);
1823 if (!reductions.empty())
1824 reductionLoops[loop] = reductions;
1828 parentOp->
walk([¶llelLoops](AffineForOp loop) {
1829 if (isLoopParallel(loop))
1830 parallelLoops.insert(loop);
1835 NestedPatternContext mlContext;
1836 vectorizeLoops(parentOp, parallelLoops, vectorSizes, fastestVaryingPattern,
1842void Vectorize::runOnOperation() {
1843 func::FuncOp f = getOperation();
1844 if (!fastestVaryingPattern.empty() &&
1845 fastestVaryingPattern.size() != vectorSizes.size()) {
1846 f.emitRemark(
"Fastest varying pattern specified with different size than "
1847 "the vector size.");
1848 return signalPassFailure();
1851 if (vectorizeReductions && vectorSizes.size() != 1) {
1852 f.emitError(
"Vectorizing reductions is supported only for 1-D vectors.");
1853 return signalPassFailure();
1856 if (llvm::any_of(vectorSizes, [](int64_t size) {
return size <= 0; })) {
1857 f.emitError(
"Vectorization factor must be greater than zero.");
1858 return signalPassFailure();
1861 vectorizeChildAffineLoops(f, vectorizeReductions, vectorSizes,
1862 fastestVaryingPattern);
1878 if (loops[0].size() != 1)
1882 for (
int i = 1, end = loops.size(); i < end; ++i) {
1883 for (AffineForOp loop : loops[i]) {
1886 if (none_of(loops[i - 1], [&](AffineForOp maybeParent) {
1887 return maybeParent->isProperAncestor(loop);
1893 for (AffineForOp sibling : loops[i]) {
1894 if (sibling->isProperAncestor(loop))
1911void mlir::affine::vectorizeAffineLoops(
1913 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern,
1916 NestedPatternContext mlContext;
1917 vectorizeLoops(parentOp, loops, vectorSizes, fastestVaryingPattern,
1956LogicalResult mlir::affine::vectorizeAffineLoopNest(
1957 std::vector<SmallVector<AffineForOp, 2>> &loops,
1958 const VectorizationStrategy &strategy) {
1960 NestedPatternContext mlContext;
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
static Operation * vectorizeUniform(Value uniformVal, VectorizationState &state)
Generates a broadcast op for the provided uniform value using the vectorization strategy in 'state'.
static std::optional< NestedPattern > makePattern(const DenseSet< Operation * > ¶llelLoops, int vectorRank, ArrayRef< int64_t > fastestVaryingPattern)
Creates a vectorization pattern from the command line arguments.
static LogicalResult vectorizeRootMatch(NestedMatch m, const VectorizationStrategy &strategy)
Extracts the matched loops and vectorizes them following a topological order.
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
static LogicalResult verifyLoopNesting(const std::vector< SmallVector< AffineForOp, 2 > > &loops)
Verify that affine loops in 'loops' meet the nesting criteria expected by SuperVectorizer:
static Operation * vectorizeOneOperation(Operation *op, VectorizationState &state)
Encodes Operation-specific behavior for vectorization.
static bool isNeutralElementConst(arith::AtomicRMWKind reductionKind, Value value, VectorizationState &state)
Returns true if value is a constant equal to the neutral element of the given vectorizable reduction.
static LogicalResult vectorizeLoopNest(std::vector< SmallVector< AffineForOp, 2 > > &loops, const VectorizationStrategy &strategy)
Internal implementation to vectorize affine loops from a single loop nest using an n-D vectorization ...
static Operation * vectorizeAffineLoad(AffineLoadOp loadOp, VectorizationState &state)
Vectorizes an affine load with the vectorization strategy in 'state' by generating a 'vector....
static Operation * vectorizeAffineForOp(AffineForOp forOp, VectorizationState &state)
Vectorizes a loop with the vectorization strategy in 'state'.
static Operation * vectorizeAffineApplyOp(AffineApplyOp applyOp, VectorizationState &state)
We have no need to vectorize affine.apply.
static LogicalResult analyzeProfitability(ArrayRef< NestedMatch > matches, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
Implements a simple strawman strategy for vectorization.
static FilterFunctionType isVectorizableLoopPtrFactory(const DenseSet< Operation * > ¶llelLoops, int fastestVaryingMemRefDimension)
Forward declaration.
static bool isIVMappedToMultipleIndices(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &loopToVectorDim)
Returns true if any vectorized loop IV drives more than one index.
static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp, VectorizationState &state)
Tries to transform a scalar constant into a vector constant.
static bool isUniformDefinition(Value value, const VectorizationStrategy *strategy)
Returns true if the provided value is vector uniform given the vectorization strategy.
static void eraseLoopNest(AffineForOp forOp)
Erases a loop nest, including all its nested operations.
static VectorType getVectorType(Type scalarTy, const VectorizationStrategy *strategy)
Returns the vector type resulting from applying the provided vectorization strategy on the scalar typ...
static void getMatchedAffineLoops(NestedMatch match, std::vector< SmallVector< AffineForOp, 2 > > &loops)
Converts all the nested loops in 'match' to a 2D vector container that preserves the relative nesting...
static Value vectorizeOperand(Value operand, VectorizationState &state)
Tries to vectorize a given operand by applying the following logic:
static void getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, std::vector< SmallVector< AffineForOp, 2 > > &loops)
Recursive implementation to convert all the nested loops in 'match' to a 2D vector container that pre...
static Operation * vectorizeAffineYieldOp(AffineYieldOp yieldOp, VectorizationState &state)
Vectorizes a yield operation by widening its types.
static arith::ConstantOp createInitialVector(arith::AtomicRMWKind reductionKind, Value oldOperand, VectorizationState &state)
Creates a constant vector filled with the neutral elements of the given reduction.
static Operation * widenOp(Operation *op, VectorizationState &state)
Vectorizes arbitrary operation by plain widening.
static Operation * vectorizeAffineStore(AffineStoreOp storeOp, VectorizationState &state)
Vectorizes an affine store with the vectorization strategy in 'state' by generating a 'vector....
static NestedPattern & vectorTransferPattern()
static void vectorizeLoops(Operation *parentOp, DenseSet< Operation * > &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops)
Internal implementation to vectorize affine loops in 'loops' using the n-D vectorization factors in '...
static void computeMemoryOpIndices(Operation *op, AffineMap map, ValueRange mapOperands, VectorizationState &state, SmallVectorImpl< Value > &results)
static void computeIntersectionBuckets(ArrayRef< NestedMatch > matches, std::vector< SmallVector< NestedMatch, 8 > > &intersectionBuckets)
Traverses all the loop matches and classifies them into intersection buckets.
static Value createMask(AffineForOp vecForOp, VectorizationState &state)
Creates a mask used to filter out garbage elements in the last iteration of unaligned loops.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
AffineMap getMultiDimIdentityMap(unsigned rank)
IntegerType getIntegerType(unsigned width)
AffineExpr getAffineDimExpr(unsigned position)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
auto lookupOrNull(T from) const
Lookup a mapped value within the map.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
void setInsertionPointAfterValue(Value val)
Sets the insertion point to the node after the specified value.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
unsigned getNumOperands()
OperationName getName()
The name of an operation is the key identifier for it.
operand_range getOperands()
Returns an iterator on the underlying Value's.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getResults()
unsigned getNumResults()
Return the number of results held by this operation.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndexOrFloat() const
Return true if this is an integer (of any signedness), index, or float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
An NestedPattern captures nested patterns in the IR.
ArrayRef< NestedMatch > getMatchedChildren()
Operation * getMatchedOperation() const
NestedPattern For(const NestedPattern &child)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
DenseMap< Operation *, SmallVector< LoopReduction, 2 > > ReductionLoopMap
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.:
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
std::function< bool(Operation &)> FilterFunctionType
A NestedPattern is a nested operation walker that:
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
Operation * createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vecToStore, Value dest, SmallVector< Value > writeIndices={}, bool useInBoundsInsteadOfMasking=false, AffineMap permutationMap=AffineMap())
Create a TransferWriteOp of vecToStore into dest.
Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, const VectorType &vecToReadTy, std::optional< Value > padValue=std::nullopt, bool useInBoundsInsteadOfMasking=false, ArrayRef< Value > indices={}, AffineMap permutationMap=AffineMap())
Creates a TransferReadOp from source.
Value getVectorReductionOp(arith::AtomicRMWKind op, OpBuilder &builder, Location loc, Value vector)
Returns the value obtained by reducing the vector into a scalar using the operation kind associated w...
Include the generated interface declarations.
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Contains the vectorization state and related methods used across the vectorization process of a given...
VectorizationState(RewriterBase &rewriter)