29#include "llvm/ADT/STLExtras.h"
30#include "llvm/Support/Debug.h"
35#define GEN_PASS_DEF_AFFINEVECTORIZE
36#include "mlir/Dialect/Affine/Transforms/Passes.h.inc"
575#define DEBUG_TYPE "early-vect"
582 int fastestVaryingMemRefDimension);
588static std::optional<NestedPattern>
592 int64_t d0 = fastestVaryingPattern.empty() ? -1 : fastestVaryingPattern[0];
593 int64_t d1 = fastestVaryingPattern.size() < 2 ? -1 : fastestVaryingPattern[1];
594 int64_t d2 = fastestVaryingPattern.size() < 3 ? -1 : fastestVaryingPattern[2];
595 switch (vectorRank) {
613 llvm::IsaPred<vector::TransferReadOp, vector::TransferWriteOp>);
621struct Vectorize :
public affine::impl::AffineVectorizeBase<Vectorize> {
624 void runOnOperation()
override;
630 unsigned patternDepth,
631 VectorizationStrategy *strategy) {
632 assert(patternDepth > depthInPattern &&
633 "patternDepth is greater than depthInPattern");
634 if (patternDepth - depthInPattern > strategy->vectorSizes.size()) {
638 strategy->loopToVectorDim[loop] =
639 strategy->vectorSizes.size() - (patternDepth - depthInPattern);
658 unsigned depthInPattern,
659 unsigned patternDepth,
660 VectorizationStrategy *strategy) {
661 for (
auto m : matches) {
663 patternDepth, strategy))) {
667 patternDepth, strategy);
676struct VectorizationState {
689 void registerOpVectorReplacement(Operation *replaced, Operation *
replacement);
702 void registerValueVectorReplacement(Value replaced, Operation *
replacement);
709 void registerBlockArgVectorReplacement(BlockArgument replaced,
722 void registerValueScalarReplacement(Value replaced, Value
replacement);
734 void registerLoopResultScalarReplacement(Value replaced, Value
replacement);
738 void getScalarValueReplacementsFor(
ValueRange inputVals,
739 SmallVectorImpl<Value> &replacedVals);
742 void finishVectorizationPattern(AffineForOp rootLoop);
751 IRMapping valueVectorReplacement;
754 IRMapping valueScalarReplacement;
756 DenseMap<Value, Value> loopResultScalarReplacement;
766 const VectorizationStrategy *strategy =
nullptr;
771 void registerValueVectorReplacementImpl(Value replaced, Value
replacement);
785void VectorizationState::registerOpVectorReplacement(
Operation *replaced,
787 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ commit vectorized op:\n");
788 LLVM_DEBUG(dbgs() << *replaced <<
"\n");
789 LLVM_DEBUG(dbgs() <<
"into\n");
793 "Unexpected replaced and replacement results");
794 assert(opVectorReplacement.count(replaced) == 0 &&
"already registered");
797 for (
auto resultTuple :
799 registerValueVectorReplacementImpl(std::get<0>(resultTuple),
800 std::get<1>(resultTuple));
813void VectorizationState::registerValueVectorReplacement(
816 "Expected single-result replacement");
820 registerValueVectorReplacementImpl(replaced,
replacement->getResult(0));
828void VectorizationState::registerBlockArgVectorReplacement(
829 BlockArgument replaced, BlockArgument
replacement) {
830 registerValueVectorReplacementImpl(replaced,
replacement);
833void VectorizationState::registerValueVectorReplacementImpl(Value replaced,
835 assert(!valueVectorReplacement.
contains(replaced) &&
836 "Vector replacement already registered");
838 "Expected vector type in vector replacement");
852void VectorizationState::registerValueScalarReplacement(Value replaced,
854 assert(!valueScalarReplacement.
contains(replaced) &&
855 "Scalar value replacement already registered");
857 "Expected scalar type in scalar replacement");
870void VectorizationState::registerLoopResultScalarReplacement(
873 assert(loopResultScalarReplacement.count(replaced) == 0 &&
874 "already registered");
875 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ will replace a result of the loop "
878 loopResultScalarReplacement[replaced] =
replacement;
882void VectorizationState::getScalarValueReplacementsFor(
883 ValueRange inputVals, SmallVectorImpl<Value> &replacedVals) {
884 for (Value inputVal : inputVals)
885 replacedVals.push_back(valueScalarReplacement.
lookupOrDefault(inputVal));
890 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ erasing:\n" << forOp <<
"\n");
895void VectorizationState::finishVectorizationPattern(AffineForOp rootLoop) {
896 LLVM_DEBUG(dbgs() <<
"\n[early-vect] Finalizing vectorization\n");
903 VectorizationState &state,
908 auto afOp = AffineApplyOp::create(state.builder, op->
getLoc(), singleResMap,
910 results.push_back(afOp);
919 int fastestVaryingMemRefDimension) {
920 return [¶llelLoops, fastestVaryingMemRefDimension](
Operation &forOp) {
921 auto loop = cast<AffineForOp>(forOp);
922 if (!parallelLoops.contains(loop))
925 auto vectorizableBody =
927 if (!vectorizableBody)
929 return memRefDim == -1 || fastestVaryingMemRefDimension == -1 ||
930 memRefDim == fastestVaryingMemRefDimension;
937 const VectorizationStrategy *strategy) {
938 assert(!isa<VectorType>(scalarTy) &&
"Expected scalar type");
939 return VectorType::get(strategy->vectorSizes, scalarTy);
946 VectorizationState &state) {
947 Type scalarTy = constOp.getType();
948 if (!VectorType::isValidElementType(scalarTy))
957 while (parentOp && !state.vecLoopToVecDim.count(parentOp))
959 assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
960 isa<AffineForOp>(parentOp) &&
"Expected a vectorized for op");
961 auto vecForOp = cast<AffineForOp>(parentOp);
964 arith::ConstantOp::create(state.builder, constOp.getLoc(), vecAttr);
967 state.registerOpVectorReplacement(constOp, newConstOp);
974 if (isa<IndexType>(scalarTy)) {
975 auto scalarConstOp = arith::ConstantOp::create(
976 state.builder, constOp.getLoc(), constOp.getValue());
977 state.registerValueScalarReplacement(constOp.getResult(),
978 scalarConstOp.getResult());
987 VectorizationState &state) {
989 for (
Value operand : applyOp.getOperands()) {
990 if (state.valueVectorReplacement.
contains(operand)) {
992 dbgs() <<
"\n[early-vect]+++++ affine.apply on vector operand\n");
997 updatedOperand = operand;
998 updatedOperands.push_back(updatedOperand);
1001 auto newApplyOp = AffineApplyOp::create(
1002 state.builder, applyOp.getLoc(), applyOp.getAffineMap(), updatedOperands);
1005 state.registerValueScalarReplacement(applyOp.getResult(),
1006 newApplyOp.getResult());
1015 VectorizationState &state) {
1017 if (!VectorType::isValidElementType(scalarTy))
1020 Attribute valueAttr = getIdentityValueAttr(
1021 reductionKind, scalarTy, state.builder, oldOperand.
getLoc());
1025 arith::ConstantOp::create(state.builder, oldOperand.
getLoc(), vecAttr);
1038 assert(state.strategy->vectorSizes.size() == 1 &&
1039 "Creating a mask non-1-D vectors is not supported.");
1040 assert(vecForOp.getStep() == state.strategy->vectorSizes[0] &&
1041 "Creating a mask for loops with non-unit original step size is not "
1045 if (
Value mask = state.vecLoopToMask.lookup(vecForOp))
1050 if (vecForOp.hasConstantBounds()) {
1052 vecForOp.getConstantUpperBound() - vecForOp.getConstantLowerBound();
1053 if (originalTripCount % vecForOp.getStepAsInt() == 0)
1074 AffineMap ubMap = vecForOp.getUpperBoundMap();
1077 ub = AffineApplyOp::create(state.builder, loc, vecForOp.getUpperBoundMap(),
1078 vecForOp.getUpperBoundOperands());
1080 ub = AffineMinOp::create(state.builder, loc, vecForOp.getUpperBoundMap(),
1081 vecForOp.getUpperBoundOperands());
1087 {ub, vecForOp.getInductionVar()});
1090 ub.getDefiningOp()->erase();
1092 Type maskTy = VectorType::get(state.strategy->vectorSizes,
1095 vector::CreateMaskOp::create(state.builder, loc, maskTy, itersLeft);
1097 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a mask:\n"
1098 << itersLeft <<
"\n"
1101 state.vecLoopToMask[vecForOp] = mask;
1111 const VectorizationStrategy *strategy) {
1113 if (forOp && strategy->loopToVectorDim.count(forOp) == 0)
1116 for (
auto loopToDim : strategy->loopToVectorDim) {
1117 auto loop = cast<AffineForOp>(loopToDim.first);
1118 if (!loop.isDefinedOutsideOfLoop(value))
1128 VectorizationState &state) {
1130 Value uniformScalarRepl =
1135 auto bcastOp = BroadcastOp::create(state.builder, uniformVal.
getLoc(),
1136 vectorTy, uniformScalarRepl);
1137 state.registerValueVectorReplacement(uniformVal, bcastOp);
1159 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorize operand: " << operand);
1162 LLVM_DEBUG(dbgs() <<
" -> already vectorized: " << vecRepl);
1169 assert(!isa<VectorType>(operand.
getType()) &&
1170 "Vector op not found in replacement map");
1173 if (
auto constOp = operand.
getDefiningOp<arith::ConstantOp>()) {
1175 LLVM_DEBUG(dbgs() <<
"-> constant: " << vecConstant);
1176 return vecConstant.getResult();
1182 LLVM_DEBUG(dbgs() <<
"-> uniform: " << *vecUniform);
1189 LLVM_DEBUG(dbgs() <<
"-> unsupported block argument\n");
1192 LLVM_DEBUG(dbgs() <<
"-> non-vectorizable\n");
1201 for (
auto &kvp : loopToVectorDim) {
1202 AffineForOp forOp = cast<AffineForOp>(kvp.first);
1207 unsigned nonInvariant = 0;
1209 if (invariants.count(idx))
1212 if (++nonInvariant > 1) {
1213 LLVM_DEBUG(dbgs() <<
"[early‑vect] Bail out: IV "
1214 << forOp.getInductionVar() <<
" drives "
1215 << nonInvariant <<
" indices\n");
1230 VectorizationState &state) {
1231 MemRefType memRefType = loadOp.getMemRefType();
1232 Type elementType = memRefType.getElementType();
1233 auto vectorType = VectorType::get(state.strategy->vectorSizes, elementType);
1237 state.getScalarValueReplacementsFor(loadOp.getMapOperands(), mapOperands);
1241 indices.reserve(memRefType.getRank());
1242 if (loadOp.getAffineMap() !=
1245 for (
auto op : mapOperands) {
1246 if (op.getDefiningOp<AffineApplyOp>())
1252 indices.append(mapOperands.begin(), mapOperands.end());
1260 indices, state.vecLoopToVecDim);
1261 if (!permutationMap) {
1262 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ can't compute permutationMap\n");
1265 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1266 LLVM_DEBUG(permutationMap.print(dbgs()));
1268 auto transfer = vector::TransferReadOp::create(
1269 state.builder, loadOp.getLoc(), vectorType, loadOp.getMemRef(),
indices,
1270 std::nullopt, permutationMap);
1273 state.registerOpVectorReplacement(loadOp, transfer);
1284 VectorizationState &state) {
1285 MemRefType memRefType = storeOp.getMemRefType();
1292 state.getScalarValueReplacementsFor(storeOp.getMapOperands(), mapOperands);
1296 indices.reserve(memRefType.getRank());
1297 if (storeOp.getAffineMap() !=
1302 indices.append(mapOperands.begin(), mapOperands.end());
1309 indices, state.vecLoopToVecDim);
1310 if (!permutationMap)
1312 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1313 LLVM_DEBUG(permutationMap.print(dbgs()));
1317 if (llvm::any_of(permutationMap.getResults(),
1318 llvm::IsaPred<AffineConstantExpr>)) {
1319 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ store permutation map has "
1320 "broadcast dims, bailing out\n");
1324 auto transfer = vector::TransferWriteOp::create(
1325 state.builder, storeOp.getLoc(), vectorValue, storeOp.getMemRef(),
1327 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorized store: " << transfer);
1330 state.registerOpVectorReplacement(storeOp, transfer);
1337 Value value, VectorizationState &state) {
1339 if (!VectorType::isValidElementType(scalarTy))
1341 Attribute valueAttr = getIdentityValueAttr(reductionKind, scalarTy,
1342 state.builder, value.
getLoc());
1343 if (
auto constOp = value.
getDefiningOp<arith::ConstantOp>())
1344 return constOp.getValue() == valueAttr;
1355 VectorizationState &state) {
1356 const VectorizationStrategy &strategy = *state.strategy;
1357 auto loopToVecDimIt = strategy.loopToVectorDim.find(forOp);
1358 bool isLoopVecDim = loopToVecDimIt != strategy.loopToVectorDim.end();
1361 if (isLoopVecDim && forOp.getNumIterOperands() > 0 && forOp.getStep() != 1) {
1364 <<
"\n[early-vect]+++++ unsupported step size for reduction loop: "
1365 << forOp.getStep() <<
"\n");
1374 unsigned vectorDim = loopToVecDimIt->second;
1375 assert(vectorDim < strategy.vectorSizes.size() &&
"vector dim overflow");
1376 int64_t forOpVecFactor = strategy.vectorSizes[vectorDim];
1377 newStep = forOp.getStepAsInt() * forOpVecFactor;
1379 newStep = forOp.getStepAsInt();
1384 if (isLoopVecDim && forOp.getNumIterOperands() > 0) {
1385 auto it = strategy.reductionLoops.find(forOp);
1386 assert(it != strategy.reductionLoops.end() &&
1387 "Reduction descriptors not found when vectorizing a reduction loop");
1388 reductions = it->second;
1389 assert(reductions.size() == forOp.getNumIterOperands() &&
1390 "The size of reductions array must match the number of iter_args");
1395 if (!isLoopVecDim) {
1396 for (
auto operand : forOp.getInits())
1402 for (
auto redAndOperand : llvm::zip(reductions, forOp.getInits())) {
1404 std::get<0>(redAndOperand).kind, std::get<1>(redAndOperand), state));
1412 state.getScalarValueReplacementsFor(forOp.getLowerBoundOperands(),
1414 state.getScalarValueReplacementsFor(forOp.getUpperBoundOperands(),
1416 auto vecForOp = AffineForOp::create(
1417 state.builder, forOp.getLoc(), lbOperands, forOp.getLowerBoundMap(),
1418 ubOperands, forOp.getUpperBoundMap(), newStep, vecIterOperands,
1437 state.registerOpVectorReplacement(forOp, vecForOp);
1438 state.registerValueScalarReplacement(forOp.getInductionVar(),
1439 vecForOp.getInductionVar());
1440 for (
auto iterTuple :
1441 llvm ::zip(forOp.getRegionIterArgs(), vecForOp.getRegionIterArgs()))
1442 state.registerBlockArgVectorReplacement(std::get<0>(iterTuple),
1443 std::get<1>(iterTuple));
1446 for (
unsigned i = 0; i < vecForOp.getNumIterOperands(); ++i) {
1450 vecForOp.getLoc(), vecForOp.getResult(i));
1451 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a vector reduction: "
1455 Value origInit = forOp.getOperand(forOp.getNumControlOperands() + i);
1456 Value finalRes = reducedRes;
1460 reducedRes.
getLoc(), reducedRes, origInit);
1461 state.registerLoopResultScalarReplacement(forOp.getResult(i), finalRes);
1466 state.vecLoopToVecDim[vecForOp] = loopToVecDimIt->second;
1474 if (isLoopVecDim && forOp.getNumIterOperands() > 0)
1486 vectorTypes.push_back(
1487 VectorType::get(state.strategy->vectorSizes,
result.getType()));
1493 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ an operand failed vectorize\n");
1496 vectorOperands.push_back(vecOperand);
1506 vectorOperands, vectorTypes, op->
getAttrs());
1507 state.registerOpVectorReplacement(op, vecOp);
1516 VectorizationState &state) {
1529 if (
Value mask = state.vecLoopToMask.lookup(newParentOp)) {
1534 cast<AffineForOp>(newParentOp).getRegionIterArgs(), i, combinerOps);
1535 assert(reducedVal &&
"expect non-null value for parallel reduction loop");
1536 assert(combinerOps.size() == 1 &&
"expect only one combiner op");
1538 Value neutralVal = cast<AffineForOp>(newParentOp).getInits()[i];
1540 Value maskedReducedVal = arith::SelectOp::create(
1541 state.builder, reducedVal.
getLoc(), mask, reducedVal, neutralVal);
1543 dbgs() <<
"\n[early-vect]+++++ masking an input to a binary op that"
1544 "produces value for a yield Op: "
1545 << maskedReducedVal);
1546 combinerOps.back()->replaceUsesOfWith(reducedVal, maskedReducedVal);
1564 VectorizationState &state) {
1566 assert(!isa<vector::TransferReadOp>(op) &&
1567 "vector.transfer_read cannot be further vectorized");
1568 assert(!isa<vector::TransferWriteOp>(op) &&
1569 "vector.transfer_write cannot be further vectorized");
1571 if (
auto loadOp = dyn_cast<AffineLoadOp>(op))
1573 if (
auto storeOp = dyn_cast<AffineStoreOp>(op))
1575 if (
auto forOp = dyn_cast<AffineForOp>(op))
1577 if (
auto yieldOp = dyn_cast<AffineYieldOp>(op))
1579 if (
auto constant = dyn_cast<arith::ConstantOp>(op))
1581 if (
auto applyOp = dyn_cast<AffineApplyOp>(op))
1599 assert(currentLevel <= loops.size() &&
"Unexpected currentLevel");
1600 if (currentLevel == loops.size())
1601 loops.emplace_back();
1625 const VectorizationStrategy &strategy) {
1626 assert(loops[0].size() == 1 &&
"Expected single root loop");
1627 AffineForOp rootLoop = loops[0][0];
1628 VectorizationState state(rootLoop.getContext());
1630 state.strategy = &strategy;
1640 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ loop is not vectorizable");
1653 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ Vectorizing: " << *op);
1657 dbgs() <<
"[early-vect]+++++ failed vectorizing the operation: "
1665 if (opVecResult.wasInterrupted()) {
1666 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ failed vectorization for: "
1667 << rootLoop <<
"\n");
1669 auto vecRootLoopIt = state.opVectorReplacement.find(rootLoop);
1670 if (vecRootLoopIt != state.opVectorReplacement.end())
1678 for (
auto resPair : state.loopResultScalarReplacement)
1679 resPair.first.replaceAllUsesWith(resPair.second);
1681 assert(state.opVectorReplacement.count(rootLoop) == 1 &&
1682 "Expected vector replacement for loop nest");
1683 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ success vectorizing pattern");
1684 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorization result:\n"
1685 << *state.opVectorReplacement[rootLoop]);
1688 state.finishVectorizationPattern(rootLoop);
1696 const VectorizationStrategy &strategy) {
1697 std::vector<SmallVector<AffineForOp, 2>> loopsToVectorize;
1709 assert(intersectionBuckets.empty() &&
"Expected empty output");
1714 AffineForOp matchRoot = cast<AffineForOp>(match.getMatchedOperation());
1715 bool intersects =
false;
1716 for (
int i = 0, end = intersectionBuckets.size(); i < end; ++i) {
1717 AffineForOp bucketRoot = bucketRoots[i];
1719 if (bucketRoot->isAncestor(matchRoot)) {
1720 intersectionBuckets[i].push_back(match);
1726 if (matchRoot->isAncestor(bucketRoot)) {
1727 bucketRoots[i] = matchRoot;
1728 intersectionBuckets[i].push_back(match);
1737 bucketRoots.push_back(matchRoot);
1738 intersectionBuckets.emplace_back();
1739 intersectionBuckets.back().push_back(match);
1754 assert((reductionLoops.empty() || vectorSizes.size() == 1) &&
1755 "Vectorizing reductions is supported only for 1-D vectors");
1758 std::optional<NestedPattern> pattern =
1759 makePattern(loops, vectorSizes.size(), fastestVaryingPattern);
1761 LLVM_DEBUG(dbgs() <<
"\n[early-vect] pattern couldn't be computed\n");
1765 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1766 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1767 LLVM_DEBUG(dbgs() <<
"\n[early-vect] new pattern on parent op\n");
1768 LLVM_DEBUG(dbgs() << *parentOp <<
"\n");
1770 unsigned patternDepth = pattern->getDepth();
1775 pattern->match(parentOp, &allMatches);
1776 std::vector<SmallVector<NestedMatch, 8>> intersectionBuckets;
1782 for (
auto &intersectingMatches : intersectionBuckets) {
1784 VectorizationStrategy strategy;
1786 strategy.vectorSizes.assign(vectorSizes.begin(), vectorSizes.end());
1787 strategy.reductionLoops = reductionLoops;
1789 patternDepth, &strategy))) {
1803 LLVM_DEBUG(dbgs() <<
"\n");
1806void affine::vectorizeChildAffineLoops(
1807 Operation *parentOp,
bool vectorizeReductions,
1808 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern) {
1814 if (vectorizeReductions) {
1815 parentOp->
walk([¶llelLoops, &reductionLoops](AffineForOp loop) {
1816 SmallVector<LoopReduction, 2> reductions;
1817 if (isLoopParallel(loop, &reductions)) {
1818 parallelLoops.insert(loop);
1820 if (!reductions.empty())
1821 reductionLoops[loop] = reductions;
1825 parentOp->
walk([¶llelLoops](AffineForOp loop) {
1826 if (isLoopParallel(loop))
1827 parallelLoops.insert(loop);
1832 NestedPatternContext mlContext;
1833 vectorizeLoops(parentOp, parallelLoops, vectorSizes, fastestVaryingPattern,
1839void Vectorize::runOnOperation() {
1840 func::FuncOp f = getOperation();
1841 if (!fastestVaryingPattern.empty() &&
1842 fastestVaryingPattern.size() != vectorSizes.size()) {
1843 f.emitRemark(
"Fastest varying pattern specified with different size than "
1844 "the vector size.");
1845 return signalPassFailure();
1848 if (vectorizeReductions && vectorSizes.size() != 1) {
1849 f.emitError(
"Vectorizing reductions is supported only for 1-D vectors.");
1850 return signalPassFailure();
1853 if (llvm::any_of(vectorSizes, [](int64_t size) {
return size <= 0; })) {
1854 f.emitError(
"Vectorization factor must be greater than zero.");
1855 return signalPassFailure();
1858 vectorizeChildAffineLoops(f, vectorizeReductions, vectorSizes,
1859 fastestVaryingPattern);
1875 if (loops[0].size() != 1)
1879 for (
int i = 1, end = loops.size(); i < end; ++i) {
1880 for (AffineForOp loop : loops[i]) {
1883 if (none_of(loops[i - 1], [&](AffineForOp maybeParent) {
1884 return maybeParent->isProperAncestor(loop);
1890 for (AffineForOp sibling : loops[i]) {
1891 if (sibling->isProperAncestor(loop))
1908void mlir::affine::vectorizeAffineLoops(
1910 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern,
1913 NestedPatternContext mlContext;
1914 vectorizeLoops(parentOp, loops, vectorSizes, fastestVaryingPattern,
1953LogicalResult mlir::affine::vectorizeAffineLoopNest(
1954 std::vector<SmallVector<AffineForOp, 2>> &loops,
1955 const VectorizationStrategy &strategy) {
1957 NestedPatternContext mlContext;
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
static Operation * vectorizeUniform(Value uniformVal, VectorizationState &state)
Generates a broadcast op for the provided uniform value using the vectorization strategy in 'state'.
static std::optional< NestedPattern > makePattern(const DenseSet< Operation * > ¶llelLoops, int vectorRank, ArrayRef< int64_t > fastestVaryingPattern)
Creates a vectorization pattern from the command line arguments.
static LogicalResult vectorizeRootMatch(NestedMatch m, const VectorizationStrategy &strategy)
Extracts the matched loops and vectorizes them following a topological order.
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
static LogicalResult verifyLoopNesting(const std::vector< SmallVector< AffineForOp, 2 > > &loops)
Verify that affine loops in 'loops' meet the nesting criteria expected by SuperVectorizer:
static Operation * vectorizeOneOperation(Operation *op, VectorizationState &state)
Encodes Operation-specific behavior for vectorization.
static bool isNeutralElementConst(arith::AtomicRMWKind reductionKind, Value value, VectorizationState &state)
Returns true if value is a constant equal to the neutral element of the given vectorizable reduction.
static LogicalResult vectorizeLoopNest(std::vector< SmallVector< AffineForOp, 2 > > &loops, const VectorizationStrategy &strategy)
Internal implementation to vectorize affine loops from a single loop nest using an n-D vectorization ...
static Operation * vectorizeAffineLoad(AffineLoadOp loadOp, VectorizationState &state)
Vectorizes an affine load with the vectorization strategy in 'state' by generating a 'vector....
static Operation * vectorizeAffineForOp(AffineForOp forOp, VectorizationState &state)
Vectorizes a loop with the vectorization strategy in 'state'.
static Operation * vectorizeAffineApplyOp(AffineApplyOp applyOp, VectorizationState &state)
We have no need to vectorize affine.apply.
static LogicalResult analyzeProfitability(ArrayRef< NestedMatch > matches, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
Implements a simple strawman strategy for vectorization.
static FilterFunctionType isVectorizableLoopPtrFactory(const DenseSet< Operation * > ¶llelLoops, int fastestVaryingMemRefDimension)
Forward declaration.
static bool isIVMappedToMultipleIndices(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &loopToVectorDim)
Returns true if any vectorized loop IV drives more than one index.
static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp, VectorizationState &state)
Tries to transform a scalar constant into a vector constant.
static bool isUniformDefinition(Value value, const VectorizationStrategy *strategy)
Returns true if the provided value is vector uniform given the vectorization strategy.
static void eraseLoopNest(AffineForOp forOp)
Erases a loop nest, including all its nested operations.
static VectorType getVectorType(Type scalarTy, const VectorizationStrategy *strategy)
Returns the vector type resulting from applying the provided vectorization strategy on the scalar typ...
static void getMatchedAffineLoops(NestedMatch match, std::vector< SmallVector< AffineForOp, 2 > > &loops)
Converts all the nested loops in 'match' to a 2D vector container that preserves the relative nesting...
static Value vectorizeOperand(Value operand, VectorizationState &state)
Tries to vectorize a given operand by applying the following logic:
static void getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, std::vector< SmallVector< AffineForOp, 2 > > &loops)
Recursive implementation to convert all the nested loops in 'match' to a 2D vector container that pre...
static Operation * vectorizeAffineYieldOp(AffineYieldOp yieldOp, VectorizationState &state)
Vectorizes a yield operation by widening its types.
static arith::ConstantOp createInitialVector(arith::AtomicRMWKind reductionKind, Value oldOperand, VectorizationState &state)
Creates a constant vector filled with the neutral elements of the given reduction.
static Operation * widenOp(Operation *op, VectorizationState &state)
Vectorizes arbitrary operation by plain widening.
static Operation * vectorizeAffineStore(AffineStoreOp storeOp, VectorizationState &state)
Vectorizes an affine store with the vectorization strategy in 'state' by generating a 'vector....
static NestedPattern & vectorTransferPattern()
static void vectorizeLoops(Operation *parentOp, DenseSet< Operation * > &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops)
Internal implementation to vectorize affine loops in 'loops' using the n-D vectorization factors in '...
static void computeMemoryOpIndices(Operation *op, AffineMap map, ValueRange mapOperands, VectorizationState &state, SmallVectorImpl< Value > &results)
static void computeIntersectionBuckets(ArrayRef< NestedMatch > matches, std::vector< SmallVector< NestedMatch, 8 > > &intersectionBuckets)
Traverses all the loop matches and classifies them into intersection buckets.
static Value createMask(AffineForOp vecForOp, VectorizationState &state)
Creates a mask used to filter out garbage elements in the last iteration of unaligned loops.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
AffineMap getMultiDimIdentityMap(unsigned rank)
IntegerType getIntegerType(unsigned width)
AffineExpr getAffineDimExpr(unsigned position)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
auto lookupOrNull(T from) const
Lookup a mapped value within the map.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
void setInsertionPointAfterValue(Value val)
Sets the insertion point to the node after the specified value.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
unsigned getNumOperands()
OperationName getName()
The name of an operation is the key identifier for it.
operand_range getOperands()
Returns an iterator on the underlying Value's.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getResults()
unsigned getNumResults()
Return the number of results held by this operation.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndexOrFloat() const
Return true if this is an integer (of any signedness), index, or float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
An NestedPattern captures nested patterns in the IR.
ArrayRef< NestedMatch > getMatchedChildren()
Operation * getMatchedOperation() const
NestedPattern For(const NestedPattern &child)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
DenseMap< Operation *, SmallVector< LoopReduction, 2 > > ReductionLoopMap
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.:
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
std::function< bool(Operation &)> FilterFunctionType
A NestedPattern is a nested operation walker that:
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
Value getVectorReductionOp(arith::AtomicRMWKind op, OpBuilder &builder, Location loc, Value vector)
Returns the value obtained by reducing the vector into a scalar using the operation kind associated w...
Include the generated interface declarations.
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
VectorizationState(RewriterBase &rewriter)