29#include "llvm/ADT/STLExtras.h"
30#include "llvm/Support/Debug.h"
35#define GEN_PASS_DEF_AFFINEVECTORIZE
36#include "mlir/Dialect/Affine/Transforms/Passes.h.inc"
575#define DEBUG_TYPE "early-vect"
582 int fastestVaryingMemRefDimension);
588static std::optional<NestedPattern>
592 int64_t d0 = fastestVaryingPattern.empty() ? -1 : fastestVaryingPattern[0];
593 int64_t d1 = fastestVaryingPattern.size() < 2 ? -1 : fastestVaryingPattern[1];
594 int64_t d2 = fastestVaryingPattern.size() < 3 ? -1 : fastestVaryingPattern[2];
595 switch (vectorRank) {
613 llvm::IsaPred<vector::TransferReadOp, vector::TransferWriteOp>);
621struct Vectorize :
public affine::impl::AffineVectorizeBase<Vectorize> {
624 void runOnOperation()
override;
630 unsigned patternDepth,
631 VectorizationStrategy *strategy) {
632 assert(patternDepth > depthInPattern &&
633 "patternDepth is greater than depthInPattern");
634 if (patternDepth - depthInPattern > strategy->vectorSizes.size()) {
638 strategy->loopToVectorDim[loop] =
639 strategy->vectorSizes.size() - (patternDepth - depthInPattern);
658 unsigned depthInPattern,
659 unsigned patternDepth,
660 VectorizationStrategy *strategy) {
661 for (
auto m : matches) {
663 patternDepth, strategy))) {
667 patternDepth, strategy);
676struct VectorizationState {
689 void registerOpVectorReplacement(Operation *replaced, Operation *
replacement);
702 void registerValueVectorReplacement(Value replaced, Operation *
replacement);
709 void registerBlockArgVectorReplacement(BlockArgument replaced,
722 void registerValueScalarReplacement(Value replaced, Value
replacement);
734 void registerLoopResultScalarReplacement(Value replaced, Value
replacement);
738 void getScalarValueReplacementsFor(
ValueRange inputVals,
739 SmallVectorImpl<Value> &replacedVals);
742 void finishVectorizationPattern(AffineForOp rootLoop);
751 IRMapping valueVectorReplacement;
754 IRMapping valueScalarReplacement;
756 DenseMap<Value, Value> loopResultScalarReplacement;
766 const VectorizationStrategy *strategy =
nullptr;
771 void registerValueVectorReplacementImpl(Value replaced, Value
replacement);
785void VectorizationState::registerOpVectorReplacement(
Operation *replaced,
787 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ commit vectorized op:\n");
788 LLVM_DEBUG(dbgs() << *replaced <<
"\n");
789 LLVM_DEBUG(dbgs() <<
"into\n");
793 "Unexpected replaced and replacement results");
794 assert(opVectorReplacement.count(replaced) == 0 &&
"already registered");
797 for (
auto resultTuple :
799 registerValueVectorReplacementImpl(std::get<0>(resultTuple),
800 std::get<1>(resultTuple));
813void VectorizationState::registerValueVectorReplacement(
816 "Expected single-result replacement");
820 registerValueVectorReplacementImpl(replaced,
replacement->getResult(0));
828void VectorizationState::registerBlockArgVectorReplacement(
829 BlockArgument replaced, BlockArgument
replacement) {
830 registerValueVectorReplacementImpl(replaced,
replacement);
833void VectorizationState::registerValueVectorReplacementImpl(Value replaced,
835 assert(!valueVectorReplacement.
contains(replaced) &&
836 "Vector replacement already registered");
838 "Expected vector type in vector replacement");
852void VectorizationState::registerValueScalarReplacement(Value replaced,
854 assert(!valueScalarReplacement.
contains(replaced) &&
855 "Scalar value replacement already registered");
857 "Expected scalar type in scalar replacement");
870void VectorizationState::registerLoopResultScalarReplacement(
873 assert(loopResultScalarReplacement.count(replaced) == 0 &&
874 "already registered");
875 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ will replace a result of the loop "
878 loopResultScalarReplacement[replaced] =
replacement;
882void VectorizationState::getScalarValueReplacementsFor(
883 ValueRange inputVals, SmallVectorImpl<Value> &replacedVals) {
884 for (Value inputVal : inputVals)
885 replacedVals.push_back(valueScalarReplacement.
lookupOrDefault(inputVal));
890 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ erasing:\n" << forOp <<
"\n");
895void VectorizationState::finishVectorizationPattern(AffineForOp rootLoop) {
896 LLVM_DEBUG(dbgs() <<
"\n[early-vect] Finalizing vectorization\n");
903 VectorizationState &state,
908 auto afOp = AffineApplyOp::create(state.builder, op->
getLoc(), singleResMap,
910 results.push_back(afOp);
919 int fastestVaryingMemRefDimension) {
920 return [¶llelLoops, fastestVaryingMemRefDimension](
Operation &forOp) {
921 auto loop = cast<AffineForOp>(forOp);
922 if (!parallelLoops.contains(loop))
925 auto vectorizableBody =
927 if (!vectorizableBody)
929 return memRefDim == -1 || fastestVaryingMemRefDimension == -1 ||
930 memRefDim == fastestVaryingMemRefDimension;
937 const VectorizationStrategy *strategy) {
938 assert(!isa<VectorType>(scalarTy) &&
"Expected scalar type");
939 return VectorType::get(strategy->vectorSizes, scalarTy);
946 VectorizationState &state) {
947 Type scalarTy = constOp.getType();
948 if (!VectorType::isValidElementType(scalarTy))
957 while (parentOp && !state.vecLoopToVecDim.count(parentOp))
959 assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
960 isa<AffineForOp>(parentOp) &&
"Expected a vectorized for op");
961 auto vecForOp = cast<AffineForOp>(parentOp);
964 arith::ConstantOp::create(state.builder, constOp.getLoc(), vecAttr);
967 state.registerOpVectorReplacement(constOp, newConstOp);
974 VectorizationState &state) {
976 for (
Value operand : applyOp.getOperands()) {
977 if (state.valueVectorReplacement.
contains(operand)) {
979 dbgs() <<
"\n[early-vect]+++++ affine.apply on vector operand\n");
984 updatedOperand = operand;
985 updatedOperands.push_back(updatedOperand);
988 auto newApplyOp = AffineApplyOp::create(
989 state.builder, applyOp.getLoc(), applyOp.getAffineMap(), updatedOperands);
992 state.registerValueScalarReplacement(applyOp.getResult(),
993 newApplyOp.getResult());
1002 VectorizationState &state) {
1004 if (!VectorType::isValidElementType(scalarTy))
1007 Attribute valueAttr = getIdentityValueAttr(
1008 reductionKind, scalarTy, state.builder, oldOperand.
getLoc());
1012 arith::ConstantOp::create(state.builder, oldOperand.
getLoc(), vecAttr);
1025 assert(state.strategy->vectorSizes.size() == 1 &&
1026 "Creating a mask non-1-D vectors is not supported.");
1027 assert(vecForOp.getStep() == state.strategy->vectorSizes[0] &&
1028 "Creating a mask for loops with non-unit original step size is not "
1032 if (
Value mask = state.vecLoopToMask.lookup(vecForOp))
1037 if (vecForOp.hasConstantBounds()) {
1039 vecForOp.getConstantUpperBound() - vecForOp.getConstantLowerBound();
1040 if (originalTripCount % vecForOp.getStepAsInt() == 0)
1061 AffineMap ubMap = vecForOp.getUpperBoundMap();
1064 ub = AffineApplyOp::create(state.builder, loc, vecForOp.getUpperBoundMap(),
1065 vecForOp.getUpperBoundOperands());
1067 ub = AffineMinOp::create(state.builder, loc, vecForOp.getUpperBoundMap(),
1068 vecForOp.getUpperBoundOperands());
1074 {ub, vecForOp.getInductionVar()});
1077 ub.getDefiningOp()->erase();
1079 Type maskTy = VectorType::get(state.strategy->vectorSizes,
1082 vector::CreateMaskOp::create(state.builder, loc, maskTy, itersLeft);
1084 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a mask:\n"
1085 << itersLeft <<
"\n"
1088 state.vecLoopToMask[vecForOp] = mask;
1098 const VectorizationStrategy *strategy) {
1100 if (forOp && strategy->loopToVectorDim.count(forOp) == 0)
1103 for (
auto loopToDim : strategy->loopToVectorDim) {
1104 auto loop = cast<AffineForOp>(loopToDim.first);
1105 if (!loop.isDefinedOutsideOfLoop(value))
1115 VectorizationState &state) {
1117 Value uniformScalarRepl =
1122 auto bcastOp = BroadcastOp::create(state.builder, uniformVal.
getLoc(),
1123 vectorTy, uniformScalarRepl);
1124 state.registerValueVectorReplacement(uniformVal, bcastOp);
1146 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorize operand: " << operand);
1149 LLVM_DEBUG(dbgs() <<
" -> already vectorized: " << vecRepl);
1156 assert(!isa<VectorType>(operand.
getType()) &&
1157 "Vector op not found in replacement map");
1160 if (
auto constOp = operand.
getDefiningOp<arith::ConstantOp>()) {
1162 LLVM_DEBUG(dbgs() <<
"-> constant: " << vecConstant);
1163 return vecConstant.getResult();
1169 LLVM_DEBUG(dbgs() <<
"-> uniform: " << *vecUniform);
1176 LLVM_DEBUG(dbgs() <<
"-> unsupported block argument\n");
1179 LLVM_DEBUG(dbgs() <<
"-> non-vectorizable\n");
1188 for (
auto &kvp : loopToVectorDim) {
1189 AffineForOp forOp = cast<AffineForOp>(kvp.first);
1194 unsigned nonInvariant = 0;
1196 if (invariants.count(idx))
1199 if (++nonInvariant > 1) {
1200 LLVM_DEBUG(dbgs() <<
"[early‑vect] Bail out: IV "
1201 << forOp.getInductionVar() <<
" drives "
1202 << nonInvariant <<
" indices\n");
1217 VectorizationState &state) {
1218 MemRefType memRefType = loadOp.getMemRefType();
1219 Type elementType = memRefType.getElementType();
1220 auto vectorType = VectorType::get(state.strategy->vectorSizes, elementType);
1224 state.getScalarValueReplacementsFor(loadOp.getMapOperands(), mapOperands);
1228 indices.reserve(memRefType.getRank());
1229 if (loadOp.getAffineMap() !=
1232 for (
auto op : mapOperands) {
1233 if (op.getDefiningOp<AffineApplyOp>())
1239 indices.append(mapOperands.begin(), mapOperands.end());
1247 indices, state.vecLoopToVecDim);
1248 if (!permutationMap) {
1249 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ can't compute permutationMap\n");
1252 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1253 LLVM_DEBUG(permutationMap.print(dbgs()));
1255 auto transfer = vector::TransferReadOp::create(
1256 state.builder, loadOp.getLoc(), vectorType, loadOp.getMemRef(),
indices,
1257 std::nullopt, permutationMap);
1260 state.registerOpVectorReplacement(loadOp, transfer);
1271 VectorizationState &state) {
1272 MemRefType memRefType = storeOp.getMemRefType();
1279 state.getScalarValueReplacementsFor(storeOp.getMapOperands(), mapOperands);
1283 indices.reserve(memRefType.getRank());
1284 if (storeOp.getAffineMap() !=
1289 indices.append(mapOperands.begin(), mapOperands.end());
1296 indices, state.vecLoopToVecDim);
1297 if (!permutationMap)
1299 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1300 LLVM_DEBUG(permutationMap.print(dbgs()));
1302 auto transfer = vector::TransferWriteOp::create(
1303 state.builder, storeOp.getLoc(), vectorValue, storeOp.getMemRef(),
1305 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorized store: " << transfer);
1308 state.registerOpVectorReplacement(storeOp, transfer);
1315 Value value, VectorizationState &state) {
1317 if (!VectorType::isValidElementType(scalarTy))
1319 Attribute valueAttr = getIdentityValueAttr(reductionKind, scalarTy,
1320 state.builder, value.
getLoc());
1321 if (
auto constOp = value.
getDefiningOp<arith::ConstantOp>())
1322 return constOp.getValue() == valueAttr;
1333 VectorizationState &state) {
1334 const VectorizationStrategy &strategy = *state.strategy;
1335 auto loopToVecDimIt = strategy.loopToVectorDim.find(forOp);
1336 bool isLoopVecDim = loopToVecDimIt != strategy.loopToVectorDim.end();
1339 if (isLoopVecDim && forOp.getNumIterOperands() > 0 && forOp.getStep() != 1) {
1342 <<
"\n[early-vect]+++++ unsupported step size for reduction loop: "
1343 << forOp.getStep() <<
"\n");
1352 unsigned vectorDim = loopToVecDimIt->second;
1353 assert(vectorDim < strategy.vectorSizes.size() &&
"vector dim overflow");
1354 int64_t forOpVecFactor = strategy.vectorSizes[vectorDim];
1355 newStep = forOp.getStepAsInt() * forOpVecFactor;
1357 newStep = forOp.getStepAsInt();
1362 if (isLoopVecDim && forOp.getNumIterOperands() > 0) {
1363 auto it = strategy.reductionLoops.find(forOp);
1364 assert(it != strategy.reductionLoops.end() &&
1365 "Reduction descriptors not found when vectorizing a reduction loop");
1366 reductions = it->second;
1367 assert(reductions.size() == forOp.getNumIterOperands() &&
1368 "The size of reductions array must match the number of iter_args");
1373 if (!isLoopVecDim) {
1374 for (
auto operand : forOp.getInits())
1380 for (
auto redAndOperand : llvm::zip(reductions, forOp.getInits())) {
1382 std::get<0>(redAndOperand).kind, std::get<1>(redAndOperand), state));
1390 state.getScalarValueReplacementsFor(forOp.getLowerBoundOperands(),
1392 state.getScalarValueReplacementsFor(forOp.getUpperBoundOperands(),
1394 auto vecForOp = AffineForOp::create(
1395 state.builder, forOp.getLoc(), lbOperands, forOp.getLowerBoundMap(),
1396 ubOperands, forOp.getUpperBoundMap(), newStep, vecIterOperands,
1415 state.registerOpVectorReplacement(forOp, vecForOp);
1416 state.registerValueScalarReplacement(forOp.getInductionVar(),
1417 vecForOp.getInductionVar());
1418 for (
auto iterTuple :
1419 llvm ::zip(forOp.getRegionIterArgs(), vecForOp.getRegionIterArgs()))
1420 state.registerBlockArgVectorReplacement(std::get<0>(iterTuple),
1421 std::get<1>(iterTuple));
1424 for (
unsigned i = 0; i < vecForOp.getNumIterOperands(); ++i) {
1428 vecForOp.getLoc(), vecForOp.getResult(i));
1429 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a vector reduction: "
1433 Value origInit = forOp.getOperand(forOp.getNumControlOperands() + i);
1434 Value finalRes = reducedRes;
1438 reducedRes.
getLoc(), reducedRes, origInit);
1439 state.registerLoopResultScalarReplacement(forOp.getResult(i), finalRes);
1444 state.vecLoopToVecDim[vecForOp] = loopToVecDimIt->second;
1452 if (isLoopVecDim && forOp.getNumIterOperands() > 0)
1464 vectorTypes.push_back(
1465 VectorType::get(state.strategy->vectorSizes,
result.getType()));
1471 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ an operand failed vectorize\n");
1474 vectorOperands.push_back(vecOperand);
1484 vectorOperands, vectorTypes, op->
getAttrs());
1485 state.registerOpVectorReplacement(op, vecOp);
1494 VectorizationState &state) {
1507 if (
Value mask = state.vecLoopToMask.lookup(newParentOp)) {
1512 cast<AffineForOp>(newParentOp).getRegionIterArgs(), i, combinerOps);
1513 assert(reducedVal &&
"expect non-null value for parallel reduction loop");
1514 assert(combinerOps.size() == 1 &&
"expect only one combiner op");
1516 Value neutralVal = cast<AffineForOp>(newParentOp).getInits()[i];
1518 Value maskedReducedVal = arith::SelectOp::create(
1519 state.builder, reducedVal.
getLoc(), mask, reducedVal, neutralVal);
1521 dbgs() <<
"\n[early-vect]+++++ masking an input to a binary op that"
1522 "produces value for a yield Op: "
1523 << maskedReducedVal);
1524 combinerOps.back()->replaceUsesOfWith(reducedVal, maskedReducedVal);
1542 VectorizationState &state) {
1544 assert(!isa<vector::TransferReadOp>(op) &&
1545 "vector.transfer_read cannot be further vectorized");
1546 assert(!isa<vector::TransferWriteOp>(op) &&
1547 "vector.transfer_write cannot be further vectorized");
1549 if (
auto loadOp = dyn_cast<AffineLoadOp>(op))
1551 if (
auto storeOp = dyn_cast<AffineStoreOp>(op))
1553 if (
auto forOp = dyn_cast<AffineForOp>(op))
1555 if (
auto yieldOp = dyn_cast<AffineYieldOp>(op))
1557 if (
auto constant = dyn_cast<arith::ConstantOp>(op))
1559 if (
auto applyOp = dyn_cast<AffineApplyOp>(op))
1577 assert(currentLevel <= loops.size() &&
"Unexpected currentLevel");
1578 if (currentLevel == loops.size())
1579 loops.emplace_back();
1603 const VectorizationStrategy &strategy) {
1604 assert(loops[0].size() == 1 &&
"Expected single root loop");
1605 AffineForOp rootLoop = loops[0][0];
1606 VectorizationState state(rootLoop.getContext());
1608 state.strategy = &strategy;
1618 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ loop is not vectorizable");
1631 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ Vectorizing: " << *op);
1635 dbgs() <<
"[early-vect]+++++ failed vectorizing the operation: "
1643 if (opVecResult.wasInterrupted()) {
1644 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ failed vectorization for: "
1645 << rootLoop <<
"\n");
1647 auto vecRootLoopIt = state.opVectorReplacement.find(rootLoop);
1648 if (vecRootLoopIt != state.opVectorReplacement.end())
1656 for (
auto resPair : state.loopResultScalarReplacement)
1657 resPair.first.replaceAllUsesWith(resPair.second);
1659 assert(state.opVectorReplacement.count(rootLoop) == 1 &&
1660 "Expected vector replacement for loop nest");
1661 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ success vectorizing pattern");
1662 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorization result:\n"
1663 << *state.opVectorReplacement[rootLoop]);
1666 state.finishVectorizationPattern(rootLoop);
1674 const VectorizationStrategy &strategy) {
1675 std::vector<SmallVector<AffineForOp, 2>> loopsToVectorize;
1687 assert(intersectionBuckets.empty() &&
"Expected empty output");
1692 AffineForOp matchRoot = cast<AffineForOp>(match.getMatchedOperation());
1693 bool intersects =
false;
1694 for (
int i = 0, end = intersectionBuckets.size(); i < end; ++i) {
1695 AffineForOp bucketRoot = bucketRoots[i];
1697 if (bucketRoot->isAncestor(matchRoot)) {
1698 intersectionBuckets[i].push_back(match);
1704 if (matchRoot->isAncestor(bucketRoot)) {
1705 bucketRoots[i] = matchRoot;
1706 intersectionBuckets[i].push_back(match);
1715 bucketRoots.push_back(matchRoot);
1716 intersectionBuckets.emplace_back();
1717 intersectionBuckets.back().push_back(match);
1732 assert((reductionLoops.empty() || vectorSizes.size() == 1) &&
1733 "Vectorizing reductions is supported only for 1-D vectors");
1736 std::optional<NestedPattern> pattern =
1737 makePattern(loops, vectorSizes.size(), fastestVaryingPattern);
1739 LLVM_DEBUG(dbgs() <<
"\n[early-vect] pattern couldn't be computed\n");
1743 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1744 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1745 LLVM_DEBUG(dbgs() <<
"\n[early-vect] new pattern on parent op\n");
1746 LLVM_DEBUG(dbgs() << *parentOp <<
"\n");
1748 unsigned patternDepth = pattern->getDepth();
1753 pattern->match(parentOp, &allMatches);
1754 std::vector<SmallVector<NestedMatch, 8>> intersectionBuckets;
1760 for (
auto &intersectingMatches : intersectionBuckets) {
1762 VectorizationStrategy strategy;
1764 strategy.vectorSizes.assign(vectorSizes.begin(), vectorSizes.end());
1765 strategy.reductionLoops = reductionLoops;
1767 patternDepth, &strategy))) {
1781 LLVM_DEBUG(dbgs() <<
"\n");
1784void affine::vectorizeChildAffineLoops(
1785 Operation *parentOp,
bool vectorizeReductions,
1786 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern) {
1792 if (vectorizeReductions) {
1793 parentOp->
walk([¶llelLoops, &reductionLoops](AffineForOp loop) {
1794 SmallVector<LoopReduction, 2> reductions;
1795 if (isLoopParallel(loop, &reductions)) {
1796 parallelLoops.insert(loop);
1798 if (!reductions.empty())
1799 reductionLoops[loop] = reductions;
1803 parentOp->
walk([¶llelLoops](AffineForOp loop) {
1804 if (isLoopParallel(loop))
1805 parallelLoops.insert(loop);
1810 NestedPatternContext mlContext;
1811 vectorizeLoops(parentOp, parallelLoops, vectorSizes, fastestVaryingPattern,
1817void Vectorize::runOnOperation() {
1818 func::FuncOp f = getOperation();
1819 if (!fastestVaryingPattern.empty() &&
1820 fastestVaryingPattern.size() != vectorSizes.size()) {
1821 f.emitRemark(
"Fastest varying pattern specified with different size than "
1822 "the vector size.");
1823 return signalPassFailure();
1826 if (vectorizeReductions && vectorSizes.size() != 1) {
1827 f.emitError(
"Vectorizing reductions is supported only for 1-D vectors.");
1828 return signalPassFailure();
1831 if (llvm::any_of(vectorSizes, [](int64_t size) {
return size <= 0; })) {
1832 f.emitError(
"Vectorization factor must be greater than zero.");
1833 return signalPassFailure();
1836 vectorizeChildAffineLoops(f, vectorizeReductions, vectorSizes,
1837 fastestVaryingPattern);
1853 if (loops[0].size() != 1)
1857 for (
int i = 1, end = loops.size(); i < end; ++i) {
1858 for (AffineForOp loop : loops[i]) {
1861 if (none_of(loops[i - 1], [&](AffineForOp maybeParent) {
1862 return maybeParent->isProperAncestor(loop);
1868 for (AffineForOp sibling : loops[i]) {
1869 if (sibling->isProperAncestor(loop))
1886void mlir::affine::vectorizeAffineLoops(
1888 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern,
1891 NestedPatternContext mlContext;
1892 vectorizeLoops(parentOp, loops, vectorSizes, fastestVaryingPattern,
1931LogicalResult mlir::affine::vectorizeAffineLoopNest(
1932 std::vector<SmallVector<AffineForOp, 2>> &loops,
1933 const VectorizationStrategy &strategy) {
1935 NestedPatternContext mlContext;
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement(set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
static Operation * vectorizeUniform(Value uniformVal, VectorizationState &state)
Generates a broadcast op for the provided uniform value using the vectorization strategy in 'state'.
static std::optional< NestedPattern > makePattern(const DenseSet< Operation * > ¶llelLoops, int vectorRank, ArrayRef< int64_t > fastestVaryingPattern)
Creates a vectorization pattern from the command line arguments.
static LogicalResult vectorizeRootMatch(NestedMatch m, const VectorizationStrategy &strategy)
Extracts the matched loops and vectorizes them following a topological order.
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
static LogicalResult verifyLoopNesting(const std::vector< SmallVector< AffineForOp, 2 > > &loops)
Verify that affine loops in 'loops' meet the nesting criteria expected by SuperVectorizer:
static Operation * vectorizeOneOperation(Operation *op, VectorizationState &state)
Encodes Operation-specific behavior for vectorization.
static bool isNeutralElementConst(arith::AtomicRMWKind reductionKind, Value value, VectorizationState &state)
Returns true if value is a constant equal to the neutral element of the given vectorizable reduction.
static LogicalResult vectorizeLoopNest(std::vector< SmallVector< AffineForOp, 2 > > &loops, const VectorizationStrategy &strategy)
Internal implementation to vectorize affine loops from a single loop nest using an n-D vectorization ...
static Operation * vectorizeAffineLoad(AffineLoadOp loadOp, VectorizationState &state)
Vectorizes an affine load with the vectorization strategy in 'state' by generating a 'vector....
static Operation * vectorizeAffineForOp(AffineForOp forOp, VectorizationState &state)
Vectorizes a loop with the vectorization strategy in 'state'.
static Operation * vectorizeAffineApplyOp(AffineApplyOp applyOp, VectorizationState &state)
We have no need to vectorize affine.apply.
static LogicalResult analyzeProfitability(ArrayRef< NestedMatch > matches, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
Implements a simple strawman strategy for vectorization.
static FilterFunctionType isVectorizableLoopPtrFactory(const DenseSet< Operation * > ¶llelLoops, int fastestVaryingMemRefDimension)
Forward declaration.
static bool isIVMappedToMultipleIndices(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &loopToVectorDim)
Returns true if any vectorized loop IV drives more than one index.
static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp, VectorizationState &state)
Tries to transform a scalar constant into a vector constant.
static bool isUniformDefinition(Value value, const VectorizationStrategy *strategy)
Returns true if the provided value is vector uniform given the vectorization strategy.
static void eraseLoopNest(AffineForOp forOp)
Erases a loop nest, including all its nested operations.
static VectorType getVectorType(Type scalarTy, const VectorizationStrategy *strategy)
Returns the vector type resulting from applying the provided vectorization strategy on the scalar typ...
static void getMatchedAffineLoops(NestedMatch match, std::vector< SmallVector< AffineForOp, 2 > > &loops)
Converts all the nested loops in 'match' to a 2D vector container that preserves the relative nesting...
static Value vectorizeOperand(Value operand, VectorizationState &state)
Tries to vectorize a given operand by applying the following logic:
static void getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, std::vector< SmallVector< AffineForOp, 2 > > &loops)
Recursive implementation to convert all the nested loops in 'match' to a 2D vector container that pre...
static Operation * vectorizeAffineYieldOp(AffineYieldOp yieldOp, VectorizationState &state)
Vectorizes a yield operation by widening its types.
static arith::ConstantOp createInitialVector(arith::AtomicRMWKind reductionKind, Value oldOperand, VectorizationState &state)
Creates a constant vector filled with the neutral elements of the given reduction.
static Operation * widenOp(Operation *op, VectorizationState &state)
Vectorizes arbitrary operation by plain widening.
static Operation * vectorizeAffineStore(AffineStoreOp storeOp, VectorizationState &state)
Vectorizes an affine store with the vectorization strategy in 'state' by generating a 'vector....
static NestedPattern & vectorTransferPattern()
static void vectorizeLoops(Operation *parentOp, DenseSet< Operation * > &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops)
Internal implementation to vectorize affine loops in 'loops' using the n-D vectorization factors in '...
static void computeMemoryOpIndices(Operation *op, AffineMap map, ValueRange mapOperands, VectorizationState &state, SmallVectorImpl< Value > &results)
static void computeIntersectionBuckets(ArrayRef< NestedMatch > matches, std::vector< SmallVector< NestedMatch, 8 > > &intersectionBuckets)
Traverses all the loop matches and classifies them into intersection buckets.
static Value createMask(AffineForOp vecForOp, VectorizationState &state)
Creates a mask used to filter out garbage elements in the last iteration of unaligned loops.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Operation * getParentOp()
Returns the closest surrounding operation that contains this block.
AffineMap getMultiDimIdentityMap(unsigned rank)
IntegerType getIntegerType(unsigned width)
AffineExpr getAffineDimExpr(unsigned position)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
bool contains(T from) const
Checks to see if a mapping for 'from' exists.
auto lookupOrNull(T from) const
Lookup a mapped value within the map.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
void setInsertionPointAfterValue(Value val)
Sets the insertion point to the node after the specified value.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
unsigned getNumOperands()
OperationName getName()
The name of an operation is the key identifier for it.
operand_range getOperands()
Returns an iterator on the underlying Value's.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getResults()
unsigned getNumResults()
Return the number of results held by this operation.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndexOrFloat() const
Return true if this is an integer (of any signedness), index, or float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
An NestedPattern captures nested patterns in the IR.
ArrayRef< NestedMatch > getMatchedChildren()
Operation * getMatchedOperation() const
NestedPattern For(const NestedPattern &child)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
DenseMap< Operation *, SmallVector< LoopReduction, 2 > > ReductionLoopMap
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.:
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
std::function< bool(Operation &)> FilterFunctionType
A NestedPattern is a nested operation walker that:
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
Value getVectorReductionOp(arith::AtomicRMWKind op, OpBuilder &builder, Location loc, Value vector)
Returns the value obtained by reducing the vector into a scalar using the operation kind associated w...
Include the generated interface declarations.
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
VectorizationState(RewriterBase &rewriter)