29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/Support/Debug.h"
35 #define GEN_PASS_DEF_AFFINEVECTORIZE
36 #include "mlir/Dialect/Affine/Passes.h.inc"
41 using namespace affine;
42 using namespace vector;
575 #define DEBUG_TYPE "early-vect"
582 int fastestVaryingMemRefDimension);
588 static std::optional<NestedPattern>
592 int64_t d0 = fastestVaryingPattern.empty() ? -1 : fastestVaryingPattern[0];
593 int64_t d1 = fastestVaryingPattern.size() < 2 ? -1 : fastestVaryingPattern[1];
594 int64_t d2 = fastestVaryingPattern.size() < 3 ? -1 : fastestVaryingPattern[2];
595 switch (vectorRank) {
613 llvm::IsaPred<vector::TransferReadOp, vector::TransferWriteOp>);
621 struct Vectorize :
public affine::impl::AffineVectorizeBase<Vectorize> {
624 void runOnOperation()
override;
630 unsigned patternDepth,
632 assert(patternDepth > depthInPattern &&
633 "patternDepth is greater than depthInPattern");
634 if (patternDepth - depthInPattern > strategy->
vectorSizes.size()) {
639 strategy->
vectorSizes.size() - (patternDepth - depthInPattern);
658 unsigned depthInPattern,
659 unsigned patternDepth,
661 for (
auto m : matches) {
663 patternDepth, strategy))) {
667 patternDepth, strategy);
702 void registerValueVectorReplacement(
Value replaced,
Operation *replacement);
709 void registerBlockArgVectorReplacement(
BlockArgument replaced,
722 void registerValueScalarReplacement(
Value replaced,
Value replacement);
734 void registerLoopResultScalarReplacement(
Value replaced,
Value replacement);
738 void getScalarValueReplacementsFor(
ValueRange inputVals,
742 void finishVectorizationPattern(AffineForOp rootLoop);
771 void registerValueVectorReplacementImpl(
Value replaced,
Value replacement);
785 void VectorizationState::registerOpVectorReplacement(
Operation *replaced,
787 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ commit vectorized op:\n");
788 LLVM_DEBUG(dbgs() << *replaced <<
"\n");
789 LLVM_DEBUG(dbgs() <<
"into\n");
790 LLVM_DEBUG(dbgs() << *replacement <<
"\n");
793 "Unexpected replaced and replacement results");
794 assert(opVectorReplacement.count(replaced) == 0 &&
"already registered");
795 opVectorReplacement[replaced] = replacement;
797 for (
auto resultTuple :
799 registerValueVectorReplacementImpl(std::get<0>(resultTuple),
800 std::get<1>(resultTuple));
813 void VectorizationState::registerValueVectorReplacement(
816 "Expected single-result replacement");
818 registerOpVectorReplacement(defOp, replacement);
820 registerValueVectorReplacementImpl(replaced, replacement->
getResult(0));
828 void VectorizationState::registerBlockArgVectorReplacement(
830 registerValueVectorReplacementImpl(replaced, replacement);
833 void VectorizationState::registerValueVectorReplacementImpl(
Value replaced,
835 assert(!valueVectorReplacement.contains(replaced) &&
836 "Vector replacement already registered");
837 assert(isa<VectorType>(replacement.
getType()) &&
838 "Expected vector type in vector replacement");
839 valueVectorReplacement.map(replaced, replacement);
852 void VectorizationState::registerValueScalarReplacement(
Value replaced,
854 assert(!valueScalarReplacement.contains(replaced) &&
855 "Scalar value replacement already registered");
856 assert(!isa<VectorType>(replacement.
getType()) &&
857 "Expected scalar type in scalar replacement");
858 valueScalarReplacement.map(replaced, replacement);
870 void VectorizationState::registerLoopResultScalarReplacement(
873 assert(loopResultScalarReplacement.count(replaced) == 0 &&
874 "already registered");
875 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ will replace a result of the loop "
878 loopResultScalarReplacement[replaced] = replacement;
882 void VectorizationState::getScalarValueReplacementsFor(
884 for (
Value inputVal : inputVals)
885 replacedVals.push_back(valueScalarReplacement.lookupOrDefault(inputVal));
890 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ erasing:\n" << forOp <<
"\n");
895 void VectorizationState::finishVectorizationPattern(AffineForOp rootLoop) {
896 LLVM_DEBUG(dbgs() <<
"\n[early-vect] Finalizing vectorization\n");
908 auto afOp = state.builder.create<AffineApplyOp>(op->
getLoc(), singleResMap,
910 results.push_back(afOp);
919 int fastestVaryingMemRefDimension) {
920 return [¶llelLoops, fastestVaryingMemRefDimension](
Operation &forOp) {
921 auto loop = cast<AffineForOp>(forOp);
922 if (!parallelLoops.contains(loop))
925 auto vectorizableBody =
927 if (!vectorizableBody)
929 return memRefDim == -1 || fastestVaryingMemRefDimension == -1 ||
930 memRefDim == fastestVaryingMemRefDimension;
938 assert(!isa<VectorType>(scalarTy) &&
"Expected scalar type");
947 Type scalarTy = constOp.getType();
948 if (!VectorType::isValidElementType(scalarTy))
955 Operation *parentOp = state.builder.getInsertionBlock()->getParentOp();
957 while (parentOp && !state.vecLoopToVecDim.count(parentOp))
959 assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
960 isa<AffineForOp>(parentOp) &&
"Expected a vectorized for op");
961 auto vecForOp = cast<AffineForOp>(parentOp);
962 state.builder.setInsertionPointToStart(vecForOp.getBody());
964 state.builder.create<arith::ConstantOp>(constOp.getLoc(), vecAttr);
967 state.registerOpVectorReplacement(constOp, newConstOp);
976 for (
Value operand : applyOp.getOperands()) {
977 if (state.valueVectorReplacement.contains(operand)) {
979 dbgs() <<
"\n[early-vect]+++++ affine.apply on vector operand\n");
982 Value updatedOperand = state.valueScalarReplacement.lookupOrNull(operand);
984 updatedOperand = operand;
985 updatedOperands.push_back(updatedOperand);
989 auto newApplyOp = state.builder.create<AffineApplyOp>(
990 applyOp.getLoc(), applyOp.getAffineMap(), updatedOperands);
993 state.registerValueScalarReplacement(applyOp.getResult(),
994 newApplyOp.getResult());
1005 if (!VectorType::isValidElementType(scalarTy))
1009 reductionKind, scalarTy, state.builder, oldOperand.
getLoc());
1013 state.builder.create<arith::ConstantOp>(oldOperand.
getLoc(), vecAttr);
1026 assert(state.strategy->vectorSizes.size() == 1 &&
1027 "Creating a mask non-1-D vectors is not supported.");
1028 assert(vecForOp.getStep() == state.strategy->vectorSizes[0] &&
1029 "Creating a mask for loops with non-unit original step size is not "
1033 if (
Value mask = state.vecLoopToMask.lookup(vecForOp))
1038 if (vecForOp.hasConstantBounds()) {
1039 int64_t originalTripCount =
1040 vecForOp.getConstantUpperBound() - vecForOp.getConstantLowerBound();
1041 if (originalTripCount % vecForOp.getStepAsInt() == 0)
1046 state.builder.setInsertionPointToStart(vecForOp.getBody());
1062 AffineMap ubMap = vecForOp.getUpperBoundMap();
1065 ub = state.builder.create<AffineApplyOp>(loc, vecForOp.getUpperBoundMap(),
1066 vecForOp.getUpperBoundOperands());
1068 ub = state.builder.create<AffineMinOp>(loc, vecForOp.getUpperBoundMap(),
1069 vecForOp.getUpperBoundOperands());
1072 state.builder.getAffineDimExpr(0) - state.builder.getAffineDimExpr(1);
1075 {ub, vecForOp.getInductionVar()});
1081 state.builder.getIntegerType(1));
1083 state.builder.create<vector::CreateMaskOp>(loc, maskTy, itersLeft);
1085 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a mask:\n"
1086 << itersLeft <<
"\n"
1089 state.vecLoopToMask[vecForOp] = mask;
1105 auto loop = cast<AffineForOp>(loopToDim.first);
1106 if (!loop.isDefinedOutsideOfLoop(value))
1121 Value uniformScalarRepl =
1122 state.valueScalarReplacement.lookupOrDefault(uniformVal);
1123 state.builder.setInsertionPointAfterValue(uniformScalarRepl);
1126 auto bcastOp = state.builder.create<BroadcastOp>(uniformVal.
getLoc(),
1127 vectorTy, uniformScalarRepl);
1128 state.registerValueVectorReplacement(uniformVal, bcastOp);
1150 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorize operand: " << operand);
1152 if (
Value vecRepl = state.valueVectorReplacement.lookupOrNull(operand)) {
1153 LLVM_DEBUG(dbgs() <<
" -> already vectorized: " << vecRepl);
1160 assert(!isa<VectorType>(operand.
getType()) &&
1161 "Vector op not found in replacement map");
1164 if (
auto constOp = operand.
getDefiningOp<arith::ConstantOp>()) {
1166 LLVM_DEBUG(dbgs() <<
"-> constant: " << vecConstant);
1167 return vecConstant.getResult();
1173 LLVM_DEBUG(dbgs() <<
"-> uniform: " << *vecUniform);
1180 LLVM_DEBUG(dbgs() <<
"-> unsupported block argument\n");
1183 LLVM_DEBUG(dbgs() <<
"-> non-vectorizable\n");
1192 for (
auto &kvp : loopToVectorDim) {
1193 AffineForOp forOp = cast<AffineForOp>(kvp.first);
1198 unsigned nonInvariant = 0;
1199 for (
Value idx : indices) {
1200 if (invariants.count(idx))
1203 if (++nonInvariant > 1) {
1204 LLVM_DEBUG(dbgs() <<
"[early‑vect] Bail out: IV "
1205 << forOp.getInductionVar() <<
" drives "
1206 << nonInvariant <<
" indices\n");
1222 MemRefType memRefType = loadOp.getMemRefType();
1223 Type elementType = memRefType.getElementType();
1224 auto vectorType =
VectorType::get(state.strategy->vectorSizes, elementType);
1228 state.getScalarValueReplacementsFor(loadOp.getMapOperands(), mapOperands);
1232 indices.reserve(memRefType.getRank());
1233 if (loadOp.getAffineMap() !=
1234 state.builder.getMultiDimIdentityMap(memRefType.getRank())) {
1236 for (
auto op : mapOperands) {
1237 if (op.getDefiningOp<AffineApplyOp>())
1243 indices.append(mapOperands.begin(), mapOperands.end());
1251 indices, state.vecLoopToVecDim);
1252 if (!permutationMap) {
1253 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ can't compute permutationMap\n");
1256 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1257 LLVM_DEBUG(permutationMap.print(dbgs()));
1259 auto transfer = state.builder.create<vector::TransferReadOp>(
1260 loadOp.getLoc(), vectorType, loadOp.getMemRef(), indices,
1261 std::nullopt, permutationMap);
1264 state.registerOpVectorReplacement(loadOp, transfer);
1276 MemRefType memRefType = storeOp.getMemRefType();
1283 state.getScalarValueReplacementsFor(storeOp.getMapOperands(), mapOperands);
1287 indices.reserve(memRefType.getRank());
1288 if (storeOp.getAffineMap() !=
1289 state.builder.getMultiDimIdentityMap(memRefType.getRank()))
1293 indices.append(mapOperands.begin(), mapOperands.end());
1300 indices, state.vecLoopToVecDim);
1301 if (!permutationMap)
1303 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1304 LLVM_DEBUG(permutationMap.print(dbgs()));
1306 auto transfer = state.builder.create<vector::TransferWriteOp>(
1307 storeOp.getLoc(), vectorValue, storeOp.getMemRef(), indices,
1309 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorized store: " << transfer);
1312 state.registerOpVectorReplacement(storeOp, transfer);
1321 if (!VectorType::isValidElementType(scalarTy))
1324 state.builder, value.
getLoc());
1325 if (
auto constOp = dyn_cast_or_null<arith::ConstantOp>(value.
getDefiningOp()))
1326 return constOp.getValue() == valueAttr;
1343 if (isLoopVecDim && forOp.getNumIterOperands() > 0 && forOp.getStep() != 1) {
1346 <<
"\n[early-vect]+++++ unsupported step size for reduction loop: "
1347 << forOp.getStep() <<
"\n");
1356 unsigned vectorDim = loopToVecDimIt->second;
1357 assert(vectorDim < strategy.
vectorSizes.size() &&
"vector dim overflow");
1358 int64_t forOpVecFactor = strategy.
vectorSizes[vectorDim];
1359 newStep = forOp.getStepAsInt() * forOpVecFactor;
1361 newStep = forOp.getStepAsInt();
1366 if (isLoopVecDim && forOp.getNumIterOperands() > 0) {
1369 "Reduction descriptors not found when vectorizing a reduction loop");
1370 reductions = it->second;
1371 assert(reductions.size() == forOp.getNumIterOperands() &&
1372 "The size of reductions array must match the number of iter_args");
1377 if (!isLoopVecDim) {
1378 for (
auto operand : forOp.getInits())
1384 for (
auto redAndOperand : llvm::zip(reductions, forOp.getInits())) {
1386 std::get<0>(redAndOperand).
kind, std::get<1>(redAndOperand), state));
1390 auto vecForOp = state.builder.create<AffineForOp>(
1391 forOp.getLoc(), forOp.getLowerBoundOperands(), forOp.getLowerBoundMap(),
1392 forOp.getUpperBoundOperands(), forOp.getUpperBoundMap(), newStep,
1412 state.registerOpVectorReplacement(forOp, vecForOp);
1413 state.registerValueScalarReplacement(forOp.getInductionVar(),
1414 vecForOp.getInductionVar());
1415 for (
auto iterTuple :
1416 llvm ::zip(forOp.getRegionIterArgs(), vecForOp.getRegionIterArgs()))
1417 state.registerBlockArgVectorReplacement(std::get<0>(iterTuple),
1418 std::get<1>(iterTuple));
1421 for (
unsigned i = 0; i < vecForOp.getNumIterOperands(); ++i) {
1425 vecForOp.getLoc(), vecForOp.getResult(i));
1426 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a vector reduction: "
1430 Value origInit = forOp.getOperand(forOp.getNumControlOperands() + i);
1431 Value finalRes = reducedRes;
1435 reducedRes.
getLoc(), reducedRes, origInit);
1436 state.registerLoopResultScalarReplacement(forOp.getResult(i), finalRes);
1441 state.vecLoopToVecDim[vecForOp] = loopToVecDimIt->second;
1445 state.builder.setInsertionPointToStart(vecForOp.getBody());
1449 if (isLoopVecDim && forOp.getNumIterOperands() > 0)
1461 vectorTypes.push_back(
1468 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ an operand failed vectorize\n");
1471 vectorOperands.push_back(vecOperand);
1481 vectorOperands, vectorTypes, op->
getAttrs());
1482 state.registerOpVectorReplacement(op, vecOp);
1493 Operation *newParentOp = state.builder.getInsertionBlock()->getParentOp();
1504 if (
Value mask = state.vecLoopToMask.lookup(newParentOp)) {
1505 state.builder.setInsertionPoint(newYieldOp);
1509 cast<AffineForOp>(newParentOp).getRegionIterArgs(), i, combinerOps);
1510 assert(reducedVal &&
"expect non-null value for parallel reduction loop");
1511 assert(combinerOps.size() == 1 &&
"expect only one combiner op");
1513 Value neutralVal = cast<AffineForOp>(newParentOp).getInits()[i];
1514 state.builder.setInsertionPoint(combinerOps.back());
1515 Value maskedReducedVal = state.builder.create<arith::SelectOp>(
1516 reducedVal.
getLoc(), mask, reducedVal, neutralVal);
1518 dbgs() <<
"\n[early-vect]+++++ masking an input to a binary op that"
1519 "produces value for a yield Op: "
1520 << maskedReducedVal);
1521 combinerOps.back()->replaceUsesOfWith(reducedVal, maskedReducedVal);
1525 state.builder.setInsertionPointAfter(newParentOp);
1541 assert(!isa<vector::TransferReadOp>(op) &&
1542 "vector.transfer_read cannot be further vectorized");
1543 assert(!isa<vector::TransferWriteOp>(op) &&
1544 "vector.transfer_write cannot be further vectorized");
1546 if (
auto loadOp = dyn_cast<AffineLoadOp>(op))
1548 if (
auto storeOp = dyn_cast<AffineStoreOp>(op))
1550 if (
auto forOp = dyn_cast<AffineForOp>(op))
1552 if (
auto yieldOp = dyn_cast<AffineYieldOp>(op))
1554 if (
auto constant = dyn_cast<arith::ConstantOp>(op))
1556 if (
auto applyOp = dyn_cast<AffineApplyOp>(op))
1574 assert(currentLevel <= loops.size() &&
"Unexpected currentLevel");
1575 if (currentLevel == loops.size())
1576 loops.emplace_back();
1598 static LogicalResult
1601 assert(loops[0].size() == 1 &&
"Expected single root loop");
1602 AffineForOp rootLoop = loops[0][0];
1604 state.builder.setInsertionPointAfter(rootLoop);
1605 state.strategy = &strategy;
1615 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ loop is not vectorizable");
1628 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ Vectorizing: " << *op);
1632 dbgs() <<
"[early-vect]+++++ failed vectorizing the operation: "
1640 if (opVecResult.wasInterrupted()) {
1641 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ failed vectorization for: "
1642 << rootLoop <<
"\n");
1644 auto vecRootLoopIt = state.opVectorReplacement.find(rootLoop);
1645 if (vecRootLoopIt != state.opVectorReplacement.end())
1653 for (
auto resPair : state.loopResultScalarReplacement)
1654 resPair.first.replaceAllUsesWith(resPair.second);
1656 assert(state.opVectorReplacement.count(rootLoop) == 1 &&
1657 "Expected vector replacement for loop nest");
1658 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ success vectorizing pattern");
1659 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorization result:\n"
1660 << *state.opVectorReplacement[rootLoop]);
1663 state.finishVectorizationPattern(rootLoop);
1672 std::vector<SmallVector<AffineForOp, 2>> loopsToVectorize;
1684 assert(intersectionBuckets.empty() &&
"Expected empty output");
1689 AffineForOp matchRoot = cast<AffineForOp>(match.getMatchedOperation());
1690 bool intersects =
false;
1691 for (
int i = 0, end = intersectionBuckets.size(); i < end; ++i) {
1692 AffineForOp bucketRoot = bucketRoots[i];
1694 if (bucketRoot->isAncestor(matchRoot)) {
1695 intersectionBuckets[i].push_back(match);
1701 if (matchRoot->isAncestor(bucketRoot)) {
1702 bucketRoots[i] = matchRoot;
1703 intersectionBuckets[i].push_back(match);
1712 bucketRoots.push_back(matchRoot);
1713 intersectionBuckets.emplace_back();
1714 intersectionBuckets.back().push_back(match);
1729 assert((reductionLoops.empty() || vectorSizes.size() == 1) &&
1730 "Vectorizing reductions is supported only for 1-D vectors");
1733 std::optional<NestedPattern> pattern =
1734 makePattern(loops, vectorSizes.size(), fastestVaryingPattern);
1736 LLVM_DEBUG(dbgs() <<
"\n[early-vect] pattern couldn't be computed\n");
1740 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1741 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1742 LLVM_DEBUG(dbgs() <<
"\n[early-vect] new pattern on parent op\n");
1743 LLVM_DEBUG(dbgs() << *parentOp <<
"\n");
1745 unsigned patternDepth = pattern->getDepth();
1750 pattern->match(parentOp, &allMatches);
1751 std::vector<SmallVector<NestedMatch, 8>> intersectionBuckets;
1757 for (
auto &intersectingMatches : intersectionBuckets) {
1761 strategy.
vectorSizes.assign(vectorSizes.begin(), vectorSizes.end());
1764 patternDepth, &strategy))) {
1778 LLVM_DEBUG(dbgs() <<
"\n");
1783 void Vectorize::runOnOperation() {
1784 func::FuncOp f = getOperation();
1785 if (!fastestVaryingPattern.empty() &&
1786 fastestVaryingPattern.size() != vectorSizes.size()) {
1787 f.emitRemark(
"Fastest varying pattern specified with different size than "
1788 "the vector size.");
1789 return signalPassFailure();
1792 if (vectorizeReductions && vectorSizes.size() != 1) {
1793 f.emitError(
"Vectorizing reductions is supported only for 1-D vectors.");
1794 return signalPassFailure();
1797 if (llvm::any_of(vectorSizes, [](int64_t size) {
return size <= 0; })) {
1798 f.emitError(
"Vectorization factor must be greater than zero.");
1799 return signalPassFailure();
1807 if (vectorizeReductions) {
1808 f.walk([¶llelLoops, &reductionLoops](AffineForOp loop) {
1811 parallelLoops.insert(loop);
1813 if (!reductions.empty())
1814 reductionLoops[loop] = reductions;
1818 f.walk([¶llelLoops](AffineForOp loop) {
1820 parallelLoops.insert(loop);
1826 vectorizeLoops(f, parallelLoops, vectorSizes, fastestVaryingPattern,
1836 static LogicalResult
1843 if (loops[0].size() != 1)
1847 for (
int i = 1, end = loops.size(); i < end; ++i) {
1848 for (AffineForOp loop : loops[i]) {
1851 if (none_of(loops[i - 1], [&](AffineForOp maybeParent) {
1852 return maybeParent->isProperAncestor(loop);
1858 for (AffineForOp sibling : loops[i]) {
1859 if (sibling->isProperAncestor(loop))
1883 vectorizeLoops(parentOp, loops, vectorSizes, fastestVaryingPattern,
union mlir::linalg::@1216::ArityGroupAndKind::Kind kind
static Operation * vectorizeAffineStore(AffineStoreOp storeOp, VectorizationState &state)
Vectorizes an affine store with the vectorization strategy in 'state' by generating a 'vector....
static Operation * vectorizeAffineForOp(AffineForOp forOp, VectorizationState &state)
Vectorizes a loop with the vectorization strategy in 'state'.
static LogicalResult vectorizeRootMatch(NestedMatch m, const VectorizationStrategy &strategy)
Extracts the matched loops and vectorizes them following a topological order.
static LogicalResult verifyLoopNesting(const std::vector< SmallVector< AffineForOp, 2 >> &loops)
Verify that affine loops in 'loops' meet the nesting criteria expected by SuperVectorizer:
static void getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, std::vector< SmallVector< AffineForOp, 2 >> &loops)
Recursive implementation to convert all the nested loops in 'match' to a 2D vector container that pre...
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
static Operation * vectorizeOneOperation(Operation *op, VectorizationState &state)
Encodes Operation-specific behavior for vectorization.
static bool isNeutralElementConst(arith::AtomicRMWKind reductionKind, Value value, VectorizationState &state)
Returns true if value is a constant equal to the neutral element of the given vectorizable reduction.
static Operation * vectorizeUniform(Value uniformVal, VectorizationState &state)
Generates a broadcast op for the provided uniform value using the vectorization strategy in 'state'.
static Operation * vectorizeAffineYieldOp(AffineYieldOp yieldOp, VectorizationState &state)
Vectorizes a yield operation by widening its types.
static void computeIntersectionBuckets(ArrayRef< NestedMatch > matches, std::vector< SmallVector< NestedMatch, 8 >> &intersectionBuckets)
Traverses all the loop matches and classifies them into intersection buckets.
static LogicalResult analyzeProfitability(ArrayRef< NestedMatch > matches, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
Implements a simple strawman strategy for vectorization.
static FilterFunctionType isVectorizableLoopPtrFactory(const DenseSet< Operation * > ¶llelLoops, int fastestVaryingMemRefDimension)
Forward declaration.
static Operation * widenOp(Operation *op, VectorizationState &state)
Vectorizes arbitrary operation by plain widening.
static bool isIVMappedToMultipleIndices(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &loopToVectorDim)
Returns true if any vectorized loop IV drives more than one index.
static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp, VectorizationState &state)
Tries to transform a scalar constant into a vector constant.
static bool isUniformDefinition(Value value, const VectorizationStrategy *strategy)
Returns true if the provided value is vector uniform given the vectorization strategy.
static void eraseLoopNest(AffineForOp forOp)
Erases a loop nest, including all its nested operations.
static VectorType getVectorType(Type scalarTy, const VectorizationStrategy *strategy)
Returns the vector type resulting from applying the provided vectorization strategy on the scalar typ...
static void getMatchedAffineLoops(NestedMatch match, std::vector< SmallVector< AffineForOp, 2 >> &loops)
Converts all the nested loops in 'match' to a 2D vector container that preserves the relative nesting...
static Value vectorizeOperand(Value operand, VectorizationState &state)
Tries to vectorize a given operand by applying the following logic:
static arith::ConstantOp createInitialVector(arith::AtomicRMWKind reductionKind, Value oldOperand, VectorizationState &state)
Creates a constant vector filled with the neutral elements of the given reduction.
static LogicalResult vectorizeLoopNest(std::vector< SmallVector< AffineForOp, 2 >> &loops, const VectorizationStrategy &strategy)
Internal implementation to vectorize affine loops from a single loop nest using an n-D vectorization ...
static NestedPattern & vectorTransferPattern()
static Operation * vectorizeAffineApplyOp(AffineApplyOp applyOp, VectorizationState &state)
We have no need to vectorize affine.apply.
static void vectorizeLoops(Operation *parentOp, DenseSet< Operation * > &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops)
Internal implementation to vectorize affine loops in 'loops' using the n-D vectorization factors in '...
static void computeMemoryOpIndices(Operation *op, AffineMap map, ValueRange mapOperands, VectorizationState &state, SmallVectorImpl< Value > &results)
static Operation * vectorizeAffineLoad(AffineLoadOp loadOp, VectorizationState &state)
Vectorizes an affine load with the vectorization strategy in 'state' by generating a 'vector....
static Value createMask(AffineForOp vecForOp, VectorizationState &state)
Creates a mask used to filter out garbage elements in the last iteration of unaligned loops.
static std::optional< NestedPattern > makePattern(const DenseSet< Operation * > ¶llelLoops, int vectorRank, ArrayRef< int64_t > fastestVaryingPattern)
Creates a vectorization pattern from the command line arguments.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
This class represents an argument of a Block.
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This is a utility class for mapping one set of IR entities to another.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
unsigned getNumOperands()
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OperationName getName()
The name of an operation is the key identifier for it.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
void erase()
Remove this operation from its parent block and delete it.
unsigned getNumResults()
Return the number of results held by this operation.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndexOrFloat() const
Return true if this is an integer (of any signedness), index, or float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
An NestedPattern captures nested patterns in the IR.
Operation * getMatchedOperation() const
ArrayRef< NestedMatch > getMatchedChildren()
RAII structure to transparently manage the bump allocator for NestedPattern and NestedMatch classes.
NestedPattern For(const NestedPattern &child)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
DenseSet< Value, DenseMapInfo< Value > > getInvariantAccesses(Value iv, ArrayRef< Value > indices)
Given an induction variable iv of type AffineForOp and indices of type IndexType, returns the set of ...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
std::function< bool(Operation &)> FilterFunctionType
A NestedPattern is a nested operation walker that:
void vectorizeAffineLoops(Operation *parentOp, llvm::DenseSet< Operation *, DenseMapInfo< Operation * >> &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops=ReductionLoopMap())
Vectorizes affine loops in 'loops' using the n-D vectorization factors in 'vectorSizes'.
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult vectorizeAffineLoopNest(std::vector< SmallVector< AffineForOp, 2 >> &loops, const VectorizationStrategy &strategy)
External utility to vectorize affine loops from a single loop nest using an n-D vectorization strateg...
TypedAttr getIdentityValueAttr(AtomicRMWKind kind, Type resultType, OpBuilder &builder, Location loc, bool useOnlyFiniteValue=false)
Returns the identity value attribute associated with an AtomicRMWKind op.
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
Value getVectorReductionOp(arith::AtomicRMWKind op, OpBuilder &builder, Location loc, Value vector)
Returns the value obtained by reducing the vector into a scalar using the operation kind associated w...
Include the generated interface declarations.
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Contains the vectorization state and related methods used across the vectorization process of a given...
Holds parameters to perform n-D vectorization on a single loop nest.
SmallVector< int64_t, 8 > vectorSizes
DenseMap< Operation *, unsigned > loopToVectorDim
ReductionLoopMap reductionLoops