29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/Support/Debug.h"
35 #define GEN_PASS_DEF_AFFINEVECTORIZE
36 #include "mlir/Dialect/Affine/Passes.h.inc"
41 using namespace affine;
42 using namespace vector;
575 #define DEBUG_TYPE "early-vect"
582 int fastestVaryingMemRefDimension);
588 static std::optional<NestedPattern>
592 int64_t d0 = fastestVaryingPattern.empty() ? -1 : fastestVaryingPattern[0];
593 int64_t d1 = fastestVaryingPattern.size() < 2 ? -1 : fastestVaryingPattern[1];
594 int64_t d2 = fastestVaryingPattern.size() < 3 ? -1 : fastestVaryingPattern[2];
595 switch (vectorRank) {
613 return isa<vector::TransferReadOp, vector::TransferWriteOp>(op);
622 struct Vectorize :
public affine::impl::AffineVectorizeBase<Vectorize> {
625 void runOnOperation()
override;
631 unsigned patternDepth,
633 assert(patternDepth > depthInPattern &&
634 "patternDepth is greater than depthInPattern");
635 if (patternDepth - depthInPattern > strategy->
vectorSizes.size()) {
640 strategy->
vectorSizes.size() - (patternDepth - depthInPattern);
659 unsigned depthInPattern,
660 unsigned patternDepth,
662 for (
auto m : matches) {
664 patternDepth, strategy))) {
668 patternDepth, strategy);
703 void registerValueVectorReplacement(
Value replaced,
Operation *replacement);
710 void registerBlockArgVectorReplacement(
BlockArgument replaced,
737 void registerLoopResultScalarReplacement(
Value replaced,
Value replacement);
741 void getScalarValueReplacementsFor(
ValueRange inputVals,
745 void finishVectorizationPattern(AffineForOp rootLoop);
774 void registerValueVectorReplacementImpl(
Value replaced,
Value replacement);
775 void registerValueScalarReplacementImpl(
Value replaced,
Value replacement);
789 void VectorizationState::registerOpVectorReplacement(
Operation *replaced,
791 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ commit vectorized op:\n");
792 LLVM_DEBUG(dbgs() << *replaced <<
"\n");
793 LLVM_DEBUG(dbgs() <<
"into\n");
794 LLVM_DEBUG(dbgs() << *replacement <<
"\n");
797 "Unexpected replaced and replacement results");
798 assert(opVectorReplacement.count(replaced) == 0 &&
"already registered");
799 opVectorReplacement[replaced] = replacement;
801 for (
auto resultTuple :
803 registerValueVectorReplacementImpl(std::get<0>(resultTuple),
804 std::get<1>(resultTuple));
817 void VectorizationState::registerValueVectorReplacement(
820 "Expected single-result replacement");
822 registerOpVectorReplacement(defOp, replacement);
824 registerValueVectorReplacementImpl(replaced, replacement->
getResult(0));
832 void VectorizationState::registerBlockArgVectorReplacement(
834 registerValueVectorReplacementImpl(replaced, replacement);
837 void VectorizationState::registerValueVectorReplacementImpl(
Value replaced,
839 assert(!valueVectorReplacement.contains(replaced) &&
840 "Vector replacement already registered");
841 assert(isa<VectorType>(replacement.
getType()) &&
842 "Expected vector type in vector replacement");
843 valueVectorReplacement.map(replaced, replacement);
857 void VectorizationState::registerValueScalarReplacement(
859 registerValueScalarReplacementImpl(replaced, replacement);
871 void VectorizationState::registerLoopResultScalarReplacement(
874 assert(loopResultScalarReplacement.count(replaced) == 0 &&
875 "already registered");
876 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ will replace a result of the loop "
879 loopResultScalarReplacement[replaced] = replacement;
882 void VectorizationState::registerValueScalarReplacementImpl(
Value replaced,
884 assert(!valueScalarReplacement.contains(replaced) &&
885 "Scalar value replacement already registered");
886 assert(!isa<VectorType>(replacement.
getType()) &&
887 "Expected scalar type in scalar replacement");
888 valueScalarReplacement.map(replaced, replacement);
892 void VectorizationState::getScalarValueReplacementsFor(
894 for (
Value inputVal : inputVals)
895 replacedVals.push_back(valueScalarReplacement.lookupOrDefault(inputVal));
900 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ erasing:\n" << forOp <<
"\n");
905 void VectorizationState::finishVectorizationPattern(AffineForOp rootLoop) {
906 LLVM_DEBUG(dbgs() <<
"\n[early-vect] Finalizing vectorization\n");
918 auto afOp = state.builder.create<AffineApplyOp>(op->
getLoc(), singleResMap,
920 results.push_back(afOp);
929 int fastestVaryingMemRefDimension) {
930 return [¶llelLoops, fastestVaryingMemRefDimension](
Operation &forOp) {
931 auto loop = cast<AffineForOp>(forOp);
932 auto parallelIt = parallelLoops.find(loop);
933 if (parallelIt == parallelLoops.end())
936 auto vectorizableBody =
938 if (!vectorizableBody)
940 return memRefDim == -1 || fastestVaryingMemRefDimension == -1 ||
941 memRefDim == fastestVaryingMemRefDimension;
949 assert(!isa<VectorType>(scalarTy) &&
"Expected scalar type");
958 Type scalarTy = constOp.getType();
959 if (!VectorType::isValidElementType(scalarTy))
966 Operation *parentOp = state.builder.getInsertionBlock()->getParentOp();
968 while (parentOp && !state.vecLoopToVecDim.count(parentOp))
970 assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
971 isa<AffineForOp>(parentOp) &&
"Expected a vectorized for op");
972 auto vecForOp = cast<AffineForOp>(parentOp);
973 state.builder.setInsertionPointToStart(vecForOp.getBody());
975 state.builder.create<arith::ConstantOp>(constOp.getLoc(), vecAttr);
978 state.registerOpVectorReplacement(constOp, newConstOp);
989 if (!VectorType::isValidElementType(scalarTy))
993 reductionKind, scalarTy, state.builder, oldOperand.
getLoc());
997 state.builder.create<arith::ConstantOp>(oldOperand.
getLoc(), vecAttr);
1010 assert(state.strategy->vectorSizes.size() == 1 &&
1011 "Creating a mask non-1-D vectors is not supported.");
1012 assert(vecForOp.getStep() == state.strategy->vectorSizes[0] &&
1013 "Creating a mask for loops with non-unit original step size is not "
1017 if (
Value mask = state.vecLoopToMask.lookup(vecForOp))
1022 if (vecForOp.hasConstantBounds()) {
1023 int64_t originalTripCount =
1024 vecForOp.getConstantUpperBound() - vecForOp.getConstantLowerBound();
1025 if (originalTripCount % vecForOp.getStep() == 0)
1030 state.builder.setInsertionPointToStart(vecForOp.getBody());
1046 AffineMap ubMap = vecForOp.getUpperBoundMap();
1049 ub = state.builder.create<AffineApplyOp>(loc, vecForOp.getUpperBoundMap(),
1050 vecForOp.getUpperBoundOperands());
1052 ub = state.builder.create<AffineMinOp>(loc, vecForOp.getUpperBoundMap(),
1053 vecForOp.getUpperBoundOperands());
1056 state.builder.getAffineDimExpr(0) - state.builder.getAffineDimExpr(1);
1059 {ub, vecForOp.getInductionVar()});
1065 state.builder.getIntegerType(1));
1067 state.builder.create<vector::CreateMaskOp>(loc, maskTy, itersLeft);
1069 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a mask:\n"
1070 << itersLeft <<
"\n"
1073 state.vecLoopToMask[vecForOp] = mask;
1089 auto loop = cast<AffineForOp>(loopToDim.first);
1090 if (!loop.isDefinedOutsideOfLoop(value))
1101 Value uniformScalarRepl =
1102 state.valueScalarReplacement.lookupOrDefault(uniformVal);
1103 state.builder.setInsertionPointAfterValue(uniformScalarRepl);
1106 auto bcastOp = state.builder.create<BroadcastOp>(uniformVal.
getLoc(),
1107 vectorTy, uniformScalarRepl);
1108 state.registerValueVectorReplacement(uniformVal, bcastOp);
1130 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorize operand: " << operand);
1132 if (
Value vecRepl = state.valueVectorReplacement.lookupOrNull(operand)) {
1133 LLVM_DEBUG(dbgs() <<
" -> already vectorized: " << vecRepl);
1140 assert(!isa<VectorType>(operand.
getType()) &&
1141 "Vector op not found in replacement map");
1144 if (
auto constOp = operand.
getDefiningOp<arith::ConstantOp>()) {
1146 LLVM_DEBUG(dbgs() <<
"-> constant: " << vecConstant);
1147 return vecConstant.getResult();
1153 LLVM_DEBUG(dbgs() <<
"-> uniform: " << *vecUniform);
1160 LLVM_DEBUG(dbgs() <<
"-> unsupported block argument\n");
1163 LLVM_DEBUG(dbgs() <<
"-> non-vectorizable\n");
1176 MemRefType memRefType = loadOp.getMemRefType();
1177 Type elementType = memRefType.getElementType();
1178 auto vectorType =
VectorType::get(state.strategy->vectorSizes, elementType);
1182 state.getScalarValueReplacementsFor(loadOp.getMapOperands(), mapOperands);
1186 indices.reserve(memRefType.getRank());
1187 if (loadOp.getAffineMap() !=
1188 state.builder.getMultiDimIdentityMap(memRefType.getRank()))
1192 indices.append(mapOperands.begin(), mapOperands.end());
1196 indices, state.vecLoopToVecDim);
1197 if (!permutationMap) {
1198 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ can't compute permutationMap\n");
1201 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1202 LLVM_DEBUG(permutationMap.print(dbgs()));
1204 auto transfer = state.builder.create<vector::TransferReadOp>(
1205 loadOp.getLoc(), vectorType, loadOp.getMemRef(), indices, permutationMap);
1208 state.registerOpVectorReplacement(loadOp, transfer);
1220 MemRefType memRefType = storeOp.getMemRefType();
1227 state.getScalarValueReplacementsFor(storeOp.getMapOperands(), mapOperands);
1231 indices.reserve(memRefType.getRank());
1232 if (storeOp.getAffineMap() !=
1233 state.builder.getMultiDimIdentityMap(memRefType.getRank()))
1237 indices.append(mapOperands.begin(), mapOperands.end());
1241 indices, state.vecLoopToVecDim);
1242 if (!permutationMap)
1244 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ permutationMap: ");
1245 LLVM_DEBUG(permutationMap.print(dbgs()));
1247 auto transfer = state.builder.create<vector::TransferWriteOp>(
1248 storeOp.getLoc(), vectorValue, storeOp.getMemRef(), indices,
1250 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorized store: " << transfer);
1253 state.registerOpVectorReplacement(storeOp, transfer);
1262 if (!VectorType::isValidElementType(scalarTy))
1265 state.builder, value.
getLoc());
1266 if (
auto constOp = dyn_cast_or_null<arith::ConstantOp>(value.
getDefiningOp()))
1267 return constOp.getValue() == valueAttr;
1284 if (isLoopVecDim && forOp.getNumIterOperands() > 0 && forOp.getStep() != 1) {
1287 <<
"\n[early-vect]+++++ unsupported step size for reduction loop: "
1288 << forOp.getStep() <<
"\n");
1297 unsigned vectorDim = loopToVecDimIt->second;
1298 assert(vectorDim < strategy.
vectorSizes.size() &&
"vector dim overflow");
1299 int64_t forOpVecFactor = strategy.
vectorSizes[vectorDim];
1300 newStep = forOp.getStep() * forOpVecFactor;
1302 newStep = forOp.getStep();
1307 if (isLoopVecDim && forOp.getNumIterOperands() > 0) {
1310 "Reduction descriptors not found when vectorizing a reduction loop");
1311 reductions = it->second;
1312 assert(reductions.size() == forOp.getNumIterOperands() &&
1313 "The size of reductions array must match the number of iter_args");
1318 if (!isLoopVecDim) {
1319 for (
auto operand : forOp.getIterOperands())
1325 for (
auto redAndOperand : llvm::zip(reductions, forOp.getIterOperands())) {
1327 std::get<0>(redAndOperand).kind, std::get<1>(redAndOperand), state));
1331 auto vecForOp = state.builder.create<AffineForOp>(
1332 forOp.getLoc(), forOp.getLowerBoundOperands(), forOp.getLowerBoundMap(),
1333 forOp.getUpperBoundOperands(), forOp.getUpperBoundMap(), newStep,
1353 state.registerOpVectorReplacement(forOp, vecForOp);
1354 state.registerValueScalarReplacement(forOp.getInductionVar(),
1355 vecForOp.getInductionVar());
1356 for (
auto iterTuple :
1357 llvm ::zip(forOp.getRegionIterArgs(), vecForOp.getRegionIterArgs()))
1358 state.registerBlockArgVectorReplacement(std::get<0>(iterTuple),
1359 std::get<1>(iterTuple));
1362 for (
unsigned i = 0; i < vecForOp.getNumIterOperands(); ++i) {
1366 vecForOp.getLoc(), vecForOp.getResult(i));
1367 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ creating a vector reduction: "
1371 Value origInit = forOp.getOperand(forOp.getNumControlOperands() + i);
1372 Value finalRes = reducedRes;
1376 reducedRes.
getLoc(), reducedRes, origInit);
1377 state.registerLoopResultScalarReplacement(forOp.getResult(i), finalRes);
1382 state.vecLoopToVecDim[vecForOp] = loopToVecDimIt->second;
1386 state.builder.setInsertionPointToStart(vecForOp.getBody());
1390 if (isLoopVecDim && forOp.getNumIterOperands() > 0)
1402 vectorTypes.push_back(
1409 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ an operand failed vectorize\n");
1412 vectorOperands.push_back(vecOperand);
1422 vectorOperands, vectorTypes, op->
getAttrs());
1423 state.registerOpVectorReplacement(op, vecOp);
1434 Operation *newParentOp = state.builder.getInsertionBlock()->getParentOp();
1445 if (
Value mask = state.vecLoopToMask.lookup(newParentOp)) {
1446 state.builder.setInsertionPoint(newYieldOp);
1450 cast<AffineForOp>(newParentOp).getRegionIterArgs(), i, combinerOps);
1451 assert(reducedVal &&
"expect non-null value for parallel reduction loop");
1452 assert(combinerOps.size() == 1 &&
"expect only one combiner op");
1454 Value neutralVal = cast<AffineForOp>(newParentOp).getIterOperands()[i];
1455 state.builder.setInsertionPoint(combinerOps.back());
1456 Value maskedReducedVal = state.builder.create<arith::SelectOp>(
1457 reducedVal.
getLoc(), mask, reducedVal, neutralVal);
1459 dbgs() <<
"\n[early-vect]+++++ masking an input to a binary op that"
1460 "produces value for a yield Op: "
1461 << maskedReducedVal);
1462 combinerOps.back()->replaceUsesOfWith(reducedVal, maskedReducedVal);
1466 state.builder.setInsertionPointAfter(newParentOp);
1482 assert(!isa<vector::TransferReadOp>(op) &&
1483 "vector.transfer_read cannot be further vectorized");
1484 assert(!isa<vector::TransferWriteOp>(op) &&
1485 "vector.transfer_write cannot be further vectorized");
1487 if (
auto loadOp = dyn_cast<AffineLoadOp>(op))
1489 if (
auto storeOp = dyn_cast<AffineStoreOp>(op))
1491 if (
auto forOp = dyn_cast<AffineForOp>(op))
1493 if (
auto yieldOp = dyn_cast<AffineYieldOp>(op))
1495 if (
auto constant = dyn_cast<arith::ConstantOp>(op))
1513 assert(currentLevel <= loops.size() &&
"Unexpected currentLevel");
1514 if (currentLevel == loops.size())
1515 loops.emplace_back();
1540 assert(loops[0].size() == 1 &&
"Expected single root loop");
1541 AffineForOp rootLoop = loops[0][0];
1543 state.builder.setInsertionPointAfter(rootLoop);
1544 state.strategy = &strategy;
1554 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ loop is not vectorizable");
1567 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ Vectorizing: " << *op);
1571 dbgs() <<
"[early-vect]+++++ failed vectorizing the operation: "
1579 if (opVecResult.wasInterrupted()) {
1580 LLVM_DEBUG(dbgs() <<
"[early-vect]+++++ failed vectorization for: "
1581 << rootLoop <<
"\n");
1583 auto vecRootLoopIt = state.opVectorReplacement.find(rootLoop);
1584 if (vecRootLoopIt != state.opVectorReplacement.end())
1592 for (
auto resPair : state.loopResultScalarReplacement)
1593 resPair.first.replaceAllUsesWith(resPair.second);
1595 assert(state.opVectorReplacement.count(rootLoop) == 1 &&
1596 "Expected vector replacement for loop nest");
1597 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ success vectorizing pattern");
1598 LLVM_DEBUG(dbgs() <<
"\n[early-vect]+++++ vectorization result:\n"
1599 << *state.opVectorReplacement[rootLoop]);
1602 state.finishVectorizationPattern(rootLoop);
1611 std::vector<SmallVector<AffineForOp, 2>> loopsToVectorize;
1623 assert(intersectionBuckets.empty() &&
"Expected empty output");
1628 AffineForOp matchRoot = cast<AffineForOp>(match.getMatchedOperation());
1630 for (
int i = 0, end = intersectionBuckets.size(); i < end; ++i) {
1631 AffineForOp bucketRoot = bucketRoots[i];
1633 if (bucketRoot->isAncestor(matchRoot)) {
1634 intersectionBuckets[i].push_back(match);
1640 if (matchRoot->isAncestor(bucketRoot)) {
1641 bucketRoots[i] = matchRoot;
1642 intersectionBuckets[i].push_back(match);
1651 bucketRoots.push_back(matchRoot);
1652 intersectionBuckets.emplace_back();
1653 intersectionBuckets.back().push_back(match);
1668 assert((reductionLoops.empty() || vectorSizes.size() == 1) &&
1669 "Vectorizing reductions is supported only for 1-D vectors");
1672 std::optional<NestedPattern> pattern =
1673 makePattern(loops, vectorSizes.size(), fastestVaryingPattern);
1675 LLVM_DEBUG(dbgs() <<
"\n[early-vect] pattern couldn't be computed\n");
1679 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1680 LLVM_DEBUG(dbgs() <<
"\n******************************************");
1681 LLVM_DEBUG(dbgs() <<
"\n[early-vect] new pattern on parent op\n");
1682 LLVM_DEBUG(dbgs() << *parentOp <<
"\n");
1684 unsigned patternDepth = pattern->getDepth();
1689 pattern->match(parentOp, &allMatches);
1690 std::vector<SmallVector<NestedMatch, 8>> intersectionBuckets;
1696 for (
auto &intersectingMatches : intersectionBuckets) {
1700 strategy.
vectorSizes.assign(vectorSizes.begin(), vectorSizes.end());
1703 patternDepth, &strategy))) {
1717 LLVM_DEBUG(dbgs() <<
"\n");
1722 void Vectorize::runOnOperation() {
1723 func::FuncOp f = getOperation();
1724 if (!fastestVaryingPattern.empty() &&
1725 fastestVaryingPattern.size() != vectorSizes.size()) {
1726 f.emitRemark(
"Fastest varying pattern specified with different size than "
1727 "the vector size.");
1728 return signalPassFailure();
1731 if (vectorizeReductions && vectorSizes.size() != 1) {
1732 f.emitError(
"Vectorizing reductions is supported only for 1-D vectors.");
1733 return signalPassFailure();
1741 if (vectorizeReductions) {
1742 f.walk([¶llelLoops, &reductionLoops](AffineForOp loop) {
1745 parallelLoops.insert(loop);
1747 if (!reductions.empty())
1748 reductionLoops[loop] = reductions;
1752 f.walk([¶llelLoops](AffineForOp loop) {
1754 parallelLoops.insert(loop);
1760 vectorizeLoops(f, parallelLoops, vectorSizes, fastestVaryingPattern,
1777 if (loops[0].size() != 1)
1781 for (
int i = 1, end = loops.size(); i < end; ++i) {
1782 for (AffineForOp loop : loops[i]) {
1785 if (none_of(loops[i - 1], [&](AffineForOp maybeParent) {
1786 return maybeParent->isProperAncestor(loop);
1792 for (AffineForOp sibling : loops[i]) {
1793 if (sibling->isProperAncestor(loop))
1817 vectorizeLoops(parentOp, loops, vectorSizes, fastestVaryingPattern,
static bool intersects(const ConstantIntRanges &lhs, const ConstantIntRanges &rhs)
Returns true if 2 integer ranges have intersection.
static Operation * vectorizeAffineStore(AffineStoreOp storeOp, VectorizationState &state)
Vectorizes an affine store with the vectorization strategy in 'state' by generating a 'vector....
static Operation * vectorizeAffineForOp(AffineForOp forOp, VectorizationState &state)
Vectorizes a loop with the vectorization strategy in 'state'.
static LogicalResult vectorizeRootMatch(NestedMatch m, const VectorizationStrategy &strategy)
Extracts the matched loops and vectorizes them following a topological order.
static LogicalResult verifyLoopNesting(const std::vector< SmallVector< AffineForOp, 2 >> &loops)
Verify that affine loops in 'loops' meet the nesting criteria expected by SuperVectorizer:
static void getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, std::vector< SmallVector< AffineForOp, 2 >> &loops)
Recursive implementation to convert all the nested loops in 'match' to a 2D vector container that pre...
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
static Operation * vectorizeOneOperation(Operation *op, VectorizationState &state)
Encodes Operation-specific behavior for vectorization.
static bool isNeutralElementConst(arith::AtomicRMWKind reductionKind, Value value, VectorizationState &state)
Returns true if value is a constant equal to the neutral element of the given vectorizable reduction.
static Operation * vectorizeUniform(Value uniformVal, VectorizationState &state)
Generates a broadcast op for the provided uniform value using the vectorization strategy in 'state'.
static Operation * vectorizeAffineYieldOp(AffineYieldOp yieldOp, VectorizationState &state)
Vectorizes a yield operation by widening its types.
static void computeIntersectionBuckets(ArrayRef< NestedMatch > matches, std::vector< SmallVector< NestedMatch, 8 >> &intersectionBuckets)
Traverses all the loop matches and classifies them into intersection buckets.
static LogicalResult analyzeProfitability(ArrayRef< NestedMatch > matches, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy)
Implements a simple strawman strategy for vectorization.
static FilterFunctionType isVectorizableLoopPtrFactory(const DenseSet< Operation * > ¶llelLoops, int fastestVaryingMemRefDimension)
Forward declaration.
static Operation * widenOp(Operation *op, VectorizationState &state)
Vectorizes arbitrary operation by plain widening.
static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp, VectorizationState &state)
Tries to transform a scalar constant into a vector constant.
static bool isUniformDefinition(Value value, const VectorizationStrategy *strategy)
Returns true if the provided value is vector uniform given the vectorization strategy.
static void eraseLoopNest(AffineForOp forOp)
Erases a loop nest, including all its nested operations.
static VectorType getVectorType(Type scalarTy, const VectorizationStrategy *strategy)
Returns the vector type resulting from applying the provided vectorization strategy on the scalar typ...
static void getMatchedAffineLoops(NestedMatch match, std::vector< SmallVector< AffineForOp, 2 >> &loops)
Converts all the nested loops in 'match' to a 2D vector container that preserves the relative nesting...
static Value vectorizeOperand(Value operand, VectorizationState &state)
Tries to vectorize a given operand by applying the following logic:
static arith::ConstantOp createInitialVector(arith::AtomicRMWKind reductionKind, Value oldOperand, VectorizationState &state)
Creates a constant vector filled with the neutral elements of the given reduction.
static LogicalResult vectorizeLoopNest(std::vector< SmallVector< AffineForOp, 2 >> &loops, const VectorizationStrategy &strategy)
Internal implementation to vectorize affine loops from a single loop nest using an n-D vectorization ...
static NestedPattern & vectorTransferPattern()
static void vectorizeLoops(Operation *parentOp, DenseSet< Operation * > &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops)
Internal implementation to vectorize affine loops in 'loops' using the n-D vectorization factors in '...
static void computeMemoryOpIndices(Operation *op, AffineMap map, ValueRange mapOperands, VectorizationState &state, SmallVectorImpl< Value > &results)
static Operation * vectorizeAffineLoad(AffineLoadOp loadOp, VectorizationState &state)
Vectorizes an affine load with the vectorization strategy in 'state' by generating a 'vector....
static Value createMask(AffineForOp vecForOp, VectorizationState &state)
Creates a mask used to filter out garbage elements in the last iteration of unaligned loops.
static std::optional< NestedPattern > makePattern(const DenseSet< Operation * > ¶llelLoops, int vectorRank, ArrayRef< int64_t > fastestVaryingPattern)
Creates a vectorization pattern from the command line arguments.
static AffineMap makePermutationMap(ArrayRef< Value > indices, const DenseMap< Operation *, unsigned > &enclosingLoopToVectorDim)
Constructs a permutation map from memref indices to vector dimension.
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
This class represents an argument of a Block.
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This is a utility class for mapping one set of IR entities to another.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
unsigned getNumOperands()
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OperationName getName()
The name of an operation is the key identifier for it.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
void erase()
Remove this operation from its parent block and delete it.
unsigned getNumResults()
Return the number of results held by this operation.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
An NestedPattern captures nested patterns in the IR.
Operation * getMatchedOperation() const
ArrayRef< NestedMatch > getMatchedChildren()
RAII structure to transparently manage the bump allocator for NestedPattern and NestedMatch classes.
NestedPattern For(const NestedPattern &child)
NestedPattern Op(FilterFunctionType filter=defaultFilterFunction)
bool isVectorizableLoopBody(AffineForOp loop, NestedPattern &vectorTransferMatcher)
Checks whether the loop is structurally vectorizable; i.e.
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
AffineForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
std::function< bool(Operation &)> FilterFunctionType
A NestedPattern is a nested operation walker that:
void vectorizeAffineLoops(Operation *parentOp, llvm::DenseSet< Operation *, DenseMapInfo< Operation * >> &loops, ArrayRef< int64_t > vectorSizes, ArrayRef< int64_t > fastestVaryingPattern, const ReductionLoopMap &reductionLoops=ReductionLoopMap())
Vectorizes affine loops in 'loops' using the n-D vectorization factors in 'vectorSizes'.
bool isLoopParallel(AffineForOp forOp, SmallVectorImpl< LoopReduction > *parallelReductions=nullptr)
Returns true if ‘forOp’ is a parallel loop.
LogicalResult vectorizeAffineLoopNest(std::vector< SmallVector< AffineForOp, 2 >> &loops, const VectorizationStrategy &strategy)
External utility to vectorize affine loops from a single loop nest using an n-D vectorization strateg...
TypedAttr getIdentityValueAttr(AtomicRMWKind kind, Type resultType, OpBuilder &builder, Location loc)
Returns the identity value attribute associated with an AtomicRMWKind op.
Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc, Value lhs, Value rhs)
Returns the value obtained by applying the reduction operation kind associated with a binary AtomicRM...
Value getVectorReductionOp(arith::AtomicRMWKind op, OpBuilder &builder, Location loc, Value vector)
Returns the value obtained by reducing the vector into a scalar using the operation kind associated w...
This header declares functions that assit transformations in the MemRef dialect.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Contains the vectorization state and related methods used across the vectorization process of a given...
This class represents an efficient way to signal success or failure.
Holds parameters to perform n-D vectorization on a single loop nest.
SmallVector< int64_t, 8 > vectorSizes
DenseMap< Operation *, unsigned > loopToVectorDim
ReductionLoopMap reductionLoops