33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/Support/Debug.h"
37#define DEBUG_TYPE "linalg-utils"
65 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
77 TileCheck t(tileSizes);
92std::optional<RegionMatcher::BinaryOpKind>
94 auto ®ion = op.getRegion();
95 if (!region.hasOneBlock())
113 if (addPattern.match(&ops.back()))
130 for (
Range range : ranges) {
149static SmallVector<int64_t>
152 PackingMetadata &packingMetadata) {
153 int64_t numPackedDims = innerDimsPos.size();
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank, innerDimsPos);
161 if (!outerPerm.empty())
168 return packInverseDestPermutation;
175 PackingMetadata &metadata) {
177 int64_t packedRank = packOp.getDestType().getRank();
182 return packInvDestPerm;
186 PackingMetadata &metadata) {
187 int64_t packedRank = unpackOp.getSourceType().getRank();
192 return unpackInvSrcPerm;
196 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
197 return m.isProjectedPermutation(true);
205 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
206 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
208 llvm::any_of(op.getResultTypes(),
209 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
216 if (op.getNumLoops() != op.getNumParallelLoops())
223 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
224 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
231 return iteratorType == utils::IteratorType::parallel;
235 return iteratorType == utils::IteratorType::reduction;
250 if (!dyn_cast_if_present<arith::ExtFOp>(defOp) &&
251 !dyn_cast_if_present<arith::ExtSIOp>(defOp) &&
252 !dyn_cast_if_present<arith::ExtUIOp>(defOp)) {
255 return dyn_cast<BlockArgument>(defOp->
getOperand(0));
265 if (!isa_and_present<arith::AddIOp, arith::AddFOp>(addOp))
269 if (!isa_and_present<arith::MulIOp, arith::MulFOp>(mulOp))
278 if (!lhsBlockArg || !rhsBlockArg || !outBlockArg ||
287template <
typename... OpTypes>
290 if (!(isa_and_present<OpTypes>(defOp) || ...))
297 if (!lhsArg || !rhsArg || lhsArg.
getOwner() != body ||
334 auto affineMap = cast<AffineMapAttr>(indexingMaps[mapIndex]).getValue();
335 if (dimIndex < affineMap.getNumResults())
336 return affineMap.getResult(dimIndex);
346 if ((dim = dyn_cast<AffineDimExpr>(expr)))
349 auto mulExpr = dyn_cast<AffineBinaryOpExpr>(expr);
357 if (((dim = dyn_cast<AffineDimExpr>(
lhs)) &&
358 (cst = dyn_cast<AffineConstantExpr>(
rhs))) ||
359 ((dim = dyn_cast<AffineDimExpr>(
rhs)) &&
360 (cst = dyn_cast<AffineConstantExpr>(
lhs))))
388 unsigned fDim,
unsigned oDim,
390 unsigned inputMapIdx = 0, filterMapIdx = 1,
391 outputMapIdx = indexingMaps.size() - 1;
393 auto addExpr = dyn_cast<AffineBinaryOpExpr>(inpExpr);
401 if (c0 == -1 || c1 == -1)
406 if (dim0 == fExpr && dim1 == oExpr) {
411 if (dim1 == fExpr && dim0 == oExpr) {
429 return indexingMaps ==
431 context, llvm::to_vector<4>(llvm::map_range(
433 return AffineMapAttr::get(m);
444 if (isa<linalg::DepthwiseConv1DNwcWcOp>(op))
448 "expected op to implement ConvolutionOpInterface");
457 ArrayAttr indexingMaps = op.getIndexingMaps();
461 1, (*dilations)[0], (*strides)[0]))
465 {{N,
W * (*strides)[0] + w * (*dilations)[0], C},
468 indexingMaps, context))
471 Block *body = op.getBlock();
473 Value yieldVal = yieldOp.getOperand(0);
484 if (isa<linalg::DepthwiseConv2DNchwChwOp>(op))
488 "expected op to implement ConvolutionOpInterface");
499 ArrayAttr indexingMaps = op.getIndexingMaps();
503 2, (*dilations)[0], (*strides)[0]))
507 3, (*dilations)[1], (*strides)[1]))
511 {{N, C, H * (*strides)[0] + h * (*dilations)[0],
512 W * (*strides)[1] + w * (*dilations)[1]},
515 indexingMaps, context))
518 Block *body = op.getBlock();
520 Value yieldVal = yieldOp.getOperand(0);
534 if (isa<linalg::DepthwiseConv3DNdhwcDhwcmOp>(op))
538 "expected op to implement ConvolutionOpInterface");
552 ArrayAttr indexingMaps = op.getIndexingMaps();
556 1, (*dilations)[0], (*strides)[0]))
560 2, (*dilations)[1], (*strides)[1]))
564 3, (*dilations)[2], (*strides)[2]))
568 {{N, D * (*strides)[0] + d * (*dilations)[0],
569 H * (*strides)[1] + h * (*dilations)[1],
570 W * (*strides)[2] + w * (*dilations)[2], C},
572 {N, D, H,
W, C, CM}},
573 indexingMaps, context))
576 Block *body = op.getBlock();
578 Value yieldVal = yieldOp.getOperand(0);
589 if (isa<linalg::PoolingNhwcMaxOp>(op))
593 "expected op to implement ConvolutionOpInterface");
604 ArrayAttr indexingMaps = op.getIndexingMaps();
608 1, (*dilations)[0], (*strides)[0]))
612 2, (*dilations)[1], (*strides)[1]))
616 {{N, H * (*strides)[0] + h * (*dilations)[0],
617 W * (*strides)[1] + w * (*dilations)[1], C},
620 indexingMaps, context))
623 Block *body = op.getBlock();
625 Value yieldVal = yieldOp.getOperand(0);
636 if (isa<linalg::PoolingNhwcMinOp>(op))
640 "expected op to implement ConvolutionOpInterface");
651 ArrayAttr indexingMaps = op.getIndexingMaps();
655 1, (*dilations)[0], (*strides)[0]))
659 2, (*dilations)[1], (*strides)[1]))
663 {{N, H * (*strides)[0] + h * (*dilations)[0],
664 W * (*strides)[1] + w * (*dilations)[1], C},
667 indexingMaps, context))
670 Block *body = op.getBlock();
672 Value yieldVal = yieldOp.getOperand(0);
683 if (isa<linalg::PoolingNhwcSumOp>(op))
687 "expected op to implement ConvolutionOpInterface");
698 ArrayAttr indexingMaps = op.getIndexingMaps();
702 1, (*dilations)[0], (*strides)[0]))
706 2, (*dilations)[1], (*strides)[1]))
710 {{N, H * (*strides)[0] + h * (*dilations)[0],
711 W * (*strides)[1] + w * (*dilations)[1], C},
714 indexingMaps, context))
717 Block *body = op.getBlock();
719 Value yieldVal = yieldOp.getOperand(0);
730 if (isa<linalg::PoolingNhwcMaxUnsignedOp>(op))
734 "expected op to implement ConvolutionOpInterface");
745 ArrayAttr indexingMaps = op.getIndexingMaps();
749 1, (*dilations)[0], (*strides)[0]))
753 2, (*dilations)[1], (*strides)[1]))
757 {{N, H * (*strides)[0] + h * (*dilations)[0],
758 W * (*strides)[1] + w * (*dilations)[1], C},
761 indexingMaps, context))
764 Block *body = op.getBlock();
766 Value yieldVal = yieldOp.getOperand(0);
777 if (isa<linalg::PoolingNhwcMinUnsignedOp>(op))
781 "expected op to implement ConvolutionOpInterface");
792 ArrayAttr indexingMaps = op.getIndexingMaps();
796 1, (*dilations)[0], (*strides)[0]))
800 2, (*dilations)[1], (*strides)[1]))
804 {{N, H * (*strides)[0] + h * (*dilations)[0],
805 W * (*strides)[1] + w * (*dilations)[1], C},
808 indexingMaps, context))
811 Block *body = op.getBlock();
813 Value yieldVal = yieldOp.getOperand(0);
821 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
827 Value current = sliceOp.getSource();
832 OpResult opResult = cast<OpResult>(current);
833 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
835 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
844 if (sliceOp.getSource().getType() != type)
849 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
850 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
857 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
859 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
866 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
867 [](std::tuple<OpFoldResult, OpFoldResult> it) {
868 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
875 Value padOpPad = padOp.getConstantPaddingValue();
882 return sliceOp.getSource();
886 auto memrefTypeTo = cast<MemRefType>(to.
getType());
888 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
889 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
890 "`from` and `to` memref must have the same rank");
896 utils::IteratorType::parallel);
897 return linalg::GenericOp::create(
904 linalg::YieldOp::create(
b, loc, args.front());
917 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
918 "expected as many entries for proc info as number of loops, even if "
919 "they are null entries");
921 if (!linalgOp.hasPureBufferSemantics())
922 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
926 b, loc, lbs, ubs, steps, iterArgInitValues,
928 assert(iterArgs.size() == iterArgInitValues.size() &&
929 "expect the number of output tensors and iter args to match");
931 if (!iterArgs.empty()) {
932 operandValuesToUse = linalgOp.getDpsInputs();
933 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
935 return bodyBuilderFn(
b, loc, ivs, operandValuesToUse);
938 if (loopNest.
loops.empty() || procInfo.empty())
942 for (
const auto &loop : llvm::enumerate(loopNest.
loops)) {
943 if (procInfo[loop.index()].distributionMethod ==
945 mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
946 procInfo[loop.index()].nprocs);
961 if (!linalgOp.hasPureBufferSemantics())
962 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
963 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
969 constantSteps.reserve(steps.size());
970 for (
Value v : steps) {
972 assert(constVal.has_value() &&
"Affine loops require constant steps");
973 constantSteps.push_back(constVal.value());
978 bodyBuilderFn(
b, loc, ivs,
979 linalgOp->getOperands());
1011 assert(lbs.size() == ubs.size());
1012 assert(lbs.size() == steps.size());
1013 assert(lbs.size() == iteratorTypes.size());
1014 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
1018 if (iteratorTypes.empty()) {
1019 bodyBuilderFn(
b, loc, ivStorage);
1027 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
1029 ivStorage.append(ivs.begin(), ivs.end());
1030 generateParallelLoopNest(
1031 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
1032 iteratorTypes.drop_front(),
1033 procInfo.empty() ? procInfo : procInfo.drop_front(),
1034 bodyBuilderFn, ivStorage);
1039 unsigned nLoops = iteratorTypes.size();
1040 unsigned numProcessed = 0;
1042 if (procInfo.empty()) {
1045 distributionMethod = procInfo.front().distributionMethod;
1054 auto remainderProcInfo =
1055 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
1056 switch (distributionMethod) {
1060 scf::ParallelOp::create(
1061 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1062 steps.take_front(numProcessed),
1064 ivStorage.append(localIvs.begin(), localIvs.end());
1065 generateParallelLoopNest(
1066 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1067 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1068 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1069 bodyBuilderFn, ivStorage);
1076 scf::ParallelOp::create(
1077 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1078 steps.take_front(numProcessed),
1080 ivStorage.append(localIvs.begin(), localIvs.end());
1081 generateParallelLoopNest(
1082 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1083 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1084 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1085 bodyBuilderFn, ivStorage);
1092 Value cond = ab.
slt(lbs[0], ubs[0]);
1093 for (
unsigned i = 1; i < numProcessed; ++i)
1094 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
1095 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1098 ubs.drop_front(numProcessed),
1099 steps.drop_front(numProcessed),
1100 iteratorTypes.drop_front(numProcessed),
1101 remainderProcInfo, bodyBuilderFn, ivStorage);
1109 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1111 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
1112 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
1113 remainderProcInfo, bodyBuilderFn, ivStorage);
1128 if (!linalgOp.hasPureBufferSemantics())
1129 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1130 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
1132 assert(iteratorTypes.size() >= loopRanges.size() &&
1133 "expected iterator type for all ranges");
1134 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
1135 "expected proc information for all loops when present");
1136 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
1138 unsigned numLoops = iteratorTypes.size();
1139 ivs.reserve(numLoops);
1140 lbsStorage.reserve(numLoops);
1141 ubsStorage.reserve(numLoops);
1142 stepsStorage.reserve(numLoops);
1145 unpackRanges(
b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
1148 for (
const auto &it : llvm::enumerate(procInfo)) {
1151 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
1152 ubsStorage[it.index()], stepsStorage[it.index()]);
1155 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
1157 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
1159 bodyBuilderFn(
b, loc, ivs, linalgOp->getOperands());
1163 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
1169 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
1171 .Case([&](MemRefType) {
1172 return memref::SubViewOp::create(
1173 builder, loc, valueToTile, sliceParams.
offsets,
1176 .Case([&](RankedTensorType) {
1177 return tensor::ExtractSliceOp::create(
1178 builder, loc, valueToTile, sliceParams.
offsets,
1181 .DefaultUnreachable(
"Unexpected shaped type");
1190 bool omitPartialTileCheck) {
1193 ubs, subShapeSizes, omitPartialTileCheck);
1202 bool omitPartialTileCheck) {
1203 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
1204 assert(shapedType &&
"only shaped types can be tiled");
1206 int64_t rank = shapedType.getRank();
1210 sliceParams.
offsets.reserve(rank);
1211 sliceParams.
sizes.reserve(rank);
1212 sliceParams.
strides.reserve(rank);
1213 for (
unsigned r = 0; r < rank; ++r) {
1214 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
1218 sliceParams.
sizes.push_back(dim);
1220 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
1223 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
1228 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
1233 [[maybe_unused]]
auto res = m.constantFold(zeros, mAtZero);
1234 assert(succeeded(res) &&
"affine_map must be evaluatable (not symbols)");
1236 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
1238 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
1239 sliceParams.
offsets.push_back(offset);
1247 LLVM_DEBUG(llvm::dbgs()
1248 <<
"computeSliceParameters: raw size: " << size <<
"\n");
1249 LLVM_DEBUG(llvm::dbgs()
1250 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
1253 if (omitPartialTileCheck) {
1256 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
1257 sliceParams.
sizes.push_back(size);
1268 auto hasTileSizeOne = sizeCst == 1;
1269 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
1270 ((shapeSize % *sizeCst) == 0);
1271 if (!hasTileSizeOne && !dividesEvenly) {
1272 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
1273 <<
", size: " << size
1274 <<
": make sure in bound with affine.min\n");
1278 bindDims(context, dim0, dim1, dim2);
1309 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
1310 sliceParams.
sizes.push_back(size);
1319 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
1320 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
1322 offsets.push_back(
isTiled ? ivs[idxIvs++] :
b.getIndexAttr(0));
1323 LLVM_DEBUG(llvm::dbgs()
1324 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
1333 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
1340 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
1346 if (op.hasPureBufferSemantics())
1348 return llvm::to_vector(
1349 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
1350 return operands[opOperand.getOperandNumber()].getType();
1357 if (op.hasPureBufferSemantics())
1360 tensorResults.reserve(results.size());
1362 unsigned resultIdx = 0;
1363 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
1366 Value outputTensor = operands[opOperand.getOperandNumber()];
1367 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
1369 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
1370 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
1371 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
1372 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
1375 tensorResults.push_back(results[resultIdx]);
1379 return tensorResults;
1387 bool omitPartialTileCheck) {
1388 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
1389 llvm::make_range(tileSizes.begin(), tileSizes.end()),
1391 "expected as many ivs as non-zero sizes");
1400 assert(
static_cast<int64_t>(valuesToTile.size()) <=
1401 linalgOp->getNumOperands() &&
1402 "more value to tile than operands.");
1404 allSliceParams.reserve(valuesToTile.size());
1405 for (
auto [opOperand, val] :
1406 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
1407 Value shapedOp = val;
1408 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
1409 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
1416 Type operandType = opOperand.get().getType();
1417 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
1418 linalgOp.isDpsInit(&opOperand))) {
1419 allSliceParams.push_back(std::nullopt);
1420 LLVM_DEBUG(llvm::dbgs()
1421 <<
": not tiled: use shape: " << operandType <<
"\n");
1424 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
1427 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
1428 omitPartialTileCheck));
1431 return allSliceParams;
1439 bool omitPartialTileCheck) {
1442 tileSizes, sizeBounds, omitPartialTileCheck);
1444 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
1445 Value valueToTile = std::get<0>(item);
1446 std::optional<SliceParameters> sliceParams = std::get<1>(item);
1447 tiledShapes.push_back(
1448 sliceParams.has_value()
1464 if (!linalgOp.hasIndexSemantics())
1467 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
1468 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
1471 b.setInsertionPointAfter(indexOp);
1475 b, indexOp.getLoc(),
index + offset,
1476 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
1477 Value materialized =
1479 b.replaceUsesWithIf(indexOp, materialized, [&](
OpOperand &use) {
1491std::optional<SmallVector<ReassociationIndices>>
1495 for (
const auto &it : llvm::enumerate(mixedSizes)) {
1496 auto dim = it.index();
1497 auto size = it.value();
1498 curr.push_back(dim);
1499 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
1500 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
1503 std::swap(reassociation.back(), curr);
1508 if (!curr.empty() && !reassociation.empty())
1509 reassociation.back().append(curr.begin(), curr.end());
1510 return reassociation;
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr > > exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
This class represents an argument of a Block.
unsigned getArgNumber() const
Returns the number of this argument.
Block * getOwner() const
Returns the block that owns this argument.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
Operation * getTerminator()
Get the terminator operation of this block.
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
Value getOperand(unsigned idx)
This class contains a list of basic blocks and a link to the parent operation it is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
bool isaConvolutionOpOfType< linalg::DepthwiseConv3DNdhwcDhwcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::PoolingNhwcSumOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNchwChwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
SmallVector< int64_t > getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata)
Compute inverse permutation for the source tensor (i.e.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNwcWcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body)
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body)
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex, uint32_t dimIndex)
static bool bodyMatcherForPoolOps(Value yieldVal, Block *body)
Utility to match block body for linalg.pool* ops.
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
@ CyclicNumProcsGeNumIters
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
@ Cyclic
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
@ CyclicNumProcsEqNumIters
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body)
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
bool isaConvolutionOpInterface(LinalgOp linalgOp, bool allowEmptyConvolvedDims=false)
Checks whether linalgOp conforms to ConvolutionOpInterface.
static BlockArgument getBlockArgumentWithOptionalExtOps(Value val)
Returns the BlockArgument that leads to val, if any.
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body)
SmallVector< int64_t > getPackInverseDestPerm(linalg::PackOp packOp, PackingMetadata &metadata)
Compute inverse permutation for the destination tensor (i.e.
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body)
Utility to match block body for convolution ops.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
static bool convLayoutMatches(ArrayRef< ArrayRef< AffineExpr > > mapListExpected, ArrayAttr indexingMaps, MLIRContext *context)
Returns true if the given indexing maps matches with the expected indexing maps.
static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body)
static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim, unsigned fDim, unsigned oDim, int64_t &dilation, int64_t &stride)
Given an array of AffineMaps indexingMaps verify the following commutatively:- indexingMaps[0]....
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
static int64_t isDimTimesConstantOrDimOnly(AffineExpr expr, AffineExpr &dim)
Check if expr is either:
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
detail::NameOpMatcher m_Op(StringRef opName)
Matches a named operation.
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::op_matcher< OpClass > m_Op()
Matches the given OpClass.
SmallVector< int64_t, 2 > ReassociationIndices
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
llvm::function_ref< Fn > function_ref
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< utils::IteratorType > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
static std::optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets