33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/Support/Debug.h"
37#define DEBUG_TYPE "linalg-utils"
65 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
77 TileCheck t(tileSizes);
92std::optional<RegionMatcher::BinaryOpKind>
94 auto ®ion = op.getRegion();
95 if (!region.hasOneBlock())
113 if (addPattern.match(&ops.back()))
130 for (
Range range : ranges) {
149static SmallVector<int64_t>
152 PackingMetadata &packingMetadata) {
153 int64_t numPackedDims = innerDimsPos.size();
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank, innerDimsPos);
161 if (!outerPerm.empty())
168 return packInverseDestPermutation;
175 PackingMetadata &metadata) {
177 int64_t packedRank = packOp.getDestType().getRank();
182 return packInvDestPerm;
186 PackingMetadata &metadata) {
187 int64_t packedRank = unpackOp.getSourceType().getRank();
192 return unpackInvSrcPerm;
196 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
197 return m.isProjectedPermutation(true);
205 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
206 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
208 llvm::any_of(op.getResultTypes(),
209 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
216 if (op.getNumLoops() != op.getNumParallelLoops())
223 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
224 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
231 return iteratorType == utils::IteratorType::parallel;
235 return iteratorType == utils::IteratorType::reduction;
250 if (!dyn_cast_if_present<arith::ExtFOp>(defOp) &&
251 !dyn_cast_if_present<arith::ExtSIOp>(defOp) &&
252 !dyn_cast_if_present<arith::ExtUIOp>(defOp) &&
253 !dyn_cast_if_present<arith::SIToFPOp>(defOp)) {
256 return dyn_cast<BlockArgument>(defOp->
getOperand(0));
278 if (!isa_and_present<arith::SubIOp, arith::SubFOp>(inputSubOp))
282 if (!isa_and_present<arith::SubIOp, arith::SubFOp>(filterSubOp))
298 if (!inputBlockArg || !inputZpBlockArg || !filterBlockArg ||
299 !filterZpBlockArg || !outBlockArg)
303 if (inputBlockArg.
getOwner() != body || inputZpBlockArg.
getOwner() != body ||
304 filterBlockArg.
getOwner() != body ||
327 bool containsZeroPointOffset =
false) {
329 if (!isa_and_present<arith::AddIOp, arith::AddFOp>(addOp))
333 if (!isa_and_present<arith::MulIOp, arith::MulFOp>(mulOp))
336 if (containsZeroPointOffset) {
345 if (!lhsBlockArg || !rhsBlockArg || !outBlockArg ||
354template <
typename... OpTypes>
357 if (!(isa_and_present<OpTypes>(defOp) || ...))
364 if (!lhsArg || !rhsArg || lhsArg.
getOwner() != body ||
401 auto affineMap = cast<AffineMapAttr>(indexingMaps[mapIndex]).getValue();
402 if (dimIndex < affineMap.getNumResults())
403 return affineMap.getResult(dimIndex);
413 if ((dim = dyn_cast<AffineDimExpr>(expr)))
416 auto mulExpr = dyn_cast<AffineBinaryOpExpr>(expr);
424 if (((dim = dyn_cast<AffineDimExpr>(
lhs)) &&
425 (cst = dyn_cast<AffineConstantExpr>(
rhs))) ||
426 ((dim = dyn_cast<AffineDimExpr>(
rhs)) &&
427 (cst = dyn_cast<AffineConstantExpr>(
lhs))))
455 unsigned fDim,
unsigned oDim,
457 unsigned inputMapIdx = 0, filterMapIdx = 1,
458 outputMapIdx = indexingMaps.size() - 1;
460 auto addExpr = dyn_cast_or_null<AffineBinaryOpExpr>(inpExpr);
468 if (c0 == -1 || c1 == -1)
473 if (dim0 == fExpr && dim1 == oExpr) {
478 if (dim1 == fExpr && dim0 == oExpr) {
492 return indexingMaps ==
494 context, llvm::to_vector<4>(llvm::map_range(
496 return AffineMapAttr::get(m);
531 ArrayAttr indexingMaps;
539 : op(op), ctx(op->
getContext()), dilations(d), strides(s),
540 indexingMaps(op.getIndexingMaps()), poolingType(poolingType) {
550 return base * (*strides)[idx] + kernel * (*dilations)[idx];
559 (*dilations)[idx], (*strides)[idx]);
575 Block *body = op.getBlock();
577 switch (poolingType) {
580 containsZeroPointOffset);
604 if (isa<linalg::Conv1DOp>(op))
608 "expected op to implement ConvolutionOpInterface");
625 if (isa<linalg::Conv1DNwcWcfOp>(op))
629 "expected op to implement ConvolutionOpInterface");
649 if (isa<linalg::Conv1DNcwFcwOp>(op))
653 "expected op to implement ConvolutionOpInterface");
673 if (isa<linalg::Conv2DOp>(op))
677 "expected op to implement ConvolutionOpInterface");
697 if (isa<linalg::Conv2DNhwcHwcfOp>(op))
701 "expected op to implement ConvolutionOpInterface");
724 if (isa<linalg::Conv2DNhwcHwcfQOp>(op))
728 "expected op to implement ConvolutionOpInterface");
753 if (isa<linalg::Conv2DNhwcFhwcOp>(op))
757 "expected op to implement ConvolutionOpInterface");
780 if (isa<linalg::Conv2DNhwcFhwcQOp>(op))
784 "expected op to implement ConvolutionOpInterface");
809 if (isa<linalg::Conv2DNchwFchwOp>(op))
813 "expected op to implement ConvolutionOpInterface");
836 if (isa<linalg::Conv2DNchwFchwQOp>(op))
840 "expected op to implement ConvolutionOpInterface");
865 if (isa<linalg::Conv2DNgchwFgchwOp>(op))
869 "expected op to implement ConvolutionOpInterface");
894 if (isa<linalg::Conv2DNgchwGfchwOp>(op))
898 "expected op to implement ConvolutionOpInterface");
923 if (isa<linalg::Conv2DNgchwGfchwQOp>(op))
927 "expected op to implement ConvolutionOpInterface");
954 if (isa<linalg::Conv2DNhwgcGfhwcOp>(op))
958 "expected op to implement ConvolutionOpInterface");
983 if (isa<linalg::Conv2DNhwgcGfhwcQOp>(op))
987 "expected op to implement ConvolutionOpInterface");
1014 if (isa<linalg::Conv3DOp>(op))
1018 "expected op to implement ConvolutionOpInterface");
1042 if (isa<linalg::Conv3DNdhwcDhwcfOp>(op))
1046 "expected op to implement ConvolutionOpInterface");
1073 if (isa<linalg::Conv3DNdhwcDhwcfQOp>(op))
1077 "expected op to implement ConvolutionOpInterface");
1106 if (isa<linalg::Conv3DNcdhwFcdhwOp>(op))
1110 "expected op to implement ConvolutionOpInterface");
1137 if (isa<linalg::DepthwiseConv1DNcwCwOp>(op))
1141 "expected op to implement ConvolutionOpInterface");
1160 if (isa<linalg::DepthwiseConv1DNwcWcOp>(op))
1164 "expected op to implement ConvolutionOpInterface");
1183 if (isa<linalg::DepthwiseConv1DNwcWcmOp>(op))
1187 "expected op to implement ConvolutionOpInterface");
1207 if (isa<linalg::DepthwiseConv2DNchwChwOp>(op))
1211 "expected op to implement ConvolutionOpInterface");
1233 if (isa<linalg::DepthwiseConv2DNhwcHwcOp>(op))
1237 "expected op to implement ConvolutionOpInterface");
1259 if (isa<linalg::DepthwiseConv2DNhwcHwcQOp>(op))
1263 "expected op to implement ConvolutionOpInterface");
1287 if (isa<linalg::DepthwiseConv2DNhwcHwcmOp>(op))
1291 "expected op to implement ConvolutionOpInterface");
1314 if (isa<linalg::DepthwiseConv2DNhwcHwcmQOp>(op))
1318 "expected op to implement ConvolutionOpInterface");
1343 if (isa<linalg::DepthwiseConv3DNdhwcDhwcOp>(op))
1347 "expected op to implement ConvolutionOpInterface");
1373 if (isa<linalg::DepthwiseConv3DNcdhwCdhwOp>(op))
1377 "expected op to implement ConvolutionOpInterface");
1403 if (isa<linalg::DepthwiseConv3DNdhwcDhwcmOp>(op))
1407 "expected op to implement ConvolutionOpInterface");
1426 {N, D, H,
W, C, CM}})
1434 if (isa<linalg::PoolingNhwcMaxOp>(op))
1438 "expected op to implement ConvolutionOpInterface");
1461 if (isa<linalg::PoolingNhwcMinOp>(op))
1465 "expected op to implement ConvolutionOpInterface");
1488 if (isa<linalg::PoolingNhwcSumOp>(op))
1492 "expected op to implement ConvolutionOpInterface");
1515 if (isa<linalg::PoolingNhwcMaxUnsignedOp>(op))
1519 "expected op to implement ConvolutionOpInterface");
1542 if (isa<linalg::PoolingNhwcMinUnsignedOp>(op))
1546 "expected op to implement ConvolutionOpInterface");
1569 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
1575 Value current = sliceOp.getSource();
1580 OpResult opResult = cast<OpResult>(current);
1581 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
1583 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
1592 if (sliceOp.getSource().getType() != type)
1597 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
1598 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
1605 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
1606 if (!padOpSliceOp ||
1607 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
1614 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
1615 [](std::tuple<OpFoldResult, OpFoldResult> it) {
1616 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
1623 Value padOpPad = padOp.getConstantPaddingValue();
1630 return sliceOp.getSource();
1634 auto memrefTypeTo = cast<MemRefType>(to.
getType());
1636 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
1637 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
1638 "`from` and `to` memref must have the same rank");
1644 utils::IteratorType::parallel);
1645 return linalg::GenericOp::create(
1652 linalg::YieldOp::create(
b, loc, args.front());
1665 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
1666 "expected as many entries for proc info as number of loops, even if "
1667 "they are null entries");
1669 if (!linalgOp.hasPureBufferSemantics())
1670 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1674 b, loc, lbs, ubs, steps, iterArgInitValues,
1676 assert(iterArgs.size() == iterArgInitValues.size() &&
1677 "expect the number of output tensors and iter args to match");
1679 if (!iterArgs.empty()) {
1680 operandValuesToUse = linalgOp.getDpsInputs();
1681 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
1683 return bodyBuilderFn(
b, loc, ivs, operandValuesToUse);
1686 if (loopNest.
loops.empty() || procInfo.empty())
1690 for (
const auto &loop : llvm::enumerate(loopNest.
loops)) {
1691 if (procInfo[loop.index()].distributionMethod ==
1693 mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
1694 procInfo[loop.index()].nprocs);
1709 if (!linalgOp.hasPureBufferSemantics())
1710 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1711 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
1717 constantSteps.reserve(steps.size());
1718 for (
Value v : steps) {
1720 assert(constVal.has_value() &&
"Affine loops require constant steps");
1721 constantSteps.push_back(constVal.value());
1726 bodyBuilderFn(
b, loc, ivs,
1727 linalgOp->getOperands());
1759 assert(lbs.size() == ubs.size());
1760 assert(lbs.size() == steps.size());
1761 assert(lbs.size() == iteratorTypes.size());
1762 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
1766 if (iteratorTypes.empty()) {
1767 bodyBuilderFn(
b, loc, ivStorage);
1775 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
1777 ivStorage.append(ivs.begin(), ivs.end());
1778 generateParallelLoopNest(
1779 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
1780 iteratorTypes.drop_front(),
1781 procInfo.empty() ? procInfo : procInfo.drop_front(),
1782 bodyBuilderFn, ivStorage);
1787 unsigned nLoops = iteratorTypes.size();
1788 unsigned numProcessed = 0;
1790 if (procInfo.empty()) {
1793 distributionMethod = procInfo.front().distributionMethod;
1802 auto remainderProcInfo =
1803 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
1804 switch (distributionMethod) {
1808 scf::ParallelOp::create(
1809 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1810 steps.take_front(numProcessed),
1812 ivStorage.append(localIvs.begin(), localIvs.end());
1813 generateParallelLoopNest(
1814 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1815 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1816 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1817 bodyBuilderFn, ivStorage);
1824 scf::ParallelOp::create(
1825 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1826 steps.take_front(numProcessed),
1828 ivStorage.append(localIvs.begin(), localIvs.end());
1829 generateParallelLoopNest(
1830 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1831 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1832 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1833 bodyBuilderFn, ivStorage);
1840 Value cond = ab.
slt(lbs[0], ubs[0]);
1841 for (
unsigned i = 1; i < numProcessed; ++i)
1842 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
1843 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1846 ubs.drop_front(numProcessed),
1847 steps.drop_front(numProcessed),
1848 iteratorTypes.drop_front(numProcessed),
1849 remainderProcInfo, bodyBuilderFn, ivStorage);
1857 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1859 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
1860 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
1861 remainderProcInfo, bodyBuilderFn, ivStorage);
1876 if (!linalgOp.hasPureBufferSemantics())
1877 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1878 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
1880 assert(iteratorTypes.size() >= loopRanges.size() &&
1881 "expected iterator type for all ranges");
1882 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
1883 "expected proc information for all loops when present");
1884 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
1886 unsigned numLoops = iteratorTypes.size();
1887 ivs.reserve(numLoops);
1888 lbsStorage.reserve(numLoops);
1889 ubsStorage.reserve(numLoops);
1890 stepsStorage.reserve(numLoops);
1893 unpackRanges(
b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
1896 for (
const auto &it : llvm::enumerate(procInfo)) {
1899 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
1900 ubsStorage[it.index()], stepsStorage[it.index()]);
1903 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
1905 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
1907 bodyBuilderFn(
b, loc, ivs, linalgOp->getOperands());
1911 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
1917 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
1919 .Case([&](MemRefType) {
1920 return memref::SubViewOp::create(
1921 builder, loc, valueToTile, sliceParams.
offsets,
1924 .Case([&](RankedTensorType) {
1925 return tensor::ExtractSliceOp::create(
1926 builder, loc, valueToTile, sliceParams.
offsets,
1929 .DefaultUnreachable(
"Unexpected shaped type");
1938 bool omitPartialTileCheck) {
1941 ubs, subShapeSizes, omitPartialTileCheck);
1950 bool omitPartialTileCheck) {
1951 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
1952 assert(shapedType &&
"only shaped types can be tiled");
1954 int64_t rank = shapedType.getRank();
1958 sliceParams.
offsets.reserve(rank);
1959 sliceParams.
sizes.reserve(rank);
1960 sliceParams.
strides.reserve(rank);
1961 for (
unsigned r = 0; r < rank; ++r) {
1962 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
1966 sliceParams.
sizes.push_back(dim);
1968 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
1971 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
1976 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
1981 [[maybe_unused]]
auto res = m.constantFold(zeros, mAtZero);
1982 assert(succeeded(res) &&
"affine_map must be evaluatable (not symbols)");
1984 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
1986 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
1987 sliceParams.
offsets.push_back(offset);
1995 LLVM_DEBUG(llvm::dbgs()
1996 <<
"computeSliceParameters: raw size: " << size <<
"\n");
1997 LLVM_DEBUG(llvm::dbgs()
1998 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
2001 if (omitPartialTileCheck) {
2004 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
2005 sliceParams.
sizes.push_back(size);
2016 auto hasTileSizeOne = sizeCst == 1;
2017 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
2018 ((shapeSize % *sizeCst) == 0);
2019 if (!hasTileSizeOne && !dividesEvenly) {
2020 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
2021 <<
", size: " << size
2022 <<
": make sure in bound with affine.min\n");
2026 bindDims(context, dim0, dim1, dim2);
2057 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
2058 sliceParams.
sizes.push_back(size);
2067 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
2068 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
2070 offsets.push_back(
isTiled ? ivs[idxIvs++] :
b.getIndexAttr(0));
2071 LLVM_DEBUG(llvm::dbgs()
2072 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
2081 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
2088 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
2094 if (op.hasPureBufferSemantics())
2096 return llvm::to_vector(
2097 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
2098 return operands[opOperand.getOperandNumber()].getType();
2105 if (op.hasPureBufferSemantics())
2108 tensorResults.reserve(results.size());
2110 unsigned resultIdx = 0;
2111 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
2114 Value outputTensor = operands[opOperand.getOperandNumber()];
2115 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
2117 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
2118 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
2119 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
2120 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
2123 tensorResults.push_back(results[resultIdx]);
2127 return tensorResults;
2135 bool omitPartialTileCheck) {
2136 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
2137 llvm::make_range(tileSizes.begin(), tileSizes.end()),
2139 "expected as many ivs as non-zero sizes");
2148 assert(
static_cast<int64_t>(valuesToTile.size()) <=
2149 linalgOp->getNumOperands() &&
2150 "more value to tile than operands.");
2152 allSliceParams.reserve(valuesToTile.size());
2153 for (
auto [opOperand, val] :
2154 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
2155 Value shapedOp = val;
2156 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
2157 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
2164 Type operandType = opOperand.get().getType();
2165 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
2166 linalgOp.isDpsInit(&opOperand))) {
2167 allSliceParams.push_back(std::nullopt);
2168 LLVM_DEBUG(llvm::dbgs()
2169 <<
": not tiled: use shape: " << operandType <<
"\n");
2172 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
2175 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
2176 omitPartialTileCheck));
2179 return allSliceParams;
2187 bool omitPartialTileCheck) {
2190 tileSizes, sizeBounds, omitPartialTileCheck);
2192 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
2193 Value valueToTile = std::get<0>(item);
2194 std::optional<SliceParameters> sliceParams = std::get<1>(item);
2195 tiledShapes.push_back(
2196 sliceParams.has_value()
2212 if (!linalgOp.hasIndexSemantics())
2215 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
2216 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
2219 b.setInsertionPointAfter(indexOp);
2223 b, indexOp.getLoc(),
index + offset,
2224 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
2225 Value materialized =
2227 b.replaceUsesWithIf(indexOp, materialized, [&](
OpOperand &use) {
2239std::optional<SmallVector<ReassociationIndices>>
2243 for (
const auto &it : llvm::enumerate(mixedSizes)) {
2244 auto dim = it.index();
2245 auto size = it.value();
2246 curr.push_back(dim);
2247 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
2248 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
2251 std::swap(reassociation.back(), curr);
2256 if (!curr.empty() && !reassociation.empty())
2257 reassociation.back().append(curr.begin(), curr.end());
2258 return reassociation;
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr > > exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
This class represents an argument of a Block.
unsigned getArgNumber() const
Returns the number of this argument.
Block * getOwner() const
Returns the block that owns this argument.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
Operation * getTerminator()
Get the terminator operation of this block.
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
Value getOperand(unsigned idx)
This class contains a list of basic blocks and a link to the parent operation it is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
Helper class for building convolution op matchers with minimal boilerplate.
ConvMatcherBuilder & matchStride(unsigned iDim, unsigned fDim, unsigned oDim, unsigned idx)
Match stride/dilation pattern for a spatial dimension.
bool matchBody(bool containsZeroPointOffset=false)
Match body pattern. This should be called last.
AffineExpr strided(AffineExpr base, AffineExpr kernel, unsigned idx)
Build strided expression: base * stride[idx] + kernel * dilation[idx].
AffineExpr dim(unsigned i)
Get affine dimension expression for dimension i.
ConvMatcherBuilder(LinalgOp op, unsigned spatialRank, SmallVector< int64_t > *d, SmallVector< int64_t > *s, PoolingType poolingType=PoolingType::None)
ConvMatcherBuilder & matchMaps(ArrayRef< ArrayRef< AffineExpr > > maps)
Match expected indexing maps layout. Returns *this for method chaining.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
bool isaConvolutionOpOfType< linalg::DepthwiseConv3DNdhwcDhwcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::PoolingNhwcSumOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNhwcHwcfQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNchwChwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
SmallVector< int64_t > getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata)
Compute inverse permutation for the source tensor (i.e.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body, bool containsZeroPointOffset=false)
Utility to match block body for convolution ops.
bool isaConvolutionOpOfType< linalg::Conv1DOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
bool isaConvolutionOpOfType< linalg::Conv1DNcwFcwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNhwcHwcmQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv3DNdhwcDhwcfOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
bool isaConvolutionOpOfType< linalg::DepthwiseConv3DNdhwcDhwcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
bool isaConvolutionOpOfType< linalg::Conv2DNhwgcGfhwcQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isaConvolutionOpOfType< linalg::Conv2DOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
PoolingType
Enum representing pooling operation types used by ConvMatcherBuilder.
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNwcWcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNgchwFgchwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNchwFchwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body)
bool isaConvolutionOpOfType< linalg::Conv3DOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForZeroPointOffsets(Operation *addOp, Operation *mulOp, Block *body)
Utility function to match the zero point offset body of quantized convolution ops.
static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body)
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
bool isaConvolutionOpOfType< linalg::Conv2DNgchwGfchwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex, uint32_t dimIndex)
static BlockArgument getBlockArgumentWithOptionalCastOps(Value val)
Returns the BlockArgument that leads to val, if any.
bool isaConvolutionOpOfType< linalg::Conv3DNcdhwFcdhwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForPoolOps(Value yieldVal, Block *body)
Utility to match block body for linalg.pool* ops.
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNhwcHwcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
@ CyclicNumProcsGeNumIters
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
@ Cyclic
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
@ CyclicNumProcsEqNumIters
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body)
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
bool isaConvolutionOpInterface(LinalgOp linalgOp, bool allowEmptyConvolvedDims=false)
Checks whether linalgOp conforms to ConvolutionOpInterface.
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNcwCwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNhwcHwcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
bool isaConvolutionOpOfType< linalg::Conv3DNdhwcDhwcfQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body)
SmallVector< int64_t > getPackInverseDestPerm(linalg::PackOp packOp, PackingMetadata &metadata)
Compute inverse permutation for the destination tensor (i.e.
bool isaConvolutionOpOfType< linalg::Conv2DNgchwGfchwQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNwcWcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNchwFchwQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
static bool convLayoutMatches(ArrayRef< ArrayRef< AffineExpr > > mapListExpected, ArrayAttr indexingMaps, MLIRContext *context)
Returns true if the given indexing maps matches with the expected indexing maps.
bool isaConvolutionOpOfType< linalg::Conv2DNhwgcGfhwcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body)
static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim, unsigned fDim, unsigned oDim, int64_t &dilation, int64_t &stride)
Given an array of AffineMaps indexingMaps verify the following commutatively:- indexingMaps[0]....
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNhwcHwcQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNhwcFhwcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNhwcHwcfOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::Conv2DNhwcFhwcQOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
bool isaConvolutionOpOfType< linalg::DepthwiseConv3DNcdhwCdhwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
bool isaConvolutionOpOfType< linalg::Conv1DNwcWcfOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
static int64_t isDimTimesConstantOrDimOnly(AffineExpr expr, AffineExpr &dim)
Check if expr is either:
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
detail::NameOpMatcher m_Op(StringRef opName)
Matches a named operation.
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::op_matcher< OpClass > m_Op()
Matches the given OpClass.
SmallVector< int64_t, 2 > ReassociationIndices
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
llvm::function_ref< Fn > function_ref
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< utils::IteratorType > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
static std::optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets