25 #include "llvm/Support/Debug.h"
28 #define DEBUG_TYPE "linalg-tiling-interface-impl"
47 Value v = b.
create<affine::AffineApplyOp>(loc, m, ivs);
57 Block *body = linalgOp.getBlock();
61 if (
auto indexOp = dyn_cast<IndexOp>(&op)) {
62 map.
map(indexOp.getResult(), ivs[indexOp.getDim()]);
72 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
74 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
76 loc, toStore, linalgOp.getDpsInitOperand(operand.index())->get(),
92 template <
typename LinalgOpTy>
93 struct LinalgOpTilingInterface
94 :
public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
98 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
99 return concreteOp.getIteratorTypesArray();
107 LinalgOp linalgOp = cast<LinalgOp>(op);
109 linalgOp.createFlatListOfOperandDims(b, loc);
110 AffineMap map = linalgOp.getShapesToLoopsMap();
112 return llvm::to_vector(
114 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
115 b, loc, loopExpr, allShapesSizes);
116 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
121 FailureOr<TilingResult>
128 LinalgOp linalgOp = cast<LinalgOp>(op);
131 b, loc, linalgOp, valuesToTile, offsets, sizes, {},
true);
133 llvm::make_filter_range(
135 [](
Value v) ->
bool {
136 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
144 Operation *tiledOp =
clone(b, linalgOp, resultTensorTypes, tiledOperands);
155 getMappedOffsetAndSize(LinalgOp linalgOp,
OpBuilder &b,
163 for (
auto [indexingMap, offsets, sizes] :
164 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
165 for (
auto [resultExpr, offset, size] :
166 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
167 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
170 unsigned position = dimExpr.getPosition();
171 auto it = mappedOffsets.find(position);
172 if (it != mappedOffsets.end()) {
175 if (seenOffset != offset || seenSize != size) {
177 llvm::dbgs() <<
"inconsistent iteration space mapping from "
178 "offsets/sizes of operands/results";
183 mappedOffsets[position] = offset;
184 mappedSizes[position] = size;
192 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(b);
193 mappedOffsetsVec.resize(iterationDomain.size());
194 mappedSizesVec.resize(iterationDomain.size());
196 auto it = mappedOffsets.find(index);
197 if (it != mappedOffsets.end()) {
198 mappedOffsetsVec[index] = it->second;
199 mappedSizesVec[index] = mappedSizes.lookup(index);
202 mappedOffsetsVec[index] = domain.offset;
203 mappedSizesVec[index] = domain.size;
210 LogicalResult getIterationDomainTileFromOperandTiles(
216 auto linalgOp = cast<LinalgOp>(op);
218 std::optional<SmallVector<OpFoldResult>> iterationSpaceOffsets,
221 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
222 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
223 return linalgOp.getMatchingIndexingMap(&opOperand);
225 if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets,
226 allSizes, iterDomainOffsets,
242 LinalgOp linalgOp = cast<LinalgOp>(op);
247 llvm::to_vector(llvm::map_range(sizes, [&](
OpFoldResult ofr) {
251 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
253 b, loc, outOperand->
get(), sizes,
254 linalgOp.getMatchingIndexingMap(outOperand), offsets,
255 {}, subShapeSizes,
true);
256 resultOffsets = sliceParams.
offsets;
257 resultSizes = sliceParams.
sizes;
261 LogicalResult getIterationDomainTileFromResultTile(
266 auto linalgOp = cast<LinalgOp>(op);
273 linalgOp.getIndexingMapMatchingResult(op->
getResult(resultNumber));
276 "unhandled tiled implementation generation when result is not "
277 "accessed using a permuted projection");
283 getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets},
284 {allSizes}, iterDomainOffsets, iterDomainSizes);
286 assert(succeeded(status) &&
"unexpected error in offset calculation");
290 FailureOr<TilingResult>
295 if (failed(getIterationDomainTileFromResultTile(
296 op, b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
299 auto tilingInterfaceOp = cast<TilingInterface>(op);
300 FailureOr<TilingResult> tilingResult =
301 tilingInterfaceOp.getTiledImplementation(b, mappedOffsets, mappedSizes);
303 if (failed(tilingResult))
306 if (tilingResult->tiledOps.size() != 1)
307 return op->
emitOpError(
"failed to generate tiled implementation");
312 tilingResult->generatedSlices};
317 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
322 if (failed(getIterationDomainTileFromOperandTiles(
323 op, b, operandNumbers, allOffsets, allSizes, mappedOffsets,
333 auto linalgOp = cast<LinalgOp>(op);
334 if (!linalgOp.hasPureBufferSemantics())
335 return op->
emitOpError(
"expected operation to have buffer semantics");
338 indexedValues.reserve(linalgOp->getNumOperands());
342 for (
OpOperand &operand : linalgOp->getOpOperands()) {
343 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
344 indexedValues.push_back(
nullptr);
347 if (linalgOp.isScalar(&operand)) {
348 indexedValues.push_back(operand.get());
352 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
354 builder.
create<memref::LoadOp>(linalgOpLoc, operand.get(), indices);
355 indexedValues.push_back(load);
371 if (reductionDim == value) {
383 getPartialResultAffineMaps(LinalgOp linalgOp,
385 auto partialReductionMaps = llvm::map_to_vector(
386 linalgOp.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
387 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
388 for (auto redPos : reductionDims) {
390 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
391 map.getNumResults());
395 return partialReductionMaps;
398 struct InitSliceInfo {
408 static InitSliceInfo getInitSliceInfoForOuterReduction(
418 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
419 if (reductionDims.contains(dim)) {
420 initOffsets.push_back(zero);
422 initOffsets.push_back(offsets[dim]);
424 initSizes.push_back(sizes[dim]);
428 return {resultShape, initOffsets, initSizes, initStrides};
434 static InitSliceInfo getInitSliceInfoForOuterParallel(
444 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
445 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
446 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
447 initSizes.push_back(one);
449 initOffsets.push_back(offsets[dim]);
450 initSizes.push_back(sizes[dim]);
451 resultShape.push_back(sizes[dim]);
456 return {staticShapes, initOffsets, initSizes, initStrides};
461 static InitSliceInfo getInitSliceInfo(
MLIRContext *context,
469 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
470 reductionDims, splitReductionIvs,
471 partialReductionMap);
474 "unexpected ReductionTilingStrategy");
475 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
476 reductionDims, splitReductionIvs,
477 partialReductionMap);
482 template <
typename LinalgOpTy>
483 struct LinalgOpPartialReductionInterface
484 :
public PartialReductionOpInterface::ExternalModel<
485 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
486 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
489 auto linalgOp = cast<LinalgOp>(op);
492 if (linalgOp.hasPureBufferSemantics())
493 return op->
emitOpError(
"expected operation to have tensor semantics");
496 getPartialResultAffineMaps(linalgOp, reductionDims);
499 for (
auto [initIdx, result, partialMap] :
504 combinerOps.size() != 1)
505 return op->
emitOpError(
"Failed to anaysis the reduction operation.");
509 if (!identity.has_value())
511 "Failed to get an identity value for the reduction operation.");
515 for (
AffineExpr dimExpr : partialMap.getResults()) {
516 auto dim = cast<AffineDimExpr>(dimExpr);
517 partialResultShape.push_back(sizes[dim.getPosition()]);
522 b.
create<tensor::EmptyOp>(loc, partialResultShape, elType);
523 Value constantOp = b.
create<arith::ConstantOp>(loc, *identity);
524 auto identityTensor =
525 b.
create<linalg::FillOp>(loc, constantOp, emptyTensor);
526 inits.push_back(identityTensor.getResult(0));
532 FailureOr<TilingResult>
540 auto linalgOp = cast<LinalgOp>(op);
543 getPartialResultAffineMaps(linalgOp, reductionDims);
548 if (tilingStrategy ==
550 newInitMaps = llvm::to_vector(partialReductionMaps);
552 newInitMaps = llvm::map_to_vector(
553 linalgOp.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
554 return linalgOp.getMatchingIndexingMap(&opOperand);
560 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {},
true);
562 llvm::make_filter_range(
568 for (
auto [partialReductionMap, valueToTile] :
569 llvm::zip_equal(partialReductionMaps, init)) {
570 InitSliceInfo sliceInfo = getInitSliceInfo(
571 b.
getContext(), tilingStrategy, offsets, sizes, reductionDims,
572 splitReductionIvs, partialReductionMap);
573 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
575 sliceInfo.resultShape, valueToTileType.getElementType(),
576 valueToTileType.getEncoding());
577 auto sliceOp = b.
create<tensor::ExtractSliceOp>(
580 tiledInits.push_back(sliceOp.getResult());
581 generatedSlices.push_back(sliceOp);
586 for (
auto [initOperand, newInitMap] :
587 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
588 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
589 newMaps[mapIdx] = newInitMap;
594 linalgOp.getIteratorTypesArray();
595 if (tilingStrategy ==
597 for (
int dim : reductionDims)
598 newIteratorTypes[dim] = utils::IteratorType::parallel;
604 if (tilingStrategy ==
606 auto genericOp = b.
create<GenericOp>(
607 loc, resultTypes, tiledInputs, tiledInits, newMaps, newIteratorTypes);
610 genericOp.getRegion().begin(), mapping);
611 partialReductionOp = genericOp.getOperation();
614 llvm::append_range(operands, tiledInits);
615 partialReductionOp =
mlir::clone(b, op, resultTypes, operands);
618 {partialReductionOp},
619 llvm::map_to_vector(partialReductionOp->
getResults(),
624 FailureOr<MergeResult>
628 auto linalgOp = cast<LinalgOp>(op);
630 getPartialResultAffineMaps(linalgOp, reductionDims);
636 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
637 unsigned initIdx = idx;
643 for (
auto [resultNum, dimExpr] :
645 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
646 if (llvm::is_contained(reductionDims, dim)) {
647 partialReductionDims.push_back(resultNum);
651 auto reduction = b.
create<linalg::ReduceOp>(
652 loc, partialResult, init, partialReductionDims,
662 b.create<linalg::YieldOp>(loc, clonedReductionOp->
getResult(0));
665 mergeOperations.push_back(reduction);
666 replacements.push_back(reduction->getResult(0));
672 LogicalResult getPartialResultTilePosition(
679 auto linalgOp = cast<LinalgOp>(op);
681 getPartialResultAffineMaps(linalgOp, reductionDims);
682 InitSliceInfo sliceInfo = getInitSliceInfo(
683 b.
getContext(), tilingStrategy, offsets, sizes, reductionDims,
684 splitReductionIvs, partialReductionMaps[resultNumber]);
685 std::swap(resultOffsets, sliceInfo.offsets);
686 std::swap(resultSizes, sliceInfo.sizes);
692 template <
typename OpTy>
695 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
696 "applies to only pack or unpack operations");
698 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
705 for (
auto dim : llvm::seq<int64_t>(0, rank)) {
706 loopBounds[dim].offset = zero;
707 loopBounds[dim].stride = one;
708 loopBounds[dim].size = resultShape[0][dim];
716 if (permutation.empty())
718 applyPermutationToVector<OpFoldResult>(offsets, permutation);
719 applyPermutationToVector<OpFoldResult>(sizes, permutation);
723 :
public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
729 auto packOp = cast<PackOp>(op);
731 packOp.getSourceRank(), utils::IteratorType::parallel);
732 return iteratorTypes;
736 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
739 FailureOr<TilingResult>
743 auto packOp = cast<PackOp>(op);
748 int64_t inputRank = packOp.getSourceRank();
751 applyPermToRange(origOffsets, origSizes,
755 packOp.getDimAndTileMapping();
759 for (
auto dim : llvm::seq<int64_t>(0, inputRank)) {
760 using AV = affine::AffineValueExpr;
761 affine::AffineBuilder ab(b, loc);
765 if (dimAndTileMapping.count(dim)) {
769 auto avOffset = AV(dim0).bind(origOffsets[dim]);
770 auto avSize = AV(dim0).bind(origSizes[dim]);
771 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
772 inputIndices.push_back(ab.mul(avOffset, avTileSize));
773 inputSizes.push_back(ab.mul(avSize, avTileSize));
775 inputIndices.push_back(origOffsets[dim]);
776 inputSizes.push_back(origSizes[dim]);
780 if (packOp.getPaddingValue()) {
782 auto avDimSize = AV(dim0).bind(dimSize);
783 auto avInputIdx = AV(dim1).bind(inputIndices.back());
785 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
793 auto sourceSlice = b.
create<tensor::ExtractSliceOp>(
794 loc, packOp.getSource(), inputIndices, inputSizes, strides);
795 tiledOperands.push_back(sourceSlice);
802 strides.append(packOp.getDestRank() - inputRank, oneAttr);
803 auto outSlice = b.
create<tensor::ExtractSliceOp>(
804 loc, packOp.getDest(), outputOffsets, outputSizes, strides);
805 tiledOperands.push_back(outSlice);
807 if (
auto val = packOp.getPaddingValue())
808 tiledOperands.push_back(val);
809 for (
auto tile : packOp.getInnerTiles())
810 tiledOperands.push_back(
tile);
831 auto packOp = cast<PackOp>(op);
832 int64_t inputRank = packOp.getSourceRank();
833 int64_t outputRank = packOp.getDestRank();
835 resultOffsets.assign(offsets.begin(), offsets.end());
836 resultOffsets.append(outputRank - inputRank, zeroAttr);
840 resultSizes.assign(sizes.begin(), sizes.end());
841 for (
auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
842 resultSizes.push_back(outputShape[0][dataTileDim]);
847 FailureOr<TilingResult>
851 auto packOp = cast<PackOp>(op);
852 int64_t numTiles = packOp.getInnerDimsPos().size();
857 for (
auto offset : offsets.take_back(numTiles))
862 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
867 op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
868 if (failed(tilingResult))
870 return tilingResult.value();
876 LogicalResult getIterationDomainTileFromOperandTiles(
882 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
884 { llvm::dbgs() <<
"unsupported operands for consumer fusion"; });
891 auto packOp = cast<PackOp>(op);
894 if (packOp.getPaddingValue())
901 packOp.getDimAndTileMapping();
902 for (
auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
903 if (dimAndTileMapping.count(dim)) {
904 FailureOr<int64_t> cstSize =
908 std::optional<int64_t> cstInnerSize =
924 if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) {
928 using AV = affine::AffineValueExpr;
929 affine::AffineBuilder ab(b, loc);
933 auto avOffset = AV(dim0).bind(offsets[dim]);
934 auto avSize = AV(dim0).bind(sizes[dim]);
935 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
936 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
937 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
939 outerDimOffsets.push_back(offsets[dim]);
940 outerDimSizes.push_back(sizes[dim]);
943 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
944 resultOffsets = outerDimOffsets;
945 resultSizes = outerDimSizes;
950 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
954 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
956 { llvm ::dbgs() <<
"unhandled operands for consumer fusion"; });
963 auto packOp = cast<PackOp>(op);
966 int64_t inputRank = packOp.getSourceRank();
971 auto sourceSlice = b.
create<tensor::ExtractSliceOp>(
972 loc, packOp.getSource(), offsets, sizes, strides);
973 tiledOperands.push_back(sourceSlice);
976 if (failed(getIterationDomainTileFromOperandTiles(
977 op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
983 outputOffsets, outputSizes)))
986 strides.append(packOp.getDestRank() - inputRank, oneAttr);
987 auto outSlice = b.
create<tensor::ExtractSliceOp>(
988 loc, packOp.getDest(), outputOffsets, outputSizes, strides);
989 tiledOperands.push_back(outSlice);
991 assert(!packOp.getPaddingValue() &&
"Expect no padding semantic");
992 for (
auto tile : packOp.getInnerTiles())
993 tiledOperands.push_back(
tile);
1005 struct UnpackTileDimInfo {
1006 bool isAlignedToInnerTileSize;
1016 static UnpackTileDimInfo getUnpackTileDimInfo(
OpBuilder &b, UnPackOp unpackOp,
1020 UnpackTileDimInfo info;
1024 unpackOp.getDimAndTileMapping();
1026 if (!dimAndTileMapping.count(tileDim)) {
1027 info.isAlignedToInnerTileSize =
true;
1028 info.sourceOffset = tileOffset;
1029 info.sourceSize = tileSize;
1030 info.resultOffset = zeroAttr;
1031 info.destExpandedSize = tileSize;
1036 using AV = affine::AffineValueExpr;
1037 affine::AffineBuilder ab(b, loc);
1042 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1044 info.isAlignedToInnerTileSize =
false;
1049 if (!failed(cstSize) && cstInnerSize) {
1050 if (*cstSize % *cstInnerSize == 0)
1051 info.isAlignedToInnerTileSize =
true;
1055 if (*cstInnerSize == *cstSize) {
1056 auto lhs = AV(dim0).bind(tileOffset);
1057 auto rhs = AV(dim1).bind(innerTileSize);
1058 info.sourceOffset = ab.floor(lhs, rhs);
1059 info.sourceSize = oneAttr;
1060 info.resultOffset = zeroAttr;
1061 info.destExpandedSize = tileSize;
1066 if (info.isAlignedToInnerTileSize) {
1068 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1069 info.resultOffset = zeroAttr;
1070 info.destExpandedSize = tileSize;
1079 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1087 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1092 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1095 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1096 AV(dim1).bind(firstCoord.quotient));
1098 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1099 info.sourceOffset = firstCoord.quotient;
1100 info.resultOffset = firstCoord.remainder;
1109 struct UnPackOpTiling
1110 :
public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1113 auto unpackOp = cast<UnPackOp>(op);
1115 unpackOp.getDestRank(), utils::IteratorType::parallel);
1116 return iteratorTypes;
1120 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
1137 FailureOr<TilingResult>
1141 auto unpackOp = cast<UnPackOp>(op);
1142 int64_t srcRank = unpackOp.getSourceRank();
1143 int64_t destRank = unpackOp.getDestRank();
1144 int64_t numInnerTiles = srcRank - destRank;
1150 bool isPerfectTilingCase =
true;
1155 for (
auto dim : llvm::seq<int64_t>(0, destRank)) {
1156 UnpackTileDimInfo info =
1157 getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
1158 if (!info.isAlignedToInnerTileSize)
1159 isPerfectTilingCase =
false;
1160 sliceSrcIndices.push_back(info.sourceOffset);
1161 sliceSrcSizes.push_back(info.sourceSize);
1162 destExpandedSizes.push_back(info.destExpandedSize);
1163 resultOffsetsFromDest.push_back(info.resultOffset);
1168 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1169 unpackOp.getOuterDimsPerm());
1171 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1172 sliceSrcSizes.append(unpackOp.getMixedTiles());
1173 sliceSrcStrides.append(numInnerTiles, oneAttr);
1175 tensor::ExtractSliceOp sliceSource = b.
create<tensor::ExtractSliceOp>(
1176 loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1178 generatedSlices.push_back(sliceSource);
1182 if (isPerfectTilingCase) {
1183 auto destSliceOp = b.
create<tensor::ExtractSliceOp>(
1184 loc, unpackOp.getDest(), offsets, sizes, destStrides);
1185 sliceDest = destSliceOp;
1186 generatedSlices.push_back(destSliceOp);
1188 sliceDest = b.
create<tensor::EmptyOp>(
1189 loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1193 for (
auto tile : unpackOp.getInnerTiles())
1194 tiledOperands.push_back(
tile);
1199 if (isPerfectTilingCase)
1204 auto extractSlice = b.
create<tensor::ExtractSliceOp>(
1205 loc, tiledUnpackOp->
getResult(0), resultOffsetsFromDest, sizes,
1208 {tiledUnpackOp}, {extractSlice.
getResult()}, generatedSlices};
1217 resultOffsets = llvm::to_vector(offsets);
1218 resultSizes = llvm::to_vector(sizes);
1222 FailureOr<TilingResult>
1226 FailureOr<TilingResult> tilingResult =
1228 if (failed(tilingResult))
1230 return tilingResult.value();
1235 LogicalResult getIterationDomainTileFromOperandTiles(
1241 if (operandNumbers.size() != 1) {
1242 LLVM_DEBUG({ llvm::dbgs() <<
"unable to handle multiple operands"; });
1245 auto unPackOp = cast<UnPackOp>(op);
1246 unsigned operandNumber = operandNumbers[0];
1251 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1252 resultOffsets = llvm::to_vector(offsets);
1253 resultSizes = llvm::to_vector(sizes);
1258 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1259 auto destOffsets = offsets.drop_back(numTiles);
1260 auto destSizes = sizes.drop_back(numTiles);
1263 int64_t outputRank = unPackOp.getDestRank();
1270 applyPermToRange(origOffsets, origSizes,
1274 unPackOp.getDimAndTileMapping();
1276 for (
auto dim : llvm::seq<int64_t>(0, outputRank)) {
1277 using AV = affine::AffineValueExpr;
1278 affine::AffineBuilder ab(b, loc);
1282 if (dimAndTileMapping.count(dim)) {
1286 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1287 auto avSize = AV(dim0).bind(origSizes[dim]);
1288 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1289 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1290 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1291 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1292 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1293 ab.sub(avResultSize, avResultOffset)}));
1295 resultOffsets.push_back(origOffsets[dim]);
1296 resultSizes.push_back(origSizes[dim]);
1303 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1307 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1308 LLVM_DEBUG({ llvm::dbgs() <<
"unhandled operands for consumer fusion"; });
1311 auto unPackOp = cast<UnPackOp>(op);
1317 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1319 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1329 if (failed(getIterationDomainTileFromOperandTiles(
1330 op, b, operandNumbers, allOffsets, allSizes, outputOffsets,
1335 int64_t outputRank = unPackOp.getDestRank();
1340 auto extractDestSlice = b.
create<tensor::ExtractSliceOp>(
1341 loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1342 tiledOperands.push_back(extractDestSlice);
1344 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1346 auto extractSourceSlice = b.
create<tensor::ExtractSliceOp>(
1347 loc, unPackOp.getSource(), offsets, sizes, strides);
1348 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1349 for (
auto tile : unPackOp.getInnerTiles())
1350 tiledOperands.push_back(
tile);
1360 extractSourceSlice, extractDestSlice})};
1366 template <
typename OpType>
1368 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1369 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1374 template <
typename... OpTypes>
1376 (registerOne<OpTypes>(ctx), ...);
1384 registerOne<linalg::GenericOp>(ctx);
1385 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1386 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1388 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1396 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1397 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
static RankedTensorType sliceResultType(Type operandType, MeshOp mesh, ArrayRef< MeshAxis > meshAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims)
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static void registerAll(MLIRContext *ctx)
Variadic helper function.
static void registerOne(MLIRContext *ctx)
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getI64IntegerAttr(int64_t value)
MLIRContext * getContext() const
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
void setOperand(unsigned idx, Value value)
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, StopConditionFn stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
std::optional< TypedAttr > getNeutralElement(Operation *op)
Return the identity numeric value associated to the give op.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry ®istry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
void registerTilingInterfaceExternalModels(DialectRegistry ®istry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
@ PartialReductionOuterReduction
@ PartialReductionOuterParallel
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for the result of merge operation of tiling.
Container for result values of tiling.
SmallVector< Operation * > tiledOps
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets