26#include "llvm/Support/Debug.h"
29#define DEBUG_TYPE "linalg-tiling-interface-impl"
48 Value v = affine::AffineApplyOp::create(
b, loc, m, ivs);
58 Block *body = linalgOp.getBlock();
62 if (
auto indexOp = dyn_cast<IndexOp>(&op)) {
63 map.
map(indexOp.getResult(), ivs[indexOp.getDim()]);
71 for (
const auto &operand : llvm::enumerate(terminator->
getOperands())) {
73 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
75 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
76 memref::StoreOp::create(
b, loc, toStore,
77 linalgOp.getDpsInitOperand(operand.index())->get(),
93template <
typename LinalgOpTy>
94struct LinalgOpTilingInterface
95 :
public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
98 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
99 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
100 return concreteOp.getIteratorTypesArray();
104 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
105 OpBuilder::InsertionGuard g(
b);
106 b.setInsertionPoint(op);
107 Location loc = op->
getLoc();
108 LinalgOp linalgOp = cast<LinalgOp>(op);
109 SmallVector<OpFoldResult> allShapesSizes =
110 linalgOp.createFlatListOfOperandDims(
b, loc);
111 AffineMap map = linalgOp.getShapesToLoopsMap();
113 return llvm::to_vector(
114 llvm::map_range(map.
getResults(), [&](AffineExpr loopExpr) {
115 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
116 b, loc, loopExpr, allShapesSizes);
117 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
122 FailureOr<TilingResult>
129 LinalgOp linalgOp = cast<LinalgOp>(op);
132 b, loc, linalgOp, valuesToTile, offsets, sizes, {},
true);
134 llvm::make_filter_range(
136 [](
Value v) ->
bool {
137 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
145 Operation *tiledOp =
clone(
b, linalgOp, resultTensorTypes, tiledOperands);
156 getMappedOffsetAndSize(LinalgOp linalgOp,
OpBuilder &
b,
164 for (
auto [indexingMap, offsets, sizes] :
165 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166 for (
auto [resultExpr, offset, size] :
167 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
171 unsigned position = dimExpr.getPosition();
172 auto it = mappedOffsets.find(position);
173 if (it != mappedOffsets.end()) {
176 if (seenOffset != offset || seenSize != size) {
178 llvm::dbgs() <<
"inconsistent iteration space mapping from "
179 "offsets/sizes of operands/results";
184 mappedOffsets[position] = offset;
185 mappedSizes[position] = size;
193 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(
b);
194 mappedOffsetsVec.resize(iterationDomain.size());
195 mappedSizesVec.resize(iterationDomain.size());
196 for (
auto [
index, domain] : llvm::enumerate(iterationDomain)) {
197 auto it = mappedOffsets.find(
index);
198 if (it != mappedOffsets.end()) {
199 mappedOffsetsVec[
index] = it->second;
200 mappedSizesVec[
index] = mappedSizes.lookup(
index);
203 mappedOffsetsVec[
index] = domain.offset;
204 mappedSizesVec[
index] = domain.size;
211 LogicalResult getIterationDomainTileFromOperandTiles(
217 auto linalgOp = cast<LinalgOp>(op);
220 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
221 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
222 return linalgOp.getMatchingIndexingMap(&opOperand);
224 if (
failed(getMappedOffsetAndSize(linalgOp,
b, indexingMaps, allOffsets,
225 allSizes, iterDomainOffsets,
241 LinalgOp linalgOp = cast<LinalgOp>(op);
246 llvm::to_vector(llvm::map_range(sizes, [&](
OpFoldResult ofr) {
250 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
252 b, loc, outOperand->get(), sizes,
253 linalgOp.getMatchingIndexingMap(outOperand), offsets,
254 {}, subShapeSizes,
true);
255 resultOffsets = sliceParams.
offsets;
256 resultSizes = sliceParams.
sizes;
260 LogicalResult getIterationDomainTileFromResultTile(
265 auto linalgOp = cast<LinalgOp>(op);
272 linalgOp.getIndexingMapMatchingResult(op->
getResult(resultNumber));
275 "unhandled tiled implementation generation when result is not "
276 "accessed using a permuted projection");
282 getMappedOffsetAndSize(linalgOp,
b, indexingMap, {allOffsets},
283 {allSizes}, iterDomainOffsets, iterDomainSizes);
285 assert(succeeded(status) &&
"unexpected error in offset calculation");
289 FailureOr<TilingResult>
294 if (
failed(getIterationDomainTileFromResultTile(
295 op,
b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
298 auto tilingInterfaceOp = cast<TilingInterface>(op);
299 FailureOr<TilingResult> tilingResult =
300 tilingInterfaceOp.getTiledImplementation(
b, mappedOffsets, mappedSizes);
305 if (tilingResult->tiledOps.size() != 1)
306 return op->
emitOpError(
"failed to generate tiled implementation");
309 tilingResult->tiledOps,
311 tilingResult->generatedSlices};
316 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
321 if (
failed(getIterationDomainTileFromOperandTiles(
322 op,
b, operandNumbers, allOffsets, allSizes, mappedOffsets,
332 auto linalgOp = cast<LinalgOp>(op);
333 if (!linalgOp.hasPureBufferSemantics())
334 return op->
emitOpError(
"expected operation to have buffer semantics");
337 indexedValues.reserve(linalgOp->getNumOperands());
341 for (
OpOperand &operand : linalgOp->getOpOperands()) {
342 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
343 indexedValues.push_back(
nullptr);
346 if (linalgOp.isScalar(&operand)) {
347 indexedValues.push_back(operand.get());
351 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
353 memref::LoadOp::create(builder, linalgOpLoc, operand.get(),
indices);
354 indexedValues.push_back(
load);
369 for (
auto [
index, reductionDim] : llvm::enumerate(reductionDims)) {
370 if (reductionDim == value) {
382getPartialResultAffineMaps(LinalgOp linalgOp,
384 auto partialReductionMaps = llvm::map_to_vector(
385 linalgOp.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
386 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
387 for (auto redPos : reductionDims) {
389 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
390 map.getNumResults());
394 return partialReductionMaps;
397struct InitSliceInfo {
398 SmallVector<int64_t> resultShape;
399 SmallVector<OpFoldResult> offsets;
400 SmallVector<OpFoldResult> sizes;
401 SmallVector<OpFoldResult> strides;
407static InitSliceInfo getInitSliceInfoForOuterReduction(
413 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
414 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
417 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
418 if (reductionDims.contains(dim)) {
419 initOffsets.push_back(zero);
421 initOffsets.push_back(offsets[dim]);
423 initSizes.push_back(sizes[dim]);
427 return {resultShape, initOffsets, initSizes, initStrides};
433static InitSliceInfo getInitSliceInfoForOuterParallel(
439 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
443 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
444 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
445 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
446 initSizes.push_back(one);
448 initOffsets.push_back(offsets[dim]);
449 initSizes.push_back(sizes[dim]);
450 resultShape.push_back(sizes[dim]);
455 return {staticShapes, initOffsets, initSizes, initStrides};
460static InitSliceInfo getInitSliceInfo(
MLIRContext *context,
468 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
469 reductionDims, splitReductionIvs,
470 partialReductionMap);
473 "unexpected ReductionTilingStrategy");
474 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
475 reductionDims, splitReductionIvs,
476 partialReductionMap);
481template <
typename LinalgOpTy>
482struct LinalgOpPartialReductionInterface
483 :
public PartialReductionOpInterface::ExternalModel<
484 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
485 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
486 Operation *op, OpBuilder &
b, Location loc, ArrayRef<OpFoldResult> sizes,
488 auto linalgOp = cast<LinalgOp>(op);
490 OpBuilder::InsertionGuard guard(
b);
491 if (linalgOp.hasPureBufferSemantics())
492 return op->
emitOpError(
"expected operation to have tensor semantics");
494 SmallVector<AffineMap> partialResultMaps =
495 getPartialResultAffineMaps(linalgOp, reductionDims);
497 SmallVector<Value> inits;
498 for (
auto [initIdx,
result, partialMap] :
499 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
500 SmallVector<Operation *, 4> combinerOps;
503 combinerOps.size() != 1)
504 return op->
emitOpError(
"Failed to anaysis the reduction operation.");
506 Operation *reductionOp = combinerOps[0];
507 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
508 if (!identity.has_value())
510 "Failed to get an identity value for the reduction operation.");
513 SmallVector<OpFoldResult> partialResultShape;
514 for (AffineExpr dimExpr : partialMap.getResults()) {
515 auto dim = cast<AffineDimExpr>(dimExpr);
516 partialResultShape.push_back(sizes[dim.getPosition()]);
521 tensor::EmptyOp::create(
b, loc, partialResultShape, elType);
522 Value constantOp = arith::ConstantOp::create(
b, loc, *identity);
523 auto identityTensor =
524 linalg::FillOp::create(
b, loc, constantOp, emptyTensor);
525 inits.push_back(identityTensor.getResult(0));
531 FailureOr<TilingResult>
532 tileToPartialReduction(Operation *op, OpBuilder &
b, Location loc,
534 ValueRange init, ArrayRef<OpFoldResult> offsets,
535 ArrayRef<OpFoldResult> sizes,
537 ArrayRef<OpFoldResult> splitReductionIvs)
const {
538 OpBuilder::InsertionGuard guard(
b);
539 auto linalgOp = cast<LinalgOp>(op);
541 SmallVector<AffineMap> partialReductionMaps =
542 getPartialResultAffineMaps(linalgOp, reductionDims);
546 SmallVector<AffineMap> newInitMaps;
547 if (tilingStrategy ==
548 ReductionTilingStrategy::PartialReductionOuterReduction) {
549 newInitMaps = llvm::to_vector(partialReductionMaps);
551 newInitMaps = llvm::map_to_vector(
552 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
553 return linalgOp.getMatchingIndexingMap(&opOperand);
559 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {},
true);
560 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
561 llvm::make_filter_range(
562 tiledInputs, [](Value v) ->
bool {
return v.
getDefiningOp(); }),
566 SmallVector<Value, 1> tiledInits;
567 for (
auto [partialReductionMap, valueToTile] :
568 llvm::zip_equal(partialReductionMaps, init)) {
569 InitSliceInfo sliceInfo = getInitSliceInfo(
570 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
571 splitReductionIvs, partialReductionMap);
572 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
574 sliceInfo.resultShape, valueToTileType.getElementType(),
575 valueToTileType.getEncoding());
576 auto sliceOp = tensor::ExtractSliceOp::create(
578 sliceInfo.sizes, sliceInfo.strides);
579 tiledInits.push_back(sliceOp.getResult());
580 generatedSlices.push_back(sliceOp);
584 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
585 for (
auto [initOperand, newInitMap] :
586 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
587 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
588 newMaps[mapIdx] = newInitMap;
592 SmallVector<utils::IteratorType> newIteratorTypes =
593 linalgOp.getIteratorTypesArray();
594 if (tilingStrategy ==
595 ReductionTilingStrategy::PartialReductionOuterReduction) {
596 for (
int dim : reductionDims)
597 newIteratorTypes[dim] = utils::IteratorType::parallel;
601 Operation *partialReductionOp;
602 auto resultTypes =
ValueRange(tiledInits).getTypes();
603 if (tilingStrategy ==
604 ReductionTilingStrategy::PartialReductionOuterReduction) {
605 auto genericOp = GenericOp::create(
b, loc, resultTypes, tiledInputs,
606 tiledInits, newMaps, newIteratorTypes);
609 genericOp.getRegion().begin(), mapping);
610 partialReductionOp = genericOp.getOperation();
612 SmallVector<Value> operands = std::move(tiledInputs);
613 llvm::append_range(operands, tiledInits);
614 partialReductionOp =
mlir::clone(
b, op, resultTypes, operands);
617 {partialReductionOp},
618 llvm::map_to_vector(partialReductionOp->
getResults(),
619 [](OpResult r) -> Value { return r; }),
623 FailureOr<MergeResult>
624 mergeReductions(Operation *op, OpBuilder &
b, Location loc,
627 auto linalgOp = cast<LinalgOp>(op);
628 SmallVector<AffineMap> partialReductionMaps =
629 getPartialResultAffineMaps(linalgOp, reductionDims);
632 SmallVector<Operation *> mergeOperations;
633 SmallVector<Value> replacements;
634 for (
auto [idx, init, partialResult, partialMap] : llvm::enumerate(
635 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
636 unsigned initIdx = idx;
641 SmallVector<int64_t> partialReductionDims;
642 for (
auto [resultNum, dimExpr] :
643 llvm::enumerate(partialMap.getResults())) {
644 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
645 if (llvm::is_contained(reductionDims, dim)) {
646 partialReductionDims.push_back(resultNum);
650 auto reduction = linalg::ReduceOp::create(
651 b, loc, partialResult, init, partialReductionDims,
652 [&linalgOp, &initIdx](OpBuilder &
b, Location loc,
ValueRange inputs) {
654 SmallVector<Operation *, 4> combinerOps;
657 Operation *clonedReductionOp =
b.clone(*combinerOps[0]);
661 linalg::YieldOp::create(
b, loc, clonedReductionOp->
getResult(0));
664 mergeOperations.push_back(reduction);
665 replacements.push_back(reduction->getResult(0));
668 return MergeResult{mergeOperations, replacements};
671 LogicalResult getPartialResultTilePosition(
672 Operation *op, OpBuilder &
b,
unsigned resultNumber,
675 ArrayRef<OpFoldResult> splitReductionIvs,
676 SmallVector<OpFoldResult> &resultOffsets,
677 SmallVector<OpFoldResult> &resultSizes)
const {
678 auto linalgOp = cast<LinalgOp>(op);
679 SmallVector<AffineMap> partialReductionMaps =
680 getPartialResultAffineMaps(linalgOp, reductionDims);
681 InitSliceInfo sliceInfo = getInitSliceInfo(
682 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
683 splitReductionIvs, partialReductionMaps[resultNumber]);
684 std::swap(resultOffsets, sliceInfo.offsets);
685 std::swap(resultSizes, sliceInfo.sizes);
691template <
typename OpTy>
694 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
695 "applies to only pack or unpack operations");
697 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
704 for (
auto dim : llvm::seq<int64_t>(0, rank)) {
705 loopBounds[dim].offset = zero;
706 loopBounds[dim].stride = one;
707 loopBounds[dim].size = resultShape[0][dim];
715 if (permutation.empty())
722 :
public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
724 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
728 auto packOp = cast<PackOp>(op);
729 SmallVector<utils::IteratorType> iteratorTypes(
730 packOp.getSourceRank(), utils::IteratorType::parallel);
731 return iteratorTypes;
734 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
735 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op),
b);
738 FailureOr<TilingResult>
740 ArrayRef<OpFoldResult> offsets,
741 ArrayRef<OpFoldResult> sizes)
const {
742 auto packOp = cast<PackOp>(op);
743 Location loc = packOp.getLoc();
747 int64_t inputRank = packOp.getSourceRank();
748 SmallVector<OpFoldResult> origOffsets(offsets);
749 SmallVector<OpFoldResult> origSizes(sizes);
750 applyPermToRange(origOffsets, origSizes,
754 packOp.getDimAndTileMapping();
755 SmallVector<OpFoldResult> srcDimValues =
757 SmallVector<OpFoldResult> inputIndices, inputSizes;
758 for (
auto dim : llvm::seq<int64_t>(0, inputRank)) {
759 using AV = affine::AffineValueExpr;
760 affine::AffineBuilder ab(
b, loc);
761 AffineExpr dim0, dim1, sym;
764 if (dimAndTileMapping.count(dim)) {
768 auto avOffset = AV(dim0).bind(origOffsets[dim]);
769 auto avSize = AV(dim0).bind(origSizes[dim]);
770 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
771 inputIndices.push_back(ab.mul(avOffset, avTileSize));
772 inputSizes.push_back(ab.mul(avSize, avTileSize));
774 inputIndices.push_back(origOffsets[dim]);
775 inputSizes.push_back(origSizes[dim]);
779 if (packOp.getPaddingValue()) {
780 OpFoldResult dimSize = srcDimValues[dim];
781 auto avDimSize = AV(dim0).bind(dimSize);
782 auto avInputIdx = AV(dim1).bind(inputIndices.back());
784 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
788 auto oneAttr =
b.getI64IntegerAttr(1);
789 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
791 SmallVector<Value> tiledOperands;
792 auto sourceSlice = tensor::ExtractSliceOp::create(
793 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
794 tiledOperands.push_back(sourceSlice);
796 SmallVector<OpFoldResult> outputOffsets, outputSizes;
801 strides.append(packOp.getDestRank() - inputRank, oneAttr);
802 auto outSlice = tensor::ExtractSliceOp::create(
803 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
804 tiledOperands.push_back(outSlice);
806 if (
auto val = packOp.getPaddingValue())
807 tiledOperands.push_back(val);
808 for (
auto tile : packOp.getInnerTiles())
809 tiledOperands.push_back(
tile);
811 Operation *tiledPackOp = PackOp::create(
816 SmallVector<Value>(tiledPackOp->
getResults()),
817 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
822 ArrayRef<OpFoldResult> offsets,
823 ArrayRef<OpFoldResult> sizes,
824 SmallVector<OpFoldResult> &resultOffsets,
825 SmallVector<OpFoldResult> &resultSizes)
const {
830 auto packOp = cast<PackOp>(op);
831 int64_t inputRank = packOp.getSourceRank();
832 int64_t outputRank = packOp.getDestRank();
833 auto zeroAttr =
b.getI64IntegerAttr(0);
834 resultOffsets.assign(offsets.begin(), offsets.end());
835 resultOffsets.append(outputRank - inputRank, zeroAttr);
839 resultSizes.assign(sizes.begin(), sizes.end());
840 for (
auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
841 resultSizes.push_back(outputShape[0][dataTileDim]);
846 FailureOr<TilingResult>
847 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
848 ArrayRef<OpFoldResult> offsets,
849 ArrayRef<OpFoldResult> sizes)
const {
850 auto packOp = cast<PackOp>(op);
851 int64_t numTiles = packOp.getInnerDimsPos().size();
856 for (
auto offset : offsets.take_back(numTiles))
861 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
866 op,
b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
869 return tilingResult.value();
875 LogicalResult getIterationDomainTileFromOperandTiles(
876 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
877 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
878 ArrayRef<SmallVector<OpFoldResult>> allSizes,
879 SmallVectorImpl<OpFoldResult> &resultOffsets,
880 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
881 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
883 { llvm::dbgs() <<
"unsupported operands for consumer fusion"; });
887 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
888 ArrayRef<OpFoldResult> sizes(allSizes[0]);
889 auto packOp = cast<PackOp>(op);
890 Location loc = packOp.getLoc();
891 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
893 packOp.getDimAndTileMapping();
894 SmallVector<int64_t> outerShapeWithoutTranspose(
895 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
896 if (!packOp.getOuterDimsPerm().empty()) {
898 outerShapeWithoutTranspose,
901 for (
auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
902 if (dimAndTileMapping.count(dim)) {
903 FailureOr<int64_t> cstTileSize =
905 presburger::BoundType::UB, sizes[dim],
907 std::optional<int64_t> cstInnerSize =
917 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
918 int64_t destDimSize = outerShapeWithoutTranspose[dim];
920 ShapedType::isDynamic(srcDimSize) ||
921 cstTileSize.value() < srcDimSize;
923 outerDimOffsets.push_back(offsets[dim]);
924 if (ShapedType::isStatic(destDimSize)) {
925 outerDimSizes.push_back(
b.getIndexAttr(destDimSize));
927 outerDimSizes.push_back(
928 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
947 if ((
failed(cstTileSize) || !cstInnerSize ||
948 *cstTileSize % *cstInnerSize != 0))
951 using AV = affine::AffineValueExpr;
952 affine::AffineBuilder ab(
b, loc);
953 AffineExpr dim0, sym;
956 auto avOffset = AV(dim0).bind(offsets[dim]);
957 auto avSize = AV(dim0).bind(sizes[dim]);
958 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
959 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
960 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
962 outerDimOffsets.push_back(offsets[dim]);
963 outerDimSizes.push_back(sizes[dim]);
966 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
967 resultOffsets = outerDimOffsets;
968 resultSizes = outerDimSizes;
973 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
974 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
975 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
976 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
977 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
979 { llvm ::dbgs() <<
"unhandled operands for consumer fusion"; });
983 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
984 ArrayRef<OpFoldResult> sizes(allSizes[0]);
986 auto packOp = cast<PackOp>(op);
987 Location loc = packOp.getLoc();
989 int64_t inputRank = packOp.getSourceRank();
990 auto oneAttr =
b.getI64IntegerAttr(1);
991 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
993 SmallVector<Value> tiledOperands;
994 auto sourceSlice = tensor::ExtractSliceOp::create(
995 b, loc, packOp.getSource(), offsets, sizes, strides);
996 tiledOperands.push_back(sourceSlice);
998 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
999 if (
failed(getIterationDomainTileFromOperandTiles(
1000 op,
b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1004 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1006 outputOffsets, outputSizes)))
1009 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1010 auto outSlice = tensor::ExtractSliceOp::create(
1011 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1012 tiledOperands.push_back(outSlice);
1014 if (
auto val = packOp.getPaddingValue())
1015 tiledOperands.push_back(val);
1016 for (
auto tile : packOp.getInnerTiles())
1017 tiledOperands.push_back(
tile);
1019 Operation *tiledPackOp = PackOp::create(
1022 return TilingResult{
1024 SmallVector<Value>(tiledPackOp->
getResults()),
1025 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1029struct UnpackTileDimInfo {
1030 bool isAlignedToInnerTileSize;
1031 OpFoldResult sourceOffset;
1032 OpFoldResult sourceSize;
1033 OpFoldResult resultOffset;
1034 OpFoldResult destExpandedSize;
1040static UnpackTileDimInfo getUnpackTileDimInfo(
OpBuilder &
b, UnPackOp unpackOp,
1044 UnpackTileDimInfo info;
1048 unpackOp.getDimAndTileMapping();
1050 if (!dimAndTileMapping.count(tileDim)) {
1051 info.isAlignedToInnerTileSize =
true;
1052 info.sourceOffset = tileOffset;
1053 info.sourceSize = tileSize;
1054 info.resultOffset = zeroAttr;
1055 info.destExpandedSize = tileSize;
1066 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1068 info.isAlignedToInnerTileSize =
false;
1073 if (!
failed(cstSize) && cstInnerSize) {
1074 if (*cstSize % *cstInnerSize == 0)
1075 info.isAlignedToInnerTileSize =
true;
1079 if (*cstInnerSize == *cstSize) {
1080 auto lhs = AV(dim0).bind(tileOffset);
1081 auto rhs = AV(dim1).bind(innerTileSize);
1082 info.sourceOffset = ab.floor(
lhs,
rhs);
1083 info.sourceSize = oneAttr;
1084 info.resultOffset = zeroAttr;
1085 info.destExpandedSize = tileSize;
1090 if (info.isAlignedToInnerTileSize) {
1092 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1093 info.resultOffset = zeroAttr;
1094 info.destExpandedSize = tileSize;
1103 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1107 affine::DivModValue firstCoord = affine::getDivMod(
1111 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1112 affine::DivModValue lastCoord = affine::getDivMod(
1116 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1119 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1120 AV(dim1).bind(firstCoord.quotient));
1122 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1123 info.sourceOffset = firstCoord.quotient;
1124 info.resultOffset = firstCoord.remainder;
1127 info.destExpandedSize =
b.createOrFold<arith::MulIOp>(
1133struct UnPackOpTiling
1134 :
public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1136 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
1137 auto unpackOp = cast<UnPackOp>(op);
1138 SmallVector<utils::IteratorType> iteratorTypes(
1139 unpackOp.getDestRank(), utils::IteratorType::parallel);
1140 return iteratorTypes;
1143 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
1144 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op),
b);
1161 FailureOr<TilingResult>
1163 ArrayRef<OpFoldResult> offsets,
1164 ArrayRef<OpFoldResult> sizes)
const {
1165 auto unpackOp = cast<UnPackOp>(op);
1166 int64_t srcRank = unpackOp.getSourceRank();
1167 int64_t destRank = unpackOp.getDestRank();
1168 int64_t numInnerTiles = srcRank - destRank;
1169 Location loc = unpackOp.getLoc();
1174 bool isPerfectTilingCase =
true;
1175 Attribute oneAttr =
b.getIndexAttr(1);
1176 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1177 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1178 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1179 for (
auto dim : llvm::seq<int64_t>(0, destRank)) {
1180 UnpackTileDimInfo info =
1181 getUnpackTileDimInfo(
b, unpackOp, dim, offsets[dim], sizes[dim]);
1182 if (!info.isAlignedToInnerTileSize)
1183 isPerfectTilingCase =
false;
1184 sliceSrcIndices.push_back(info.sourceOffset);
1185 sliceSrcSizes.push_back(info.sourceSize);
1186 destExpandedSizes.push_back(info.destExpandedSize);
1187 resultOffsetsFromDest.push_back(info.resultOffset);
1192 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1193 unpackOp.getOuterDimsPerm());
1194 Attribute zeroAttr =
b.getIndexAttr(0);
1195 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1196 sliceSrcSizes.append(unpackOp.getMixedTiles());
1197 sliceSrcStrides.append(numInnerTiles, oneAttr);
1198 SmallVector<Operation *> generatedSlices;
1199 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1200 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1202 generatedSlices.push_back(sliceSource);
1204 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1206 if (isPerfectTilingCase) {
1207 auto destSliceOp = tensor::ExtractSliceOp::create(
1208 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1209 sliceDest = destSliceOp;
1210 generatedSlices.push_back(destSliceOp);
1212 sliceDest = tensor::EmptyOp::create(
1213 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1216 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1217 for (
auto tile : unpackOp.getInnerTiles())
1218 tiledOperands.push_back(
tile);
1220 Operation *tiledUnpackOp = UnPackOp::create(
1223 if (isPerfectTilingCase)
1224 return TilingResult{{tiledUnpackOp},
1225 SmallVector<Value>(tiledUnpackOp->
getResults()),
1228 auto extractSlice = tensor::ExtractSliceOp::create(
1229 b, loc, tiledUnpackOp->
getResult(0), resultOffsetsFromDest, sizes,
1231 return TilingResult{
1232 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1237 ArrayRef<OpFoldResult> offsets,
1238 ArrayRef<OpFoldResult> sizes,
1239 SmallVector<OpFoldResult> &resultOffsets,
1240 SmallVector<OpFoldResult> &resultSizes)
const {
1241 resultOffsets = llvm::to_vector(offsets);
1242 resultSizes = llvm::to_vector(sizes);
1246 FailureOr<TilingResult>
1247 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
1248 ArrayRef<OpFoldResult> offsets,
1249 ArrayRef<OpFoldResult> sizes)
const {
1250 FailureOr<TilingResult> tilingResult =
1252 if (
failed(tilingResult))
1254 return tilingResult.value();
1259 LogicalResult getIterationDomainTileFromOperandTiles(
1260 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1261 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1262 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1263 SmallVectorImpl<OpFoldResult> &resultOffsets,
1264 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
1265 if (operandNumbers.size() != 1) {
1266 LLVM_DEBUG({ llvm::dbgs() <<
"unable to handle multiple operands"; });
1269 auto unPackOp = cast<UnPackOp>(op);
1270 unsigned operandNumber = operandNumbers[0];
1271 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1272 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1275 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1276 resultOffsets = llvm::to_vector(offsets);
1277 resultSizes = llvm::to_vector(sizes);
1280 Location loc = unPackOp.getLoc();
1282 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1283 auto destOffsets = offsets.drop_back(numTiles);
1284 auto destSizes = sizes.drop_back(numTiles);
1287 int64_t outputRank = unPackOp.getDestRank();
1291 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1292 SmallVector<OpFoldResult> origOffsets(destOffsets);
1293 SmallVector<OpFoldResult> origSizes(destSizes);
1294 applyPermToRange(origOffsets, origSizes,
1298 unPackOp.getDimAndTileMapping();
1300 for (
auto dim : llvm::seq<int64_t>(0, outputRank)) {
1301 using AV = affine::AffineValueExpr;
1302 affine::AffineBuilder ab(
b, loc);
1303 AffineExpr dim0, dim1, sym0;
1306 if (dimAndTileMapping.count(dim)) {
1310 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1311 auto avSize = AV(dim0).bind(origSizes[dim]);
1312 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1313 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1314 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1315 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1316 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1317 ab.sub(avResultSize, avResultOffset)}));
1319 resultOffsets.push_back(origOffsets[dim]);
1320 resultSizes.push_back(origSizes[dim]);
1327 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1328 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1329 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1330 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1331 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1332 LLVM_DEBUG({ llvm::dbgs() <<
"unhandled operands for consumer fusion"; });
1335 auto unPackOp = cast<UnPackOp>(op);
1336 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1337 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1341 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1343 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1348 Location loc = unPackOp.getLoc();
1352 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1353 if (
failed(getIterationDomainTileFromOperandTiles(
1354 op,
b, operandNumbers, allOffsets, allSizes, outputOffsets,
1358 auto oneAttr =
b.getI64IntegerAttr(1);
1359 int64_t outputRank = unPackOp.getDestRank();
1360 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1362 SmallVector<Value> tiledOperands;
1364 auto extractDestSlice = tensor::ExtractSliceOp::create(
1365 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1366 tiledOperands.push_back(extractDestSlice);
1368 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1370 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1371 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1372 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1373 for (
auto tile : unPackOp.getInnerTiles())
1374 tiledOperands.push_back(
tile);
1377 Operation *tiledUnPackOp =
1378 UnPackOp::create(
b, loc,
TypeRange{extractDestSlice.getType()},
1381 return TilingResult{{tiledUnPackOp},
1382 SmallVector<Value>(tiledUnPackOp->
getResults()),
1383 llvm::to_vector(ArrayRef<Operation *>{
1384 extractSourceSlice, extractDestSlice})};
1390template <
typename OpType>
1392 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1393 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1398template <
typename... OpTypes>
1409 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1410 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1412#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1420 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1421 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
void setOperand(unsigned idx, Value value)
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry ®istry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
void registerTilingInterfaceExternalModels(DialectRegistry ®istry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
@ PartialReductionOuterReduction
@ PartialReductionOuterParallel
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets