26#include "llvm/Support/Debug.h"
29#define DEBUG_TYPE "linalg-tiling-interface-impl"
48 Value v = affine::AffineApplyOp::create(
b, loc, m, ivs);
58 Block *body = linalgOp.getBlock();
62 if (
auto indexOp = dyn_cast<IndexOp>(&op)) {
63 map.
map(indexOp.getResult(), ivs[indexOp.getDim()]);
71 for (
const auto &operand : llvm::enumerate(terminator->
getOperands())) {
73 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
75 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
76 memref::StoreOp::create(
b, loc, toStore,
77 linalgOp.getDpsInitOperand(operand.index())->get(),
93template <
typename LinalgOpTy>
94struct LinalgOpTilingInterface
95 :
public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
98 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
99 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
100 return concreteOp.getIteratorTypesArray();
104 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
105 OpBuilder::InsertionGuard g(
b);
106 b.setInsertionPoint(op);
107 Location loc = op->
getLoc();
108 LinalgOp linalgOp = cast<LinalgOp>(op);
109 SmallVector<OpFoldResult> allShapesSizes =
110 linalgOp.createFlatListOfOperandDims(
b, loc);
111 AffineMap map = linalgOp.getShapesToLoopsMap();
113 return llvm::to_vector(
114 llvm::map_range(map.
getResults(), [&](AffineExpr loopExpr) {
115 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
116 b, loc, loopExpr, allShapesSizes);
117 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
122 FailureOr<TilingResult>
129 LinalgOp linalgOp = cast<LinalgOp>(op);
132 b, loc, linalgOp, valuesToTile, offsets, sizes, {},
true);
134 llvm::make_filter_range(
136 [](
Value v) ->
bool {
137 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
145 Operation *tiledOp =
clone(
b, linalgOp, resultTensorTypes, tiledOperands);
156 getMappedOffsetAndSize(LinalgOp linalgOp,
OpBuilder &
b,
164 for (
auto [indexingMap, offsets, sizes] :
165 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166 for (
auto [resultExpr, offset, size] :
167 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
171 unsigned position = dimExpr.getPosition();
172 auto it = mappedOffsets.find(position);
173 if (it != mappedOffsets.end()) {
176 if (seenOffset != offset || seenSize != size) {
178 llvm::dbgs() <<
"inconsistent iteration space mapping from "
179 "offsets/sizes of operands/results";
184 mappedOffsets[position] = offset;
185 mappedSizes[position] = size;
193 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(
b);
194 mappedOffsetsVec.resize(iterationDomain.size());
195 mappedSizesVec.resize(iterationDomain.size());
196 for (
auto [
index, domain] : llvm::enumerate(iterationDomain)) {
197 auto it = mappedOffsets.find(
index);
198 if (it != mappedOffsets.end()) {
199 mappedOffsetsVec[
index] = it->second;
200 mappedSizesVec[
index] = mappedSizes.lookup(
index);
203 mappedOffsetsVec[
index] = domain.offset;
204 mappedSizesVec[
index] = domain.size;
211 LogicalResult getIterationDomainTileFromOperandTiles(
217 auto linalgOp = cast<LinalgOp>(op);
220 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
221 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
222 return linalgOp.getMatchingIndexingMap(&opOperand);
224 if (
failed(getMappedOffsetAndSize(linalgOp,
b, indexingMaps, allOffsets,
225 allSizes, iterDomainOffsets,
241 LinalgOp linalgOp = cast<LinalgOp>(op);
246 llvm::to_vector(llvm::map_range(sizes, [&](
OpFoldResult ofr) {
250 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
252 b, loc, outOperand->get(), sizes,
253 linalgOp.getMatchingIndexingMap(outOperand), offsets,
254 {}, subShapeSizes,
true);
255 resultOffsets = sliceParams.
offsets;
256 resultSizes = sliceParams.
sizes;
260 LogicalResult getIterationDomainTileFromResultTile(
265 auto linalgOp = cast<LinalgOp>(op);
272 linalgOp.getIndexingMapMatchingResult(op->
getResult(resultNumber));
275 "unhandled tiled implementation generation when result is not "
276 "accessed using a permuted projection");
282 getMappedOffsetAndSize(linalgOp,
b, indexingMap, {allOffsets},
283 {allSizes}, iterDomainOffsets, iterDomainSizes);
285 assert(succeeded(status) &&
"unexpected error in offset calculation");
289 FailureOr<TilingResult>
294 if (
failed(getIterationDomainTileFromResultTile(
295 op,
b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
298 auto tilingInterfaceOp = cast<TilingInterface>(op);
299 FailureOr<TilingResult> tilingResult =
300 tilingInterfaceOp.getTiledImplementation(
b, mappedOffsets, mappedSizes);
305 if (tilingResult->tiledOps.size() != 1)
306 return op->
emitOpError(
"failed to generate tiled implementation");
309 tilingResult->tiledOps,
311 tilingResult->generatedSlices};
316 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
321 if (
failed(getIterationDomainTileFromOperandTiles(
322 op,
b, operandNumbers, allOffsets, allSizes, mappedOffsets,
332 auto linalgOp = cast<LinalgOp>(op);
333 if (!linalgOp.hasPureBufferSemantics())
334 return op->
emitOpError(
"expected operation to have buffer semantics");
337 indexedValues.reserve(linalgOp->getNumOperands());
341 for (
OpOperand &operand : linalgOp->getOpOperands()) {
342 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
343 indexedValues.push_back(
nullptr);
346 if (linalgOp.isScalar(&operand)) {
347 indexedValues.push_back(operand.get());
351 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
353 memref::LoadOp::create(builder, linalgOpLoc, operand.get(),
indices);
354 indexedValues.push_back(
load);
361 bool isOpFusableWithConsumerSlice(
Operation *op,
unsigned resultNumber,
368 bool isOpFusableWithProducerSlices(
373 auto linalgOp = cast<LinalgOp>(op);
375 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
376 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
377 return linalgOp.getMatchingIndexingMap(&opOperand);
382 return succeeded(getMappedOffsetAndSize(linalgOp,
b, indexingMaps,
383 allOffsets, allSizes, mappedOffsets,
395 for (
auto [
index, reductionDim] : llvm::enumerate(reductionDims)) {
396 if (reductionDim == value) {
408getPartialResultAffineMaps(LinalgOp linalgOp,
410 auto partialReductionMaps = llvm::map_to_vector(
411 linalgOp.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
412 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
413 for (auto redPos : reductionDims) {
415 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
416 map.getNumResults());
420 return partialReductionMaps;
423struct InitSliceInfo {
424 SmallVector<int64_t> resultShape;
425 SmallVector<OpFoldResult> offsets;
426 SmallVector<OpFoldResult> sizes;
427 SmallVector<OpFoldResult> strides;
433static InitSliceInfo getInitSliceInfoForOuterReduction(
439 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
440 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
443 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
444 if (reductionDims.contains(dim)) {
445 initOffsets.push_back(zero);
447 initOffsets.push_back(offsets[dim]);
449 initSizes.push_back(sizes[dim]);
453 return {resultShape, initOffsets, initSizes, initStrides};
459static InitSliceInfo getInitSliceInfoForOuterParallel(
465 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
469 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
470 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
471 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
472 initSizes.push_back(one);
474 initOffsets.push_back(offsets[dim]);
475 initSizes.push_back(sizes[dim]);
476 resultShape.push_back(sizes[dim]);
481 return {staticShapes, initOffsets, initSizes, initStrides};
486static InitSliceInfo getInitSliceInfo(
MLIRContext *context,
494 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
495 reductionDims, splitReductionIvs,
496 partialReductionMap);
499 "unexpected ReductionTilingStrategy");
500 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
501 reductionDims, splitReductionIvs,
502 partialReductionMap);
507template <
typename LinalgOpTy>
508struct LinalgOpPartialReductionInterface
509 :
public PartialReductionOpInterface::ExternalModel<
510 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
511 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
512 Operation *op, OpBuilder &
b, Location loc, ArrayRef<OpFoldResult> sizes,
514 auto linalgOp = cast<LinalgOp>(op);
516 OpBuilder::InsertionGuard guard(
b);
517 if (linalgOp.hasPureBufferSemantics())
518 return op->
emitOpError(
"expected operation to have tensor semantics");
520 SmallVector<AffineMap> partialResultMaps =
521 getPartialResultAffineMaps(linalgOp, reductionDims);
523 SmallVector<Value> inits;
524 for (
auto [initIdx,
result, partialMap] :
525 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
526 SmallVector<Operation *, 4> combinerOps;
529 combinerOps.size() != 1)
530 return op->
emitOpError(
"Failed to anaysis the reduction operation.");
532 Operation *reductionOp = combinerOps[0];
533 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
534 if (!identity.has_value())
536 "Failed to get an identity value for the reduction operation.");
539 SmallVector<OpFoldResult> partialResultShape;
540 for (AffineExpr dimExpr : partialMap.getResults()) {
541 auto dim = cast<AffineDimExpr>(dimExpr);
542 partialResultShape.push_back(sizes[dim.getPosition()]);
547 tensor::EmptyOp::create(
b, loc, partialResultShape, elType);
548 Value constantOp = arith::ConstantOp::create(
b, loc, *identity);
549 auto identityTensor =
550 linalg::FillOp::create(
b, loc, constantOp, emptyTensor);
551 inits.push_back(identityTensor.getResult(0));
557 FailureOr<TilingResult>
558 tileToPartialReduction(Operation *op, OpBuilder &
b, Location loc,
560 ValueRange init, ArrayRef<OpFoldResult> offsets,
561 ArrayRef<OpFoldResult> sizes,
563 ArrayRef<OpFoldResult> splitReductionIvs)
const {
564 OpBuilder::InsertionGuard guard(
b);
565 auto linalgOp = cast<LinalgOp>(op);
567 SmallVector<AffineMap> partialReductionMaps =
568 getPartialResultAffineMaps(linalgOp, reductionDims);
572 SmallVector<AffineMap> newInitMaps;
573 if (tilingStrategy ==
574 ReductionTilingStrategy::PartialReductionOuterReduction) {
575 newInitMaps = llvm::to_vector(partialReductionMaps);
577 newInitMaps = llvm::map_to_vector(
578 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
579 return linalgOp.getMatchingIndexingMap(&opOperand);
585 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {},
true);
586 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
587 llvm::make_filter_range(
588 tiledInputs, [](Value v) ->
bool {
return v.
getDefiningOp(); }),
592 SmallVector<Value, 1> tiledInits;
593 for (
auto [partialReductionMap, valueToTile] :
594 llvm::zip_equal(partialReductionMaps, init)) {
595 InitSliceInfo sliceInfo = getInitSliceInfo(
596 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
597 splitReductionIvs, partialReductionMap);
598 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
600 sliceInfo.resultShape, valueToTileType.getElementType(),
601 valueToTileType.getEncoding());
602 auto sliceOp = tensor::ExtractSliceOp::create(
604 sliceInfo.sizes, sliceInfo.strides);
605 tiledInits.push_back(sliceOp.getResult());
606 generatedSlices.push_back(sliceOp);
610 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
611 for (
auto [initOperand, newInitMap] :
612 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
613 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
614 newMaps[mapIdx] = newInitMap;
618 SmallVector<utils::IteratorType> newIteratorTypes =
619 linalgOp.getIteratorTypesArray();
620 if (tilingStrategy ==
621 ReductionTilingStrategy::PartialReductionOuterReduction) {
622 for (
int dim : reductionDims)
623 newIteratorTypes[dim] = utils::IteratorType::parallel;
627 Operation *partialReductionOp;
628 auto resultTypes =
ValueRange(tiledInits).getTypes();
629 if (tilingStrategy ==
630 ReductionTilingStrategy::PartialReductionOuterReduction) {
631 auto genericOp = GenericOp::create(
b, loc, resultTypes, tiledInputs,
632 tiledInits, newMaps, newIteratorTypes);
635 genericOp.getRegion().begin(), mapping);
636 partialReductionOp = genericOp.getOperation();
638 SmallVector<Value> operands = std::move(tiledInputs);
639 llvm::append_range(operands, tiledInits);
640 partialReductionOp =
mlir::clone(
b, op, resultTypes, operands);
643 {partialReductionOp},
644 llvm::map_to_vector(partialReductionOp->
getResults(),
645 [](OpResult r) -> Value { return r; }),
649 FailureOr<MergeResult>
650 mergeReductions(Operation *op, OpBuilder &
b, Location loc,
653 auto linalgOp = cast<LinalgOp>(op);
654 SmallVector<AffineMap> partialReductionMaps =
655 getPartialResultAffineMaps(linalgOp, reductionDims);
658 SmallVector<Operation *> mergeOperations;
659 SmallVector<Value> replacements;
660 for (
auto [idx, init, partialResult, partialMap] : llvm::enumerate(
661 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
662 unsigned initIdx = idx;
667 SmallVector<int64_t> partialReductionDims;
668 for (
auto [resultNum, dimExpr] :
669 llvm::enumerate(partialMap.getResults())) {
670 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
671 if (llvm::is_contained(reductionDims, dim)) {
672 partialReductionDims.push_back(resultNum);
676 auto reduction = linalg::ReduceOp::create(
677 b, loc, partialResult, init, partialReductionDims,
678 [&linalgOp, &initIdx](OpBuilder &
b, Location loc,
ValueRange inputs) {
680 SmallVector<Operation *, 4> combinerOps;
683 Operation *clonedReductionOp =
b.clone(*combinerOps[0]);
687 linalg::YieldOp::create(
b, loc, clonedReductionOp->
getResult(0));
690 mergeOperations.push_back(reduction);
691 replacements.push_back(reduction->getResult(0));
694 return MergeResult{mergeOperations, replacements};
697 LogicalResult getPartialResultTilePosition(
698 Operation *op, OpBuilder &
b,
unsigned resultNumber,
701 ArrayRef<OpFoldResult> splitReductionIvs,
702 SmallVector<OpFoldResult> &resultOffsets,
703 SmallVector<OpFoldResult> &resultSizes)
const {
704 auto linalgOp = cast<LinalgOp>(op);
705 SmallVector<AffineMap> partialReductionMaps =
706 getPartialResultAffineMaps(linalgOp, reductionDims);
707 InitSliceInfo sliceInfo = getInitSliceInfo(
708 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
709 splitReductionIvs, partialReductionMaps[resultNumber]);
710 std::swap(resultOffsets, sliceInfo.offsets);
711 std::swap(resultSizes, sliceInfo.sizes);
717template <
typename OpTy>
720 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
721 "applies to only pack or unpack operations");
723 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
730 for (
auto dim : llvm::seq<int64_t>(0, rank)) {
731 loopBounds[dim].offset = zero;
732 loopBounds[dim].stride = one;
733 loopBounds[dim].size = resultShape[0][dim];
741 if (permutation.empty())
748 :
public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
750 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
754 auto packOp = cast<PackOp>(op);
755 SmallVector<utils::IteratorType> iteratorTypes(
756 packOp.getSourceRank(), utils::IteratorType::parallel);
757 return iteratorTypes;
760 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
761 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op),
b);
764 FailureOr<TilingResult>
766 ArrayRef<OpFoldResult> offsets,
767 ArrayRef<OpFoldResult> sizes)
const {
768 auto packOp = cast<PackOp>(op);
769 Location loc = packOp.getLoc();
773 int64_t inputRank = packOp.getSourceRank();
774 SmallVector<OpFoldResult> origOffsets(offsets);
775 SmallVector<OpFoldResult> origSizes(sizes);
776 applyPermToRange(origOffsets, origSizes,
780 packOp.getDimAndTileMapping();
781 SmallVector<OpFoldResult> srcDimValues =
783 SmallVector<OpFoldResult> inputIndices, inputSizes;
784 for (
auto dim : llvm::seq<int64_t>(0, inputRank)) {
785 using AV = affine::AffineValueExpr;
786 affine::AffineBuilder ab(
b, loc);
787 AffineExpr dim0, dim1, sym;
790 if (dimAndTileMapping.count(dim)) {
794 auto avOffset = AV(dim0).bind(origOffsets[dim]);
795 auto avSize = AV(dim0).bind(origSizes[dim]);
796 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
797 inputIndices.push_back(ab.mul(avOffset, avTileSize));
798 inputSizes.push_back(ab.mul(avSize, avTileSize));
800 inputIndices.push_back(origOffsets[dim]);
801 inputSizes.push_back(origSizes[dim]);
805 if (packOp.getPaddingValue()) {
806 OpFoldResult dimSize = srcDimValues[dim];
807 auto avDimSize = AV(dim0).bind(dimSize);
808 auto avInputIdx = AV(dim1).bind(inputIndices.back());
810 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
814 auto oneAttr =
b.getI64IntegerAttr(1);
815 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
817 SmallVector<Value> tiledOperands;
818 auto sourceSlice = tensor::ExtractSliceOp::create(
819 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
820 tiledOperands.push_back(sourceSlice);
822 SmallVector<OpFoldResult> outputOffsets, outputSizes;
827 strides.append(packOp.getDestRank() - inputRank, oneAttr);
828 auto outSlice = tensor::ExtractSliceOp::create(
829 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
830 tiledOperands.push_back(outSlice);
832 if (
auto val = packOp.getPaddingValue())
833 tiledOperands.push_back(val);
834 for (
auto tile : packOp.getInnerTiles())
835 tiledOperands.push_back(
tile);
837 Operation *tiledPackOp = PackOp::create(
842 SmallVector<Value>(tiledPackOp->
getResults()),
843 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
848 ArrayRef<OpFoldResult> offsets,
849 ArrayRef<OpFoldResult> sizes,
850 SmallVector<OpFoldResult> &resultOffsets,
851 SmallVector<OpFoldResult> &resultSizes)
const {
856 auto packOp = cast<PackOp>(op);
857 int64_t inputRank = packOp.getSourceRank();
858 int64_t outputRank = packOp.getDestRank();
859 auto zeroAttr =
b.getI64IntegerAttr(0);
860 resultOffsets.assign(offsets.begin(), offsets.end());
861 resultOffsets.append(outputRank - inputRank, zeroAttr);
865 resultSizes.assign(sizes.begin(), sizes.end());
866 for (
auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
867 resultSizes.push_back(outputShape[0][dataTileDim]);
872 FailureOr<TilingResult>
873 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
874 ArrayRef<OpFoldResult> offsets,
875 ArrayRef<OpFoldResult> sizes)
const {
876 auto packOp = cast<PackOp>(op);
877 int64_t numTiles = packOp.getInnerDimsPos().size();
882 for (
auto offset : offsets.take_back(numTiles))
887 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
892 op,
b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
895 return tilingResult.value();
901 LogicalResult getIterationDomainTileFromOperandTiles(
902 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
903 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
904 ArrayRef<SmallVector<OpFoldResult>> allSizes,
905 SmallVectorImpl<OpFoldResult> &resultOffsets,
906 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
907 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
909 { llvm::dbgs() <<
"unsupported operands for consumer fusion"; });
913 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
914 ArrayRef<OpFoldResult> sizes(allSizes[0]);
915 auto packOp = cast<PackOp>(op);
916 Location loc = packOp.getLoc();
917 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
919 packOp.getDimAndTileMapping();
920 SmallVector<int64_t> outerShapeWithoutTranspose(
921 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
922 if (!packOp.getOuterDimsPerm().empty()) {
924 outerShapeWithoutTranspose,
927 for (
auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
928 if (dimAndTileMapping.count(dim)) {
929 FailureOr<int64_t> cstTileSize =
931 presburger::BoundType::UB, sizes[dim],
933 std::optional<int64_t> cstInnerSize =
943 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
944 int64_t destDimSize = outerShapeWithoutTranspose[dim];
946 ShapedType::isDynamic(srcDimSize) ||
947 cstTileSize.value() < srcDimSize;
949 outerDimOffsets.push_back(offsets[dim]);
950 if (ShapedType::isStatic(destDimSize)) {
951 outerDimSizes.push_back(
b.getIndexAttr(destDimSize));
953 outerDimSizes.push_back(
954 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
973 if ((
failed(cstTileSize) || !cstInnerSize ||
974 *cstTileSize % *cstInnerSize != 0))
977 using AV = affine::AffineValueExpr;
978 affine::AffineBuilder ab(
b, loc);
979 AffineExpr dim0, sym;
982 auto avOffset = AV(dim0).bind(offsets[dim]);
983 auto avSize = AV(dim0).bind(sizes[dim]);
984 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
985 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
986 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
988 outerDimOffsets.push_back(offsets[dim]);
989 outerDimSizes.push_back(sizes[dim]);
992 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
993 resultOffsets = outerDimOffsets;
994 resultSizes = outerDimSizes;
999 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1000 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1001 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1002 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1003 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1005 { llvm ::dbgs() <<
"unhandled operands for consumer fusion"; });
1009 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1010 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1012 auto packOp = cast<PackOp>(op);
1013 Location loc = packOp.getLoc();
1015 int64_t inputRank = packOp.getSourceRank();
1016 auto oneAttr =
b.getI64IntegerAttr(1);
1017 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
1019 SmallVector<Value> tiledOperands;
1020 auto sourceSlice = tensor::ExtractSliceOp::create(
1021 b, loc, packOp.getSource(), offsets, sizes, strides);
1022 tiledOperands.push_back(sourceSlice);
1024 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1025 if (
failed(getIterationDomainTileFromOperandTiles(
1026 op,
b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1030 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1032 outputOffsets, outputSizes)))
1035 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1036 auto outSlice = tensor::ExtractSliceOp::create(
1037 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1038 tiledOperands.push_back(outSlice);
1040 if (
auto val = packOp.getPaddingValue())
1041 tiledOperands.push_back(val);
1042 for (
auto tile : packOp.getInnerTiles())
1043 tiledOperands.push_back(
tile);
1045 Operation *tiledPackOp = PackOp::create(
1048 return TilingResult{
1050 SmallVector<Value>(tiledPackOp->
getResults()),
1051 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1055struct UnpackTileDimInfo {
1056 bool isAlignedToInnerTileSize;
1057 OpFoldResult sourceOffset;
1058 OpFoldResult sourceSize;
1059 OpFoldResult resultOffset;
1060 OpFoldResult destExpandedSize;
1066static UnpackTileDimInfo getUnpackTileDimInfo(
OpBuilder &
b, UnPackOp unpackOp,
1070 UnpackTileDimInfo info;
1074 unpackOp.getDimAndTileMapping();
1076 if (!dimAndTileMapping.count(tileDim)) {
1077 info.isAlignedToInnerTileSize =
true;
1078 info.sourceOffset = tileOffset;
1079 info.sourceSize = tileSize;
1080 info.resultOffset = zeroAttr;
1081 info.destExpandedSize = tileSize;
1092 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1094 info.isAlignedToInnerTileSize =
false;
1099 if (!
failed(cstSize) && cstInnerSize) {
1100 if (*cstSize % *cstInnerSize == 0)
1101 info.isAlignedToInnerTileSize =
true;
1105 if (*cstInnerSize == *cstSize) {
1106 auto lhs = AV(dim0).bind(tileOffset);
1107 auto rhs = AV(dim1).bind(innerTileSize);
1108 info.sourceOffset = ab.floor(
lhs,
rhs);
1109 info.sourceSize = oneAttr;
1110 info.resultOffset = zeroAttr;
1111 info.destExpandedSize = tileSize;
1116 if (info.isAlignedToInnerTileSize) {
1118 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1119 info.resultOffset = zeroAttr;
1120 info.destExpandedSize = tileSize;
1129 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1133 affine::DivModValue firstCoord = affine::getDivMod(
1137 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1138 affine::DivModValue lastCoord = affine::getDivMod(
1142 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1145 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1146 AV(dim1).bind(firstCoord.quotient));
1148 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1149 info.sourceOffset = firstCoord.quotient;
1150 info.resultOffset = firstCoord.remainder;
1153 info.destExpandedSize =
b.createOrFold<arith::MulIOp>(
1159struct UnPackOpTiling
1160 :
public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1162 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
1163 auto unpackOp = cast<UnPackOp>(op);
1164 SmallVector<utils::IteratorType> iteratorTypes(
1165 unpackOp.getDestRank(), utils::IteratorType::parallel);
1166 return iteratorTypes;
1169 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
1170 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op),
b);
1187 FailureOr<TilingResult>
1189 ArrayRef<OpFoldResult> offsets,
1190 ArrayRef<OpFoldResult> sizes)
const {
1191 auto unpackOp = cast<UnPackOp>(op);
1192 int64_t srcRank = unpackOp.getSourceRank();
1193 int64_t destRank = unpackOp.getDestRank();
1194 int64_t numInnerTiles = srcRank - destRank;
1195 Location loc = unpackOp.getLoc();
1200 bool isPerfectTilingCase =
true;
1201 Attribute oneAttr =
b.getIndexAttr(1);
1202 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1203 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1204 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1205 for (
auto dim : llvm::seq<int64_t>(0, destRank)) {
1206 UnpackTileDimInfo info =
1207 getUnpackTileDimInfo(
b, unpackOp, dim, offsets[dim], sizes[dim]);
1208 if (!info.isAlignedToInnerTileSize)
1209 isPerfectTilingCase =
false;
1210 sliceSrcIndices.push_back(info.sourceOffset);
1211 sliceSrcSizes.push_back(info.sourceSize);
1212 destExpandedSizes.push_back(info.destExpandedSize);
1213 resultOffsetsFromDest.push_back(info.resultOffset);
1218 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1219 unpackOp.getOuterDimsPerm());
1220 Attribute zeroAttr =
b.getIndexAttr(0);
1221 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1222 sliceSrcSizes.append(unpackOp.getMixedTiles());
1223 sliceSrcStrides.append(numInnerTiles, oneAttr);
1224 SmallVector<Operation *> generatedSlices;
1225 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1226 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1228 generatedSlices.push_back(sliceSource);
1230 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1232 if (isPerfectTilingCase) {
1233 auto destSliceOp = tensor::ExtractSliceOp::create(
1234 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1235 sliceDest = destSliceOp;
1236 generatedSlices.push_back(destSliceOp);
1238 sliceDest = tensor::EmptyOp::create(
1239 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1242 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1243 for (
auto tile : unpackOp.getInnerTiles())
1244 tiledOperands.push_back(
tile);
1246 Operation *tiledUnpackOp = UnPackOp::create(
1249 if (isPerfectTilingCase)
1250 return TilingResult{{tiledUnpackOp},
1251 SmallVector<Value>(tiledUnpackOp->
getResults()),
1254 auto extractSlice = tensor::ExtractSliceOp::create(
1255 b, loc, tiledUnpackOp->
getResult(0), resultOffsetsFromDest, sizes,
1257 return TilingResult{
1258 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1263 ArrayRef<OpFoldResult> offsets,
1264 ArrayRef<OpFoldResult> sizes,
1265 SmallVector<OpFoldResult> &resultOffsets,
1266 SmallVector<OpFoldResult> &resultSizes)
const {
1267 resultOffsets = llvm::to_vector(offsets);
1268 resultSizes = llvm::to_vector(sizes);
1272 FailureOr<TilingResult>
1273 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
1274 ArrayRef<OpFoldResult> offsets,
1275 ArrayRef<OpFoldResult> sizes)
const {
1276 FailureOr<TilingResult> tilingResult =
1278 if (
failed(tilingResult))
1280 return tilingResult.value();
1285 LogicalResult getIterationDomainTileFromOperandTiles(
1286 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1287 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1288 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1289 SmallVectorImpl<OpFoldResult> &resultOffsets,
1290 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
1291 if (operandNumbers.size() != 1) {
1292 LLVM_DEBUG({ llvm::dbgs() <<
"unable to handle multiple operands"; });
1295 auto unPackOp = cast<UnPackOp>(op);
1296 unsigned operandNumber = operandNumbers[0];
1297 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1298 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1301 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1302 resultOffsets = llvm::to_vector(offsets);
1303 resultSizes = llvm::to_vector(sizes);
1306 Location loc = unPackOp.getLoc();
1308 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1309 auto destOffsets = offsets.drop_back(numTiles);
1310 auto destSizes = sizes.drop_back(numTiles);
1313 int64_t outputRank = unPackOp.getDestRank();
1317 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1318 SmallVector<OpFoldResult> origOffsets(destOffsets);
1319 SmallVector<OpFoldResult> origSizes(destSizes);
1320 applyPermToRange(origOffsets, origSizes,
1324 unPackOp.getDimAndTileMapping();
1326 for (
auto dim : llvm::seq<int64_t>(0, outputRank)) {
1327 using AV = affine::AffineValueExpr;
1328 affine::AffineBuilder ab(
b, loc);
1329 AffineExpr dim0, dim1, sym0;
1332 if (dimAndTileMapping.count(dim)) {
1336 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1337 auto avSize = AV(dim0).bind(origSizes[dim]);
1338 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1339 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1340 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1341 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1342 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1343 ab.sub(avResultSize, avResultOffset)}));
1345 resultOffsets.push_back(origOffsets[dim]);
1346 resultSizes.push_back(origSizes[dim]);
1353 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1354 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1355 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1356 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1357 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1358 LLVM_DEBUG({ llvm::dbgs() <<
"unhandled operands for consumer fusion"; });
1361 auto unPackOp = cast<UnPackOp>(op);
1362 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1363 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1367 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1369 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1374 Location loc = unPackOp.getLoc();
1378 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1379 if (
failed(getIterationDomainTileFromOperandTiles(
1380 op,
b, operandNumbers, allOffsets, allSizes, outputOffsets,
1384 auto oneAttr =
b.getI64IntegerAttr(1);
1385 int64_t outputRank = unPackOp.getDestRank();
1386 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1388 SmallVector<Value> tiledOperands;
1390 auto extractDestSlice = tensor::ExtractSliceOp::create(
1391 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1392 tiledOperands.push_back(extractDestSlice);
1394 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1396 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1397 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1398 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1399 for (
auto tile : unPackOp.getInnerTiles())
1400 tiledOperands.push_back(
tile);
1403 Operation *tiledUnPackOp =
1404 UnPackOp::create(
b, loc,
TypeRange{extractDestSlice.getType()},
1407 return TilingResult{{tiledUnPackOp},
1408 SmallVector<Value>(tiledUnPackOp->
getResults()),
1409 llvm::to_vector(ArrayRef<Operation *>{
1410 extractSourceSlice, extractDestSlice})};
1416template <
typename OpType>
1418 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1419 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1424template <
typename... OpTypes>
1435 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1436 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1438#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1446 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1447 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
void setOperand(unsigned idx, Value value)
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry ®istry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
void registerTilingInterfaceExternalModels(DialectRegistry ®istry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
@ PartialReductionOuterReduction
@ PartialReductionOuterParallel
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets