26#include "llvm/ADT/SmallVectorExtras.h"
27#include "llvm/Support/Debug.h"
30#define DEBUG_TYPE "linalg-tiling-interface-impl"
49 Value v = affine::AffineApplyOp::create(
b, loc, m, ivs);
59 Block *body = linalgOp.getBlock();
63 if (
auto indexOp = dyn_cast<IndexOp>(&op)) {
64 map.
map(indexOp.getResult(), ivs[indexOp.getDim()]);
72 for (
const auto &operand : llvm::enumerate(terminator->
getOperands())) {
74 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
76 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
77 memref::StoreOp::create(
b, loc, toStore,
78 linalgOp.getDpsInitOperand(operand.index())->get(),
94template <
typename LinalgOpTy>
95struct LinalgOpTilingInterface
96 :
public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
99 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
100 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
101 return concreteOp.getIteratorTypesArray();
105 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
106 OpBuilder::InsertionGuard g(
b);
107 b.setInsertionPoint(op);
108 Location loc = op->
getLoc();
109 LinalgOp linalgOp = cast<LinalgOp>(op);
110 SmallVector<OpFoldResult> allShapesSizes =
111 linalgOp.createFlatListOfOperandDims(
b, loc);
112 AffineMap map = linalgOp.getShapesToLoopsMap();
114 return llvm::map_to_vector(map.
getResults(), [&](AffineExpr loopExpr) {
115 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(b, loc, loopExpr,
117 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
122 FailureOr<TilingResult>
129 LinalgOp linalgOp = cast<LinalgOp>(op);
132 b, loc, linalgOp, valuesToTile, offsets, sizes, {},
true);
134 llvm::make_filter_range(
136 [](
Value v) ->
bool {
137 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
145 Operation *tiledOp =
clone(
b, linalgOp, resultTensorTypes, tiledOperands);
156 getMappedOffsetAndSize(LinalgOp linalgOp,
OpBuilder &
b,
164 for (
auto [indexingMap, offsets, sizes] :
165 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166 for (
auto [resultExpr, offset, size] :
167 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
171 unsigned position = dimExpr.getPosition();
172 auto it = mappedOffsets.find(position);
173 if (it != mappedOffsets.end()) {
176 if (seenOffset != offset || seenSize != size) {
178 llvm::dbgs() <<
"inconsistent iteration space mapping from "
179 "offsets/sizes of operands/results";
184 mappedOffsets[position] = offset;
185 mappedSizes[position] = size;
193 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(
b);
194 mappedOffsetsVec.resize(iterationDomain.size());
195 mappedSizesVec.resize(iterationDomain.size());
196 for (
auto [
index, domain] : llvm::enumerate(iterationDomain)) {
197 auto it = mappedOffsets.find(
index);
198 if (it != mappedOffsets.end()) {
199 mappedOffsetsVec[
index] = it->second;
200 mappedSizesVec[
index] = mappedSizes.lookup(
index);
203 mappedOffsetsVec[
index] = domain.offset;
204 mappedSizesVec[
index] = domain.size;
211 LogicalResult getIterationDomainTileFromOperandTiles(
217 auto linalgOp = cast<LinalgOp>(op);
220 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
221 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
222 return linalgOp.getMatchingIndexingMap(&opOperand);
224 if (
failed(getMappedOffsetAndSize(linalgOp,
b, indexingMaps, allOffsets,
225 allSizes, iterDomainOffsets,
241 LinalgOp linalgOp = cast<LinalgOp>(op);
250 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
252 b, loc, outOperand->get(), sizes,
253 linalgOp.getMatchingIndexingMap(outOperand), offsets,
254 {}, subShapeSizes,
true);
255 resultOffsets = sliceParams.
offsets;
256 resultSizes = sliceParams.
sizes;
260 LogicalResult getIterationDomainTileFromResultTile(
265 auto linalgOp = cast<LinalgOp>(op);
272 linalgOp.getIndexingMapMatchingResult(op->
getResult(resultNumber));
275 "unhandled tiled implementation generation when result is not "
276 "accessed using a permuted projection");
282 getMappedOffsetAndSize(linalgOp,
b, indexingMap, {allOffsets},
283 {allSizes}, iterDomainOffsets, iterDomainSizes);
285 assert(succeeded(status) &&
"unexpected error in offset calculation");
289 FailureOr<TilingResult>
294 if (
failed(getIterationDomainTileFromResultTile(
295 op,
b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
298 auto tilingInterfaceOp = cast<TilingInterface>(op);
299 FailureOr<TilingResult> tilingResult =
300 tilingInterfaceOp.getTiledImplementation(
b, mappedOffsets, mappedSizes);
305 if (tilingResult->tiledOps.size() != 1)
306 return op->
emitOpError(
"failed to generate tiled implementation");
309 tilingResult->tiledOps,
311 tilingResult->generatedSlices};
316 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
321 if (
failed(getIterationDomainTileFromOperandTiles(
322 op,
b, operandNumbers, allOffsets, allSizes, mappedOffsets,
332 auto linalgOp = cast<LinalgOp>(op);
333 if (!linalgOp.hasPureBufferSemantics())
334 return op->
emitOpError(
"expected operation to have buffer semantics");
337 indexedValues.reserve(linalgOp->getNumOperands());
341 for (
OpOperand &operand : linalgOp->getOpOperands()) {
342 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
343 indexedValues.push_back(
nullptr);
346 if (linalgOp.isScalar(&operand)) {
347 indexedValues.push_back(operand.get());
351 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
353 memref::LoadOp::create(builder, linalgOpLoc, operand.get(),
indices);
354 indexedValues.push_back(
load);
361 bool isOpFusableWithConsumerSlice(
Operation *op,
unsigned resultNumber,
368 bool isOpFusableWithProducerSlices(
373 auto linalgOp = cast<LinalgOp>(op);
375 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
376 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
377 return linalgOp.getMatchingIndexingMap(&opOperand);
382 return succeeded(getMappedOffsetAndSize(linalgOp,
b, indexingMaps,
383 allOffsets, allSizes, mappedOffsets,
395 for (
auto [
index, reductionDim] : llvm::enumerate(reductionDims)) {
396 if (reductionDim == value) {
408getPartialResultAffineMaps(LinalgOp linalgOp,
410 auto partialReductionMaps = llvm::map_to_vector(
411 linalgOp.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
412 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
413 for (auto redPos : reductionDims) {
415 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
416 map.getNumResults());
420 return partialReductionMaps;
423struct InitSliceInfo {
424 SmallVector<int64_t> resultShape;
425 SmallVector<OpFoldResult> offsets;
426 SmallVector<OpFoldResult> sizes;
427 SmallVector<OpFoldResult> strides;
433static InitSliceInfo getInitSliceInfoForOuterReduction(
439 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
440 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
443 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
444 if (reductionDims.contains(dim)) {
445 initOffsets.push_back(zero);
447 initOffsets.push_back(offsets[dim]);
449 initSizes.push_back(sizes[dim]);
453 return {resultShape, initOffsets, initSizes, initStrides};
459static InitSliceInfo getInitSliceInfoForOuterParallel(
465 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
469 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
470 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
471 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
472 initSizes.push_back(one);
474 initOffsets.push_back(offsets[dim]);
475 initSizes.push_back(sizes[dim]);
476 resultShape.push_back(sizes[dim]);
481 return {staticShapes, initOffsets, initSizes, initStrides};
486static InitSliceInfo getInitSliceInfo(
MLIRContext *context,
494 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
495 reductionDims, splitReductionIvs,
496 partialReductionMap);
499 "unexpected ReductionTilingStrategy");
500 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
501 reductionDims, splitReductionIvs,
502 partialReductionMap);
507template <
typename LinalgOpTy>
508struct LinalgOpPartialReductionInterface
509 :
public PartialReductionOpInterface::ExternalModel<
510 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
511 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
512 Operation *op, OpBuilder &
b, Location loc, ArrayRef<OpFoldResult> sizes,
514 auto linalgOp = cast<LinalgOp>(op);
516 OpBuilder::InsertionGuard guard(
b);
517 if (linalgOp.hasPureBufferSemantics())
518 return op->
emitOpError(
"expected operation to have tensor semantics");
520 SmallVector<AffineMap> partialResultMaps =
521 getPartialResultAffineMaps(linalgOp, reductionDims);
523 SmallVector<Value> inits;
524 for (
auto [initIdx,
result, partialMap] :
525 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
526 SmallVector<Operation *, 4> combinerOps;
529 combinerOps.size() != 1)
530 return op->
emitOpError(
"Failed to anaysis the reduction operation.");
532 Operation *reductionOp = combinerOps[0];
533 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
534 if (!identity.has_value())
536 "Failed to get an identity value for the reduction operation.");
539 SmallVector<OpFoldResult> partialResultShape;
540 for (AffineExpr dimExpr : partialMap.getResults()) {
541 auto dim = cast<AffineDimExpr>(dimExpr);
542 partialResultShape.push_back(sizes[dim.getPosition()]);
547 tensor::EmptyOp::create(
b, loc, partialResultShape, elType);
548 Value constantOp = arith::ConstantOp::create(
b, loc, *identity);
549 auto identityTensor =
550 linalg::FillOp::create(
b, loc, constantOp, emptyTensor);
551 inits.push_back(identityTensor.getResult(0));
557 FailureOr<TilingResult>
558 tileToPartialReduction(Operation *op, OpBuilder &
b, Location loc,
560 ValueRange init, ArrayRef<OpFoldResult> offsets,
561 ArrayRef<OpFoldResult> sizes,
563 ArrayRef<OpFoldResult> splitReductionIvs)
const {
564 OpBuilder::InsertionGuard guard(
b);
565 auto linalgOp = cast<LinalgOp>(op);
567 SmallVector<AffineMap> partialReductionMaps =
568 getPartialResultAffineMaps(linalgOp, reductionDims);
572 SmallVector<AffineMap> newInitMaps;
573 if (tilingStrategy ==
574 ReductionTilingStrategy::PartialReductionOuterReduction) {
575 newInitMaps = llvm::to_vector(partialReductionMaps);
577 newInitMaps = llvm::map_to_vector(
578 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
579 return linalgOp.getMatchingIndexingMap(&opOperand);
585 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {},
true);
586 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
587 llvm::make_filter_range(
588 tiledInputs, [](Value v) ->
bool {
return v.
getDefiningOp(); }),
592 SmallVector<Value, 1> tiledInits;
593 for (
auto [partialReductionMap, valueToTile] :
594 llvm::zip_equal(partialReductionMaps, init)) {
595 InitSliceInfo sliceInfo = getInitSliceInfo(
596 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
597 splitReductionIvs, partialReductionMap);
598 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
600 sliceInfo.resultShape, valueToTileType.getElementType(),
601 valueToTileType.getEncoding());
602 auto sliceOp = tensor::ExtractSliceOp::create(
604 sliceInfo.sizes, sliceInfo.strides);
605 tiledInits.push_back(sliceOp.getResult());
606 generatedSlices.push_back(sliceOp);
610 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
611 for (
auto [initOperand, newInitMap] :
612 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
613 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
614 newMaps[mapIdx] = newInitMap;
618 SmallVector<utils::IteratorType> newIteratorTypes =
619 linalgOp.getIteratorTypesArray();
620 if (tilingStrategy ==
621 ReductionTilingStrategy::PartialReductionOuterReduction) {
622 for (
int dim : reductionDims)
623 newIteratorTypes[dim] = utils::IteratorType::parallel;
627 Operation *partialReductionOp;
628 auto resultTypes =
ValueRange(tiledInits).getTypes();
629 if (tilingStrategy ==
630 ReductionTilingStrategy::PartialReductionOuterReduction) {
631 auto genericOp = GenericOp::create(
b, loc, resultTypes, tiledInputs,
632 tiledInits, newMaps, newIteratorTypes);
635 genericOp.getRegion().begin(), mapping);
636 partialReductionOp = genericOp.getOperation();
638 SmallVector<Value> operands = std::move(tiledInputs);
639 llvm::append_range(operands, tiledInits);
640 partialReductionOp =
mlir::clone(
b, op, resultTypes, operands);
643 {partialReductionOp},
644 llvm::map_to_vector(partialReductionOp->
getResults(),
645 [](OpResult r) -> Value { return r; }),
649 FailureOr<MergeResult>
650 mergeReductions(Operation *op, OpBuilder &
b, Location loc,
653 auto linalgOp = cast<LinalgOp>(op);
654 SmallVector<AffineMap> partialReductionMaps =
655 getPartialResultAffineMaps(linalgOp, reductionDims);
658 SmallVector<Operation *> mergeOperations;
659 SmallVector<Value> replacements;
660 for (
auto [idx, init, partialResult, partialMap] : llvm::enumerate(
661 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
662 unsigned initIdx = idx;
667 SmallVector<int64_t> partialReductionDims;
668 for (
auto [resultNum, dimExpr] :
669 llvm::enumerate(partialMap.getResults())) {
670 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
671 if (llvm::is_contained(reductionDims, dim)) {
672 partialReductionDims.push_back(resultNum);
676 auto reduction = linalg::ReduceOp::create(
677 b, loc, partialResult, init, partialReductionDims,
678 [&linalgOp, &initIdx](OpBuilder &
b, Location loc,
ValueRange inputs) {
680 SmallVector<Operation *, 4> combinerOps;
683 Operation *clonedReductionOp =
b.clone(*combinerOps[0]);
687 linalg::YieldOp::create(
b, loc, clonedReductionOp->
getResult(0));
690 mergeOperations.push_back(reduction);
691 replacements.push_back(reduction->getResult(0));
694 return MergeResult{mergeOperations, replacements};
697 LogicalResult getPartialResultTilePosition(
698 Operation *op, OpBuilder &
b,
unsigned resultNumber,
701 ArrayRef<OpFoldResult> splitReductionIvs,
702 SmallVector<OpFoldResult> &resultOffsets,
703 SmallVector<OpFoldResult> &resultSizes)
const {
704 auto linalgOp = cast<LinalgOp>(op);
705 SmallVector<AffineMap> partialReductionMaps =
706 getPartialResultAffineMaps(linalgOp, reductionDims);
707 InitSliceInfo sliceInfo = getInitSliceInfo(
708 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
709 splitReductionIvs, partialReductionMaps[resultNumber]);
710 std::swap(resultOffsets, sliceInfo.offsets);
711 std::swap(resultSizes, sliceInfo.sizes);
717template <
typename OpTy>
720 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
721 "applies to only pack or unpack operations");
723 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
730 for (
auto dim : llvm::seq<int64_t>(0, rank)) {
731 loopBounds[dim].offset = zero;
732 loopBounds[dim].stride = one;
733 loopBounds[dim].size = resultShape[0][dim];
741 if (permutation.empty())
748 :
public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
750 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
754 auto packOp = cast<PackOp>(op);
755 SmallVector<utils::IteratorType> iteratorTypes(
756 packOp.getSourceRank(), utils::IteratorType::parallel);
757 return iteratorTypes;
760 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
761 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op),
b);
764 FailureOr<TilingResult>
766 ArrayRef<OpFoldResult> offsets,
767 ArrayRef<OpFoldResult> sizes)
const {
768 auto packOp = cast<PackOp>(op);
770 if (!packOp.hasPureTensorSemantics())
773 Location loc = packOp.getLoc();
777 int64_t inputRank = packOp.getSourceRank();
778 SmallVector<OpFoldResult> origOffsets(offsets);
779 SmallVector<OpFoldResult> origSizes(sizes);
780 applyPermToRange(origOffsets, origSizes,
784 packOp.getDimAndTileMapping();
785 SmallVector<OpFoldResult> srcDimValues =
787 SmallVector<OpFoldResult> inputIndices, inputSizes;
788 for (
auto dim : llvm::seq<int64_t>(0, inputRank)) {
789 using AV = affine::AffineValueExpr;
790 affine::AffineBuilder ab(
b, loc);
791 AffineExpr dim0, dim1, sym;
794 if (dimAndTileMapping.count(dim)) {
798 auto avOffset = AV(dim0).bind(origOffsets[dim]);
799 auto avSize = AV(dim0).bind(origSizes[dim]);
800 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
801 inputIndices.push_back(ab.mul(avOffset, avTileSize));
802 inputSizes.push_back(ab.mul(avSize, avTileSize));
804 inputIndices.push_back(origOffsets[dim]);
805 inputSizes.push_back(origSizes[dim]);
809 if (packOp.getPaddingValue()) {
810 OpFoldResult dimSize = srcDimValues[dim];
811 auto avDimSize = AV(dim0).bind(dimSize);
812 auto avInputIdx = AV(dim1).bind(inputIndices.back());
814 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
818 auto oneAttr =
b.getI64IntegerAttr(1);
819 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
821 SmallVector<Value> tiledOperands;
822 auto sourceSlice = tensor::ExtractSliceOp::create(
823 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
824 tiledOperands.push_back(sourceSlice);
826 SmallVector<OpFoldResult> outputOffsets, outputSizes;
831 strides.append(packOp.getDestRank() - inputRank, oneAttr);
832 auto outSlice = tensor::ExtractSliceOp::create(
833 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
834 tiledOperands.push_back(outSlice);
836 if (
auto val = packOp.getPaddingValue())
837 tiledOperands.push_back(val);
838 for (
auto tile : packOp.getInnerTiles())
839 tiledOperands.push_back(
tile);
841 Operation *tiledPackOp = PackOp::create(
846 SmallVector<Value>(tiledPackOp->
getResults()),
847 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
852 ArrayRef<OpFoldResult> offsets,
853 ArrayRef<OpFoldResult> sizes,
854 SmallVector<OpFoldResult> &resultOffsets,
855 SmallVector<OpFoldResult> &resultSizes)
const {
860 auto packOp = cast<PackOp>(op);
861 int64_t inputRank = packOp.getSourceRank();
862 int64_t outputRank = packOp.getDestRank();
863 auto zeroAttr =
b.getI64IntegerAttr(0);
864 resultOffsets.assign(offsets.begin(), offsets.end());
865 resultOffsets.append(outputRank - inputRank, zeroAttr);
869 resultSizes.assign(sizes.begin(), sizes.end());
870 for (
auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
871 resultSizes.push_back(outputShape[0][dataTileDim]);
876 FailureOr<TilingResult>
877 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
878 ArrayRef<OpFoldResult> offsets,
879 ArrayRef<OpFoldResult> sizes)
const {
880 auto packOp = cast<PackOp>(op);
881 int64_t numTiles = packOp.getInnerDimsPos().size();
886 for (
auto offset : offsets.take_back(numTiles))
891 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
896 op,
b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
899 return tilingResult.value();
905 LogicalResult getIterationDomainTileFromOperandTiles(
906 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
907 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
908 ArrayRef<SmallVector<OpFoldResult>> allSizes,
909 SmallVectorImpl<OpFoldResult> &resultOffsets,
910 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
911 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
913 { llvm::dbgs() <<
"unsupported operands for consumer fusion"; });
917 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
918 ArrayRef<OpFoldResult> sizes(allSizes[0]);
919 auto packOp = cast<PackOp>(op);
920 Location loc = packOp.getLoc();
921 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
923 packOp.getDimAndTileMapping();
924 SmallVector<int64_t> outerShapeWithoutTranspose(
925 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
926 if (!packOp.getOuterDimsPerm().empty()) {
928 outerShapeWithoutTranspose,
931 for (
auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
932 if (dimAndTileMapping.count(dim)) {
933 FailureOr<int64_t> cstTileSize =
935 presburger::BoundType::UB, sizes[dim],
937 std::optional<int64_t> cstInnerSize =
947 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
948 int64_t destDimSize = outerShapeWithoutTranspose[dim];
950 ShapedType::isDynamic(srcDimSize) ||
951 cstTileSize.value() < srcDimSize;
953 outerDimOffsets.push_back(offsets[dim]);
954 if (ShapedType::isStatic(destDimSize)) {
955 outerDimSizes.push_back(
b.getIndexAttr(destDimSize));
957 outerDimSizes.push_back(
958 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
977 if ((
failed(cstTileSize) || !cstInnerSize ||
978 *cstTileSize % *cstInnerSize != 0))
981 using AV = affine::AffineValueExpr;
982 affine::AffineBuilder ab(
b, loc);
983 AffineExpr dim0, sym;
986 auto avOffset = AV(dim0).bind(offsets[dim]);
987 auto avSize = AV(dim0).bind(sizes[dim]);
988 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
989 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
990 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
992 outerDimOffsets.push_back(offsets[dim]);
993 outerDimSizes.push_back(sizes[dim]);
996 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
997 resultOffsets = outerDimOffsets;
998 resultSizes = outerDimSizes;
1003 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1004 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1005 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1006 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1007 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1009 { llvm ::dbgs() <<
"unhandled operands for consumer fusion"; });
1013 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1014 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1016 auto packOp = cast<PackOp>(op);
1018 if (!packOp.hasPureTensorSemantics())
1021 Location loc = packOp.getLoc();
1023 int64_t inputRank = packOp.getSourceRank();
1024 auto oneAttr =
b.getI64IntegerAttr(1);
1025 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
1027 SmallVector<Value> tiledOperands;
1028 auto sourceSlice = tensor::ExtractSliceOp::create(
1029 b, loc, packOp.getSource(), offsets, sizes, strides);
1030 tiledOperands.push_back(sourceSlice);
1032 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1033 if (
failed(getIterationDomainTileFromOperandTiles(
1034 op,
b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1038 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1040 outputOffsets, outputSizes)))
1043 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1044 auto outSlice = tensor::ExtractSliceOp::create(
1045 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1046 tiledOperands.push_back(outSlice);
1048 if (
auto val = packOp.getPaddingValue())
1049 tiledOperands.push_back(val);
1050 for (
auto tile : packOp.getInnerTiles())
1051 tiledOperands.push_back(
tile);
1053 Operation *tiledPackOp = PackOp::create(
1056 return TilingResult{
1058 SmallVector<Value>(tiledPackOp->
getResults()),
1059 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1063struct UnpackTileDimInfo {
1064 bool isAlignedToInnerTileSize;
1065 OpFoldResult sourceOffset;
1066 OpFoldResult sourceSize;
1067 OpFoldResult resultOffset;
1068 OpFoldResult destExpandedSize;
1074static UnpackTileDimInfo getUnpackTileDimInfo(
OpBuilder &
b, UnPackOp unpackOp,
1078 UnpackTileDimInfo info;
1082 unpackOp.getDimAndTileMapping();
1084 if (!dimAndTileMapping.count(tileDim)) {
1085 info.isAlignedToInnerTileSize =
true;
1086 info.sourceOffset = tileOffset;
1087 info.sourceSize = tileSize;
1088 info.resultOffset = zeroAttr;
1089 info.destExpandedSize = tileSize;
1100 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1102 info.isAlignedToInnerTileSize =
false;
1107 if (!
failed(cstSize) && cstInnerSize) {
1108 if (*cstSize % *cstInnerSize == 0)
1109 info.isAlignedToInnerTileSize =
true;
1113 if (*cstInnerSize == *cstSize) {
1114 auto lhs = AV(dim0).bind(tileOffset);
1115 auto rhs = AV(dim1).bind(innerTileSize);
1116 info.sourceOffset = ab.floor(
lhs,
rhs);
1117 info.sourceSize = oneAttr;
1118 info.resultOffset = zeroAttr;
1119 info.destExpandedSize = tileSize;
1124 if (info.isAlignedToInnerTileSize) {
1126 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1127 info.resultOffset = zeroAttr;
1128 info.destExpandedSize = tileSize;
1137 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1141 affine::DivModValue firstCoord = affine::getDivMod(
1145 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1146 affine::DivModValue lastCoord = affine::getDivMod(
1150 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1153 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1154 AV(dim1).bind(firstCoord.quotient));
1156 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1157 info.sourceOffset = firstCoord.quotient;
1158 info.resultOffset = firstCoord.remainder;
1161 info.destExpandedSize =
b.createOrFold<arith::MulIOp>(
1167struct UnPackOpTiling
1168 :
public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1170 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
1171 auto unpackOp = cast<UnPackOp>(op);
1172 SmallVector<utils::IteratorType> iteratorTypes(
1173 unpackOp.getDestRank(), utils::IteratorType::parallel);
1174 return iteratorTypes;
1177 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
1178 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op),
b);
1195 FailureOr<TilingResult>
1197 ArrayRef<OpFoldResult> offsets,
1198 ArrayRef<OpFoldResult> sizes)
const {
1199 auto unpackOp = cast<UnPackOp>(op);
1201 if (!unpackOp.hasPureTensorSemantics())
1204 int64_t srcRank = unpackOp.getSourceRank();
1205 int64_t destRank = unpackOp.getDestRank();
1206 int64_t numInnerTiles = srcRank - destRank;
1207 Location loc = unpackOp.getLoc();
1212 bool isPerfectTilingCase =
true;
1213 Attribute oneAttr =
b.getIndexAttr(1);
1214 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1215 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1216 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1217 for (
auto dim : llvm::seq<int64_t>(0, destRank)) {
1218 UnpackTileDimInfo info =
1219 getUnpackTileDimInfo(
b, unpackOp, dim, offsets[dim], sizes[dim]);
1220 if (!info.isAlignedToInnerTileSize)
1221 isPerfectTilingCase =
false;
1222 sliceSrcIndices.push_back(info.sourceOffset);
1223 sliceSrcSizes.push_back(info.sourceSize);
1224 destExpandedSizes.push_back(info.destExpandedSize);
1225 resultOffsetsFromDest.push_back(info.resultOffset);
1230 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1231 unpackOp.getOuterDimsPerm());
1232 Attribute zeroAttr =
b.getIndexAttr(0);
1233 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1234 sliceSrcSizes.append(unpackOp.getMixedTiles());
1235 sliceSrcStrides.append(numInnerTiles, oneAttr);
1236 SmallVector<Operation *> generatedSlices;
1237 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1238 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1240 generatedSlices.push_back(sliceSource);
1242 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1244 if (isPerfectTilingCase) {
1245 auto destSliceOp = tensor::ExtractSliceOp::create(
1246 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1247 sliceDest = destSliceOp;
1248 generatedSlices.push_back(destSliceOp);
1250 sliceDest = tensor::EmptyOp::create(
1251 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1254 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1255 for (
auto tile : unpackOp.getInnerTiles())
1256 tiledOperands.push_back(
tile);
1258 Operation *tiledUnpackOp = UnPackOp::create(
1261 if (isPerfectTilingCase)
1262 return TilingResult{{tiledUnpackOp},
1263 SmallVector<Value>(tiledUnpackOp->
getResults()),
1266 auto extractSlice = tensor::ExtractSliceOp::create(
1267 b, loc, tiledUnpackOp->
getResult(0), resultOffsetsFromDest, sizes,
1269 return TilingResult{
1270 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1275 ArrayRef<OpFoldResult> offsets,
1276 ArrayRef<OpFoldResult> sizes,
1277 SmallVector<OpFoldResult> &resultOffsets,
1278 SmallVector<OpFoldResult> &resultSizes)
const {
1279 resultOffsets = llvm::to_vector(offsets);
1280 resultSizes = llvm::to_vector(sizes);
1284 FailureOr<TilingResult>
1285 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
1286 ArrayRef<OpFoldResult> offsets,
1287 ArrayRef<OpFoldResult> sizes)
const {
1288 FailureOr<TilingResult> tilingResult =
1290 if (
failed(tilingResult))
1292 return tilingResult.value();
1297 LogicalResult getIterationDomainTileFromOperandTiles(
1298 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1299 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1300 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1301 SmallVectorImpl<OpFoldResult> &resultOffsets,
1302 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
1303 if (operandNumbers.size() != 1) {
1304 LLVM_DEBUG({ llvm::dbgs() <<
"unable to handle multiple operands"; });
1307 auto unPackOp = cast<UnPackOp>(op);
1308 unsigned operandNumber = operandNumbers[0];
1309 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1310 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1313 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1314 resultOffsets = llvm::to_vector(offsets);
1315 resultSizes = llvm::to_vector(sizes);
1318 Location loc = unPackOp.getLoc();
1320 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1321 auto destOffsets = offsets.drop_back(numTiles);
1322 auto destSizes = sizes.drop_back(numTiles);
1325 int64_t outputRank = unPackOp.getDestRank();
1329 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1330 SmallVector<OpFoldResult> origOffsets(destOffsets);
1331 SmallVector<OpFoldResult> origSizes(destSizes);
1332 applyPermToRange(origOffsets, origSizes,
1336 unPackOp.getDimAndTileMapping();
1338 for (
auto dim : llvm::seq<int64_t>(0, outputRank)) {
1339 using AV = affine::AffineValueExpr;
1340 affine::AffineBuilder ab(
b, loc);
1341 AffineExpr dim0, dim1, sym0;
1344 if (dimAndTileMapping.count(dim)) {
1348 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1349 auto avSize = AV(dim0).bind(origSizes[dim]);
1350 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1351 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1352 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1353 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1354 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1355 ab.sub(avResultSize, avResultOffset)}));
1357 resultOffsets.push_back(origOffsets[dim]);
1358 resultSizes.push_back(origSizes[dim]);
1365 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1366 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1367 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1368 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1369 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1370 LLVM_DEBUG({ llvm::dbgs() <<
"unhandled operands for consumer fusion"; });
1373 auto unPackOp = cast<UnPackOp>(op);
1375 if (!unPackOp.hasPureTensorSemantics())
1378 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1379 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1383 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1385 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1390 Location loc = unPackOp.getLoc();
1394 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1395 if (
failed(getIterationDomainTileFromOperandTiles(
1396 op,
b, operandNumbers, allOffsets, allSizes, outputOffsets,
1400 auto oneAttr =
b.getI64IntegerAttr(1);
1401 int64_t outputRank = unPackOp.getDestRank();
1402 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1404 SmallVector<Value> tiledOperands;
1406 auto extractDestSlice = tensor::ExtractSliceOp::create(
1407 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1408 tiledOperands.push_back(extractDestSlice);
1410 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1412 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1413 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1414 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1415 for (
auto tile : unPackOp.getInnerTiles())
1416 tiledOperands.push_back(
tile);
1419 Operation *tiledUnPackOp =
1420 UnPackOp::create(
b, loc,
TypeRange{extractDestSlice.getType()},
1423 return TilingResult{{tiledUnPackOp},
1424 SmallVector<Value>(tiledUnPackOp->
getResults()),
1425 llvm::to_vector(ArrayRef<Operation *>{
1426 extractSourceSlice, extractDestSlice})};
1432template <
typename OpType>
1434 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1435 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1440template <
typename... OpTypes>
1451 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1452 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1454#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1462 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1463 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
void setOperand(unsigned idx, Value value)
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry ®istry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
void registerTilingInterfaceExternalModels(DialectRegistry ®istry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
@ PartialReductionOuterReduction
@ PartialReductionOuterParallel
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return "true" if v is an integer value/attribute with constant value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets