27#include "llvm/ADT/SmallVectorExtras.h"
28#include "llvm/Support/Debug.h"
31#define DEBUG_TYPE "linalg-tiling-interface-impl"
50 Value v = affine::AffineApplyOp::create(
b, loc, m, ivs);
60 Block *body = linalgOp.getBlock();
64 if (
auto indexOp = dyn_cast<IndexOp>(&op)) {
65 map.
map(indexOp.getResult(), ivs[indexOp.getDim()]);
73 for (
const auto &operand : llvm::enumerate(terminator->
getOperands())) {
75 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
77 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
78 memref::StoreOp::create(
b, loc, toStore,
79 linalgOp.getDpsInitOperand(operand.index())->get(),
95template <
typename LinalgOpTy>
96struct LinalgOpTilingInterface
97 :
public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
100 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
101 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
102 return concreteOp.getIteratorTypesArray();
106 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
107 OpBuilder::InsertionGuard g(
b);
108 b.setInsertionPoint(op);
109 Location loc = op->
getLoc();
110 LinalgOp linalgOp = cast<LinalgOp>(op);
111 SmallVector<OpFoldResult> allShapesSizes =
112 linalgOp.createFlatListOfOperandDims(
b, loc);
113 AffineMap map = linalgOp.getShapesToLoopsMap();
115 return llvm::map_to_vector(map.
getResults(), [&](AffineExpr loopExpr) {
116 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(b, loc, loopExpr,
118 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
123 FailureOr<TilingResult>
130 LinalgOp linalgOp = cast<LinalgOp>(op);
133 b, loc, linalgOp, valuesToTile, offsets, sizes, {},
true);
135 llvm::make_filter_range(
137 [](
Value v) ->
bool {
138 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
146 Operation *tiledOp =
clone(
b, linalgOp, resultTensorTypes, tiledOperands);
157 getMappedOffsetAndSize(LinalgOp linalgOp,
OpBuilder &
b,
165 for (
auto [indexingMap, offsets, sizes] :
166 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
167 for (
auto [resultExpr, offset, size] :
168 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
169 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
172 unsigned position = dimExpr.getPosition();
173 auto it = mappedOffsets.find(position);
174 if (it != mappedOffsets.end()) {
177 if (seenOffset != offset || seenSize != size) {
179 llvm::dbgs() <<
"inconsistent iteration space mapping from "
180 "offsets/sizes of operands/results";
185 mappedOffsets[position] = offset;
186 mappedSizes[position] = size;
194 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(
b);
195 mappedOffsetsVec.resize(iterationDomain.size());
196 mappedSizesVec.resize(iterationDomain.size());
197 for (
auto [
index, domain] : llvm::enumerate(iterationDomain)) {
198 auto it = mappedOffsets.find(
index);
199 if (it != mappedOffsets.end()) {
200 mappedOffsetsVec[
index] = it->second;
201 mappedSizesVec[
index] = mappedSizes.lookup(
index);
204 mappedOffsetsVec[
index] = domain.offset;
205 mappedSizesVec[
index] = domain.size;
212 LogicalResult getIterationDomainTileFromOperandTiles(
218 auto linalgOp = cast<LinalgOp>(op);
221 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
222 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
223 return linalgOp.getMatchingIndexingMap(&opOperand);
225 if (
failed(getMappedOffsetAndSize(linalgOp,
b, indexingMaps, allOffsets,
226 allSizes, iterDomainOffsets,
242 LinalgOp linalgOp = cast<LinalgOp>(op);
251 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
253 b, loc, outOperand->get(), sizes,
254 linalgOp.getMatchingIndexingMap(outOperand), offsets,
255 {}, subShapeSizes,
true);
256 resultOffsets = sliceParams.
offsets;
257 resultSizes = sliceParams.
sizes;
261 LogicalResult getIterationDomainTileFromResultTile(
266 auto linalgOp = cast<LinalgOp>(op);
273 linalgOp.getIndexingMapMatchingResult(op->
getResult(resultNumber));
276 "unhandled tiled implementation generation when result is not "
277 "accessed using a permuted projection");
283 getMappedOffsetAndSize(linalgOp,
b, indexingMap, {allOffsets},
284 {allSizes}, iterDomainOffsets, iterDomainSizes);
286 assert(succeeded(status) &&
"unexpected error in offset calculation");
290 FailureOr<TilingResult>
295 if (
failed(getIterationDomainTileFromResultTile(
296 op,
b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
299 auto tilingInterfaceOp = cast<TilingInterface>(op);
300 FailureOr<TilingResult> tilingResult =
301 tilingInterfaceOp.getTiledImplementation(
b, mappedOffsets, mappedSizes);
306 if (tilingResult->tiledOps.size() != 1)
307 return op->
emitOpError(
"failed to generate tiled implementation");
310 tilingResult->tiledOps,
312 tilingResult->generatedSlices};
317 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
322 if (
failed(getIterationDomainTileFromOperandTiles(
323 op,
b, operandNumbers, allOffsets, allSizes, mappedOffsets,
333 auto linalgOp = cast<LinalgOp>(op);
334 if (!linalgOp.hasPureBufferSemantics())
335 return op->
emitOpError(
"expected operation to have buffer semantics");
338 indexedValues.reserve(linalgOp->getNumOperands());
342 for (
OpOperand &operand : linalgOp->getOpOperands()) {
343 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
344 indexedValues.push_back(
nullptr);
347 if (linalgOp.isScalar(&operand)) {
348 indexedValues.push_back(operand.get());
352 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
354 memref::LoadOp::create(builder, linalgOpLoc, operand.get(),
indices);
355 indexedValues.push_back(
load);
362 bool isOpFusableWithConsumerSlice(
Operation *op,
unsigned resultNumber,
369 bool isOpFusableWithProducerSlices(
374 auto linalgOp = cast<LinalgOp>(op);
376 llvm::map_to_vector(operandNumbers, [&](
unsigned operandNumber) {
377 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
378 return linalgOp.getMatchingIndexingMap(&opOperand);
383 return succeeded(getMappedOffsetAndSize(linalgOp,
b, indexingMaps,
384 allOffsets, allSizes, mappedOffsets,
396 for (
auto [
index, reductionDim] : llvm::enumerate(reductionDims)) {
397 if (reductionDim == value) {
409getPartialResultAffineMaps(LinalgOp linalgOp,
411 auto partialReductionMaps = llvm::map_to_vector(
412 linalgOp.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
413 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
414 for (auto redPos : reductionDims) {
416 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
417 map.getNumResults());
421 return partialReductionMaps;
424struct InitSliceInfo {
425 SmallVector<int64_t> resultShape;
426 SmallVector<OpFoldResult> offsets;
427 SmallVector<OpFoldResult> sizes;
428 SmallVector<OpFoldResult> strides;
434static InitSliceInfo getInitSliceInfoForOuterReduction(
440 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
441 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
444 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
445 if (reductionDims.contains(dim)) {
446 initOffsets.push_back(zero);
448 initOffsets.push_back(offsets[dim]);
450 initSizes.push_back(sizes[dim]);
454 return {resultShape, initOffsets, initSizes, initStrides};
460static InitSliceInfo getInitSliceInfoForOuterParallel(
466 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
470 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
471 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
472 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
473 initSizes.push_back(one);
475 initOffsets.push_back(offsets[dim]);
476 initSizes.push_back(sizes[dim]);
477 resultShape.push_back(sizes[dim]);
482 return {staticShapes, initOffsets, initSizes, initStrides};
487static InitSliceInfo getInitSliceInfo(
MLIRContext *context,
495 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
496 reductionDims, splitReductionIvs,
497 partialReductionMap);
500 "unexpected ReductionTilingStrategy");
501 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
502 reductionDims, splitReductionIvs,
503 partialReductionMap);
508template <
typename LinalgOpTy>
509struct LinalgOpPartialReductionInterface
510 :
public PartialReductionOpInterface::ExternalModel<
511 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
512 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
513 Operation *op, OpBuilder &
b, Location loc, ArrayRef<OpFoldResult> sizes,
515 auto linalgOp = cast<LinalgOp>(op);
517 OpBuilder::InsertionGuard guard(
b);
518 if (linalgOp.hasPureBufferSemantics())
519 return op->
emitOpError(
"expected operation to have tensor semantics");
521 SmallVector<AffineMap> partialResultMaps =
522 getPartialResultAffineMaps(linalgOp, reductionDims);
524 SmallVector<Value> inits;
525 for (
auto [initIdx,
result, partialMap] :
526 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
527 SmallVector<Operation *, 4> combinerOps;
530 combinerOps.size() != 1)
531 return op->
emitOpError(
"Failed to anaysis the reduction operation.");
533 Operation *reductionOp = combinerOps[0];
534 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
535 if (!identity.has_value())
537 "Failed to get an identity value for the reduction operation.");
540 SmallVector<OpFoldResult> partialResultShape;
541 for (AffineExpr dimExpr : partialMap.getResults()) {
542 auto dim = cast<AffineDimExpr>(dimExpr);
543 partialResultShape.push_back(sizes[dim.getPosition()]);
548 tensor::EmptyOp::create(
b, loc, partialResultShape, elType);
549 Value constantOp = arith::ConstantOp::create(
b, loc, *identity);
550 auto identityTensor =
551 linalg::FillOp::create(
b, loc, constantOp, emptyTensor);
552 inits.push_back(identityTensor.getResult(0));
558 FailureOr<TilingResult>
559 tileToPartialReduction(Operation *op, OpBuilder &
b, Location loc,
561 ValueRange init, ArrayRef<OpFoldResult> offsets,
562 ArrayRef<OpFoldResult> sizes,
564 ArrayRef<OpFoldResult> splitReductionIvs)
const {
565 OpBuilder::InsertionGuard guard(
b);
566 auto linalgOp = cast<LinalgOp>(op);
568 SmallVector<AffineMap> partialReductionMaps =
569 getPartialResultAffineMaps(linalgOp, reductionDims);
573 SmallVector<AffineMap> newInitMaps;
574 if (tilingStrategy ==
575 ReductionTilingStrategy::PartialReductionOuterReduction) {
576 newInitMaps = llvm::to_vector(partialReductionMaps);
578 newInitMaps = llvm::map_to_vector(
579 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
580 return linalgOp.getMatchingIndexingMap(&opOperand);
586 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {},
true);
587 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
588 llvm::make_filter_range(
589 tiledInputs, [](Value v) ->
bool {
return v.
getDefiningOp(); }),
593 SmallVector<Value, 1> tiledInits;
594 for (
auto [partialReductionMap, valueToTile] :
595 llvm::zip_equal(partialReductionMaps, init)) {
596 InitSliceInfo sliceInfo = getInitSliceInfo(
597 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
598 splitReductionIvs, partialReductionMap);
599 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
601 sliceInfo.resultShape, valueToTileType.getElementType(),
602 valueToTileType.getEncoding());
603 auto sliceOp = tensor::ExtractSliceOp::create(
605 sliceInfo.sizes, sliceInfo.strides);
606 tiledInits.push_back(sliceOp.getResult());
607 generatedSlices.push_back(sliceOp);
611 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
612 for (
auto [initOperand, newInitMap] :
613 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
614 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
615 newMaps[mapIdx] = newInitMap;
619 SmallVector<utils::IteratorType> newIteratorTypes =
620 linalgOp.getIteratorTypesArray();
621 if (tilingStrategy ==
622 ReductionTilingStrategy::PartialReductionOuterReduction) {
623 for (
int dim : reductionDims)
624 newIteratorTypes[dim] = utils::IteratorType::parallel;
628 Operation *partialReductionOp;
629 auto resultTypes =
ValueRange(tiledInits).getTypes();
630 if (tilingStrategy ==
631 ReductionTilingStrategy::PartialReductionOuterReduction) {
632 auto genericOp = GenericOp::create(
b, loc, resultTypes, tiledInputs,
633 tiledInits, newMaps, newIteratorTypes);
636 genericOp.getRegion().begin(), mapping);
637 partialReductionOp = genericOp.getOperation();
639 SmallVector<Value> operands = std::move(tiledInputs);
640 llvm::append_range(operands, tiledInits);
641 partialReductionOp =
mlir::clone(
b, op, resultTypes, operands);
644 {partialReductionOp},
645 llvm::map_to_vector(partialReductionOp->
getResults(),
646 [](OpResult r) -> Value { return r; }),
650 FailureOr<MergeResult>
651 mergeReductions(Operation *op, OpBuilder &
b, Location loc,
654 auto linalgOp = cast<LinalgOp>(op);
655 SmallVector<AffineMap> partialReductionMaps =
656 getPartialResultAffineMaps(linalgOp, reductionDims);
659 SmallVector<Operation *> mergeOperations;
660 SmallVector<Value> replacements;
661 for (
auto [idx, init, partialResult, partialMap] : llvm::enumerate(
662 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
663 unsigned initIdx = idx;
668 SmallVector<int64_t> partialReductionDims;
669 for (
auto [resultNum, dimExpr] :
670 llvm::enumerate(partialMap.getResults())) {
671 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
672 if (llvm::is_contained(reductionDims, dim)) {
673 partialReductionDims.push_back(resultNum);
677 auto reduction = linalg::ReduceOp::create(
678 b, loc, partialResult, init, partialReductionDims,
679 [&linalgOp, &initIdx](OpBuilder &
b, Location loc,
ValueRange inputs) {
681 SmallVector<Operation *, 4> combinerOps;
684 Operation *clonedReductionOp =
b.clone(*combinerOps[0]);
688 linalg::YieldOp::create(
b, loc, clonedReductionOp->
getResult(0));
691 mergeOperations.push_back(reduction);
692 replacements.push_back(reduction->getResult(0));
695 return MergeResult{mergeOperations, replacements};
698 LogicalResult getPartialResultTilePosition(
699 Operation *op, OpBuilder &
b,
unsigned resultNumber,
702 ArrayRef<OpFoldResult> splitReductionIvs,
703 SmallVector<OpFoldResult> &resultOffsets,
704 SmallVector<OpFoldResult> &resultSizes)
const {
705 auto linalgOp = cast<LinalgOp>(op);
706 SmallVector<AffineMap> partialReductionMaps =
707 getPartialResultAffineMaps(linalgOp, reductionDims);
708 InitSliceInfo sliceInfo = getInitSliceInfo(
709 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
710 splitReductionIvs, partialReductionMaps[resultNumber]);
711 std::swap(resultOffsets, sliceInfo.offsets);
712 std::swap(resultSizes, sliceInfo.sizes);
718template <
typename OpTy>
721 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
722 "applies to only pack or unpack operations");
724 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
729 (
void)op.reifyResultShapes(builder, resultShape);
731 for (
auto dim : llvm::seq<int64_t>(0, rank)) {
732 loopBounds[dim].offset = zero;
733 loopBounds[dim].stride = one;
734 loopBounds[dim].size = resultShape[0][dim];
742 if (permutation.empty())
754 interchangeVector.reserve(dimsPos.size());
763 for (
int64_t dimsIdx = 0, end = dimsPos.size(); dimsIdx < end; dimsIdx++)
764 dimsAndPosMapping[dimsPos[dimsIdx]] = dimsIdx;
768 for (
int64_t dimsIdx = 0; dimsIdx < rank; dimsIdx++) {
769 if (dimsAndPosMapping.count(dimsIdx))
770 interchangeVector.push_back(dimsAndPosMapping[dimsIdx]);
772 return interchangeVector;
792 for (
auto [idx, val] : llvm::enumerate(interchangeVector))
793 vec[idx + offset] = elements[val + offset];
799static void generatePackOpScalarImplementationBody(PackOp packOp,
814 computeInterchangeFromDimPos(dimsToInnerBlock, packOp.getSourceRank());
815 interchangedIvs = interchange<Value>(interchangedIvs, interchangeVector,
816 packOp.getSourceRank());
817 if (!dimsToOuterBlock.empty()) {
819 computeInterchangeFromDimPos(dimsToOuterBlock, packOp.getSourceRank());
821 interchange<Value>(interchangedIvs, interchangeVector, 0);
824 packOp.getDimAndTileMapping();
826 size_t pointLoopsOffset = 0;
827 int64_t sourceRank = packOp.getSourceRank();
828 for (
auto dim : llvm::seq<int64_t>(0, sourceRank)) {
829 if (dimAndTileMapping.contains(dim)) {
834 builder, loc, i *
tile +
j,
836 interchangedIvs[dim],
837 interchangedIvs[pointLoopsOffset + packOp.getSourceRank()],
838 dimAndTileMapping[dim]});
839 sourceIndices.push_back(sourceIndex);
842 sourceIndices.push_back(interchangedIvs[dim]);
846 auto createLoad = [&]() ->
Value {
847 return memref::LoadOp::create(
848 builder, loc, packOp.getSource(),
852 if (
auto paddingValue = packOp.getPaddingValue()) {
855 for (
auto dim : llvm::seq<int64_t>(0, sourceRank)) {
858 Value cond = arithBuilder.slt(
862 scalar = scf::IfOp::create(
865 scf::YieldOp::create(
b, l, createLoad());
869 scf::YieldOp::create(
b, l, paddingValue);
873 scalar = createLoad();
876 memref::StoreOp::create(builder, loc, scalar, packOp.getDest(), ivs);
880 :
public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
882 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
886 auto packOp = cast<PackOp>(op);
887 SmallVector<utils::IteratorType> iteratorTypes(
888 packOp.getSourceRank(), utils::IteratorType::parallel);
889 return iteratorTypes;
892 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
893 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op),
b);
896 FailureOr<TilingResult>
898 ArrayRef<OpFoldResult> offsets,
899 ArrayRef<OpFoldResult> sizes)
const {
900 auto packOp = cast<PackOp>(op);
902 if (!packOp.hasPureTensorSemantics())
905 Location loc = packOp.getLoc();
909 int64_t inputRank = packOp.getSourceRank();
910 SmallVector<OpFoldResult> origOffsets(offsets);
911 SmallVector<OpFoldResult> origSizes(sizes);
912 applyPermToRange(origOffsets, origSizes,
916 packOp.getDimAndTileMapping();
917 SmallVector<OpFoldResult> srcDimValues =
919 SmallVector<OpFoldResult> inputIndices, inputSizes;
920 for (
auto dim : llvm::seq<int64_t>(0, inputRank)) {
921 using AV = affine::AffineValueExpr;
922 affine::AffineBuilder ab(
b, loc);
923 AffineExpr dim0, dim1, sym;
926 if (dimAndTileMapping.count(dim)) {
930 auto avOffset = AV(dim0).bind(origOffsets[dim]);
931 auto avSize = AV(dim0).bind(origSizes[dim]);
932 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
933 inputIndices.push_back(ab.mul(avOffset, avTileSize));
934 inputSizes.push_back(ab.mul(avSize, avTileSize));
936 inputIndices.push_back(origOffsets[dim]);
937 inputSizes.push_back(origSizes[dim]);
941 if (packOp.getPaddingValue()) {
942 OpFoldResult dimSize = srcDimValues[dim];
943 auto avDimSize = AV(dim0).bind(dimSize);
944 auto avInputIdx = AV(dim1).bind(inputIndices.back());
946 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
950 auto oneAttr =
b.getI64IntegerAttr(1);
951 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
953 SmallVector<Value> tiledOperands;
954 auto sourceSlice = tensor::ExtractSliceOp::create(
955 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
956 tiledOperands.push_back(sourceSlice);
958 SmallVector<OpFoldResult> outputOffsets, outputSizes;
963 strides.append(packOp.getDestRank() - inputRank, oneAttr);
964 auto outSlice = tensor::ExtractSliceOp::create(
965 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
966 tiledOperands.push_back(outSlice);
968 if (
auto val = packOp.getPaddingValue())
969 tiledOperands.push_back(val);
970 for (
auto tile : packOp.getInnerTiles())
971 tiledOperands.push_back(
tile);
973 Operation *tiledPackOp = PackOp::create(
978 SmallVector<Value>(tiledPackOp->
getResults()),
979 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
984 ArrayRef<OpFoldResult> offsets,
985 ArrayRef<OpFoldResult> sizes,
986 SmallVector<OpFoldResult> &resultOffsets,
987 SmallVector<OpFoldResult> &resultSizes)
const {
992 auto packOp = cast<PackOp>(op);
993 int64_t inputRank = packOp.getSourceRank();
994 int64_t outputRank = packOp.getDestRank();
995 auto zeroAttr =
b.getI64IntegerAttr(0);
996 resultOffsets.assign(offsets.begin(), offsets.end());
997 resultOffsets.append(outputRank - inputRank, zeroAttr);
1001 resultSizes.assign(sizes.begin(), sizes.end());
1002 for (
auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
1003 resultSizes.push_back(outputShape[0][dataTileDim]);
1008 FailureOr<TilingResult>
1009 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
1010 ArrayRef<OpFoldResult> offsets,
1011 ArrayRef<OpFoldResult> sizes)
const {
1012 auto packOp = cast<PackOp>(op);
1013 int64_t numTiles = packOp.getInnerDimsPos().size();
1018 for (
auto offset : offsets.take_back(numTiles))
1023 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
1028 op,
b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
1029 if (
failed(tilingResult))
1031 return tilingResult.value();
1034 LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
1037 auto packOp = cast<PackOp>(op);
1038 assert(packOp.hasPureBufferSemantics() &&
1039 "expected operation to have buffer semantics");
1040 OpBuilder::InsertionGuard g(builder);
1043 SmallVector<Value> ivVec(ivs);
1046 SmallVector<OpFoldResult> outputShape;
1047 Value dest = packOp.getDest();
1048 for (
auto dim : llvm::seq<int64_t>(0, packOp.getDestRank()))
1057 for (
auto dataTileDim : llvm::seq<unsigned>(packOp.getSourceRank(),
1058 packOp.getDestRank() - 1)) {
1060 outputShape[dataTileDim]);
1061 scf::ForOp loop = scf::ForOp::create(builder, loc, zero, ub, one);
1063 ivVec.push_back(loop.getInductionVar());
1070 [&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
1072 ivVec.push_back(iv);
1073 generatePackOpScalarImplementationBody(packOp, bodyBuilder, bodyLoc,
1075 scf::YieldOp::create(bodyBuilder, bodyLoc);
1083 LogicalResult getIterationDomainTileFromOperandTiles(
1084 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1085 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1086 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1087 SmallVectorImpl<OpFoldResult> &resultOffsets,
1088 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
1089 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1091 { llvm::dbgs() <<
"unsupported operands for consumer fusion"; });
1095 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1096 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1097 auto packOp = cast<PackOp>(op);
1098 Location loc = packOp.getLoc();
1099 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1101 packOp.getDimAndTileMapping();
1102 SmallVector<int64_t> outerShapeWithoutTranspose(
1103 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
1104 if (!packOp.getOuterDimsPerm().empty()) {
1106 outerShapeWithoutTranspose,
1109 for (
auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
1110 if (dimAndTileMapping.count(dim)) {
1111 FailureOr<int64_t> cstTileSize =
1113 presburger::BoundType::UB, sizes[dim],
1115 std::optional<int64_t> cstInnerSize =
1125 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
1126 int64_t destDimSize = outerShapeWithoutTranspose[dim];
1128 ShapedType::isDynamic(srcDimSize) ||
1129 cstTileSize.value() < srcDimSize;
1131 outerDimOffsets.push_back(offsets[dim]);
1132 if (ShapedType::isStatic(destDimSize)) {
1133 outerDimSizes.push_back(
b.getIndexAttr(destDimSize));
1135 outerDimSizes.push_back(
1136 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
1155 if ((
failed(cstTileSize) || !cstInnerSize ||
1156 *cstTileSize % *cstInnerSize != 0))
1159 using AV = affine::AffineValueExpr;
1160 affine::AffineBuilder ab(
b, loc);
1161 AffineExpr dim0, sym;
1164 auto avOffset = AV(dim0).bind(offsets[dim]);
1165 auto avSize = AV(dim0).bind(sizes[dim]);
1166 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
1167 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
1168 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
1170 outerDimOffsets.push_back(offsets[dim]);
1171 outerDimSizes.push_back(sizes[dim]);
1174 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
1175 resultOffsets = outerDimOffsets;
1176 resultSizes = outerDimSizes;
1181 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1182 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1183 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1184 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1185 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1187 { llvm ::dbgs() <<
"unhandled operands for consumer fusion"; });
1191 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1192 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1194 auto packOp = cast<PackOp>(op);
1196 if (!packOp.hasPureTensorSemantics())
1199 Location loc = packOp.getLoc();
1201 int64_t inputRank = packOp.getSourceRank();
1202 auto oneAttr =
b.getI64IntegerAttr(1);
1203 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
1205 SmallVector<Value> tiledOperands;
1206 auto sourceSlice = tensor::ExtractSliceOp::create(
1207 b, loc, packOp.getSource(), offsets, sizes, strides);
1208 tiledOperands.push_back(sourceSlice);
1210 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1211 if (
failed(getIterationDomainTileFromOperandTiles(
1212 op,
b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1216 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1218 outputOffsets, outputSizes)))
1221 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1222 auto outSlice = tensor::ExtractSliceOp::create(
1223 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1224 tiledOperands.push_back(outSlice);
1226 if (
auto val = packOp.getPaddingValue())
1227 tiledOperands.push_back(val);
1228 for (
auto tile : packOp.getInnerTiles())
1229 tiledOperands.push_back(
tile);
1231 Operation *tiledPackOp = PackOp::create(
1234 return TilingResult{
1236 SmallVector<Value>(tiledPackOp->
getResults()),
1237 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1241struct UnpackTileDimInfo {
1242 bool isAlignedToInnerTileSize;
1243 OpFoldResult sourceOffset;
1244 OpFoldResult sourceSize;
1245 OpFoldResult resultOffset;
1246 OpFoldResult destExpandedSize;
1252static UnpackTileDimInfo getUnpackTileDimInfo(
OpBuilder &
b, UnPackOp unpackOp,
1256 UnpackTileDimInfo info;
1260 unpackOp.getDimAndTileMapping();
1262 if (!dimAndTileMapping.count(tileDim)) {
1263 info.isAlignedToInnerTileSize =
true;
1264 info.sourceOffset = tileOffset;
1265 info.sourceSize = tileSize;
1266 info.resultOffset = zeroAttr;
1267 info.destExpandedSize = tileSize;
1278 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1280 info.isAlignedToInnerTileSize =
false;
1285 if (!
failed(cstSize) && cstInnerSize) {
1286 if (*cstSize % *cstInnerSize == 0)
1287 info.isAlignedToInnerTileSize =
true;
1291 if (*cstInnerSize == *cstSize) {
1292 auto lhs = AV(dim0).bind(tileOffset);
1293 auto rhs = AV(dim1).bind(innerTileSize);
1294 info.sourceOffset = ab.floor(
lhs,
rhs);
1295 info.sourceSize = oneAttr;
1296 info.resultOffset = zeroAttr;
1297 info.destExpandedSize = tileSize;
1302 if (info.isAlignedToInnerTileSize) {
1304 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1305 info.resultOffset = zeroAttr;
1306 info.destExpandedSize = tileSize;
1315 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1319 affine::DivModValue firstCoord = affine::getDivMod(
1323 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1324 affine::DivModValue lastCoord = affine::getDivMod(
1328 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1331 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1332 AV(dim1).bind(firstCoord.quotient));
1334 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1335 info.sourceOffset = firstCoord.quotient;
1336 info.resultOffset = firstCoord.remainder;
1339 info.destExpandedSize =
b.createOrFold<arith::MulIOp>(
1345struct UnPackOpTiling
1346 :
public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1348 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op)
const {
1349 auto unpackOp = cast<UnPackOp>(op);
1350 SmallVector<utils::IteratorType> iteratorTypes(
1351 unpackOp.getDestRank(), utils::IteratorType::parallel);
1352 return iteratorTypes;
1355 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &
b)
const {
1356 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op),
b);
1373 FailureOr<TilingResult>
1375 ArrayRef<OpFoldResult> offsets,
1376 ArrayRef<OpFoldResult> sizes)
const {
1377 auto unpackOp = cast<UnPackOp>(op);
1379 if (!unpackOp.hasPureTensorSemantics())
1382 int64_t srcRank = unpackOp.getSourceRank();
1383 int64_t destRank = unpackOp.getDestRank();
1384 int64_t numInnerTiles = srcRank - destRank;
1385 Location loc = unpackOp.getLoc();
1390 bool isPerfectTilingCase =
true;
1391 Attribute oneAttr =
b.getIndexAttr(1);
1392 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1393 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1394 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1395 for (
auto dim : llvm::seq<int64_t>(0, destRank)) {
1396 UnpackTileDimInfo info =
1397 getUnpackTileDimInfo(
b, unpackOp, dim, offsets[dim], sizes[dim]);
1398 if (!info.isAlignedToInnerTileSize)
1399 isPerfectTilingCase =
false;
1400 sliceSrcIndices.push_back(info.sourceOffset);
1401 sliceSrcSizes.push_back(info.sourceSize);
1402 destExpandedSizes.push_back(info.destExpandedSize);
1403 resultOffsetsFromDest.push_back(info.resultOffset);
1408 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1409 unpackOp.getOuterDimsPerm());
1410 Attribute zeroAttr =
b.getIndexAttr(0);
1411 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1412 sliceSrcSizes.append(unpackOp.getMixedTiles());
1413 sliceSrcStrides.append(numInnerTiles, oneAttr);
1414 SmallVector<Operation *> generatedSlices;
1415 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1416 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1418 generatedSlices.push_back(sliceSource);
1420 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1422 if (isPerfectTilingCase) {
1423 auto destSliceOp = tensor::ExtractSliceOp::create(
1424 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1425 sliceDest = destSliceOp;
1426 generatedSlices.push_back(destSliceOp);
1428 sliceDest = tensor::EmptyOp::create(
1429 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1432 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1433 for (
auto tile : unpackOp.getInnerTiles())
1434 tiledOperands.push_back(
tile);
1436 Operation *tiledUnpackOp = UnPackOp::create(
1439 if (isPerfectTilingCase)
1440 return TilingResult{{tiledUnpackOp},
1441 SmallVector<Value>(tiledUnpackOp->
getResults()),
1444 auto extractSlice = tensor::ExtractSliceOp::create(
1445 b, loc, tiledUnpackOp->
getResult(0), resultOffsetsFromDest, sizes,
1447 return TilingResult{
1448 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1453 ArrayRef<OpFoldResult> offsets,
1454 ArrayRef<OpFoldResult> sizes,
1455 SmallVector<OpFoldResult> &resultOffsets,
1456 SmallVector<OpFoldResult> &resultSizes)
const {
1457 resultOffsets = llvm::to_vector(offsets);
1458 resultSizes = llvm::to_vector(sizes);
1462 FailureOr<TilingResult>
1463 generateResultTileValue(Operation *op, OpBuilder &
b,
unsigned resultNumber,
1464 ArrayRef<OpFoldResult> offsets,
1465 ArrayRef<OpFoldResult> sizes)
const {
1466 FailureOr<TilingResult> tilingResult =
1468 if (
failed(tilingResult))
1470 return tilingResult.value();
1473 LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
1476 auto unpackOp = cast<UnPackOp>(op);
1477 assert(unpackOp.hasPureBufferSemantics() &&
1478 "expected operation to have buffer semantics");
1479 assert(ivs.size() == unpackOp.getDestRank() &&
1480 "number of ivs must match the rank of the output tensor");
1481 OpBuilder::InsertionGuard g(builder);
1484 unpackOp.getDimAndTileMapping();
1486 SmallVector<Value> inputIvs;
1488 SmallVector<Value> inputIvsPointLoops;
1489 inputIvs.reserve(unpackOp.getDestRank());
1490 inputIvsPointLoops.reserve(dimAndTileMapping.size());
1491 for (
auto dim : llvm::seq<int64_t>(0, unpackOp.getDestRank())) {
1492 if (dimAndTileMapping.count(dim)) {
1493 affine::DivModValue divMod =
1494 affine::getDivMod(builder, loc, ivs[dim],
1496 builder, loc, dimAndTileMapping[dim]));
1497 inputIvsPointLoops.push_back(divMod.remainder);
1498 inputIvs.push_back(divMod.quotient);
1500 inputIvs.push_back(ivs[dim]);
1506 assert(inputIvsPointLoops.size() + inputIvs.size() ==
1507 unpackOp.getSourceRank() &&
1508 "expect same number of induction variables equals to input rank");
1510 ArrayRef<int64_t> innerDims = unpackOp.getInnerDimsPos();
1511 SmallVector<int64_t> interchangeVector =
1512 computeInterchangeFromDimPos(innerDims, unpackOp.getDestRank());
1513 SmallVector<Value> interchangedInputIvsPointLoops = inputIvsPointLoops;
1514 interchangedInputIvsPointLoops = interchange<Value>(
1515 interchangedInputIvsPointLoops, interchangeVector, 0);
1518 ArrayRef<int64_t> outerDims = unpackOp.getOuterDimsPerm();
1519 if (!outerDims.empty())
1520 inputIvs = interchange<Value>(inputIvs, outerDims, 0);
1522 llvm::append_range(inputIvs, interchangedInputIvsPointLoops);
1524 memref::LoadOp::create(builder, loc, unpackOp.getSource(), inputIvs);
1525 memref::StoreOp::create(builder, loc, scalar, unpackOp.getDest(), ivs);
1531 LogicalResult getIterationDomainTileFromOperandTiles(
1532 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1533 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1534 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1535 SmallVectorImpl<OpFoldResult> &resultOffsets,
1536 SmallVectorImpl<OpFoldResult> &resultSizes)
const {
1537 if (operandNumbers.size() != 1) {
1538 LLVM_DEBUG({ llvm::dbgs() <<
"unable to handle multiple operands"; });
1541 auto unPackOp = cast<UnPackOp>(op);
1542 unsigned operandNumber = operandNumbers[0];
1543 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1544 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1547 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1548 resultOffsets = llvm::to_vector(offsets);
1549 resultSizes = llvm::to_vector(sizes);
1552 Location loc = unPackOp.getLoc();
1554 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1555 auto destOffsets = offsets.drop_back(numTiles);
1556 auto destSizes = sizes.drop_back(numTiles);
1559 int64_t outputRank = unPackOp.getDestRank();
1563 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1564 SmallVector<OpFoldResult> origOffsets(destOffsets);
1565 SmallVector<OpFoldResult> origSizes(destSizes);
1566 applyPermToRange(origOffsets, origSizes,
1570 unPackOp.getDimAndTileMapping();
1572 for (
auto dim : llvm::seq<int64_t>(0, outputRank)) {
1573 using AV = affine::AffineValueExpr;
1574 affine::AffineBuilder ab(
b, loc);
1575 AffineExpr dim0, dim1, sym0;
1578 if (dimAndTileMapping.count(dim)) {
1582 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1583 auto avSize = AV(dim0).bind(origSizes[dim]);
1584 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1585 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1586 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1587 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1588 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1589 ab.sub(avResultSize, avResultOffset)}));
1591 resultOffsets.push_back(origOffsets[dim]);
1592 resultSizes.push_back(origSizes[dim]);
1599 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1600 Operation *op, OpBuilder &
b, ArrayRef<unsigned> operandNumbers,
1601 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1602 ArrayRef<SmallVector<OpFoldResult>> allSizes)
const {
1603 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1604 LLVM_DEBUG({ llvm::dbgs() <<
"unhandled operands for consumer fusion"; });
1607 auto unPackOp = cast<UnPackOp>(op);
1609 if (!unPackOp.hasPureTensorSemantics())
1612 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1613 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1617 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1619 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1624 Location loc = unPackOp.getLoc();
1628 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1629 if (
failed(getIterationDomainTileFromOperandTiles(
1630 op,
b, operandNumbers, allOffsets, allSizes, outputOffsets,
1634 auto oneAttr =
b.getI64IntegerAttr(1);
1635 int64_t outputRank = unPackOp.getDestRank();
1636 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1638 SmallVector<Value> tiledOperands;
1640 auto extractDestSlice = tensor::ExtractSliceOp::create(
1641 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1642 tiledOperands.push_back(extractDestSlice);
1644 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1646 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1647 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1648 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1649 for (
auto tile : unPackOp.getInnerTiles())
1650 tiledOperands.push_back(
tile);
1653 Operation *tiledUnPackOp =
1654 UnPackOp::create(
b, loc,
TypeRange{extractDestSlice.getType()},
1657 return TilingResult{{tiledUnPackOp},
1658 SmallVector<Value>(tiledUnPackOp->
getResults()),
1659 llvm::to_vector(ArrayRef<Operation *>{
1660 extractSourceSlice, extractDestSlice})};
1666template <
typename OpType>
1668 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1669 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1674template <
typename... OpTypes>
1685 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1686 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1688#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1696 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1697 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
static bool isInBounds(TransferOp op, int64_t resultIdx, int64_t indicesIdx)
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
void setOperand(unsigned idx, Value value)
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry ®istry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
void registerTilingInterfaceExternalModels(DialectRegistry ®istry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
@ PartialReductionOuterReduction
@ PartialReductionOuterParallel
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return "true" if v is an integer value/attribute with constant value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Helper struct to build simple arithmetic quantities with minimal type inference support.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.