MLIR 23.0.0git
TilingInterfaceImpl.cpp
Go to the documentation of this file.
1//===- TilingInterfaceImpl.cpp - Implementation of TilingInterface -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
26#include "llvm/ADT/SmallVectorExtras.h"
27#include "llvm/Support/Debug.h"
28#include <optional>
29
30#define DEBUG_TYPE "linalg-tiling-interface-impl"
31
32using namespace mlir;
33using namespace mlir::linalg;
34
35//===----------------------------------------------------------------------===//
36// Utility methods for implementation of Tiling Interface for Linalg ops
37//===----------------------------------------------------------------------===//
38
39/// Return the SSA values that represent the data point accessed using a given
40/// `indexingMap` for a given point in the iteration space represented by `ivs`.
42 AffineMap indexingMap,
43 ValueRange ivs) {
45 indices.reserve(indexingMap.getNumResults());
46 for (auto result : indexingMap.getResults()) {
47 AffineMap m = AffineMap::get(indexingMap.getNumDims(),
48 indexingMap.getNumSymbols(), result);
49 Value v = affine::AffineApplyOp::create(b, loc, m, ivs);
50 indices.push_back(v);
51 }
52 return indices;
53}
54
55/// Method to inline the payload of a `linalgOp` given the iteration space
56/// point and values for the arguments of the payload.
57static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp,
58 ValueRange ivs, ValueRange argValues) {
59 Block *body = linalgOp.getBlock();
60 IRMapping map;
61 map.map(body->getArguments(), argValues);
62 for (auto &op : body->without_terminator()) {
63 if (auto indexOp = dyn_cast<IndexOp>(&op)) {
64 map.map(indexOp.getResult(), ivs[indexOp.getDim()]);
65 continue;
66 }
67 b.clone(op, map);
68 }
69
70 Operation *terminator = body->getTerminator();
71 Location loc = terminator->getLoc();
72 for (const auto &operand : llvm::enumerate(terminator->getOperands())) {
73 Value toStore = map.lookupOrDefault(operand.value());
74 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
76 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
77 memref::StoreOp::create(b, loc, toStore,
78 linalgOp.getDpsInitOperand(operand.index())->get(),
79 indices);
80 }
81 return success();
82}
83
84//===----------------------------------------------------------------------===//
85// External Model for implementing `TilingInterface` for `LinalgOp`s.
86//===----------------------------------------------------------------------===//
87
88namespace {
89/// External model implementation of TilingInterface for LinalgOps. An external
90/// model implementation is used for now till the use of `TilingInterface` is
91/// on-par with the current Linalg tiling + fusion patterns. Once it is
92/// maybe possible to move this into the op-definition (though there are
93/// advantages to leaving it as an external model)
94template <typename LinalgOpTy>
95struct LinalgOpTilingInterface
96 : public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
97 LinalgOpTy> {
98 /// Return the loop iterator type.
99 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
100 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
101 return concreteOp.getIteratorTypesArray();
102 }
103
104 /// Return the iteration domain range.
105 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
106 OpBuilder::InsertionGuard g(b);
107 b.setInsertionPoint(op);
108 Location loc = op->getLoc();
109 LinalgOp linalgOp = cast<LinalgOp>(op);
110 SmallVector<OpFoldResult> allShapesSizes =
111 linalgOp.createFlatListOfOperandDims(b, loc);
112 AffineMap map = linalgOp.getShapesToLoopsMap();
113
114 return llvm::map_to_vector(map.getResults(), [&](AffineExpr loopExpr) {
115 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(b, loc, loopExpr,
116 allShapesSizes);
117 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
118 });
119 }
120
121 /// Instantiate the tiled implementation of the operation.
122 FailureOr<TilingResult>
125 ArrayRef<OpFoldResult> sizes) const {
126 // Leave the `sizeBounds` value empty. That is only needed when the `sizes`
127 // specified could lead to out of bounds accesses.
128 Location loc = op->getLoc();
129 LinalgOp linalgOp = cast<LinalgOp>(op);
130 SmallVector<Value> valuesToTile = linalgOp->getOperands();
131 SmallVector<Value> tiledOperands = makeTiledShapes(
132 b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true);
133 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
134 llvm::make_filter_range(
135 tiledOperands,
136 [](Value v) -> bool {
137 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
138 v.getDefiningOp());
139 }),
140 [](Value v) -> Operation * { return v.getDefiningOp(); });
141
142 SmallVector<Type> resultTensorTypes =
143 getTensorOutputTypes(linalgOp, tiledOperands);
144
145 Operation *tiledOp = clone(b, linalgOp, resultTensorTypes, tiledOperands);
146 offsetIndices(b, cast<LinalgOp>(tiledOp), offsets);
147
148 return TilingResult{
149 {tiledOp}, SmallVector<Value>(tiledOp->getResults()), generatedSlices};
150 }
151
152 /// Utility to fetch the offsets and sizes when applied as per the indexing
153 /// map of the linalg op. This helps in fusing the linalg op as a consumer of
154 /// a given slice op.
155 static LogicalResult
156 getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b,
157 ArrayRef<AffineMap> indexingMaps,
160 SmallVectorImpl<OpFoldResult> &mappedOffsetsVec,
161 SmallVectorImpl<OpFoldResult> &mappedSizesVec) {
162 DenseMap<unsigned, OpFoldResult> mappedOffsets, mappedSizes;
163
164 for (auto [indexingMap, offsets, sizes] :
165 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166 for (auto [resultExpr, offset, size] :
167 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
169 if (!dimExpr)
170 return failure();
171 unsigned position = dimExpr.getPosition();
172 auto it = mappedOffsets.find(position);
173 if (it != mappedOffsets.end()) {
174 OpFoldResult seenOffset = it->second;
175 OpFoldResult seenSize = mappedSizes.lookup(position);
176 if (seenOffset != offset || seenSize != size) {
177 LLVM_DEBUG({
178 llvm::dbgs() << "inconsistent iteration space mapping from "
179 "offsets/sizes of operands/results";
180 });
181 return failure();
182 }
183 } else {
184 mappedOffsets[position] = offset;
185 mappedSizes[position] = size;
186 }
187 }
188 }
189
190 // Aggregate from the given operand offsets and sizes, or default to
191 // iteration space values.
192 SmallVector<Range> iterationDomain =
193 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(b);
194 mappedOffsetsVec.resize(iterationDomain.size());
195 mappedSizesVec.resize(iterationDomain.size());
196 for (auto [index, domain] : llvm::enumerate(iterationDomain)) {
197 auto it = mappedOffsets.find(index);
198 if (it != mappedOffsets.end()) {
199 mappedOffsetsVec[index] = it->second;
200 mappedSizesVec[index] = mappedSizes.lookup(index);
201 continue;
202 }
203 mappedOffsetsVec[index] = domain.offset;
204 mappedSizesVec[index] = domain.size;
205 }
206 return success();
207 }
208
209 /// Method to return the position of the result tile computed by the tiled
210 /// operation.
211 LogicalResult getIterationDomainTileFromOperandTiles(
212 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
215 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
216 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
217 auto linalgOp = cast<LinalgOp>(op);
218
219 SmallVector<AffineMap> indexingMaps =
220 llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
221 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
222 return linalgOp.getMatchingIndexingMap(&opOperand);
223 });
224 if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets,
225 allSizes, iterDomainOffsets,
226 iterDomainSizes))) {
227 return failure();
228 }
229 return success();
230 }
231
232 /// Return the details of the output tile generated by the tiled
233 /// implementation.
234 LogicalResult
235 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
238 SmallVector<OpFoldResult> &resultOffsets,
239 SmallVector<OpFoldResult> &resultSizes) const {
240 Location loc = op->getLoc();
241 LinalgOp linalgOp = cast<LinalgOp>(op);
242
243 AffineExpr d0;
244 bindDims(b.getContext(), d0);
245 SmallVector<OpFoldResult> subShapeSizes =
246 llvm::map_to_vector(sizes, [&](OpFoldResult ofr) {
247 return affine::makeComposedFoldedAffineApply(b, loc, d0 - 1, ofr);
248 });
249
250 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
252 b, loc, outOperand->get(), sizes,
253 linalgOp.getMatchingIndexingMap(outOperand), offsets,
254 /*ubs*/ {}, subShapeSizes, true);
255 resultOffsets = sliceParams.offsets;
256 resultSizes = sliceParams.sizes;
257 return success();
258 }
259
260 LogicalResult getIterationDomainTileFromResultTile(
261 Operation *op, OpBuilder &b, unsigned resultNumber,
263 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
264 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
265 auto linalgOp = cast<LinalgOp>(op);
266
267 // Check that the indexing map used for the output is a projected
268 // permutation. This could be relaxed with a more general approach that can
269 // map the offsets and sizes from the result to iteration space tiles
270 // (filling in full extent for dimensions not used to access the result).
271 AffineMap indexingMap =
272 linalgOp.getIndexingMapMatchingResult(op->getResult(resultNumber));
273 if (!indexingMap.isProjectedPermutation()) {
274 return op->emitOpError(
275 "unhandled tiled implementation generation when result is not "
276 "accessed using a permuted projection");
277 }
278
279 SmallVector<OpFoldResult> allOffsets = llvm::to_vector(offsets);
280 SmallVector<OpFoldResult> allSizes = llvm::to_vector(sizes);
281 auto status =
282 getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets},
283 {allSizes}, iterDomainOffsets, iterDomainSizes);
284 (void)status;
285 assert(succeeded(status) && "unexpected error in offset calculation");
286 return success();
287 }
288
289 FailureOr<TilingResult>
290 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
292 ArrayRef<OpFoldResult> sizes) const {
293 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
294 if (failed(getIterationDomainTileFromResultTile(
295 op, b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
296 return failure();
297 }
298 auto tilingInterfaceOp = cast<TilingInterface>(op);
299 FailureOr<TilingResult> tilingResult =
300 tilingInterfaceOp.getTiledImplementation(b, mappedOffsets, mappedSizes);
301
302 if (failed(tilingResult))
303 return failure();
304
305 if (tilingResult->tiledOps.size() != 1)
306 return op->emitOpError("failed to generate tiled implementation");
307
308 return TilingResult{
309 tilingResult->tiledOps,
310 SmallVector<Value>{tilingResult->tiledValues[resultNumber]},
311 tilingResult->generatedSlices};
312 }
313
314 /// Method to generate the tiled implementation of an operation from the tile
315 /// of the operand.
316 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
317 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
319 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
320 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
321 if (failed(getIterationDomainTileFromOperandTiles(
322 op, b, operandNumbers, allOffsets, allSizes, mappedOffsets,
323 mappedSizes))) {
324 return failure();
325 }
326 return getTiledImplementation(op, b, mappedOffsets, mappedSizes);
327 }
328
329 LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
330 Location loc,
331 ValueRange ivs) const {
332 auto linalgOp = cast<LinalgOp>(op);
333 if (!linalgOp.hasPureBufferSemantics())
334 return op->emitOpError("expected operation to have buffer semantics");
335
336 SmallVector<Value> indexedValues;
337 indexedValues.reserve(linalgOp->getNumOperands());
338 Location linalgOpLoc = op->getLoc();
339 /// Load the data corresponding to the block arguments that
340 /// represent input operands.
341 for (OpOperand &operand : linalgOp->getOpOperands()) {
342 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
343 indexedValues.push_back(nullptr);
344 continue;
345 }
346 if (linalgOp.isScalar(&operand)) {
347 indexedValues.push_back(operand.get());
348 continue;
349 }
351 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
352 Value load =
353 memref::LoadOp::create(builder, linalgOpLoc, operand.get(), indices);
354 indexedValues.push_back(load);
355 }
356
357 /// Inline the op payload and store the result.
358 return inlinePayload(builder, linalgOp, ivs, indexedValues);
359 }
360
361 bool isOpFusableWithConsumerSlice(Operation *op, unsigned resultNumber,
363 ArrayRef<OpFoldResult> sizes) const {
364 // The verifier gives all the necessary requirements for consumer fusion.
365 return true;
366 }
367
368 bool isOpFusableWithProducerSlices(
369 Operation *op, ArrayRef<unsigned> operandNumbers,
371 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
372
373 auto linalgOp = cast<LinalgOp>(op);
374 SmallVector<AffineMap> indexingMaps =
375 llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
376 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
377 return linalgOp.getMatchingIndexingMap(&opOperand);
378 });
379 // Check that offsets/sizes are consistent across all operands.
380 OpBuilder b(op);
381 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
382 return succeeded(getMappedOffsetAndSize(linalgOp, b, indexingMaps,
383 allOffsets, allSizes, mappedOffsets,
384 mappedSizes));
385 }
386};
387
388//===----------------------------------------------------------------------===//
389// External Model for implementing `PartialReductionInterface` for `LinalgOp`s.
390//===----------------------------------------------------------------------===//
391
392/// In a given set vector, get the position of a particular element.
393std::optional<int> getPositionIn(const llvm::SetVector<unsigned> &reductionDims,
394 unsigned value) {
395 for (auto [index, reductionDim] : llvm::enumerate(reductionDims)) {
396 if (reductionDim == value) {
397 return index;
398 }
399 }
400 return std::nullopt;
401}
402
403/// Return an AffineMaps to use for the `outs` operands of the linalg op
404/// generated for partial results. The new AffineMap is the AffineMap of the
405/// untiled op with reduction dimensions appended at end in order in which they
406/// were specified during tiling.
408getPartialResultAffineMaps(LinalgOp linalgOp,
409 const SetVector<unsigned> &reductionDims) {
410 auto partialReductionMaps = llvm::map_to_vector(
411 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
412 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
413 for (auto redPos : reductionDims) {
414 map =
415 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
416 map.getNumResults());
417 }
418 return map;
419 });
420 return partialReductionMaps;
421}
422
423struct InitSliceInfo {
424 SmallVector<int64_t> resultShape;
425 SmallVector<OpFoldResult> offsets;
426 SmallVector<OpFoldResult> sizes;
427 SmallVector<OpFoldResult> strides;
428};
429
430/// Return the result shape, offsets, sizes and strides of the slice of the
431/// `initValue` to use as the destination of the partial reduction op generated
432/// with outer reduction strategy.
433static InitSliceInfo getInitSliceInfoForOuterReduction(
434 MLIRContext *context, ArrayRef<OpFoldResult> offsets,
435 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
436 ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
437 int64_t initRank = partialReductionMap.getNumResults();
438 SmallVector<OpFoldResult> initOffsets, initSizes;
439 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
440 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
441 SmallVector<OpFoldResult> initStrides(initRank, one);
442 for (AffineExpr dimExpr : partialReductionMap.getResults()) {
443 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
444 if (reductionDims.contains(dim)) {
445 initOffsets.push_back(zero);
446 } else {
447 initOffsets.push_back(offsets[dim]);
448 }
449 initSizes.push_back(sizes[dim]);
450 }
451 SmallVector<int64_t> resultShape;
452 std::tie(resultShape, std::ignore) = decomposeMixedValues(initSizes);
453 return {resultShape, initOffsets, initSizes, initStrides};
454}
455
456/// Return the result shape, offsets, sizes and strides of the slice of the
457/// `initValue` to use as destination of the partial reduction op generated with
458/// outer parallel strategy.
459static InitSliceInfo getInitSliceInfoForOuterParallel(
460 MLIRContext *context, ArrayRef<OpFoldResult> offsets,
461 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
462 ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
463 int64_t initRank = partialReductionMap.getNumResults();
464 SmallVector<OpFoldResult> initOffsets, initSizes;
465 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
466 SmallVector<OpFoldResult> initStrides(initRank, one);
467 SmallVector<OpFoldResult> resultShape;
468 for (AffineExpr dimExpr : partialReductionMap.getResults()) {
469 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
470 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
471 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
472 initSizes.push_back(one);
473 } else {
474 initOffsets.push_back(offsets[dim]);
475 initSizes.push_back(sizes[dim]);
476 resultShape.push_back(sizes[dim]);
477 }
478 }
479 SmallVector<int64_t> staticShapes;
480 std::tie(staticShapes, std::ignore) = decomposeMixedValues(resultShape);
481 return {staticShapes, initOffsets, initSizes, initStrides};
482}
483
484/// Return the result shape, offsets, sizes and strides of the slice of the
485/// `initValue` to use as destination of the partial reduction op.
486static InitSliceInfo getInitSliceInfo(MLIRContext *context,
490 const SetVector<unsigned> &reductionDims,
491 ArrayRef<OpFoldResult> splitReductionIvs,
492 AffineMap partialReductionMap) {
494 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
495 reductionDims, splitReductionIvs,
496 partialReductionMap);
497 }
499 "unexpected ReductionTilingStrategy");
500 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
501 reductionDims, splitReductionIvs,
502 partialReductionMap);
503}
504
505/// External model implementation of PartialReductionInterface for
506/// LinalgOps.
507template <typename LinalgOpTy>
508struct LinalgOpPartialReductionInterface
509 : public PartialReductionOpInterface::ExternalModel<
510 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
511 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
512 Operation *op, OpBuilder &b, Location loc, ArrayRef<OpFoldResult> sizes,
513 const SetVector<unsigned> &reductionDims) const {
514 auto linalgOp = cast<LinalgOp>(op);
515
516 OpBuilder::InsertionGuard guard(b);
517 if (linalgOp.hasPureBufferSemantics())
518 return op->emitOpError("expected operation to have tensor semantics");
519
520 SmallVector<AffineMap> partialResultMaps =
521 getPartialResultAffineMaps(linalgOp, reductionDims);
522
523 SmallVector<Value> inits;
524 for (auto [initIdx, result, partialMap] :
525 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
526 SmallVector<Operation *, 4> combinerOps;
527 if (!matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
528 combinerOps) ||
529 combinerOps.size() != 1)
530 return op->emitOpError("Failed to anaysis the reduction operation.");
531
532 Operation *reductionOp = combinerOps[0];
533 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
534 if (!identity.has_value())
535 return op->emitOpError(
536 "Failed to get an identity value for the reduction operation.");
537
538 // Append the new partial result dimensions.
539 SmallVector<OpFoldResult> partialResultShape;
540 for (AffineExpr dimExpr : partialMap.getResults()) {
541 auto dim = cast<AffineDimExpr>(dimExpr);
542 partialResultShape.push_back(sizes[dim.getPosition()]);
543 }
544
545 Type elType = getElementTypeOrSelf(result.getType());
546 Value emptyTensor =
547 tensor::EmptyOp::create(b, loc, partialResultShape, elType);
548 Value constantOp = arith::ConstantOp::create(b, loc, *identity);
549 auto identityTensor =
550 linalg::FillOp::create(b, loc, constantOp, emptyTensor);
551 inits.push_back(identityTensor.getResult(0));
552 }
553
554 return inits;
555 }
556
557 FailureOr<TilingResult>
558 tileToPartialReduction(Operation *op, OpBuilder &b, Location loc,
559 ReductionTilingStrategy tilingStrategy,
560 ValueRange init, ArrayRef<OpFoldResult> offsets,
561 ArrayRef<OpFoldResult> sizes,
562 const SetVector<unsigned> &reductionDims,
563 ArrayRef<OpFoldResult> splitReductionIvs) const {
564 OpBuilder::InsertionGuard guard(b);
565 auto linalgOp = cast<LinalgOp>(op);
566
567 SmallVector<AffineMap> partialReductionMaps =
568 getPartialResultAffineMaps(linalgOp, reductionDims);
569
570 // Step 1. Extend init maps to have reduction dimension dims, since we
571 // are converting them to parallel dimensions.
572 SmallVector<AffineMap> newInitMaps;
573 if (tilingStrategy ==
574 ReductionTilingStrategy::PartialReductionOuterReduction) {
575 newInitMaps = llvm::to_vector(partialReductionMaps);
576 } else {
577 newInitMaps = llvm::map_to_vector(
578 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
579 return linalgOp.getMatchingIndexingMap(&opOperand);
580 });
581 }
582
583 // Step 2a: Extract a slice of the input operands.
584 SmallVector<Value> tiledInputs = makeTiledShapes(
585 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {}, true);
586 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
587 llvm::make_filter_range(
588 tiledInputs, [](Value v) -> bool { return v.getDefiningOp(); }),
589 [](Value v) -> Operation * { return v.getDefiningOp(); });
590
591 // Step 2b: Extract a slice of the init operands.
592 SmallVector<Value, 1> tiledInits;
593 for (auto [partialReductionMap, valueToTile] :
594 llvm::zip_equal(partialReductionMaps, init)) {
595 InitSliceInfo sliceInfo = getInitSliceInfo(
596 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
597 splitReductionIvs, partialReductionMap);
598 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
599 RankedTensorType sliceResultType = RankedTensorType::get(
600 sliceInfo.resultShape, valueToTileType.getElementType(),
601 valueToTileType.getEncoding());
602 auto sliceOp = tensor::ExtractSliceOp::create(
603 b, loc, sliceResultType, valueToTile, sliceInfo.offsets,
604 sliceInfo.sizes, sliceInfo.strides);
605 tiledInits.push_back(sliceOp.getResult());
606 generatedSlices.push_back(sliceOp);
607 }
608
609 // Update the indexing maps.
610 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
611 for (auto [initOperand, newInitMap] :
612 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
613 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
614 newMaps[mapIdx] = newInitMap;
615 }
616
617 // Step 3. Change the reduction dim iterator types.
618 SmallVector<utils::IteratorType> newIteratorTypes =
619 linalgOp.getIteratorTypesArray();
620 if (tilingStrategy ==
621 ReductionTilingStrategy::PartialReductionOuterReduction) {
622 for (int dim : reductionDims)
623 newIteratorTypes[dim] = utils::IteratorType::parallel;
624 }
625
626 // Step 4. Create the new generic op.
627 Operation *partialReductionOp;
628 auto resultTypes = ValueRange(tiledInits).getTypes();
629 if (tilingStrategy ==
630 ReductionTilingStrategy::PartialReductionOuterReduction) {
631 auto genericOp = GenericOp::create(b, loc, resultTypes, tiledInputs,
632 tiledInits, newMaps, newIteratorTypes);
633 IRMapping mapping;
634 op->getRegion(0).cloneInto(&genericOp.getRegion(),
635 genericOp.getRegion().begin(), mapping);
636 partialReductionOp = genericOp.getOperation();
637 } else {
638 SmallVector<Value> operands = std::move(tiledInputs);
639 llvm::append_range(operands, tiledInits);
640 partialReductionOp = mlir::clone(b, op, resultTypes, operands);
641 }
642 return TilingResult{
643 {partialReductionOp},
644 llvm::map_to_vector(partialReductionOp->getResults(),
645 [](OpResult r) -> Value { return r; }),
646 generatedSlices};
647 }
648
649 FailureOr<MergeResult>
650 mergeReductions(Operation *op, OpBuilder &b, Location loc,
651 ValueRange partialReduce,
652 const SetVector<unsigned> &reductionDims) const {
653 auto linalgOp = cast<LinalgOp>(op);
654 SmallVector<AffineMap> partialReductionMaps =
655 getPartialResultAffineMaps(linalgOp, reductionDims);
656
657 // Permute the reduction dims as permuted by the partial result map.
658 SmallVector<Operation *> mergeOperations;
659 SmallVector<Value> replacements;
660 for (auto [idx, init, partialResult, partialMap] : llvm::enumerate(
661 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
662 unsigned initIdx = idx;
663 // linalg.reduce's iteration space is the tiled result's iteration space
664 // (and not the tiled operation's iteration space). To account for this,
665 // permute the reduction dimensions based on the partial result map of the
666 // tiled result.
667 SmallVector<int64_t> partialReductionDims;
668 for (auto [resultNum, dimExpr] :
669 llvm::enumerate(partialMap.getResults())) {
670 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
671 if (llvm::is_contained(reductionDims, dim)) {
672 partialReductionDims.push_back(resultNum);
673 }
674 }
675
676 auto reduction = linalg::ReduceOp::create(
677 b, loc, partialResult, init, partialReductionDims,
678 [&linalgOp, &initIdx](OpBuilder &b, Location loc, ValueRange inputs) {
679 // Get the combiner op.
680 SmallVector<Operation *, 4> combinerOps;
681 matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
682 combinerOps);
683 Operation *clonedReductionOp = b.clone(*combinerOps[0]);
684 // Combine the input at idx and output at numInits + idx.
685 clonedReductionOp->setOperand(0, inputs[0]);
686 clonedReductionOp->setOperand(1, inputs[1]);
687 linalg::YieldOp::create(b, loc, clonedReductionOp->getResult(0));
688 });
689
690 mergeOperations.push_back(reduction);
691 replacements.push_back(reduction->getResult(0));
692 }
693
694 return MergeResult{mergeOperations, replacements};
695 }
696
697 LogicalResult getPartialResultTilePosition(
698 Operation *op, OpBuilder &b, unsigned resultNumber,
699 ReductionTilingStrategy tilingStrategy, ArrayRef<OpFoldResult> offsets,
700 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
701 ArrayRef<OpFoldResult> splitReductionIvs,
702 SmallVector<OpFoldResult> &resultOffsets,
703 SmallVector<OpFoldResult> &resultSizes) const {
704 auto linalgOp = cast<LinalgOp>(op);
705 SmallVector<AffineMap> partialReductionMaps =
706 getPartialResultAffineMaps(linalgOp, reductionDims);
707 InitSliceInfo sliceInfo = getInitSliceInfo(
708 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
709 splitReductionIvs, partialReductionMaps[resultNumber]);
710 std::swap(resultOffsets, sliceInfo.offsets);
711 std::swap(resultSizes, sliceInfo.sizes);
712
713 return success();
714 }
715};
716
717template <typename OpTy>
718static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
719 OpBuilder &builder) {
720 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
721 "applies to only pack or unpack operations");
722 OpBuilder::InsertionGuard g(builder);
723 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
724 : op.getDestRank();
725 OpFoldResult zero = builder.getIndexAttr(0);
726 OpFoldResult one = builder.getIndexAttr(1);
727 ReifiedRankedShapedTypeDims resultShape;
728 (void)reifyResultShapes(builder, op, resultShape);
729 SmallVector<Range> loopBounds(rank);
730 for (auto dim : llvm::seq<int64_t>(0, rank)) {
731 loopBounds[dim].offset = zero;
732 loopBounds[dim].stride = one;
733 loopBounds[dim].size = resultShape[0][dim];
734 }
735 return loopBounds;
736}
737
738static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
740 ArrayRef<int64_t> permutation) {
741 if (permutation.empty())
742 return;
743 applyPermutationToVector<OpFoldResult>(offsets, permutation);
744 applyPermutationToVector<OpFoldResult>(sizes, permutation);
745}
746
747struct PackOpTiling
748 : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
749
750 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
751 // Note that here we only consider untiled dimensions and outer tiled data
752 // dimensions, the inner tiled data dimensions are materialized when
753 // building the body of the operation.
754 auto packOp = cast<PackOp>(op);
755 SmallVector<utils::IteratorType> iteratorTypes(
756 packOp.getSourceRank(), utils::IteratorType::parallel);
757 return iteratorTypes;
758 }
759
760 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
761 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
762 }
763
764 FailureOr<TilingResult>
765 getTiledImplementation(Operation *op, OpBuilder &b,
766 ArrayRef<OpFoldResult> offsets,
767 ArrayRef<OpFoldResult> sizes) const {
768 auto packOp = cast<PackOp>(op);
769 // TODO: Support Memref PackOp. Temporarily return failure.
770 if (!packOp.hasPureTensorSemantics())
771 return failure();
772
773 Location loc = packOp.getLoc();
774
775 // The tiling is applied on interchanged dimensions. We have to undo the
776 // interchange to map sizes and offsets to the original input.
777 int64_t inputRank = packOp.getSourceRank();
778 SmallVector<OpFoldResult> origOffsets(offsets);
779 SmallVector<OpFoldResult> origSizes(sizes);
780 applyPermToRange(origOffsets, origSizes,
781 invertPermutationVector(packOp.getOuterDimsPerm()));
782
783 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
784 packOp.getDimAndTileMapping();
785 SmallVector<OpFoldResult> srcDimValues =
786 tensor::getMixedSizes(b, loc, packOp.getSource());
787 SmallVector<OpFoldResult> inputIndices, inputSizes;
788 for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
789 using AV = affine::AffineValueExpr;
790 affine::AffineBuilder ab(b, loc);
791 AffineExpr dim0, dim1, sym;
792 bindDims(b.getContext(), dim0, dim1);
793 bindSymbols(b.getContext(), sym);
794 if (dimAndTileMapping.count(dim)) {
795 // If the data dimension is tiled, the i-th index is the product of
796 // offset_i and tile_i, and the i-th size is the product of sizes_i and
797 // tile_i.
798 auto avOffset = AV(dim0).bind(origOffsets[dim]);
799 auto avSize = AV(dim0).bind(origSizes[dim]);
800 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
801 inputIndices.push_back(ab.mul(avOffset, avTileSize));
802 inputSizes.push_back(ab.mul(avSize, avTileSize));
803 } else {
804 inputIndices.push_back(origOffsets[dim]);
805 inputSizes.push_back(origSizes[dim]);
806 }
807
808 // Limit the size of the input operand for incomplete tiles.
809 if (packOp.getPaddingValue()) {
810 OpFoldResult dimSize = srcDimValues[dim];
811 auto avDimSize = AV(dim0).bind(dimSize);
812 auto avInputIdx = AV(dim1).bind(inputIndices.back());
813 inputSizes.back() =
814 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
815 }
816 }
817
818 auto oneAttr = b.getI64IntegerAttr(1);
819 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
820
821 SmallVector<Value> tiledOperands;
822 auto sourceSlice = tensor::ExtractSliceOp::create(
823 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
824 tiledOperands.push_back(sourceSlice);
825
826 SmallVector<OpFoldResult> outputOffsets, outputSizes;
827 if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
828 outputSizes)))
829 return {};
830
831 strides.append(packOp.getDestRank() - inputRank, oneAttr);
832 auto outSlice = tensor::ExtractSliceOp::create(
833 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
834 tiledOperands.push_back(outSlice);
835
836 if (auto val = packOp.getPaddingValue())
837 tiledOperands.push_back(val);
838 for (auto tile : packOp.getInnerTiles())
839 tiledOperands.push_back(tile);
840
841 Operation *tiledPackOp = PackOp::create(
842 b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
843
844 return TilingResult{
845 {tiledPackOp},
846 SmallVector<Value>(tiledPackOp->getResults()),
847 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
848 }
849
850 LogicalResult
851 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
852 ArrayRef<OpFoldResult> offsets,
853 ArrayRef<OpFoldResult> sizes,
854 SmallVector<OpFoldResult> &resultOffsets,
855 SmallVector<OpFoldResult> &resultSizes) const {
856 // The iteration domain is over outer dimensions of packed layout. In this
857 // context, the outer dimensions of `resultOffsets` are `offsets`. The
858 // inner dimensions of `resultOffsets` are zeros because tiling is not
859 // applied to them.
860 auto packOp = cast<PackOp>(op);
861 int64_t inputRank = packOp.getSourceRank();
862 int64_t outputRank = packOp.getDestRank();
863 auto zeroAttr = b.getI64IntegerAttr(0);
864 resultOffsets.assign(offsets.begin(), offsets.end());
865 resultOffsets.append(outputRank - inputRank, zeroAttr);
866
867 ReifiedRankedShapedTypeDims outputShape;
868 (void)reifyResultShapes(b, packOp, outputShape);
869 resultSizes.assign(sizes.begin(), sizes.end());
870 for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
871 resultSizes.push_back(outputShape[0][dataTileDim]);
872
873 return success();
874 }
875
876 FailureOr<TilingResult>
877 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
878 ArrayRef<OpFoldResult> offsets,
879 ArrayRef<OpFoldResult> sizes) const {
880 auto packOp = cast<PackOp>(op);
881 int64_t numTiles = packOp.getInnerDimsPos().size();
882
883 // tensor.pack op is fusible (as a producer) only if full inner tiles are
884 // iterated or inner dims are not tiled. Otherwise, it will generate a
885 // sequence of non-trivial ops (for partial tiles).
886 for (auto offset : offsets.take_back(numTiles))
887 if (!isZeroInteger(offset))
888 return failure();
889
890 for (auto iter :
891 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
892 if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
893 return failure();
894
895 FailureOr<TilingResult> tilingResult = getTiledImplementation(
896 op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
897 if (failed(tilingResult))
898 return failure();
899 return tilingResult.value();
900 }
901
902 /// Method to return the position of iteration domain tile computed by the
903 /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
904 /// `resultSizes` only cover outer dimensions.
905 LogicalResult getIterationDomainTileFromOperandTiles(
906 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
907 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
908 ArrayRef<SmallVector<OpFoldResult>> allSizes,
909 SmallVectorImpl<OpFoldResult> &resultOffsets,
910 SmallVectorImpl<OpFoldResult> &resultSizes) const {
911 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
912 LLVM_DEBUG(
913 { llvm::dbgs() << "unsupported operands for consumer fusion"; });
914 return failure();
915 }
916
917 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
918 ArrayRef<OpFoldResult> sizes(allSizes[0]);
919 auto packOp = cast<PackOp>(op);
920 Location loc = packOp.getLoc();
921 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
922 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
923 packOp.getDimAndTileMapping();
924 SmallVector<int64_t> outerShapeWithoutTranspose(
925 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
926 if (!packOp.getOuterDimsPerm().empty()) {
928 outerShapeWithoutTranspose,
929 invertPermutationVector(packOp.getOuterDimsPerm()));
930 }
931 for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
932 if (dimAndTileMapping.count(dim)) {
933 FailureOr<int64_t> cstTileSize =
935 presburger::BoundType::UB, sizes[dim],
936 /*stopCondition=*/nullptr, /*closedUB=*/true);
937 std::optional<int64_t> cstInnerSize =
938 getConstantIntValue(dimAndTileMapping[dim]);
939
940 // If a dimension is not tiled, it is always valid to fuse the pack op,
941 // even if the op has padding semantics. Because it always generates a
942 // full slice along the dimension. The tile sizes are for unpacked
943 // domain, i.e., `srcDimSize`, so `tileSize < srcDimSize` means that the
944 // dimension is tiled.
945 // TODO: It could be untiled if the `srcDimSize` is dynamic. It is a
946 // hard check to determine if a dimension is tiled or not.
947 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
948 int64_t destDimSize = outerShapeWithoutTranspose[dim];
949 bool isTiled = failed(cstTileSize) ||
950 ShapedType::isDynamic(srcDimSize) ||
951 cstTileSize.value() < srcDimSize;
952 if (!isTiled) {
953 outerDimOffsets.push_back(offsets[dim]);
954 if (ShapedType::isStatic(destDimSize)) {
955 outerDimSizes.push_back(b.getIndexAttr(destDimSize));
956 } else {
957 outerDimSizes.push_back(
958 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
959 }
960 continue;
961 }
962
963 // Currently fusing `packOp` as consumer only expects perfect tiling
964 // scenario because even if without padding semantic, the `packOp` may
965 // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
966 // where the `tileSize` from operand of `packOp` is 5, which is not
967 // exactly divided by `innerTile`(=6) of `packOp`. As the result:
968 // 1. the first slice is extracted from (0) to (4) and inserted into
969 // (0,0)~(0,4) at first row.
970 // 2. the second slice is extracted from (5) to (9) and SHOULD BE
971 // respectively inserted into two rows with different length, including
972 // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
973 // them, thus adding below constraint to bypass them temporarily. In
974 // another word, we can only support tiling with consumer if the tile
975 // size for the producer is a multiple of the inner tile size for the
976 // packed dimensions at this moment.
977 if ((failed(cstTileSize) || !cstInnerSize ||
978 *cstTileSize % *cstInnerSize != 0))
979 return failure();
980
981 using AV = affine::AffineValueExpr;
982 affine::AffineBuilder ab(b, loc);
983 AffineExpr dim0, sym;
984 bindDims(b.getContext(), dim0);
985 bindSymbols(b.getContext(), sym);
986 auto avOffset = AV(dim0).bind(offsets[dim]);
987 auto avSize = AV(dim0).bind(sizes[dim]);
988 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
989 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
990 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
991 } else {
992 outerDimOffsets.push_back(offsets[dim]);
993 outerDimSizes.push_back(sizes[dim]);
994 }
995 }
996 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
997 resultOffsets = outerDimOffsets;
998 resultSizes = outerDimSizes;
999 return success();
1000 }
1001
1002 /// Method to return the tiled implementation of tensor.pack as a consumer.
1003 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1004 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1005 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1006 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1007 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1008 LLVM_DEBUG(
1009 { llvm ::dbgs() << "unhandled operands for consumer fusion"; });
1010 return failure();
1011 }
1012
1013 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1014 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1015
1016 auto packOp = cast<PackOp>(op);
1017 // TODO: Support Memref UnPackOp. Temporarily return failure.
1018 if (!packOp.hasPureTensorSemantics())
1019 return failure();
1020
1021 Location loc = packOp.getLoc();
1022
1023 int64_t inputRank = packOp.getSourceRank();
1024 auto oneAttr = b.getI64IntegerAttr(1);
1025 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
1026
1027 SmallVector<Value> tiledOperands;
1028 auto sourceSlice = tensor::ExtractSliceOp::create(
1029 b, loc, packOp.getSource(), offsets, sizes, strides);
1030 tiledOperands.push_back(sourceSlice);
1031
1032 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1033 if (failed(getIterationDomainTileFromOperandTiles(
1034 op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1035 outerDimSizes)))
1036 return failure();
1037
1038 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1039 if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
1040 outputOffsets, outputSizes)))
1041 return failure();
1042
1043 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1044 auto outSlice = tensor::ExtractSliceOp::create(
1045 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1046 tiledOperands.push_back(outSlice);
1047
1048 if (auto val = packOp.getPaddingValue())
1049 tiledOperands.push_back(val);
1050 for (auto tile : packOp.getInnerTiles())
1051 tiledOperands.push_back(tile);
1052
1053 Operation *tiledPackOp = PackOp::create(
1054 b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
1055
1056 return TilingResult{
1057 {tiledPackOp},
1058 SmallVector<Value>(tiledPackOp->getResults()),
1059 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1060 }
1061};
1062
1063struct UnpackTileDimInfo {
1064 bool isAlignedToInnerTileSize;
1065 OpFoldResult sourceOffset;
1066 OpFoldResult sourceSize;
1067 OpFoldResult resultOffset;
1068 OpFoldResult destExpandedSize;
1069};
1070
1071/// Returns the needed information for tiling unpack op on `tileDim` with given
1072/// `tileOffset` and `tileSize`. For more details, see the comment of the
1073/// `getTiledImplementation`.
1074static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
1075 int64_t tileDim,
1076 OpFoldResult tileOffset,
1077 OpFoldResult tileSize) {
1078 UnpackTileDimInfo info;
1079 Attribute zeroAttr = b.getIndexAttr(0);
1080 Attribute oneAttr = b.getIndexAttr(1);
1081 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1082 unpackOp.getDimAndTileMapping();
1083 // The dimension is not one of packed data dimension.
1084 if (!dimAndTileMapping.count(tileDim)) {
1085 info.isAlignedToInnerTileSize = true;
1086 info.sourceOffset = tileOffset;
1087 info.sourceSize = tileSize;
1088 info.resultOffset = zeroAttr;
1089 info.destExpandedSize = tileSize;
1090 return info;
1091 }
1092
1093 Location loc = unpackOp.getLoc();
1094 using AV = affine::AffineValueExpr;
1095 affine::AffineBuilder ab(b, loc);
1096 AffineExpr dim0, dim1, sym0;
1097 bindDims(b.getContext(), dim0, dim1);
1098 bindSymbols(b.getContext(), sym0);
1099
1100 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1101
1102 info.isAlignedToInnerTileSize = false;
1103 FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
1104 presburger::BoundType::UB, tileSize,
1105 /*stopCondition=*/nullptr, /*closedUB=*/true);
1106 std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
1107 if (!failed(cstSize) && cstInnerSize) {
1108 if (*cstSize % *cstInnerSize == 0)
1109 info.isAlignedToInnerTileSize = true;
1110
1111 // If the tiling size equals to the inner tiling size, the outer dims are
1112 // always 1.
1113 if (*cstInnerSize == *cstSize) {
1114 auto lhs = AV(dim0).bind(tileOffset);
1115 auto rhs = AV(dim1).bind(innerTileSize);
1116 info.sourceOffset = ab.floor(lhs, rhs);
1117 info.sourceSize = oneAttr;
1118 info.resultOffset = zeroAttr;
1119 info.destExpandedSize = tileSize;
1120 return info;
1121 }
1122 }
1123
1124 if (info.isAlignedToInnerTileSize) {
1125 info.sourceOffset =
1126 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1127 info.resultOffset = zeroAttr;
1128 info.destExpandedSize = tileSize;
1129
1130 // The ceilDiv is needed here because there could be incomplete tile even
1131 // it is perfect tiling cases. E.g.,
1132 // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
1133 // If the tiling size is 32, there will be 3 tiles. Two of them have
1134 // size=32; one of them have size=2. The size is represented using
1135 // affine_min op; we need ceilDiv.
1136 info.sourceSize =
1137 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1138 return info;
1139 }
1140
1141 affine::DivModValue firstCoord = affine::getDivMod(
1142 b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
1143 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1144 OpFoldResult tileExclusiveBound =
1145 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1146 affine::DivModValue lastCoord = affine::getDivMod(
1147 b, loc,
1149 b, loc,
1150 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1151 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1152
1153 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1154 AV(dim1).bind(firstCoord.quotient));
1155 info.sourceSize =
1156 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1157 info.sourceOffset = firstCoord.quotient;
1158 info.resultOffset = firstCoord.remainder;
1159 // Do not create an Affine ops for expanded size because the affine op is too
1160 // complicated which would trigger an issue in affine ops simplification.
1161 info.destExpandedSize = b.createOrFold<arith::MulIOp>(
1162 loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
1163 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1164 return info;
1165}
1166
1167struct UnPackOpTiling
1168 : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1169
1170 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
1171 auto unpackOp = cast<UnPackOp>(op);
1172 SmallVector<utils::IteratorType> iteratorTypes(
1173 unpackOp.getDestRank(), utils::IteratorType::parallel);
1174 return iteratorTypes;
1175 }
1176
1177 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
1178 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
1179 }
1180
1181 /// There are two cases in tiling unpack ops. If the tiling size is aligned to
1182 /// the inner tile size, the corresponding tiles of source are all complete.
1183 /// Otherwise, there are in-complete tiles. We will need to expand the slice
1184 /// of source for getting complete tiles. The tiled unpack op unpacks more
1185 /// data from source, so We'll need an extract_slice op to shift and truncate
1186 /// the output.
1187 /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
1188 /// coordinates of second tile (i.e., result[15..31]) are
1189 /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
1190 /// row are incomplete tiles. To represent the unpack op, we have to complete
1191 /// the rows. I.e., the input coordinates would start with (1, 0); end with
1192 /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
1193 /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
1194 /// can get the actual result.
1195 FailureOr<TilingResult>
1196 getTiledImplementation(Operation *op, OpBuilder &b,
1197 ArrayRef<OpFoldResult> offsets,
1198 ArrayRef<OpFoldResult> sizes) const {
1199 auto unpackOp = cast<UnPackOp>(op);
1200 // TODO: Support Memref UnPackOp. Temporarily return failure.
1201 if (!unpackOp.hasPureTensorSemantics())
1202 return failure();
1203
1204 int64_t srcRank = unpackOp.getSourceRank();
1205 int64_t destRank = unpackOp.getDestRank();
1206 int64_t numInnerTiles = srcRank - destRank;
1207 Location loc = unpackOp.getLoc();
1208
1209 // The perfect tiling case indicates that the tiling sizes are multiple of
1210 // inner_tile_size. In this context, no extra data is needed when
1211 // representing the tiled unpack op.
1212 bool isPerfectTilingCase = true;
1213 Attribute oneAttr = b.getIndexAttr(1);
1214 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1215 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1216 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1217 for (auto dim : llvm::seq<int64_t>(0, destRank)) {
1218 UnpackTileDimInfo info =
1219 getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
1220 if (!info.isAlignedToInnerTileSize)
1221 isPerfectTilingCase = false;
1222 sliceSrcIndices.push_back(info.sourceOffset);
1223 sliceSrcSizes.push_back(info.sourceSize);
1224 destExpandedSizes.push_back(info.destExpandedSize);
1225 resultOffsetsFromDest.push_back(info.resultOffset);
1226 }
1227
1228 // The tiling is applied on destination dimensions. We have to apply the
1229 // interchange on source dimensions if outer_dims_perm is set.
1230 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1231 unpackOp.getOuterDimsPerm());
1232 Attribute zeroAttr = b.getIndexAttr(0);
1233 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1234 sliceSrcSizes.append(unpackOp.getMixedTiles());
1235 sliceSrcStrides.append(numInnerTiles, oneAttr);
1236 SmallVector<Operation *> generatedSlices;
1237 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1238 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1239 sliceSrcStrides);
1240 generatedSlices.push_back(sliceSource);
1241
1242 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1243 Value sliceDest;
1244 if (isPerfectTilingCase) {
1245 auto destSliceOp = tensor::ExtractSliceOp::create(
1246 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1247 sliceDest = destSliceOp;
1248 generatedSlices.push_back(destSliceOp);
1249 } else {
1250 sliceDest = tensor::EmptyOp::create(
1251 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1252 }
1253
1254 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1255 for (auto tile : unpackOp.getInnerTiles())
1256 tiledOperands.push_back(tile);
1257
1258 Operation *tiledUnpackOp = UnPackOp::create(
1259 b, loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
1260
1261 if (isPerfectTilingCase)
1262 return TilingResult{{tiledUnpackOp},
1263 SmallVector<Value>(tiledUnpackOp->getResults()),
1264 generatedSlices};
1265
1266 auto extractSlice = tensor::ExtractSliceOp::create(
1267 b, loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
1268 destStrides);
1269 return TilingResult{
1270 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1271 }
1272
1273 LogicalResult
1274 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
1275 ArrayRef<OpFoldResult> offsets,
1276 ArrayRef<OpFoldResult> sizes,
1277 SmallVector<OpFoldResult> &resultOffsets,
1278 SmallVector<OpFoldResult> &resultSizes) const {
1279 resultOffsets = llvm::to_vector(offsets);
1280 resultSizes = llvm::to_vector(sizes);
1281 return success();
1282 }
1283
1284 FailureOr<TilingResult>
1285 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
1286 ArrayRef<OpFoldResult> offsets,
1287 ArrayRef<OpFoldResult> sizes) const {
1288 FailureOr<TilingResult> tilingResult =
1289 getTiledImplementation(op, b, offsets, sizes);
1290 if (failed(tilingResult))
1291 return failure();
1292 return tilingResult.value();
1293 }
1294
1295 /// Method to return the position of iteration domain tile computed by the
1296 /// tiled operation.
1297 LogicalResult getIterationDomainTileFromOperandTiles(
1298 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1299 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1300 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1301 SmallVectorImpl<OpFoldResult> &resultOffsets,
1302 SmallVectorImpl<OpFoldResult> &resultSizes) const {
1303 if (operandNumbers.size() != 1) {
1304 LLVM_DEBUG({ llvm::dbgs() << "unable to handle multiple operands"; });
1305 return failure();
1306 }
1307 auto unPackOp = cast<UnPackOp>(op);
1308 unsigned operandNumber = operandNumbers[0];
1309 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1310 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1311
1312 // If the operand tile is the dest, then no adjustment is needed.
1313 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1314 resultOffsets = llvm::to_vector(offsets);
1315 resultSizes = llvm::to_vector(sizes);
1316 return success();
1317 }
1318 Location loc = unPackOp.getLoc();
1319
1320 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1321 auto destOffsets = offsets.drop_back(numTiles);
1322 auto destSizes = sizes.drop_back(numTiles);
1323 // The tiling is applied on interchanged dimensions. We have to undo the
1324 // interchange to map sizes and offsets to the original input.
1325 int64_t outputRank = unPackOp.getDestRank();
1326 ReifiedRankedShapedTypeDims reifiedReturnShapes;
1327 if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
1328 return failure();
1329 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1330 SmallVector<OpFoldResult> origOffsets(destOffsets);
1331 SmallVector<OpFoldResult> origSizes(destSizes);
1332 applyPermToRange(origOffsets, origSizes,
1333 invertPermutationVector(unPackOp.getOuterDimsPerm()));
1334
1335 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1336 unPackOp.getDimAndTileMapping();
1337
1338 for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
1339 using AV = affine::AffineValueExpr;
1340 affine::AffineBuilder ab(b, loc);
1341 AffineExpr dim0, dim1, sym0;
1342 bindDims(b.getContext(), dim0, dim1);
1343 bindSymbols(b.getContext(), sym0);
1344 if (dimAndTileMapping.count(dim)) {
1345 // If the data dimension is tiled, the i-th index is the product of
1346 // offset_i and tile_i, and the i-th size is the product of sizes_i and
1347 // tile_i. The sizes must be clamped to the sizes of the unpack result.
1348 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1349 auto avSize = AV(dim0).bind(origSizes[dim]);
1350 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1351 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1352 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1353 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1354 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1355 ab.sub(avResultSize, avResultOffset)}));
1356 } else {
1357 resultOffsets.push_back(origOffsets[dim]);
1358 resultSizes.push_back(origSizes[dim]);
1359 }
1360 }
1361 return success();
1362 }
1363
1364 /// Method to return the tiled implementation of tensor.unpack as a consumer.
1365 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1366 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1367 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1368 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1369 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1370 LLVM_DEBUG({ llvm::dbgs() << "unhandled operands for consumer fusion"; });
1371 return failure();
1372 }
1373 auto unPackOp = cast<UnPackOp>(op);
1374 // TODO: Support Memref UnPackOp. Temporarily return failure.
1375 if (!unPackOp.hasPureTensorSemantics())
1376 return failure();
1377
1378 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1379 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1380
1381 // tensor.unpack op is fusible (as a consumer) only if inner dims are not
1382 // tiled.
1383 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1384 for (auto iter :
1385 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1386 if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
1387 return failure();
1388 }
1389
1390 Location loc = unPackOp.getLoc();
1391
1392 // Fetch offset/size for creating the slice of the dest operand of
1393 // unpack op.
1394 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1395 if (failed(getIterationDomainTileFromOperandTiles(
1396 op, b, operandNumbers, allOffsets, allSizes, outputOffsets,
1397 outputSizes)))
1398 return failure();
1399
1400 auto oneAttr = b.getI64IntegerAttr(1);
1401 int64_t outputRank = unPackOp.getDestRank();
1402 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1403
1404 SmallVector<Value> tiledOperands;
1405 // Create slice of the dest operand.
1406 auto extractDestSlice = tensor::ExtractSliceOp::create(
1407 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1408 tiledOperands.push_back(extractDestSlice);
1409
1410 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1411 // Create slice of the source operand.
1412 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1413 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1414 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1415 for (auto tile : unPackOp.getInnerTiles())
1416 tiledOperands.push_back(tile);
1417
1418 // Create tiled unpack op.
1419 Operation *tiledUnPackOp =
1420 UnPackOp::create(b, loc, TypeRange{extractDestSlice.getType()},
1421 tiledOperands, op->getAttrs());
1422
1423 return TilingResult{{tiledUnPackOp},
1424 SmallVector<Value>(tiledUnPackOp->getResults()),
1425 llvm::to_vector(ArrayRef<Operation *>{
1426 extractSourceSlice, extractDestSlice})};
1427 }
1428};
1429
1430} // namespace
1431
1432template <typename OpType>
1433static void registerOne(MLIRContext *ctx) {
1434 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1435 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1436 *ctx);
1437}
1438
1439/// Variadic helper function.
1440template <typename... OpTypes>
1441static void registerAll(MLIRContext *ctx) {
1442 (registerOne<OpTypes>(ctx), ...);
1443}
1444
1445#define GET_OP_LIST
1446
1448 DialectRegistry &registry) {
1449 registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
1451 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1452 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1454#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1455 >(ctx);
1456 });
1457}
1458
1460 DialectRegistry &registry) {
1461 registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
1462 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1463 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1464 });
1465}
return success()
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition Utils.cpp:76
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
auto load
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
Base type for affine expression.
Definition AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Definition Attributes.h:25
Block represents an ordered list of Operations.
Definition Block.h:33
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
BlockArgListType getArguments()
Definition Block.h:97
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition Block.h:222
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:112
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
Definition IRMapping.h:65
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:350
This class helps build Operations.
Definition Builders.h:209
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:686
void setOperand(unsigned idx, Value value)
Definition Operation.h:351
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:512
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
result_range getResults()
Definition Operation.h:415
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition Utils.cpp:2850
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry &registry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition Utils.cpp:2872
void registerTilingInterfaceExternalModels(DialectRegistry &registry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition Utils.cpp:2761
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition Utils.cpp:2614
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:578
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition TensorOps.cpp:68
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:123
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return "true" if v is an integer value/attribute with constant value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:112
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition Utils.cpp:1294
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:118
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
Definition Utils.h:377
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition Utils.h:172
SmallVector< OpFoldResult > sizes
Definition Utils.h:174
SmallVector< OpFoldResult > offsets
Definition Utils.h:173