MLIR 22.0.0git
TilingInterfaceImpl.cpp
Go to the documentation of this file.
1//===- TilingInterfaceImpl.cpp - Implementation of TilingInterface -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
26#include "llvm/Support/Debug.h"
27#include <optional>
28
29#define DEBUG_TYPE "linalg-tiling-interface-impl"
30
31using namespace mlir;
32using namespace mlir::linalg;
33
34//===----------------------------------------------------------------------===//
35// Utility methods for implementation of Tiling Interface for Linalg ops
36//===----------------------------------------------------------------------===//
37
38/// Return the SSA values that represent the data point accessed using a given
39/// `indexingMap` for a given point in the iteration space represented by `ivs`.
41 AffineMap indexingMap,
42 ValueRange ivs) {
44 indices.reserve(indexingMap.getNumResults());
45 for (auto result : indexingMap.getResults()) {
46 AffineMap m = AffineMap::get(indexingMap.getNumDims(),
47 indexingMap.getNumSymbols(), result);
48 Value v = affine::AffineApplyOp::create(b, loc, m, ivs);
49 indices.push_back(v);
50 }
51 return indices;
52}
53
54/// Method to inline the payload of a `linalgOp` given the iteration space
55/// point and values for the arguments of the payload.
56static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp,
57 ValueRange ivs, ValueRange argValues) {
58 Block *body = linalgOp.getBlock();
59 IRMapping map;
60 map.map(body->getArguments(), argValues);
61 for (auto &op : body->without_terminator()) {
62 if (auto indexOp = dyn_cast<IndexOp>(&op)) {
63 map.map(indexOp.getResult(), ivs[indexOp.getDim()]);
64 continue;
65 }
66 b.clone(op, map);
67 }
68
69 Operation *terminator = body->getTerminator();
70 Location loc = terminator->getLoc();
71 for (const auto &operand : llvm::enumerate(terminator->getOperands())) {
72 Value toStore = map.lookupOrDefault(operand.value());
73 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
75 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
76 memref::StoreOp::create(b, loc, toStore,
77 linalgOp.getDpsInitOperand(operand.index())->get(),
78 indices);
79 }
80 return success();
81}
82
83//===----------------------------------------------------------------------===//
84// External Model for implementing `TilingInterface` for `LinalgOp`s.
85//===----------------------------------------------------------------------===//
86
87namespace {
88/// External model implementation of TilingInterface for LinalgOps. An external
89/// model implementation is used for now till the use of `TilingInterface` is
90/// on-par with the current Linalg tiling + fusion patterns. Once it is
91/// maybe possible to move this into the op-definition (though there are
92/// advantages to leaving it as an external model)
93template <typename LinalgOpTy>
94struct LinalgOpTilingInterface
95 : public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
96 LinalgOpTy> {
97 /// Return the loop iterator type.
98 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
99 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
100 return concreteOp.getIteratorTypesArray();
101 }
102
103 /// Return the iteration domain range.
104 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
105 OpBuilder::InsertionGuard g(b);
106 b.setInsertionPoint(op);
107 Location loc = op->getLoc();
108 LinalgOp linalgOp = cast<LinalgOp>(op);
109 SmallVector<OpFoldResult> allShapesSizes =
110 linalgOp.createFlatListOfOperandDims(b, loc);
111 AffineMap map = linalgOp.getShapesToLoopsMap();
112
113 return llvm::to_vector(
114 llvm::map_range(map.getResults(), [&](AffineExpr loopExpr) {
115 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
116 b, loc, loopExpr, allShapesSizes);
117 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
118 }));
119 }
120
121 /// Instantiate the tiled implementation of the operation.
122 FailureOr<TilingResult>
125 ArrayRef<OpFoldResult> sizes) const {
126 // Leave the `sizeBounds` value empty. That is only needed when the `sizes`
127 // specified could lead to out of bounds accesses.
128 Location loc = op->getLoc();
129 LinalgOp linalgOp = cast<LinalgOp>(op);
130 SmallVector<Value> valuesToTile = linalgOp->getOperands();
131 SmallVector<Value> tiledOperands = makeTiledShapes(
132 b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true);
133 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
134 llvm::make_filter_range(
135 tiledOperands,
136 [](Value v) -> bool {
137 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
138 v.getDefiningOp());
139 }),
140 [](Value v) -> Operation * { return v.getDefiningOp(); });
141
142 SmallVector<Type> resultTensorTypes =
143 getTensorOutputTypes(linalgOp, tiledOperands);
144
145 Operation *tiledOp = clone(b, linalgOp, resultTensorTypes, tiledOperands);
146 offsetIndices(b, cast<LinalgOp>(tiledOp), offsets);
147
148 return TilingResult{
149 {tiledOp}, SmallVector<Value>(tiledOp->getResults()), generatedSlices};
150 }
151
152 /// Utility to fetch the offsets and sizes when applied as per the indexing
153 /// map of the linalg op. This helps in fusing the linalg op as a consumer of
154 /// a given slice op.
155 static LogicalResult
156 getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b,
157 ArrayRef<AffineMap> indexingMaps,
160 SmallVectorImpl<OpFoldResult> &mappedOffsetsVec,
161 SmallVectorImpl<OpFoldResult> &mappedSizesVec) {
162 DenseMap<unsigned, OpFoldResult> mappedOffsets, mappedSizes;
163
164 for (auto [indexingMap, offsets, sizes] :
165 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166 for (auto [resultExpr, offset, size] :
167 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
169 if (!dimExpr)
170 return failure();
171 unsigned position = dimExpr.getPosition();
172 auto it = mappedOffsets.find(position);
173 if (it != mappedOffsets.end()) {
174 OpFoldResult seenOffset = it->second;
175 OpFoldResult seenSize = mappedSizes.lookup(position);
176 if (seenOffset != offset || seenSize != size) {
177 LLVM_DEBUG({
178 llvm::dbgs() << "inconsistent iteration space mapping from "
179 "offsets/sizes of operands/results";
180 });
181 return failure();
182 }
183 } else {
184 mappedOffsets[position] = offset;
185 mappedSizes[position] = size;
186 }
187 }
188 }
189
190 // Aggregate from the given operand offsets and sizes, or default to
191 // iteration space values.
192 SmallVector<Range> iterationDomain =
193 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(b);
194 mappedOffsetsVec.resize(iterationDomain.size());
195 mappedSizesVec.resize(iterationDomain.size());
196 for (auto [index, domain] : llvm::enumerate(iterationDomain)) {
197 auto it = mappedOffsets.find(index);
198 if (it != mappedOffsets.end()) {
199 mappedOffsetsVec[index] = it->second;
200 mappedSizesVec[index] = mappedSizes.lookup(index);
201 continue;
202 }
203 mappedOffsetsVec[index] = domain.offset;
204 mappedSizesVec[index] = domain.size;
205 }
206 return success();
207 }
208
209 /// Method to return the position of the result tile computed by the tiled
210 /// operation.
211 LogicalResult getIterationDomainTileFromOperandTiles(
212 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
215 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
216 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
217 auto linalgOp = cast<LinalgOp>(op);
218
219 SmallVector<AffineMap> indexingMaps =
220 llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
221 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
222 return linalgOp.getMatchingIndexingMap(&opOperand);
223 });
224 if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets,
225 allSizes, iterDomainOffsets,
226 iterDomainSizes))) {
227 return failure();
228 }
229 return success();
230 }
231
232 /// Return the details of the output tile generated by the tiled
233 /// implementation.
234 LogicalResult
235 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
238 SmallVector<OpFoldResult> &resultOffsets,
239 SmallVector<OpFoldResult> &resultSizes) const {
240 Location loc = op->getLoc();
241 LinalgOp linalgOp = cast<LinalgOp>(op);
242
243 AffineExpr d0;
244 bindDims(b.getContext(), d0);
245 SmallVector<OpFoldResult> subShapeSizes =
246 llvm::to_vector(llvm::map_range(sizes, [&](OpFoldResult ofr) {
247 return affine::makeComposedFoldedAffineApply(b, loc, d0 - 1, ofr);
248 }));
249
250 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
252 b, loc, outOperand->get(), sizes,
253 linalgOp.getMatchingIndexingMap(outOperand), offsets,
254 /*ubs*/ {}, subShapeSizes, true);
255 resultOffsets = sliceParams.offsets;
256 resultSizes = sliceParams.sizes;
257 return success();
258 }
259
260 LogicalResult getIterationDomainTileFromResultTile(
261 Operation *op, OpBuilder &b, unsigned resultNumber,
263 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
264 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
265 auto linalgOp = cast<LinalgOp>(op);
266
267 // Check that the indexing map used for the output is a projected
268 // permutation. This could be relaxed with a more general approach that can
269 // map the offsets and sizes from the result to iteration space tiles
270 // (filling in full extent for dimensions not used to access the result).
271 AffineMap indexingMap =
272 linalgOp.getIndexingMapMatchingResult(op->getResult(resultNumber));
273 if (!indexingMap.isProjectedPermutation()) {
274 return op->emitOpError(
275 "unhandled tiled implementation generation when result is not "
276 "accessed using a permuted projection");
277 }
278
279 SmallVector<OpFoldResult> allOffsets = llvm::to_vector(offsets);
280 SmallVector<OpFoldResult> allSizes = llvm::to_vector(sizes);
281 auto status =
282 getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets},
283 {allSizes}, iterDomainOffsets, iterDomainSizes);
284 (void)status;
285 assert(succeeded(status) && "unexpected error in offset calculation");
286 return success();
287 }
288
289 FailureOr<TilingResult>
290 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
292 ArrayRef<OpFoldResult> sizes) const {
293 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
294 if (failed(getIterationDomainTileFromResultTile(
295 op, b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
296 return failure();
297 }
298 auto tilingInterfaceOp = cast<TilingInterface>(op);
299 FailureOr<TilingResult> tilingResult =
300 tilingInterfaceOp.getTiledImplementation(b, mappedOffsets, mappedSizes);
301
302 if (failed(tilingResult))
303 return failure();
304
305 if (tilingResult->tiledOps.size() != 1)
306 return op->emitOpError("failed to generate tiled implementation");
307
308 return TilingResult{
309 tilingResult->tiledOps,
310 SmallVector<Value>{tilingResult->tiledValues[resultNumber]},
311 tilingResult->generatedSlices};
312 }
313
314 /// Method to generate the tiled implementation of an operation from the tile
315 /// of the operand.
316 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
317 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
319 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
320 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
321 if (failed(getIterationDomainTileFromOperandTiles(
322 op, b, operandNumbers, allOffsets, allSizes, mappedOffsets,
323 mappedSizes))) {
324 return failure();
325 }
326 return getTiledImplementation(op, b, mappedOffsets, mappedSizes);
327 }
328
329 LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
330 Location loc,
331 ValueRange ivs) const {
332 auto linalgOp = cast<LinalgOp>(op);
333 if (!linalgOp.hasPureBufferSemantics())
334 return op->emitOpError("expected operation to have buffer semantics");
335
336 SmallVector<Value> indexedValues;
337 indexedValues.reserve(linalgOp->getNumOperands());
338 Location linalgOpLoc = op->getLoc();
339 /// Load the data corresponding to the block arguments that
340 /// represent input operands.
341 for (OpOperand &operand : linalgOp->getOpOperands()) {
342 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
343 indexedValues.push_back(nullptr);
344 continue;
345 }
346 if (linalgOp.isScalar(&operand)) {
347 indexedValues.push_back(operand.get());
348 continue;
349 }
351 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
352 Value load =
353 memref::LoadOp::create(builder, linalgOpLoc, operand.get(), indices);
354 indexedValues.push_back(load);
355 }
356
357 /// Inline the op payload and store the result.
358 return inlinePayload(builder, linalgOp, ivs, indexedValues);
359 }
360
361 bool isOpFusableWithConsumerSlice(Operation *op, unsigned resultNumber,
363 ArrayRef<OpFoldResult> sizes) const {
364 // The verifier gives all the necessary requirements for consumer fusion.
365 return true;
366 }
367
368 bool isOpFusableWithProducerSlices(
369 Operation *op, ArrayRef<unsigned> operandNumbers,
371 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
372
373 auto linalgOp = cast<LinalgOp>(op);
374 SmallVector<AffineMap> indexingMaps =
375 llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
376 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
377 return linalgOp.getMatchingIndexingMap(&opOperand);
378 });
379 // Check that offsets/sizes are consistent across all operands.
380 OpBuilder b(op);
381 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
382 return succeeded(getMappedOffsetAndSize(linalgOp, b, indexingMaps,
383 allOffsets, allSizes, mappedOffsets,
384 mappedSizes));
385 }
386};
387
388//===----------------------------------------------------------------------===//
389// External Model for implementing `PartialReductionInterface` for `LinalgOp`s.
390//===----------------------------------------------------------------------===//
391
392/// In a given set vector, get the position of a particular element.
393std::optional<int> getPositionIn(const llvm::SetVector<unsigned> &reductionDims,
394 unsigned value) {
395 for (auto [index, reductionDim] : llvm::enumerate(reductionDims)) {
396 if (reductionDim == value) {
397 return index;
398 }
399 }
400 return std::nullopt;
401}
402
403/// Return an AffineMaps to use for the `outs` operands of the linalg op
404/// generated for partial results. The new AffineMap is the AffineMap of the
405/// untiled op with reduction dimensions appended at end in order in which they
406/// were specified during tiling.
408getPartialResultAffineMaps(LinalgOp linalgOp,
409 const SetVector<unsigned> &reductionDims) {
410 auto partialReductionMaps = llvm::map_to_vector(
411 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
412 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
413 for (auto redPos : reductionDims) {
414 map =
415 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
416 map.getNumResults());
417 }
418 return map;
419 });
420 return partialReductionMaps;
421}
422
423struct InitSliceInfo {
424 SmallVector<int64_t> resultShape;
425 SmallVector<OpFoldResult> offsets;
426 SmallVector<OpFoldResult> sizes;
427 SmallVector<OpFoldResult> strides;
428};
429
430/// Return the result shape, offsets, sizes and strides of the slice of the
431/// `initValue` to use as the destination of the partial reduction op generated
432/// with outer reduction strategy.
433static InitSliceInfo getInitSliceInfoForOuterReduction(
434 MLIRContext *context, ArrayRef<OpFoldResult> offsets,
435 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
436 ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
437 int64_t initRank = partialReductionMap.getNumResults();
438 SmallVector<OpFoldResult> initOffsets, initSizes;
439 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
440 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
441 SmallVector<OpFoldResult> initStrides(initRank, one);
442 for (AffineExpr dimExpr : partialReductionMap.getResults()) {
443 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
444 if (reductionDims.contains(dim)) {
445 initOffsets.push_back(zero);
446 } else {
447 initOffsets.push_back(offsets[dim]);
448 }
449 initSizes.push_back(sizes[dim]);
450 }
451 SmallVector<int64_t> resultShape;
452 std::tie(resultShape, std::ignore) = decomposeMixedValues(initSizes);
453 return {resultShape, initOffsets, initSizes, initStrides};
454}
455
456/// Return the result shape, offsets, sizes and strides of the slice of the
457/// `initValue` to use as destination of the partial reduction op generated with
458/// outer parallel strategy.
459static InitSliceInfo getInitSliceInfoForOuterParallel(
460 MLIRContext *context, ArrayRef<OpFoldResult> offsets,
461 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
462 ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
463 int64_t initRank = partialReductionMap.getNumResults();
464 SmallVector<OpFoldResult> initOffsets, initSizes;
465 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
466 SmallVector<OpFoldResult> initStrides(initRank, one);
467 SmallVector<OpFoldResult> resultShape;
468 for (AffineExpr dimExpr : partialReductionMap.getResults()) {
469 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
470 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
471 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
472 initSizes.push_back(one);
473 } else {
474 initOffsets.push_back(offsets[dim]);
475 initSizes.push_back(sizes[dim]);
476 resultShape.push_back(sizes[dim]);
477 }
478 }
479 SmallVector<int64_t> staticShapes;
480 std::tie(staticShapes, std::ignore) = decomposeMixedValues(resultShape);
481 return {staticShapes, initOffsets, initSizes, initStrides};
482}
483
484/// Return the result shape, offsets, sizes and strides of the slice of the
485/// `initValue` to use as destination of the partial reduction op.
486static InitSliceInfo getInitSliceInfo(MLIRContext *context,
490 const SetVector<unsigned> &reductionDims,
491 ArrayRef<OpFoldResult> splitReductionIvs,
492 AffineMap partialReductionMap) {
494 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
495 reductionDims, splitReductionIvs,
496 partialReductionMap);
497 }
499 "unexpected ReductionTilingStrategy");
500 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
501 reductionDims, splitReductionIvs,
502 partialReductionMap);
503}
504
505/// External model implementation of PartialReductionInterface for
506/// LinalgOps.
507template <typename LinalgOpTy>
508struct LinalgOpPartialReductionInterface
509 : public PartialReductionOpInterface::ExternalModel<
510 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
511 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
512 Operation *op, OpBuilder &b, Location loc, ArrayRef<OpFoldResult> sizes,
513 const SetVector<unsigned> &reductionDims) const {
514 auto linalgOp = cast<LinalgOp>(op);
515
516 OpBuilder::InsertionGuard guard(b);
517 if (linalgOp.hasPureBufferSemantics())
518 return op->emitOpError("expected operation to have tensor semantics");
519
520 SmallVector<AffineMap> partialResultMaps =
521 getPartialResultAffineMaps(linalgOp, reductionDims);
522
523 SmallVector<Value> inits;
524 for (auto [initIdx, result, partialMap] :
525 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
526 SmallVector<Operation *, 4> combinerOps;
527 if (!matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
528 combinerOps) ||
529 combinerOps.size() != 1)
530 return op->emitOpError("Failed to anaysis the reduction operation.");
531
532 Operation *reductionOp = combinerOps[0];
533 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
534 if (!identity.has_value())
535 return op->emitOpError(
536 "Failed to get an identity value for the reduction operation.");
537
538 // Append the new partial result dimensions.
539 SmallVector<OpFoldResult> partialResultShape;
540 for (AffineExpr dimExpr : partialMap.getResults()) {
541 auto dim = cast<AffineDimExpr>(dimExpr);
542 partialResultShape.push_back(sizes[dim.getPosition()]);
543 }
544
545 Type elType = getElementTypeOrSelf(result.getType());
546 Value emptyTensor =
547 tensor::EmptyOp::create(b, loc, partialResultShape, elType);
548 Value constantOp = arith::ConstantOp::create(b, loc, *identity);
549 auto identityTensor =
550 linalg::FillOp::create(b, loc, constantOp, emptyTensor);
551 inits.push_back(identityTensor.getResult(0));
552 }
553
554 return inits;
555 }
556
557 FailureOr<TilingResult>
558 tileToPartialReduction(Operation *op, OpBuilder &b, Location loc,
559 ReductionTilingStrategy tilingStrategy,
560 ValueRange init, ArrayRef<OpFoldResult> offsets,
561 ArrayRef<OpFoldResult> sizes,
562 const SetVector<unsigned> &reductionDims,
563 ArrayRef<OpFoldResult> splitReductionIvs) const {
564 OpBuilder::InsertionGuard guard(b);
565 auto linalgOp = cast<LinalgOp>(op);
566
567 SmallVector<AffineMap> partialReductionMaps =
568 getPartialResultAffineMaps(linalgOp, reductionDims);
569
570 // Step 1. Extend init maps to have reduction dimension dims, since we
571 // are converting them to parallel dimensions.
572 SmallVector<AffineMap> newInitMaps;
573 if (tilingStrategy ==
574 ReductionTilingStrategy::PartialReductionOuterReduction) {
575 newInitMaps = llvm::to_vector(partialReductionMaps);
576 } else {
577 newInitMaps = llvm::map_to_vector(
578 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
579 return linalgOp.getMatchingIndexingMap(&opOperand);
580 });
581 }
582
583 // Step 2a: Extract a slice of the input operands.
584 SmallVector<Value> tiledInputs = makeTiledShapes(
585 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {}, true);
586 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
587 llvm::make_filter_range(
588 tiledInputs, [](Value v) -> bool { return v.getDefiningOp(); }),
589 [](Value v) -> Operation * { return v.getDefiningOp(); });
590
591 // Step 2b: Extract a slice of the init operands.
592 SmallVector<Value, 1> tiledInits;
593 for (auto [partialReductionMap, valueToTile] :
594 llvm::zip_equal(partialReductionMaps, init)) {
595 InitSliceInfo sliceInfo = getInitSliceInfo(
596 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
597 splitReductionIvs, partialReductionMap);
598 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
599 RankedTensorType sliceResultType = RankedTensorType::get(
600 sliceInfo.resultShape, valueToTileType.getElementType(),
601 valueToTileType.getEncoding());
602 auto sliceOp = tensor::ExtractSliceOp::create(
603 b, loc, sliceResultType, valueToTile, sliceInfo.offsets,
604 sliceInfo.sizes, sliceInfo.strides);
605 tiledInits.push_back(sliceOp.getResult());
606 generatedSlices.push_back(sliceOp);
607 }
608
609 // Update the indexing maps.
610 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
611 for (auto [initOperand, newInitMap] :
612 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
613 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
614 newMaps[mapIdx] = newInitMap;
615 }
616
617 // Step 3. Change the reduction dim iterator types.
618 SmallVector<utils::IteratorType> newIteratorTypes =
619 linalgOp.getIteratorTypesArray();
620 if (tilingStrategy ==
621 ReductionTilingStrategy::PartialReductionOuterReduction) {
622 for (int dim : reductionDims)
623 newIteratorTypes[dim] = utils::IteratorType::parallel;
624 }
625
626 // Step 4. Create the new generic op.
627 Operation *partialReductionOp;
628 auto resultTypes = ValueRange(tiledInits).getTypes();
629 if (tilingStrategy ==
630 ReductionTilingStrategy::PartialReductionOuterReduction) {
631 auto genericOp = GenericOp::create(b, loc, resultTypes, tiledInputs,
632 tiledInits, newMaps, newIteratorTypes);
633 IRMapping mapping;
634 op->getRegion(0).cloneInto(&genericOp.getRegion(),
635 genericOp.getRegion().begin(), mapping);
636 partialReductionOp = genericOp.getOperation();
637 } else {
638 SmallVector<Value> operands = std::move(tiledInputs);
639 llvm::append_range(operands, tiledInits);
640 partialReductionOp = mlir::clone(b, op, resultTypes, operands);
641 }
642 return TilingResult{
643 {partialReductionOp},
644 llvm::map_to_vector(partialReductionOp->getResults(),
645 [](OpResult r) -> Value { return r; }),
646 generatedSlices};
647 }
648
649 FailureOr<MergeResult>
650 mergeReductions(Operation *op, OpBuilder &b, Location loc,
651 ValueRange partialReduce,
652 const SetVector<unsigned> &reductionDims) const {
653 auto linalgOp = cast<LinalgOp>(op);
654 SmallVector<AffineMap> partialReductionMaps =
655 getPartialResultAffineMaps(linalgOp, reductionDims);
656
657 // Permute the reduction dims as permuted by the partial result map.
658 SmallVector<Operation *> mergeOperations;
659 SmallVector<Value> replacements;
660 for (auto [idx, init, partialResult, partialMap] : llvm::enumerate(
661 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
662 unsigned initIdx = idx;
663 // linalg.reduce's iteration space is the tiled result's iteration space
664 // (and not the tiled operation's iteration space). To account for this,
665 // permute the reduction dimensions based on the partial result map of the
666 // tiled result.
667 SmallVector<int64_t> partialReductionDims;
668 for (auto [resultNum, dimExpr] :
669 llvm::enumerate(partialMap.getResults())) {
670 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
671 if (llvm::is_contained(reductionDims, dim)) {
672 partialReductionDims.push_back(resultNum);
673 }
674 }
675
676 auto reduction = linalg::ReduceOp::create(
677 b, loc, partialResult, init, partialReductionDims,
678 [&linalgOp, &initIdx](OpBuilder &b, Location loc, ValueRange inputs) {
679 // Get the combiner op.
680 SmallVector<Operation *, 4> combinerOps;
681 matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
682 combinerOps);
683 Operation *clonedReductionOp = b.clone(*combinerOps[0]);
684 // Combine the input at idx and output at numInits + idx.
685 clonedReductionOp->setOperand(0, inputs[0]);
686 clonedReductionOp->setOperand(1, inputs[1]);
687 linalg::YieldOp::create(b, loc, clonedReductionOp->getResult(0));
688 });
689
690 mergeOperations.push_back(reduction);
691 replacements.push_back(reduction->getResult(0));
692 }
693
694 return MergeResult{mergeOperations, replacements};
695 }
696
697 LogicalResult getPartialResultTilePosition(
698 Operation *op, OpBuilder &b, unsigned resultNumber,
699 ReductionTilingStrategy tilingStrategy, ArrayRef<OpFoldResult> offsets,
700 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
701 ArrayRef<OpFoldResult> splitReductionIvs,
702 SmallVector<OpFoldResult> &resultOffsets,
703 SmallVector<OpFoldResult> &resultSizes) const {
704 auto linalgOp = cast<LinalgOp>(op);
705 SmallVector<AffineMap> partialReductionMaps =
706 getPartialResultAffineMaps(linalgOp, reductionDims);
707 InitSliceInfo sliceInfo = getInitSliceInfo(
708 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
709 splitReductionIvs, partialReductionMaps[resultNumber]);
710 std::swap(resultOffsets, sliceInfo.offsets);
711 std::swap(resultSizes, sliceInfo.sizes);
712
713 return success();
714 }
715};
716
717template <typename OpTy>
718static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
719 OpBuilder &builder) {
720 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
721 "applies to only pack or unpack operations");
722 OpBuilder::InsertionGuard g(builder);
723 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
724 : op.getDestRank();
725 OpFoldResult zero = builder.getIndexAttr(0);
726 OpFoldResult one = builder.getIndexAttr(1);
727 ReifiedRankedShapedTypeDims resultShape;
728 (void)reifyResultShapes(builder, op, resultShape);
729 SmallVector<Range> loopBounds(rank);
730 for (auto dim : llvm::seq<int64_t>(0, rank)) {
731 loopBounds[dim].offset = zero;
732 loopBounds[dim].stride = one;
733 loopBounds[dim].size = resultShape[0][dim];
734 }
735 return loopBounds;
736}
737
738static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
740 ArrayRef<int64_t> permutation) {
741 if (permutation.empty())
742 return;
743 applyPermutationToVector<OpFoldResult>(offsets, permutation);
744 applyPermutationToVector<OpFoldResult>(sizes, permutation);
745}
746
747struct PackOpTiling
748 : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
749
750 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
751 // Note that here we only consider untiled dimensions and outer tiled data
752 // dimensions, the inner tiled data dimensions are materialized when
753 // building the body of the operation.
754 auto packOp = cast<PackOp>(op);
755 SmallVector<utils::IteratorType> iteratorTypes(
756 packOp.getSourceRank(), utils::IteratorType::parallel);
757 return iteratorTypes;
758 }
759
760 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
761 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
762 }
763
764 FailureOr<TilingResult>
765 getTiledImplementation(Operation *op, OpBuilder &b,
766 ArrayRef<OpFoldResult> offsets,
767 ArrayRef<OpFoldResult> sizes) const {
768 auto packOp = cast<PackOp>(op);
769 Location loc = packOp.getLoc();
770
771 // The tiling is applied on interchanged dimensions. We have to undo the
772 // interchange to map sizes and offsets to the original input.
773 int64_t inputRank = packOp.getSourceRank();
774 SmallVector<OpFoldResult> origOffsets(offsets);
775 SmallVector<OpFoldResult> origSizes(sizes);
776 applyPermToRange(origOffsets, origSizes,
777 invertPermutationVector(packOp.getOuterDimsPerm()));
778
779 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
780 packOp.getDimAndTileMapping();
781 SmallVector<OpFoldResult> srcDimValues =
782 tensor::getMixedSizes(b, loc, packOp.getSource());
783 SmallVector<OpFoldResult> inputIndices, inputSizes;
784 for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
785 using AV = affine::AffineValueExpr;
786 affine::AffineBuilder ab(b, loc);
787 AffineExpr dim0, dim1, sym;
788 bindDims(b.getContext(), dim0, dim1);
789 bindSymbols(b.getContext(), sym);
790 if (dimAndTileMapping.count(dim)) {
791 // If the data dimension is tiled, the i-th index is the product of
792 // offset_i and tile_i, and the i-th size is the product of sizes_i and
793 // tile_i.
794 auto avOffset = AV(dim0).bind(origOffsets[dim]);
795 auto avSize = AV(dim0).bind(origSizes[dim]);
796 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
797 inputIndices.push_back(ab.mul(avOffset, avTileSize));
798 inputSizes.push_back(ab.mul(avSize, avTileSize));
799 } else {
800 inputIndices.push_back(origOffsets[dim]);
801 inputSizes.push_back(origSizes[dim]);
802 }
803
804 // Limit the size of the input operand for incomplete tiles.
805 if (packOp.getPaddingValue()) {
806 OpFoldResult dimSize = srcDimValues[dim];
807 auto avDimSize = AV(dim0).bind(dimSize);
808 auto avInputIdx = AV(dim1).bind(inputIndices.back());
809 inputSizes.back() =
810 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
811 }
812 }
813
814 auto oneAttr = b.getI64IntegerAttr(1);
815 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
816
817 SmallVector<Value> tiledOperands;
818 auto sourceSlice = tensor::ExtractSliceOp::create(
819 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
820 tiledOperands.push_back(sourceSlice);
821
822 SmallVector<OpFoldResult> outputOffsets, outputSizes;
823 if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
824 outputSizes)))
825 return {};
826
827 strides.append(packOp.getDestRank() - inputRank, oneAttr);
828 auto outSlice = tensor::ExtractSliceOp::create(
829 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
830 tiledOperands.push_back(outSlice);
831
832 if (auto val = packOp.getPaddingValue())
833 tiledOperands.push_back(val);
834 for (auto tile : packOp.getInnerTiles())
835 tiledOperands.push_back(tile);
836
837 Operation *tiledPackOp = PackOp::create(
838 b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
839
840 return TilingResult{
841 {tiledPackOp},
842 SmallVector<Value>(tiledPackOp->getResults()),
843 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
844 }
845
846 LogicalResult
847 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
848 ArrayRef<OpFoldResult> offsets,
849 ArrayRef<OpFoldResult> sizes,
850 SmallVector<OpFoldResult> &resultOffsets,
851 SmallVector<OpFoldResult> &resultSizes) const {
852 // The iteration domain is over outer dimensions of packed layout. In this
853 // context, the outer dimensions of `resultOffsets` are `offsets`. The
854 // inner dimensions of `resultOffsets` are zeros because tiling is not
855 // applied to them.
856 auto packOp = cast<PackOp>(op);
857 int64_t inputRank = packOp.getSourceRank();
858 int64_t outputRank = packOp.getDestRank();
859 auto zeroAttr = b.getI64IntegerAttr(0);
860 resultOffsets.assign(offsets.begin(), offsets.end());
861 resultOffsets.append(outputRank - inputRank, zeroAttr);
862
863 ReifiedRankedShapedTypeDims outputShape;
864 (void)reifyResultShapes(b, packOp, outputShape);
865 resultSizes.assign(sizes.begin(), sizes.end());
866 for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
867 resultSizes.push_back(outputShape[0][dataTileDim]);
868
869 return success();
870 }
871
872 FailureOr<TilingResult>
873 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
874 ArrayRef<OpFoldResult> offsets,
875 ArrayRef<OpFoldResult> sizes) const {
876 auto packOp = cast<PackOp>(op);
877 int64_t numTiles = packOp.getInnerDimsPos().size();
878
879 // tensor.pack op is fusible (as a producer) only if full inner tiles are
880 // iterated or inner dims are not tiled. Otherwise, it will generate a
881 // sequence of non-trivial ops (for partial tiles).
882 for (auto offset : offsets.take_back(numTiles))
883 if (!isZeroInteger(offset))
884 return failure();
885
886 for (auto iter :
887 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
888 if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
889 return failure();
890
891 FailureOr<TilingResult> tilingResult = getTiledImplementation(
892 op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
893 if (failed(tilingResult))
894 return failure();
895 return tilingResult.value();
896 }
897
898 /// Method to return the position of iteration domain tile computed by the
899 /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
900 /// `resultSizes` only cover outer dimensions.
901 LogicalResult getIterationDomainTileFromOperandTiles(
902 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
903 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
904 ArrayRef<SmallVector<OpFoldResult>> allSizes,
905 SmallVectorImpl<OpFoldResult> &resultOffsets,
906 SmallVectorImpl<OpFoldResult> &resultSizes) const {
907 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
908 LLVM_DEBUG(
909 { llvm::dbgs() << "unsupported operands for consumer fusion"; });
910 return failure();
911 }
912
913 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
914 ArrayRef<OpFoldResult> sizes(allSizes[0]);
915 auto packOp = cast<PackOp>(op);
916 Location loc = packOp.getLoc();
917 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
918 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
919 packOp.getDimAndTileMapping();
920 SmallVector<int64_t> outerShapeWithoutTranspose(
921 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
922 if (!packOp.getOuterDimsPerm().empty()) {
924 outerShapeWithoutTranspose,
925 invertPermutationVector(packOp.getOuterDimsPerm()));
926 }
927 for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
928 if (dimAndTileMapping.count(dim)) {
929 FailureOr<int64_t> cstTileSize =
931 presburger::BoundType::UB, sizes[dim],
932 /*stopCondition=*/nullptr, /*closedUB=*/true);
933 std::optional<int64_t> cstInnerSize =
934 getConstantIntValue(dimAndTileMapping[dim]);
935
936 // If a dimension is not tiled, it is always valid to fuse the pack op,
937 // even if the op has padding semantics. Because it always generates a
938 // full slice along the dimension. The tile sizes are for unpacked
939 // domain, i.e., `srcDimSize`, so `tileSize < srcDimSize` means that the
940 // dimension is tiled.
941 // TODO: It could be untiled if the `srcDimSize` is dynamic. It is a
942 // hard check to determine if a dimension is tiled or not.
943 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
944 int64_t destDimSize = outerShapeWithoutTranspose[dim];
945 bool isTiled = failed(cstTileSize) ||
946 ShapedType::isDynamic(srcDimSize) ||
947 cstTileSize.value() < srcDimSize;
948 if (!isTiled) {
949 outerDimOffsets.push_back(offsets[dim]);
950 if (ShapedType::isStatic(destDimSize)) {
951 outerDimSizes.push_back(b.getIndexAttr(destDimSize));
952 } else {
953 outerDimSizes.push_back(
954 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
955 }
956 continue;
957 }
958
959 // Currently fusing `packOp` as consumer only expects perfect tiling
960 // scenario because even if without padding semantic, the `packOp` may
961 // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
962 // where the `tileSize` from operand of `packOp` is 5, which is not
963 // exactly divided by `innerTile`(=6) of `packOp`. As the result:
964 // 1. the first slice is extracted from (0) to (4) and inserted into
965 // (0,0)~(0,4) at first row.
966 // 2. the second slice is extracted from (5) to (9) and SHOULD BE
967 // respectively inserted into two rows with different length, including
968 // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
969 // them, thus adding below constraint to bypass them temporarily. In
970 // another word, we can only support tiling with consumer if the tile
971 // size for the producer is a multiple of the inner tile size for the
972 // packed dimensions at this moment.
973 if ((failed(cstTileSize) || !cstInnerSize ||
974 *cstTileSize % *cstInnerSize != 0))
975 return failure();
976
977 using AV = affine::AffineValueExpr;
978 affine::AffineBuilder ab(b, loc);
979 AffineExpr dim0, sym;
980 bindDims(b.getContext(), dim0);
981 bindSymbols(b.getContext(), sym);
982 auto avOffset = AV(dim0).bind(offsets[dim]);
983 auto avSize = AV(dim0).bind(sizes[dim]);
984 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
985 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
986 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
987 } else {
988 outerDimOffsets.push_back(offsets[dim]);
989 outerDimSizes.push_back(sizes[dim]);
990 }
991 }
992 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
993 resultOffsets = outerDimOffsets;
994 resultSizes = outerDimSizes;
995 return success();
996 }
997
998 /// Method to return the tiled implementation of tensor.pack as a consumer.
999 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1000 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1001 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1002 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1003 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1004 LLVM_DEBUG(
1005 { llvm ::dbgs() << "unhandled operands for consumer fusion"; });
1006 return failure();
1007 }
1008
1009 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1010 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1011
1012 auto packOp = cast<PackOp>(op);
1013 Location loc = packOp.getLoc();
1014
1015 int64_t inputRank = packOp.getSourceRank();
1016 auto oneAttr = b.getI64IntegerAttr(1);
1017 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
1018
1019 SmallVector<Value> tiledOperands;
1020 auto sourceSlice = tensor::ExtractSliceOp::create(
1021 b, loc, packOp.getSource(), offsets, sizes, strides);
1022 tiledOperands.push_back(sourceSlice);
1023
1024 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1025 if (failed(getIterationDomainTileFromOperandTiles(
1026 op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1027 outerDimSizes)))
1028 return failure();
1029
1030 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1031 if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
1032 outputOffsets, outputSizes)))
1033 return failure();
1034
1035 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1036 auto outSlice = tensor::ExtractSliceOp::create(
1037 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1038 tiledOperands.push_back(outSlice);
1039
1040 if (auto val = packOp.getPaddingValue())
1041 tiledOperands.push_back(val);
1042 for (auto tile : packOp.getInnerTiles())
1043 tiledOperands.push_back(tile);
1044
1045 Operation *tiledPackOp = PackOp::create(
1046 b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
1047
1048 return TilingResult{
1049 {tiledPackOp},
1050 SmallVector<Value>(tiledPackOp->getResults()),
1051 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1052 }
1053};
1054
1055struct UnpackTileDimInfo {
1056 bool isAlignedToInnerTileSize;
1057 OpFoldResult sourceOffset;
1058 OpFoldResult sourceSize;
1059 OpFoldResult resultOffset;
1060 OpFoldResult destExpandedSize;
1061};
1062
1063/// Returns the needed information for tiling unpack op on `tileDim` with given
1064/// `tileOffset` and `tileSize`. For more details, see the comment of the
1065/// `getTiledImplementation`.
1066static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
1067 int64_t tileDim,
1068 OpFoldResult tileOffset,
1069 OpFoldResult tileSize) {
1070 UnpackTileDimInfo info;
1071 Attribute zeroAttr = b.getIndexAttr(0);
1072 Attribute oneAttr = b.getIndexAttr(1);
1073 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1074 unpackOp.getDimAndTileMapping();
1075 // The dimension is not one of packed data dimension.
1076 if (!dimAndTileMapping.count(tileDim)) {
1077 info.isAlignedToInnerTileSize = true;
1078 info.sourceOffset = tileOffset;
1079 info.sourceSize = tileSize;
1080 info.resultOffset = zeroAttr;
1081 info.destExpandedSize = tileSize;
1082 return info;
1083 }
1084
1085 Location loc = unpackOp.getLoc();
1086 using AV = affine::AffineValueExpr;
1087 affine::AffineBuilder ab(b, loc);
1088 AffineExpr dim0, dim1, sym0;
1089 bindDims(b.getContext(), dim0, dim1);
1090 bindSymbols(b.getContext(), sym0);
1091
1092 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1093
1094 info.isAlignedToInnerTileSize = false;
1095 FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
1096 presburger::BoundType::UB, tileSize,
1097 /*stopCondition=*/nullptr, /*closedUB=*/true);
1098 std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
1099 if (!failed(cstSize) && cstInnerSize) {
1100 if (*cstSize % *cstInnerSize == 0)
1101 info.isAlignedToInnerTileSize = true;
1102
1103 // If the tiling size equals to the inner tiling size, the outer dims are
1104 // always 1.
1105 if (*cstInnerSize == *cstSize) {
1106 auto lhs = AV(dim0).bind(tileOffset);
1107 auto rhs = AV(dim1).bind(innerTileSize);
1108 info.sourceOffset = ab.floor(lhs, rhs);
1109 info.sourceSize = oneAttr;
1110 info.resultOffset = zeroAttr;
1111 info.destExpandedSize = tileSize;
1112 return info;
1113 }
1114 }
1115
1116 if (info.isAlignedToInnerTileSize) {
1117 info.sourceOffset =
1118 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1119 info.resultOffset = zeroAttr;
1120 info.destExpandedSize = tileSize;
1121
1122 // The ceilDiv is needed here because there could be incomplete tile even
1123 // it is perfect tiling cases. E.g.,
1124 // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
1125 // If the tiling size is 32, there will be 3 tiles. Two of them have
1126 // size=32; one of them have size=2. The size is represented using
1127 // affine_min op; we need ceilDiv.
1128 info.sourceSize =
1129 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1130 return info;
1131 }
1132
1133 affine::DivModValue firstCoord = affine::getDivMod(
1134 b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
1135 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1136 OpFoldResult tileExclusiveBound =
1137 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1138 affine::DivModValue lastCoord = affine::getDivMod(
1139 b, loc,
1141 b, loc,
1142 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1143 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1144
1145 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1146 AV(dim1).bind(firstCoord.quotient));
1147 info.sourceSize =
1148 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1149 info.sourceOffset = firstCoord.quotient;
1150 info.resultOffset = firstCoord.remainder;
1151 // Do not create an Affine ops for expanded size because the affine op is too
1152 // complicated which would trigger an issue in affine ops simplification.
1153 info.destExpandedSize = b.createOrFold<arith::MulIOp>(
1154 loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
1155 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1156 return info;
1157}
1158
1159struct UnPackOpTiling
1160 : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1161
1162 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
1163 auto unpackOp = cast<UnPackOp>(op);
1164 SmallVector<utils::IteratorType> iteratorTypes(
1165 unpackOp.getDestRank(), utils::IteratorType::parallel);
1166 return iteratorTypes;
1167 }
1168
1169 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
1170 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
1171 }
1172
1173 /// There are two cases in tiling unpack ops. If the tiling size is aligned to
1174 /// the inner tile size, the corresponding tiles of source are all complete.
1175 /// Otherwise, there are in-complete tiles. We will need to expand the slice
1176 /// of source for getting complete tiles. The tiled unpack op unpacks more
1177 /// data from source, so We'll need an extract_slice op to shift and truncate
1178 /// the output.
1179 /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
1180 /// coordinates of second tile (i.e., result[15..31]) are
1181 /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
1182 /// row are incomplete tiles. To represent the unpack op, we have to complete
1183 /// the rows. I.e., the input coordinates would start with (1, 0); end with
1184 /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
1185 /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
1186 /// can get the actual result.
1187 FailureOr<TilingResult>
1188 getTiledImplementation(Operation *op, OpBuilder &b,
1189 ArrayRef<OpFoldResult> offsets,
1190 ArrayRef<OpFoldResult> sizes) const {
1191 auto unpackOp = cast<UnPackOp>(op);
1192 int64_t srcRank = unpackOp.getSourceRank();
1193 int64_t destRank = unpackOp.getDestRank();
1194 int64_t numInnerTiles = srcRank - destRank;
1195 Location loc = unpackOp.getLoc();
1196
1197 // The perfect tiling case indicates that the tiling sizes are multiple of
1198 // inner_tile_size. In this context, no extra data is needed when
1199 // representing the tiled unpack op.
1200 bool isPerfectTilingCase = true;
1201 Attribute oneAttr = b.getIndexAttr(1);
1202 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1203 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1204 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1205 for (auto dim : llvm::seq<int64_t>(0, destRank)) {
1206 UnpackTileDimInfo info =
1207 getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
1208 if (!info.isAlignedToInnerTileSize)
1209 isPerfectTilingCase = false;
1210 sliceSrcIndices.push_back(info.sourceOffset);
1211 sliceSrcSizes.push_back(info.sourceSize);
1212 destExpandedSizes.push_back(info.destExpandedSize);
1213 resultOffsetsFromDest.push_back(info.resultOffset);
1214 }
1215
1216 // The tiling is applied on destination dimensions. We have to apply the
1217 // interchange on source dimensions if outer_dims_perm is set.
1218 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1219 unpackOp.getOuterDimsPerm());
1220 Attribute zeroAttr = b.getIndexAttr(0);
1221 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1222 sliceSrcSizes.append(unpackOp.getMixedTiles());
1223 sliceSrcStrides.append(numInnerTiles, oneAttr);
1224 SmallVector<Operation *> generatedSlices;
1225 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1226 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1227 sliceSrcStrides);
1228 generatedSlices.push_back(sliceSource);
1229
1230 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1231 Value sliceDest;
1232 if (isPerfectTilingCase) {
1233 auto destSliceOp = tensor::ExtractSliceOp::create(
1234 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1235 sliceDest = destSliceOp;
1236 generatedSlices.push_back(destSliceOp);
1237 } else {
1238 sliceDest = tensor::EmptyOp::create(
1239 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1240 }
1241
1242 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1243 for (auto tile : unpackOp.getInnerTiles())
1244 tiledOperands.push_back(tile);
1245
1246 Operation *tiledUnpackOp = UnPackOp::create(
1247 b, loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
1248
1249 if (isPerfectTilingCase)
1250 return TilingResult{{tiledUnpackOp},
1251 SmallVector<Value>(tiledUnpackOp->getResults()),
1252 generatedSlices};
1253
1254 auto extractSlice = tensor::ExtractSliceOp::create(
1255 b, loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
1256 destStrides);
1257 return TilingResult{
1258 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1259 }
1260
1261 LogicalResult
1262 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
1263 ArrayRef<OpFoldResult> offsets,
1264 ArrayRef<OpFoldResult> sizes,
1265 SmallVector<OpFoldResult> &resultOffsets,
1266 SmallVector<OpFoldResult> &resultSizes) const {
1267 resultOffsets = llvm::to_vector(offsets);
1268 resultSizes = llvm::to_vector(sizes);
1269 return success();
1270 }
1271
1272 FailureOr<TilingResult>
1273 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
1274 ArrayRef<OpFoldResult> offsets,
1275 ArrayRef<OpFoldResult> sizes) const {
1276 FailureOr<TilingResult> tilingResult =
1277 getTiledImplementation(op, b, offsets, sizes);
1278 if (failed(tilingResult))
1279 return failure();
1280 return tilingResult.value();
1281 }
1282
1283 /// Method to return the position of iteration domain tile computed by the
1284 /// tiled operation.
1285 LogicalResult getIterationDomainTileFromOperandTiles(
1286 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1287 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1288 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1289 SmallVectorImpl<OpFoldResult> &resultOffsets,
1290 SmallVectorImpl<OpFoldResult> &resultSizes) const {
1291 if (operandNumbers.size() != 1) {
1292 LLVM_DEBUG({ llvm::dbgs() << "unable to handle multiple operands"; });
1293 return failure();
1294 }
1295 auto unPackOp = cast<UnPackOp>(op);
1296 unsigned operandNumber = operandNumbers[0];
1297 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1298 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1299
1300 // If the operand tile is the dest, then no adjustment is needed.
1301 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1302 resultOffsets = llvm::to_vector(offsets);
1303 resultSizes = llvm::to_vector(sizes);
1304 return success();
1305 }
1306 Location loc = unPackOp.getLoc();
1307
1308 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1309 auto destOffsets = offsets.drop_back(numTiles);
1310 auto destSizes = sizes.drop_back(numTiles);
1311 // The tiling is applied on interchanged dimensions. We have to undo the
1312 // interchange to map sizes and offsets to the original input.
1313 int64_t outputRank = unPackOp.getDestRank();
1314 ReifiedRankedShapedTypeDims reifiedReturnShapes;
1315 if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
1316 return failure();
1317 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1318 SmallVector<OpFoldResult> origOffsets(destOffsets);
1319 SmallVector<OpFoldResult> origSizes(destSizes);
1320 applyPermToRange(origOffsets, origSizes,
1321 invertPermutationVector(unPackOp.getOuterDimsPerm()));
1322
1323 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1324 unPackOp.getDimAndTileMapping();
1325
1326 for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
1327 using AV = affine::AffineValueExpr;
1328 affine::AffineBuilder ab(b, loc);
1329 AffineExpr dim0, dim1, sym0;
1330 bindDims(b.getContext(), dim0, dim1);
1331 bindSymbols(b.getContext(), sym0);
1332 if (dimAndTileMapping.count(dim)) {
1333 // If the data dimension is tiled, the i-th index is the product of
1334 // offset_i and tile_i, and the i-th size is the product of sizes_i and
1335 // tile_i. The sizes must be clamped to the sizes of the unpack result.
1336 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1337 auto avSize = AV(dim0).bind(origSizes[dim]);
1338 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1339 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1340 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1341 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1342 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1343 ab.sub(avResultSize, avResultOffset)}));
1344 } else {
1345 resultOffsets.push_back(origOffsets[dim]);
1346 resultSizes.push_back(origSizes[dim]);
1347 }
1348 }
1349 return success();
1350 }
1351
1352 /// Method to return the tiled implementation of tensor.unpack as a consumer.
1353 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1354 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1355 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1356 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1357 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1358 LLVM_DEBUG({ llvm::dbgs() << "unhandled operands for consumer fusion"; });
1359 return failure();
1360 }
1361 auto unPackOp = cast<UnPackOp>(op);
1362 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1363 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1364
1365 // tensor.unpack op is fusible (as a consumer) only if inner dims are not
1366 // tiled.
1367 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1368 for (auto iter :
1369 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1370 if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
1371 return failure();
1372 }
1373
1374 Location loc = unPackOp.getLoc();
1375
1376 // Fetch offset/size for creating the slice of the dest operand of
1377 // unpack op.
1378 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1379 if (failed(getIterationDomainTileFromOperandTiles(
1380 op, b, operandNumbers, allOffsets, allSizes, outputOffsets,
1381 outputSizes)))
1382 return failure();
1383
1384 auto oneAttr = b.getI64IntegerAttr(1);
1385 int64_t outputRank = unPackOp.getDestRank();
1386 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1387
1388 SmallVector<Value> tiledOperands;
1389 // Create slice of the dest operand.
1390 auto extractDestSlice = tensor::ExtractSliceOp::create(
1391 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1392 tiledOperands.push_back(extractDestSlice);
1393
1394 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1395 // Create slice of the source operand.
1396 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1397 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1398 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1399 for (auto tile : unPackOp.getInnerTiles())
1400 tiledOperands.push_back(tile);
1401
1402 // Create tiled unpack op.
1403 Operation *tiledUnPackOp =
1404 UnPackOp::create(b, loc, TypeRange{extractDestSlice.getType()},
1405 tiledOperands, op->getAttrs());
1406
1407 return TilingResult{{tiledUnPackOp},
1408 SmallVector<Value>(tiledUnPackOp->getResults()),
1409 llvm::to_vector(ArrayRef<Operation *>{
1410 extractSourceSlice, extractDestSlice})};
1411 }
1412};
1413
1414} // namespace
1415
1416template <typename OpType>
1417static void registerOne(MLIRContext *ctx) {
1418 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1419 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1420 *ctx);
1421}
1422
1423/// Variadic helper function.
1424template <typename... OpTypes>
1425static void registerAll(MLIRContext *ctx) {
1426 (registerOne<OpTypes>(ctx), ...);
1427}
1428
1429#define GET_OP_LIST
1430
1432 DialectRegistry &registry) {
1433 registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
1435 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1436 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1438#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1439 >(ctx);
1440 });
1441}
1442
1444 DialectRegistry &registry) {
1445 registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
1446 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1447 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1448 });
1449}
return success()
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition Utils.cpp:74
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
auto load
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
Base type for affine expression.
Definition AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Definition Attributes.h:25
Block represents an ordered list of Operations.
Definition Block.h:33
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
BlockArgListType getArguments()
Definition Block.h:87
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition Block.h:212
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
Definition IRMapping.h:65
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:686
void setOperand(unsigned idx, Value value)
Definition Operation.h:351
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:512
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
result_range getResults()
Definition Operation.h:415
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition Utils.cpp:2127
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry &registry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition Utils.cpp:2149
void registerTilingInterfaceExternalModels(DialectRegistry &registry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition Utils.cpp:2038
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition Utils.cpp:1891
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:573
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition TensorOps.cpp:66
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition Utils.cpp:1293
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:126
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
Definition Utils.h:372
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition Utils.h:158
SmallVector< OpFoldResult > sizes
Definition Utils.h:160
SmallVector< OpFoldResult > offsets
Definition Utils.h:159