MLIR 22.0.0git
TilingInterfaceImpl.cpp
Go to the documentation of this file.
1//===- TilingInterfaceImpl.cpp - Implementation of TilingInterface -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
26#include "llvm/Support/Debug.h"
27#include <optional>
28
29#define DEBUG_TYPE "linalg-tiling-interface-impl"
30
31using namespace mlir;
32using namespace mlir::linalg;
33
34//===----------------------------------------------------------------------===//
35// Utility methods for implementation of Tiling Interface for Linalg ops
36//===----------------------------------------------------------------------===//
37
38/// Return the SSA values that represent the data point accessed using a given
39/// `indexingMap` for a given point in the iteration space represented by `ivs`.
41 AffineMap indexingMap,
42 ValueRange ivs) {
44 indices.reserve(indexingMap.getNumResults());
45 for (auto result : indexingMap.getResults()) {
46 AffineMap m = AffineMap::get(indexingMap.getNumDims(),
47 indexingMap.getNumSymbols(), result);
48 Value v = affine::AffineApplyOp::create(b, loc, m, ivs);
49 indices.push_back(v);
50 }
51 return indices;
52}
53
54/// Method to inline the payload of a `linalgOp` given the iteration space
55/// point and values for the arguments of the payload.
56static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp,
57 ValueRange ivs, ValueRange argValues) {
58 Block *body = linalgOp.getBlock();
59 IRMapping map;
60 map.map(body->getArguments(), argValues);
61 for (auto &op : body->without_terminator()) {
62 if (auto indexOp = dyn_cast<IndexOp>(&op)) {
63 map.map(indexOp.getResult(), ivs[indexOp.getDim()]);
64 continue;
65 }
66 b.clone(op, map);
67 }
68
69 Operation *terminator = body->getTerminator();
70 Location loc = terminator->getLoc();
71 for (const auto &operand : llvm::enumerate(terminator->getOperands())) {
72 Value toStore = map.lookupOrDefault(operand.value());
73 OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
75 b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
76 memref::StoreOp::create(b, loc, toStore,
77 linalgOp.getDpsInitOperand(operand.index())->get(),
78 indices);
79 }
80 return success();
81}
82
83//===----------------------------------------------------------------------===//
84// External Model for implementing `TilingInterface` for `LinalgOp`s.
85//===----------------------------------------------------------------------===//
86
87namespace {
88/// External model implementation of TilingInterface for LinalgOps. An external
89/// model implementation is used for now till the use of `TilingInterface` is
90/// on-par with the current Linalg tiling + fusion patterns. Once it is
91/// maybe possible to move this into the op-definition (though there are
92/// advantages to leaving it as an external model)
93template <typename LinalgOpTy>
94struct LinalgOpTilingInterface
95 : public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
96 LinalgOpTy> {
97 /// Return the loop iterator type.
98 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
99 LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
100 return concreteOp.getIteratorTypesArray();
101 }
102
103 /// Return the iteration domain range.
104 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
105 OpBuilder::InsertionGuard g(b);
106 b.setInsertionPoint(op);
107 Location loc = op->getLoc();
108 LinalgOp linalgOp = cast<LinalgOp>(op);
109 SmallVector<OpFoldResult> allShapesSizes =
110 linalgOp.createFlatListOfOperandDims(b, loc);
111 AffineMap map = linalgOp.getShapesToLoopsMap();
112
113 return llvm::to_vector(
114 llvm::map_range(map.getResults(), [&](AffineExpr loopExpr) {
115 OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
116 b, loc, loopExpr, allShapesSizes);
117 return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
118 }));
119 }
120
121 /// Instantiate the tiled implementation of the operation.
122 FailureOr<TilingResult>
125 ArrayRef<OpFoldResult> sizes) const {
126 // Leave the `sizeBounds` value empty. That is only needed when the `sizes`
127 // specified could lead to out of bounds accesses.
128 Location loc = op->getLoc();
129 LinalgOp linalgOp = cast<LinalgOp>(op);
130 SmallVector<Value> valuesToTile = linalgOp->getOperands();
131 SmallVector<Value> tiledOperands = makeTiledShapes(
132 b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true);
133 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
134 llvm::make_filter_range(
135 tiledOperands,
136 [](Value v) -> bool {
137 return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
138 v.getDefiningOp());
139 }),
140 [](Value v) -> Operation * { return v.getDefiningOp(); });
141
142 SmallVector<Type> resultTensorTypes =
143 getTensorOutputTypes(linalgOp, tiledOperands);
144
145 Operation *tiledOp = clone(b, linalgOp, resultTensorTypes, tiledOperands);
146 offsetIndices(b, cast<LinalgOp>(tiledOp), offsets);
147
148 return TilingResult{
149 {tiledOp}, SmallVector<Value>(tiledOp->getResults()), generatedSlices};
150 }
151
152 /// Utility to fetch the offsets and sizes when applied as per the indexing
153 /// map of the linalg op. This helps in fusing the linalg op as a consumer of
154 /// a given slice op.
155 static LogicalResult
156 getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b,
157 ArrayRef<AffineMap> indexingMaps,
160 SmallVectorImpl<OpFoldResult> &mappedOffsetsVec,
161 SmallVectorImpl<OpFoldResult> &mappedSizesVec) {
162 DenseMap<unsigned, OpFoldResult> mappedOffsets, mappedSizes;
163
164 for (auto [indexingMap, offsets, sizes] :
165 llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166 for (auto [resultExpr, offset, size] :
167 llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168 auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
169 if (!dimExpr)
170 continue;
171 unsigned position = dimExpr.getPosition();
172 auto it = mappedOffsets.find(position);
173 if (it != mappedOffsets.end()) {
174 OpFoldResult seenOffset = it->second;
175 OpFoldResult seenSize = mappedSizes.lookup(position);
176 if (seenOffset != offset || seenSize != size) {
177 LLVM_DEBUG({
178 llvm::dbgs() << "inconsistent iteration space mapping from "
179 "offsets/sizes of operands/results";
180 });
181 return failure();
182 }
183 } else {
184 mappedOffsets[position] = offset;
185 mappedSizes[position] = size;
186 }
187 }
188 }
189
190 // Aggregate from the given operand offsets and sizes, or default to
191 // iteration space values.
192 SmallVector<Range> iterationDomain =
193 cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(b);
194 mappedOffsetsVec.resize(iterationDomain.size());
195 mappedSizesVec.resize(iterationDomain.size());
196 for (auto [index, domain] : llvm::enumerate(iterationDomain)) {
197 auto it = mappedOffsets.find(index);
198 if (it != mappedOffsets.end()) {
199 mappedOffsetsVec[index] = it->second;
200 mappedSizesVec[index] = mappedSizes.lookup(index);
201 continue;
202 }
203 mappedOffsetsVec[index] = domain.offset;
204 mappedSizesVec[index] = domain.size;
205 }
206 return success();
207 }
208
209 /// Method to return the position of the result tile computed by the tiled
210 /// operation.
211 LogicalResult getIterationDomainTileFromOperandTiles(
212 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
215 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
216 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
217 auto linalgOp = cast<LinalgOp>(op);
218
219 SmallVector<AffineMap> indexingMaps =
220 llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
221 OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
222 return linalgOp.getMatchingIndexingMap(&opOperand);
223 });
224 if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets,
225 allSizes, iterDomainOffsets,
226 iterDomainSizes))) {
227 return failure();
228 }
229 return success();
230 }
231
232 /// Return the details of the output tile generated by the tiled
233 /// implementation.
234 LogicalResult
235 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
238 SmallVector<OpFoldResult> &resultOffsets,
239 SmallVector<OpFoldResult> &resultSizes) const {
240 Location loc = op->getLoc();
241 LinalgOp linalgOp = cast<LinalgOp>(op);
242
243 AffineExpr d0;
244 bindDims(b.getContext(), d0);
245 SmallVector<OpFoldResult> subShapeSizes =
246 llvm::to_vector(llvm::map_range(sizes, [&](OpFoldResult ofr) {
247 return affine::makeComposedFoldedAffineApply(b, loc, d0 - 1, ofr);
248 }));
249
250 OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
252 b, loc, outOperand->get(), sizes,
253 linalgOp.getMatchingIndexingMap(outOperand), offsets,
254 /*ubs*/ {}, subShapeSizes, true);
255 resultOffsets = sliceParams.offsets;
256 resultSizes = sliceParams.sizes;
257 return success();
258 }
259
260 LogicalResult getIterationDomainTileFromResultTile(
261 Operation *op, OpBuilder &b, unsigned resultNumber,
263 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
264 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
265 auto linalgOp = cast<LinalgOp>(op);
266
267 // Check that the indexing map used for the output is a projected
268 // permutation. This could be relaxed with a more general approach that can
269 // map the offsets and sizes from the result to iteration space tiles
270 // (filling in full extent for dimensions not used to access the result).
271 AffineMap indexingMap =
272 linalgOp.getIndexingMapMatchingResult(op->getResult(resultNumber));
273 if (!indexingMap.isProjectedPermutation()) {
274 return op->emitOpError(
275 "unhandled tiled implementation generation when result is not "
276 "accessed using a permuted projection");
277 }
278
279 SmallVector<OpFoldResult> allOffsets = llvm::to_vector(offsets);
280 SmallVector<OpFoldResult> allSizes = llvm::to_vector(sizes);
281 auto status =
282 getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets},
283 {allSizes}, iterDomainOffsets, iterDomainSizes);
284 (void)status;
285 assert(succeeded(status) && "unexpected error in offset calculation");
286 return success();
287 }
288
289 FailureOr<TilingResult>
290 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
292 ArrayRef<OpFoldResult> sizes) const {
293 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
294 if (failed(getIterationDomainTileFromResultTile(
295 op, b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
296 return failure();
297 }
298 auto tilingInterfaceOp = cast<TilingInterface>(op);
299 FailureOr<TilingResult> tilingResult =
300 tilingInterfaceOp.getTiledImplementation(b, mappedOffsets, mappedSizes);
301
302 if (failed(tilingResult))
303 return failure();
304
305 if (tilingResult->tiledOps.size() != 1)
306 return op->emitOpError("failed to generate tiled implementation");
307
308 return TilingResult{
309 tilingResult->tiledOps,
310 SmallVector<Value>{tilingResult->tiledValues[resultNumber]},
311 tilingResult->generatedSlices};
312 }
313
314 /// Method to generate the tiled implementation of an operation from the tile
315 /// of the operand.
316 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
317 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
319 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
320 SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
321 if (failed(getIterationDomainTileFromOperandTiles(
322 op, b, operandNumbers, allOffsets, allSizes, mappedOffsets,
323 mappedSizes))) {
324 return failure();
325 }
326 return getTiledImplementation(op, b, mappedOffsets, mappedSizes);
327 }
328
329 LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
330 Location loc,
331 ValueRange ivs) const {
332 auto linalgOp = cast<LinalgOp>(op);
333 if (!linalgOp.hasPureBufferSemantics())
334 return op->emitOpError("expected operation to have buffer semantics");
335
336 SmallVector<Value> indexedValues;
337 indexedValues.reserve(linalgOp->getNumOperands());
338 Location linalgOpLoc = op->getLoc();
339 /// Load the data corresponding to the block arguments that
340 /// represent input operands.
341 for (OpOperand &operand : linalgOp->getOpOperands()) {
342 if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
343 indexedValues.push_back(nullptr);
344 continue;
345 }
346 if (linalgOp.isScalar(&operand)) {
347 indexedValues.push_back(operand.get());
348 continue;
349 }
351 builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
352 Value load =
353 memref::LoadOp::create(builder, linalgOpLoc, operand.get(), indices);
354 indexedValues.push_back(load);
355 }
356
357 /// Inline the op payload and store the result.
358 return inlinePayload(builder, linalgOp, ivs, indexedValues);
359 }
360};
361
362//===----------------------------------------------------------------------===//
363// External Model for implementing `PartialReductionInterface` for `LinalgOp`s.
364//===----------------------------------------------------------------------===//
365
366/// In a given set vector, get the position of a particular element.
367std::optional<int> getPositionIn(const llvm::SetVector<unsigned> &reductionDims,
368 unsigned value) {
369 for (auto [index, reductionDim] : llvm::enumerate(reductionDims)) {
370 if (reductionDim == value) {
371 return index;
372 }
373 }
374 return std::nullopt;
375}
376
377/// Return an AffineMaps to use for the `outs` operands of the linalg op
378/// generated for partial results. The new AffineMap is the AffineMap of the
379/// untiled op with reduction dimensions appended at end in order in which they
380/// were specified during tiling.
382getPartialResultAffineMaps(LinalgOp linalgOp,
383 const SetVector<unsigned> &reductionDims) {
384 auto partialReductionMaps = llvm::map_to_vector(
385 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
386 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
387 for (auto redPos : reductionDims) {
388 map =
389 map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
390 map.getNumResults());
391 }
392 return map;
393 });
394 return partialReductionMaps;
395}
396
397struct InitSliceInfo {
398 SmallVector<int64_t> resultShape;
399 SmallVector<OpFoldResult> offsets;
400 SmallVector<OpFoldResult> sizes;
401 SmallVector<OpFoldResult> strides;
402};
403
404/// Return the result shape, offsets, sizes and strides of the slice of the
405/// `initValue` to use as the destination of the partial reduction op generated
406/// with outer reduction strategy.
407static InitSliceInfo getInitSliceInfoForOuterReduction(
408 MLIRContext *context, ArrayRef<OpFoldResult> offsets,
409 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
410 ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
411 int64_t initRank = partialReductionMap.getNumResults();
412 SmallVector<OpFoldResult> initOffsets, initSizes;
413 Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
414 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
415 SmallVector<OpFoldResult> initStrides(initRank, one);
416 for (AffineExpr dimExpr : partialReductionMap.getResults()) {
417 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
418 if (reductionDims.contains(dim)) {
419 initOffsets.push_back(zero);
420 } else {
421 initOffsets.push_back(offsets[dim]);
422 }
423 initSizes.push_back(sizes[dim]);
424 }
425 SmallVector<int64_t> resultShape;
426 std::tie(resultShape, std::ignore) = decomposeMixedValues(initSizes);
427 return {resultShape, initOffsets, initSizes, initStrides};
428}
429
430/// Return the result shape, offsets, sizes and strides of the slice of the
431/// `initValue` to use as destination of the partial reduction op generated with
432/// outer parallel strategy.
433static InitSliceInfo getInitSliceInfoForOuterParallel(
434 MLIRContext *context, ArrayRef<OpFoldResult> offsets,
435 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
436 ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
437 int64_t initRank = partialReductionMap.getNumResults();
438 SmallVector<OpFoldResult> initOffsets, initSizes;
439 Attribute one = IntegerAttr::get(IndexType::get(context), 1);
440 SmallVector<OpFoldResult> initStrides(initRank, one);
441 SmallVector<OpFoldResult> resultShape;
442 for (AffineExpr dimExpr : partialReductionMap.getResults()) {
443 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
444 if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
445 initOffsets.push_back(splitReductionIvs[dimPos.value()]);
446 initSizes.push_back(one);
447 } else {
448 initOffsets.push_back(offsets[dim]);
449 initSizes.push_back(sizes[dim]);
450 resultShape.push_back(sizes[dim]);
451 }
452 }
453 SmallVector<int64_t> staticShapes;
454 std::tie(staticShapes, std::ignore) = decomposeMixedValues(resultShape);
455 return {staticShapes, initOffsets, initSizes, initStrides};
456}
457
458/// Return the result shape, offsets, sizes and strides of the slice of the
459/// `initValue` to use as destination of the partial reduction op.
460static InitSliceInfo getInitSliceInfo(MLIRContext *context,
464 const SetVector<unsigned> &reductionDims,
465 ArrayRef<OpFoldResult> splitReductionIvs,
466 AffineMap partialReductionMap) {
468 return getInitSliceInfoForOuterReduction(context, offsets, sizes,
469 reductionDims, splitReductionIvs,
470 partialReductionMap);
471 }
473 "unexpected ReductionTilingStrategy");
474 return getInitSliceInfoForOuterParallel(context, offsets, sizes,
475 reductionDims, splitReductionIvs,
476 partialReductionMap);
477}
478
479/// External model implementation of PartialReductionInterface for
480/// LinalgOps.
481template <typename LinalgOpTy>
482struct LinalgOpPartialReductionInterface
483 : public PartialReductionOpInterface::ExternalModel<
484 LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
485 FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
486 Operation *op, OpBuilder &b, Location loc, ArrayRef<OpFoldResult> sizes,
487 const SetVector<unsigned> &reductionDims) const {
488 auto linalgOp = cast<LinalgOp>(op);
489
490 OpBuilder::InsertionGuard guard(b);
491 if (linalgOp.hasPureBufferSemantics())
492 return op->emitOpError("expected operation to have tensor semantics");
493
494 SmallVector<AffineMap> partialResultMaps =
495 getPartialResultAffineMaps(linalgOp, reductionDims);
496
497 SmallVector<Value> inits;
498 for (auto [initIdx, result, partialMap] :
499 llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
500 SmallVector<Operation *, 4> combinerOps;
501 if (!matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
502 combinerOps) ||
503 combinerOps.size() != 1)
504 return op->emitOpError("Failed to anaysis the reduction operation.");
505
506 Operation *reductionOp = combinerOps[0];
507 std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
508 if (!identity.has_value())
509 return op->emitOpError(
510 "Failed to get an identity value for the reduction operation.");
511
512 // Append the new partial result dimensions.
513 SmallVector<OpFoldResult> partialResultShape;
514 for (AffineExpr dimExpr : partialMap.getResults()) {
515 auto dim = cast<AffineDimExpr>(dimExpr);
516 partialResultShape.push_back(sizes[dim.getPosition()]);
517 }
518
519 Type elType = getElementTypeOrSelf(result.getType());
520 Value emptyTensor =
521 tensor::EmptyOp::create(b, loc, partialResultShape, elType);
522 Value constantOp = arith::ConstantOp::create(b, loc, *identity);
523 auto identityTensor =
524 linalg::FillOp::create(b, loc, constantOp, emptyTensor);
525 inits.push_back(identityTensor.getResult(0));
526 }
527
528 return inits;
529 }
530
531 FailureOr<TilingResult>
532 tileToPartialReduction(Operation *op, OpBuilder &b, Location loc,
533 ReductionTilingStrategy tilingStrategy,
534 ValueRange init, ArrayRef<OpFoldResult> offsets,
535 ArrayRef<OpFoldResult> sizes,
536 const SetVector<unsigned> &reductionDims,
537 ArrayRef<OpFoldResult> splitReductionIvs) const {
538 OpBuilder::InsertionGuard guard(b);
539 auto linalgOp = cast<LinalgOp>(op);
540
541 SmallVector<AffineMap> partialReductionMaps =
542 getPartialResultAffineMaps(linalgOp, reductionDims);
543
544 // Step 1. Extend init maps to have reduction dimension dims, since we
545 // are converting them to parallel dimensions.
546 SmallVector<AffineMap> newInitMaps;
547 if (tilingStrategy ==
548 ReductionTilingStrategy::PartialReductionOuterReduction) {
549 newInitMaps = llvm::to_vector(partialReductionMaps);
550 } else {
551 newInitMaps = llvm::map_to_vector(
552 linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
553 return linalgOp.getMatchingIndexingMap(&opOperand);
554 });
555 }
556
557 // Step 2a: Extract a slice of the input operands.
558 SmallVector<Value> tiledInputs = makeTiledShapes(
559 b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {}, true);
560 SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
561 llvm::make_filter_range(
562 tiledInputs, [](Value v) -> bool { return v.getDefiningOp(); }),
563 [](Value v) -> Operation * { return v.getDefiningOp(); });
564
565 // Step 2b: Extract a slice of the init operands.
566 SmallVector<Value, 1> tiledInits;
567 for (auto [partialReductionMap, valueToTile] :
568 llvm::zip_equal(partialReductionMaps, init)) {
569 InitSliceInfo sliceInfo = getInitSliceInfo(
570 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
571 splitReductionIvs, partialReductionMap);
572 auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
573 RankedTensorType sliceResultType = RankedTensorType::get(
574 sliceInfo.resultShape, valueToTileType.getElementType(),
575 valueToTileType.getEncoding());
576 auto sliceOp = tensor::ExtractSliceOp::create(
577 b, loc, sliceResultType, valueToTile, sliceInfo.offsets,
578 sliceInfo.sizes, sliceInfo.strides);
579 tiledInits.push_back(sliceOp.getResult());
580 generatedSlices.push_back(sliceOp);
581 }
582
583 // Update the indexing maps.
584 SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
585 for (auto [initOperand, newInitMap] :
586 llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
587 int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
588 newMaps[mapIdx] = newInitMap;
589 }
590
591 // Step 3. Change the reduction dim iterator types.
592 SmallVector<utils::IteratorType> newIteratorTypes =
593 linalgOp.getIteratorTypesArray();
594 if (tilingStrategy ==
595 ReductionTilingStrategy::PartialReductionOuterReduction) {
596 for (int dim : reductionDims)
597 newIteratorTypes[dim] = utils::IteratorType::parallel;
598 }
599
600 // Step 4. Create the new generic op.
601 Operation *partialReductionOp;
602 auto resultTypes = ValueRange(tiledInits).getTypes();
603 if (tilingStrategy ==
604 ReductionTilingStrategy::PartialReductionOuterReduction) {
605 auto genericOp = GenericOp::create(b, loc, resultTypes, tiledInputs,
606 tiledInits, newMaps, newIteratorTypes);
607 IRMapping mapping;
608 op->getRegion(0).cloneInto(&genericOp.getRegion(),
609 genericOp.getRegion().begin(), mapping);
610 partialReductionOp = genericOp.getOperation();
611 } else {
612 SmallVector<Value> operands = std::move(tiledInputs);
613 llvm::append_range(operands, tiledInits);
614 partialReductionOp = mlir::clone(b, op, resultTypes, operands);
615 }
616 return TilingResult{
617 {partialReductionOp},
618 llvm::map_to_vector(partialReductionOp->getResults(),
619 [](OpResult r) -> Value { return r; }),
620 generatedSlices};
621 }
622
623 FailureOr<MergeResult>
624 mergeReductions(Operation *op, OpBuilder &b, Location loc,
625 ValueRange partialReduce,
626 const SetVector<unsigned> &reductionDims) const {
627 auto linalgOp = cast<LinalgOp>(op);
628 SmallVector<AffineMap> partialReductionMaps =
629 getPartialResultAffineMaps(linalgOp, reductionDims);
630
631 // Permute the reduction dims as permuted by the partial result map.
632 SmallVector<Operation *> mergeOperations;
633 SmallVector<Value> replacements;
634 for (auto [idx, init, partialResult, partialMap] : llvm::enumerate(
635 linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
636 unsigned initIdx = idx;
637 // linalg.reduce's iteration space is the tiled result's iteration space
638 // (and not the tiled operation's iteration space). To account for this,
639 // permute the reduction dimensions based on the partial result map of the
640 // tiled result.
641 SmallVector<int64_t> partialReductionDims;
642 for (auto [resultNum, dimExpr] :
643 llvm::enumerate(partialMap.getResults())) {
644 unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
645 if (llvm::is_contained(reductionDims, dim)) {
646 partialReductionDims.push_back(resultNum);
647 }
648 }
649
650 auto reduction = linalg::ReduceOp::create(
651 b, loc, partialResult, init, partialReductionDims,
652 [&linalgOp, &initIdx](OpBuilder &b, Location loc, ValueRange inputs) {
653 // Get the combiner op.
654 SmallVector<Operation *, 4> combinerOps;
655 matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
656 combinerOps);
657 Operation *clonedReductionOp = b.clone(*combinerOps[0]);
658 // Combine the input at idx and output at numInits + idx.
659 clonedReductionOp->setOperand(0, inputs[0]);
660 clonedReductionOp->setOperand(1, inputs[1]);
661 linalg::YieldOp::create(b, loc, clonedReductionOp->getResult(0));
662 });
663
664 mergeOperations.push_back(reduction);
665 replacements.push_back(reduction->getResult(0));
666 }
667
668 return MergeResult{mergeOperations, replacements};
669 }
670
671 LogicalResult getPartialResultTilePosition(
672 Operation *op, OpBuilder &b, unsigned resultNumber,
673 ReductionTilingStrategy tilingStrategy, ArrayRef<OpFoldResult> offsets,
674 ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
675 ArrayRef<OpFoldResult> splitReductionIvs,
676 SmallVector<OpFoldResult> &resultOffsets,
677 SmallVector<OpFoldResult> &resultSizes) const {
678 auto linalgOp = cast<LinalgOp>(op);
679 SmallVector<AffineMap> partialReductionMaps =
680 getPartialResultAffineMaps(linalgOp, reductionDims);
681 InitSliceInfo sliceInfo = getInitSliceInfo(
682 b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
683 splitReductionIvs, partialReductionMaps[resultNumber]);
684 std::swap(resultOffsets, sliceInfo.offsets);
685 std::swap(resultSizes, sliceInfo.sizes);
686
687 return success();
688 }
689};
690
691template <typename OpTy>
692static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
693 OpBuilder &builder) {
694 static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
695 "applies to only pack or unpack operations");
696 OpBuilder::InsertionGuard g(builder);
697 int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
698 : op.getDestRank();
699 OpFoldResult zero = builder.getIndexAttr(0);
700 OpFoldResult one = builder.getIndexAttr(1);
701 ReifiedRankedShapedTypeDims resultShape;
702 (void)reifyResultShapes(builder, op, resultShape);
703 SmallVector<Range> loopBounds(rank);
704 for (auto dim : llvm::seq<int64_t>(0, rank)) {
705 loopBounds[dim].offset = zero;
706 loopBounds[dim].stride = one;
707 loopBounds[dim].size = resultShape[0][dim];
708 }
709 return loopBounds;
710}
711
712static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
714 ArrayRef<int64_t> permutation) {
715 if (permutation.empty())
716 return;
717 applyPermutationToVector<OpFoldResult>(offsets, permutation);
718 applyPermutationToVector<OpFoldResult>(sizes, permutation);
719}
720
721struct PackOpTiling
722 : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
723
724 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
725 // Note that here we only consider untiled dimensions and outer tiled data
726 // dimensions, the inner tiled data dimensions are materialized when
727 // building the body of the operation.
728 auto packOp = cast<PackOp>(op);
729 SmallVector<utils::IteratorType> iteratorTypes(
730 packOp.getSourceRank(), utils::IteratorType::parallel);
731 return iteratorTypes;
732 }
733
734 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
735 return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
736 }
737
738 FailureOr<TilingResult>
739 getTiledImplementation(Operation *op, OpBuilder &b,
740 ArrayRef<OpFoldResult> offsets,
741 ArrayRef<OpFoldResult> sizes) const {
742 auto packOp = cast<PackOp>(op);
743 Location loc = packOp.getLoc();
744
745 // The tiling is applied on interchanged dimensions. We have to undo the
746 // interchange to map sizes and offsets to the original input.
747 int64_t inputRank = packOp.getSourceRank();
748 SmallVector<OpFoldResult> origOffsets(offsets);
749 SmallVector<OpFoldResult> origSizes(sizes);
750 applyPermToRange(origOffsets, origSizes,
751 invertPermutationVector(packOp.getOuterDimsPerm()));
752
753 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
754 packOp.getDimAndTileMapping();
755 SmallVector<OpFoldResult> srcDimValues =
756 tensor::getMixedSizes(b, loc, packOp.getSource());
757 SmallVector<OpFoldResult> inputIndices, inputSizes;
758 for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
759 using AV = affine::AffineValueExpr;
760 affine::AffineBuilder ab(b, loc);
761 AffineExpr dim0, dim1, sym;
762 bindDims(b.getContext(), dim0, dim1);
763 bindSymbols(b.getContext(), sym);
764 if (dimAndTileMapping.count(dim)) {
765 // If the data dimension is tiled, the i-th index is the product of
766 // offset_i and tile_i, and the i-th size is the product of sizes_i and
767 // tile_i.
768 auto avOffset = AV(dim0).bind(origOffsets[dim]);
769 auto avSize = AV(dim0).bind(origSizes[dim]);
770 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
771 inputIndices.push_back(ab.mul(avOffset, avTileSize));
772 inputSizes.push_back(ab.mul(avSize, avTileSize));
773 } else {
774 inputIndices.push_back(origOffsets[dim]);
775 inputSizes.push_back(origSizes[dim]);
776 }
777
778 // Limit the size of the input operand for incomplete tiles.
779 if (packOp.getPaddingValue()) {
780 OpFoldResult dimSize = srcDimValues[dim];
781 auto avDimSize = AV(dim0).bind(dimSize);
782 auto avInputIdx = AV(dim1).bind(inputIndices.back());
783 inputSizes.back() =
784 ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
785 }
786 }
787
788 auto oneAttr = b.getI64IntegerAttr(1);
789 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
790
791 SmallVector<Value> tiledOperands;
792 auto sourceSlice = tensor::ExtractSliceOp::create(
793 b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
794 tiledOperands.push_back(sourceSlice);
795
796 SmallVector<OpFoldResult> outputOffsets, outputSizes;
797 if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
798 outputSizes)))
799 return {};
800
801 strides.append(packOp.getDestRank() - inputRank, oneAttr);
802 auto outSlice = tensor::ExtractSliceOp::create(
803 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
804 tiledOperands.push_back(outSlice);
805
806 if (auto val = packOp.getPaddingValue())
807 tiledOperands.push_back(val);
808 for (auto tile : packOp.getInnerTiles())
809 tiledOperands.push_back(tile);
810
811 Operation *tiledPackOp = PackOp::create(
812 b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
813
814 return TilingResult{
815 {tiledPackOp},
816 SmallVector<Value>(tiledPackOp->getResults()),
817 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
818 }
819
820 LogicalResult
821 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
822 ArrayRef<OpFoldResult> offsets,
823 ArrayRef<OpFoldResult> sizes,
824 SmallVector<OpFoldResult> &resultOffsets,
825 SmallVector<OpFoldResult> &resultSizes) const {
826 // The iteration domain is over outer dimensions of packed layout. In this
827 // context, the outer dimensions of `resultOffsets` are `offsets`. The
828 // inner dimensions of `resultOffsets` are zeros because tiling is not
829 // applied to them.
830 auto packOp = cast<PackOp>(op);
831 int64_t inputRank = packOp.getSourceRank();
832 int64_t outputRank = packOp.getDestRank();
833 auto zeroAttr = b.getI64IntegerAttr(0);
834 resultOffsets.assign(offsets.begin(), offsets.end());
835 resultOffsets.append(outputRank - inputRank, zeroAttr);
836
837 ReifiedRankedShapedTypeDims outputShape;
838 (void)reifyResultShapes(b, packOp, outputShape);
839 resultSizes.assign(sizes.begin(), sizes.end());
840 for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
841 resultSizes.push_back(outputShape[0][dataTileDim]);
842
843 return success();
844 }
845
846 FailureOr<TilingResult>
847 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
848 ArrayRef<OpFoldResult> offsets,
849 ArrayRef<OpFoldResult> sizes) const {
850 auto packOp = cast<PackOp>(op);
851 int64_t numTiles = packOp.getInnerDimsPos().size();
852
853 // tensor.pack op is fusible (as a producer) only if full inner tiles are
854 // iterated or inner dims are not tiled. Otherwise, it will generate a
855 // sequence of non-trivial ops (for partial tiles).
856 for (auto offset : offsets.take_back(numTiles))
857 if (!isZeroInteger(offset))
858 return failure();
859
860 for (auto iter :
861 llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
862 if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
863 return failure();
864
865 FailureOr<TilingResult> tilingResult = getTiledImplementation(
866 op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
867 if (failed(tilingResult))
868 return failure();
869 return tilingResult.value();
870 }
871
872 /// Method to return the position of iteration domain tile computed by the
873 /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
874 /// `resultSizes` only cover outer dimensions.
875 LogicalResult getIterationDomainTileFromOperandTiles(
876 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
877 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
878 ArrayRef<SmallVector<OpFoldResult>> allSizes,
879 SmallVectorImpl<OpFoldResult> &resultOffsets,
880 SmallVectorImpl<OpFoldResult> &resultSizes) const {
881 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
882 LLVM_DEBUG(
883 { llvm::dbgs() << "unsupported operands for consumer fusion"; });
884 return failure();
885 }
886
887 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
888 ArrayRef<OpFoldResult> sizes(allSizes[0]);
889 auto packOp = cast<PackOp>(op);
890 Location loc = packOp.getLoc();
891 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
892 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
893 packOp.getDimAndTileMapping();
894 SmallVector<int64_t> outerShapeWithoutTranspose(
895 packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
896 if (!packOp.getOuterDimsPerm().empty()) {
898 outerShapeWithoutTranspose,
899 invertPermutationVector(packOp.getOuterDimsPerm()));
900 }
901 for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
902 if (dimAndTileMapping.count(dim)) {
903 FailureOr<int64_t> cstTileSize =
905 presburger::BoundType::UB, sizes[dim],
906 /*stopCondition=*/nullptr, /*closedUB=*/true);
907 std::optional<int64_t> cstInnerSize =
908 getConstantIntValue(dimAndTileMapping[dim]);
909
910 // If a dimension is not tiled, it is always valid to fuse the pack op,
911 // even if the op has padding semantics. Because it always generates a
912 // full slice along the dimension. The tile sizes are for unpacked
913 // domain, i.e., `srcDimSize`, so `tileSize < srcDimSize` means that the
914 // dimension is tiled.
915 // TODO: It could be untiled if the `srcDimSize` is dynamic. It is a
916 // hard check to determine if a dimension is tiled or not.
917 int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
918 int64_t destDimSize = outerShapeWithoutTranspose[dim];
919 bool isTiled = failed(cstTileSize) ||
920 ShapedType::isDynamic(srcDimSize) ||
921 cstTileSize.value() < srcDimSize;
922 if (!isTiled) {
923 outerDimOffsets.push_back(offsets[dim]);
924 if (ShapedType::isStatic(destDimSize)) {
925 outerDimSizes.push_back(b.getIndexAttr(destDimSize));
926 } else {
927 outerDimSizes.push_back(
928 b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
929 }
930 continue;
931 }
932
933 // Currently fusing `packOp` as consumer only expects perfect tiling
934 // scenario because even if without padding semantic, the `packOp` may
935 // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
936 // where the `tileSize` from operand of `packOp` is 5, which is not
937 // exactly divided by `innerTile`(=6) of `packOp`. As the result:
938 // 1. the first slice is extracted from (0) to (4) and inserted into
939 // (0,0)~(0,4) at first row.
940 // 2. the second slice is extracted from (5) to (9) and SHOULD BE
941 // respectively inserted into two rows with different length, including
942 // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
943 // them, thus adding below constraint to bypass them temporarily. In
944 // another word, we can only support tiling with consumer if the tile
945 // size for the producer is a multiple of the inner tile size for the
946 // packed dimensions at this moment.
947 if ((failed(cstTileSize) || !cstInnerSize ||
948 *cstTileSize % *cstInnerSize != 0))
949 return failure();
950
951 using AV = affine::AffineValueExpr;
952 affine::AffineBuilder ab(b, loc);
953 AffineExpr dim0, sym;
954 bindDims(b.getContext(), dim0);
955 bindSymbols(b.getContext(), sym);
956 auto avOffset = AV(dim0).bind(offsets[dim]);
957 auto avSize = AV(dim0).bind(sizes[dim]);
958 auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
959 outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
960 outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
961 } else {
962 outerDimOffsets.push_back(offsets[dim]);
963 outerDimSizes.push_back(sizes[dim]);
964 }
965 }
966 applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
967 resultOffsets = outerDimOffsets;
968 resultSizes = outerDimSizes;
969 return success();
970 }
971
972 /// Method to return the tiled implementation of tensor.pack as a consumer.
973 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
974 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
975 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
976 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
977 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
978 LLVM_DEBUG(
979 { llvm ::dbgs() << "unhandled operands for consumer fusion"; });
980 return failure();
981 }
982
983 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
984 ArrayRef<OpFoldResult> sizes(allSizes[0]);
985
986 auto packOp = cast<PackOp>(op);
987 Location loc = packOp.getLoc();
988
989 int64_t inputRank = packOp.getSourceRank();
990 auto oneAttr = b.getI64IntegerAttr(1);
991 SmallVector<OpFoldResult> strides(inputRank, oneAttr);
992
993 SmallVector<Value> tiledOperands;
994 auto sourceSlice = tensor::ExtractSliceOp::create(
995 b, loc, packOp.getSource(), offsets, sizes, strides);
996 tiledOperands.push_back(sourceSlice);
997
998 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
999 if (failed(getIterationDomainTileFromOperandTiles(
1000 op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1001 outerDimSizes)))
1002 return failure();
1003
1004 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1005 if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
1006 outputOffsets, outputSizes)))
1007 return failure();
1008
1009 strides.append(packOp.getDestRank() - inputRank, oneAttr);
1010 auto outSlice = tensor::ExtractSliceOp::create(
1011 b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1012 tiledOperands.push_back(outSlice);
1013
1014 if (auto val = packOp.getPaddingValue())
1015 tiledOperands.push_back(val);
1016 for (auto tile : packOp.getInnerTiles())
1017 tiledOperands.push_back(tile);
1018
1019 Operation *tiledPackOp = PackOp::create(
1020 b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
1021
1022 return TilingResult{
1023 {tiledPackOp},
1024 SmallVector<Value>(tiledPackOp->getResults()),
1025 llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1026 }
1027};
1028
1029struct UnpackTileDimInfo {
1030 bool isAlignedToInnerTileSize;
1031 OpFoldResult sourceOffset;
1032 OpFoldResult sourceSize;
1033 OpFoldResult resultOffset;
1034 OpFoldResult destExpandedSize;
1035};
1036
1037/// Returns the needed information for tiling unpack op on `tileDim` with given
1038/// `tileOffset` and `tileSize`. For more details, see the comment of the
1039/// `getTiledImplementation`.
1040static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
1041 int64_t tileDim,
1042 OpFoldResult tileOffset,
1043 OpFoldResult tileSize) {
1044 UnpackTileDimInfo info;
1045 Attribute zeroAttr = b.getIndexAttr(0);
1046 Attribute oneAttr = b.getIndexAttr(1);
1047 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1048 unpackOp.getDimAndTileMapping();
1049 // The dimension is not one of packed data dimension.
1050 if (!dimAndTileMapping.count(tileDim)) {
1051 info.isAlignedToInnerTileSize = true;
1052 info.sourceOffset = tileOffset;
1053 info.sourceSize = tileSize;
1054 info.resultOffset = zeroAttr;
1055 info.destExpandedSize = tileSize;
1056 return info;
1057 }
1058
1059 Location loc = unpackOp.getLoc();
1060 using AV = affine::AffineValueExpr;
1061 affine::AffineBuilder ab(b, loc);
1062 AffineExpr dim0, dim1, sym0;
1063 bindDims(b.getContext(), dim0, dim1);
1064 bindSymbols(b.getContext(), sym0);
1065
1066 OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1067
1068 info.isAlignedToInnerTileSize = false;
1069 FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
1070 presburger::BoundType::UB, tileSize,
1071 /*stopCondition=*/nullptr, /*closedUB=*/true);
1072 std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
1073 if (!failed(cstSize) && cstInnerSize) {
1074 if (*cstSize % *cstInnerSize == 0)
1075 info.isAlignedToInnerTileSize = true;
1076
1077 // If the tiling size equals to the inner tiling size, the outer dims are
1078 // always 1.
1079 if (*cstInnerSize == *cstSize) {
1080 auto lhs = AV(dim0).bind(tileOffset);
1081 auto rhs = AV(dim1).bind(innerTileSize);
1082 info.sourceOffset = ab.floor(lhs, rhs);
1083 info.sourceSize = oneAttr;
1084 info.resultOffset = zeroAttr;
1085 info.destExpandedSize = tileSize;
1086 return info;
1087 }
1088 }
1089
1090 if (info.isAlignedToInnerTileSize) {
1091 info.sourceOffset =
1092 ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1093 info.resultOffset = zeroAttr;
1094 info.destExpandedSize = tileSize;
1095
1096 // The ceilDiv is needed here because there could be incomplete tile even
1097 // it is perfect tiling cases. E.g.,
1098 // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
1099 // If the tiling size is 32, there will be 3 tiles. Two of them have
1100 // size=32; one of them have size=2. The size is represented using
1101 // affine_min op; we need ceilDiv.
1102 info.sourceSize =
1103 ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1104 return info;
1105 }
1106
1107 affine::DivModValue firstCoord = affine::getDivMod(
1108 b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
1109 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1110 OpFoldResult tileExclusiveBound =
1111 ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1112 affine::DivModValue lastCoord = affine::getDivMod(
1113 b, loc,
1115 b, loc,
1116 ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1117 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1118
1119 OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1120 AV(dim1).bind(firstCoord.quotient));
1121 info.sourceSize =
1122 ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1123 info.sourceOffset = firstCoord.quotient;
1124 info.resultOffset = firstCoord.remainder;
1125 // Do not create an Affine ops for expanded size because the affine op is too
1126 // complicated which would trigger an issue in affine ops simplification.
1127 info.destExpandedSize = b.createOrFold<arith::MulIOp>(
1128 loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
1129 getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1130 return info;
1131}
1132
1133struct UnPackOpTiling
1134 : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1135
1136 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
1137 auto unpackOp = cast<UnPackOp>(op);
1138 SmallVector<utils::IteratorType> iteratorTypes(
1139 unpackOp.getDestRank(), utils::IteratorType::parallel);
1140 return iteratorTypes;
1141 }
1142
1143 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
1144 return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
1145 }
1146
1147 /// There are two cases in tiling unpack ops. If the tiling size is aligned to
1148 /// the inner tile size, the corresponding tiles of source are all complete.
1149 /// Otherwise, there are in-complete tiles. We will need to expand the slice
1150 /// of source for getting complete tiles. The tiled unpack op unpacks more
1151 /// data from source, so We'll need an extract_slice op to shift and truncate
1152 /// the output.
1153 /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
1154 /// coordinates of second tile (i.e., result[15..31]) are
1155 /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
1156 /// row are incomplete tiles. To represent the unpack op, we have to complete
1157 /// the rows. I.e., the input coordinates would start with (1, 0); end with
1158 /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
1159 /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
1160 /// can get the actual result.
1161 FailureOr<TilingResult>
1162 getTiledImplementation(Operation *op, OpBuilder &b,
1163 ArrayRef<OpFoldResult> offsets,
1164 ArrayRef<OpFoldResult> sizes) const {
1165 auto unpackOp = cast<UnPackOp>(op);
1166 int64_t srcRank = unpackOp.getSourceRank();
1167 int64_t destRank = unpackOp.getDestRank();
1168 int64_t numInnerTiles = srcRank - destRank;
1169 Location loc = unpackOp.getLoc();
1170
1171 // The perfect tiling case indicates that the tiling sizes are multiple of
1172 // inner_tile_size. In this context, no extra data is needed when
1173 // representing the tiled unpack op.
1174 bool isPerfectTilingCase = true;
1175 Attribute oneAttr = b.getIndexAttr(1);
1176 SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1177 SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1178 SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1179 for (auto dim : llvm::seq<int64_t>(0, destRank)) {
1180 UnpackTileDimInfo info =
1181 getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
1182 if (!info.isAlignedToInnerTileSize)
1183 isPerfectTilingCase = false;
1184 sliceSrcIndices.push_back(info.sourceOffset);
1185 sliceSrcSizes.push_back(info.sourceSize);
1186 destExpandedSizes.push_back(info.destExpandedSize);
1187 resultOffsetsFromDest.push_back(info.resultOffset);
1188 }
1189
1190 // The tiling is applied on destination dimensions. We have to apply the
1191 // interchange on source dimensions if outer_dims_perm is set.
1192 applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1193 unpackOp.getOuterDimsPerm());
1194 Attribute zeroAttr = b.getIndexAttr(0);
1195 sliceSrcIndices.append(numInnerTiles, zeroAttr);
1196 sliceSrcSizes.append(unpackOp.getMixedTiles());
1197 sliceSrcStrides.append(numInnerTiles, oneAttr);
1198 SmallVector<Operation *> generatedSlices;
1199 tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1200 b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1201 sliceSrcStrides);
1202 generatedSlices.push_back(sliceSource);
1203
1204 SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1205 Value sliceDest;
1206 if (isPerfectTilingCase) {
1207 auto destSliceOp = tensor::ExtractSliceOp::create(
1208 b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1209 sliceDest = destSliceOp;
1210 generatedSlices.push_back(destSliceOp);
1211 } else {
1212 sliceDest = tensor::EmptyOp::create(
1213 b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1214 }
1215
1216 SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1217 for (auto tile : unpackOp.getInnerTiles())
1218 tiledOperands.push_back(tile);
1219
1220 Operation *tiledUnpackOp = UnPackOp::create(
1221 b, loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
1222
1223 if (isPerfectTilingCase)
1224 return TilingResult{{tiledUnpackOp},
1225 SmallVector<Value>(tiledUnpackOp->getResults()),
1226 generatedSlices};
1227
1228 auto extractSlice = tensor::ExtractSliceOp::create(
1229 b, loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
1230 destStrides);
1231 return TilingResult{
1232 {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1233 }
1234
1235 LogicalResult
1236 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
1237 ArrayRef<OpFoldResult> offsets,
1238 ArrayRef<OpFoldResult> sizes,
1239 SmallVector<OpFoldResult> &resultOffsets,
1240 SmallVector<OpFoldResult> &resultSizes) const {
1241 resultOffsets = llvm::to_vector(offsets);
1242 resultSizes = llvm::to_vector(sizes);
1243 return success();
1244 }
1245
1246 FailureOr<TilingResult>
1247 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
1248 ArrayRef<OpFoldResult> offsets,
1249 ArrayRef<OpFoldResult> sizes) const {
1250 FailureOr<TilingResult> tilingResult =
1251 getTiledImplementation(op, b, offsets, sizes);
1252 if (failed(tilingResult))
1253 return failure();
1254 return tilingResult.value();
1255 }
1256
1257 /// Method to return the position of iteration domain tile computed by the
1258 /// tiled operation.
1259 LogicalResult getIterationDomainTileFromOperandTiles(
1260 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1261 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1262 ArrayRef<SmallVector<OpFoldResult>> allSizes,
1263 SmallVectorImpl<OpFoldResult> &resultOffsets,
1264 SmallVectorImpl<OpFoldResult> &resultSizes) const {
1265 if (operandNumbers.size() != 1) {
1266 LLVM_DEBUG({ llvm::dbgs() << "unable to handle multiple operands"; });
1267 return failure();
1268 }
1269 auto unPackOp = cast<UnPackOp>(op);
1270 unsigned operandNumber = operandNumbers[0];
1271 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1272 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1273
1274 // If the operand tile is the dest, then no adjustment is needed.
1275 if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1276 resultOffsets = llvm::to_vector(offsets);
1277 resultSizes = llvm::to_vector(sizes);
1278 return success();
1279 }
1280 Location loc = unPackOp.getLoc();
1281
1282 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1283 auto destOffsets = offsets.drop_back(numTiles);
1284 auto destSizes = sizes.drop_back(numTiles);
1285 // The tiling is applied on interchanged dimensions. We have to undo the
1286 // interchange to map sizes and offsets to the original input.
1287 int64_t outputRank = unPackOp.getDestRank();
1288 ReifiedRankedShapedTypeDims reifiedReturnShapes;
1289 if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
1290 return failure();
1291 SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1292 SmallVector<OpFoldResult> origOffsets(destOffsets);
1293 SmallVector<OpFoldResult> origSizes(destSizes);
1294 applyPermToRange(origOffsets, origSizes,
1295 invertPermutationVector(unPackOp.getOuterDimsPerm()));
1296
1297 DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1298 unPackOp.getDimAndTileMapping();
1299
1300 for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
1301 using AV = affine::AffineValueExpr;
1302 affine::AffineBuilder ab(b, loc);
1303 AffineExpr dim0, dim1, sym0;
1304 bindDims(b.getContext(), dim0, dim1);
1305 bindSymbols(b.getContext(), sym0);
1306 if (dimAndTileMapping.count(dim)) {
1307 // If the data dimension is tiled, the i-th index is the product of
1308 // offset_i and tile_i, and the i-th size is the product of sizes_i and
1309 // tile_i. The sizes must be clamped to the sizes of the unpack result.
1310 auto avOffset = AV(dim0).bind(origOffsets[dim]);
1311 auto avSize = AV(dim0).bind(origSizes[dim]);
1312 auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1313 auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1314 resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1315 auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1316 resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1317 ab.sub(avResultSize, avResultOffset)}));
1318 } else {
1319 resultOffsets.push_back(origOffsets[dim]);
1320 resultSizes.push_back(origSizes[dim]);
1321 }
1322 }
1323 return success();
1324 }
1325
1326 /// Method to return the tiled implementation of tensor.unpack as a consumer.
1327 FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1328 Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1329 ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1330 ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1331 if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1332 LLVM_DEBUG({ llvm::dbgs() << "unhandled operands for consumer fusion"; });
1333 return failure();
1334 }
1335 auto unPackOp = cast<UnPackOp>(op);
1336 ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1337 ArrayRef<OpFoldResult> sizes(allSizes[0]);
1338
1339 // tensor.unpack op is fusible (as a consumer) only if inner dims are not
1340 // tiled.
1341 int64_t numTiles = unPackOp.getInnerDimsPos().size();
1342 for (auto iter :
1343 llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1344 if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
1345 return failure();
1346 }
1347
1348 Location loc = unPackOp.getLoc();
1349
1350 // Fetch offset/size for creating the slice of the dest operand of
1351 // unpack op.
1352 SmallVector<OpFoldResult> outputOffsets, outputSizes;
1353 if (failed(getIterationDomainTileFromOperandTiles(
1354 op, b, operandNumbers, allOffsets, allSizes, outputOffsets,
1355 outputSizes)))
1356 return failure();
1357
1358 auto oneAttr = b.getI64IntegerAttr(1);
1359 int64_t outputRank = unPackOp.getDestRank();
1360 SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1361
1362 SmallVector<Value> tiledOperands;
1363 // Create slice of the dest operand.
1364 auto extractDestSlice = tensor::ExtractSliceOp::create(
1365 b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1366 tiledOperands.push_back(extractDestSlice);
1367
1368 strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1369 // Create slice of the source operand.
1370 auto extractSourceSlice = tensor::ExtractSliceOp::create(
1371 b, loc, unPackOp.getSource(), offsets, sizes, strides);
1372 tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1373 for (auto tile : unPackOp.getInnerTiles())
1374 tiledOperands.push_back(tile);
1375
1376 // Create tiled unpack op.
1377 Operation *tiledUnPackOp =
1378 UnPackOp::create(b, loc, TypeRange{extractDestSlice.getType()},
1379 tiledOperands, op->getAttrs());
1380
1381 return TilingResult{{tiledUnPackOp},
1382 SmallVector<Value>(tiledUnPackOp->getResults()),
1383 llvm::to_vector(ArrayRef<Operation *>{
1384 extractSourceSlice, extractDestSlice})};
1385 }
1386};
1387
1388} // namespace
1389
1390template <typename OpType>
1391static void registerOne(MLIRContext *ctx) {
1392 OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1393 OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1394 *ctx);
1395}
1396
1397/// Variadic helper function.
1398template <typename... OpTypes>
1399static void registerAll(MLIRContext *ctx) {
1400 (registerOne<OpTypes>(ctx), ...);
1401}
1402
1403#define GET_OP_LIST
1404
1406 DialectRegistry &registry) {
1407 registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
1409 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1410 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1412#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1413 >(ctx);
1414 });
1415}
1416
1418 DialectRegistry &registry) {
1419 registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
1420 linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1421 linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1422 });
1423}
return success()
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition Utils.cpp:74
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
auto load
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
Base type for affine expression.
Definition AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
unsigned getNumSymbols() const
unsigned getNumDims() const
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
Attributes are known-constant values of operations.
Definition Attributes.h:25
Block represents an ordered list of Operations.
Definition Block.h:33
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
BlockArgListType getArguments()
Definition Block.h:87
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition Block.h:212
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
Definition IRMapping.h:65
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition Operation.h:686
void setOperand(unsigned idx, Value value)
Definition Operation.h:351
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:512
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
result_range getResults()
Definition Operation.h:415
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, const StopConditionFn &stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition Utils.cpp:1732
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry &registry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
static void registerOne(MLIRContext *ctx)
static void registerAll(MLIRContext *ctx)
Variadic helper function.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition Utils.cpp:1754
void registerTilingInterfaceExternalModels(DialectRegistry &registry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition Utils.cpp:1643
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition Utils.cpp:1496
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition TensorOps.cpp:66
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition Utils.cpp:1293
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:126
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for result values of tiling.
Helper struct to build simple AffineValueExprs with minimal type inference support.
Definition Utils.h:372
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition Utils.h:158
SmallVector< OpFoldResult > sizes
Definition Utils.h:160
SmallVector< OpFoldResult > offsets
Definition Utils.h:159