MLIR  21.0.0git
TilingInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- TilingInterfaceImpl.cpp - Implementation of TilingInterface -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
25 #include "llvm/Support/Debug.h"
26 #include <optional>
27 
28 #define DEBUG_TYPE "linalg-tiling-interface-impl"
29 
30 using namespace mlir;
31 using namespace mlir::linalg;
32 
33 //===----------------------------------------------------------------------===//
34 // Utility methods for implementation of Tiling Interface for Linalg ops
35 //===----------------------------------------------------------------------===//
36 
37 /// Return the SSA values that represent the data point accessed using a given
38 /// `indexingMap` for a given point in the iteration space represented by `ivs`.
40  AffineMap indexingMap,
41  ValueRange ivs) {
42  SmallVector<Value> indices;
43  indices.reserve(indexingMap.getNumResults());
44  for (auto result : indexingMap.getResults()) {
45  AffineMap m = AffineMap::get(indexingMap.getNumDims(),
46  indexingMap.getNumSymbols(), result);
47  Value v = b.create<affine::AffineApplyOp>(loc, m, ivs);
48  indices.push_back(v);
49  }
50  return indices;
51 }
52 
53 /// Method to inline the payload of a `linalgOp` given the iteration space
54 /// point and values for the arguments of the payload.
55 static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp,
56  ValueRange ivs, ValueRange argValues) {
57  Block *body = linalgOp.getBlock();
58  IRMapping map;
59  map.map(body->getArguments(), argValues);
60  for (auto &op : body->without_terminator()) {
61  if (auto indexOp = dyn_cast<IndexOp>(&op)) {
62  map.map(indexOp.getResult(), ivs[indexOp.getDim()]);
63  continue;
64  }
65  b.clone(op, map);
66  }
67 
68  Operation *terminator = body->getTerminator();
69  Location loc = terminator->getLoc();
70  for (const auto &operand : llvm::enumerate(terminator->getOperands())) {
71  Value toStore = map.lookupOrDefault(operand.value());
72  OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
73  auto indices = getIndicesForAccess(
74  b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
75  b.create<memref::StoreOp>(
76  loc, toStore, linalgOp.getDpsInitOperand(operand.index())->get(),
77  indices);
78  }
79  return success();
80 }
81 
82 //===----------------------------------------------------------------------===//
83 // External Model for implementing `TilingInterface` for `LinalgOp`s.
84 //===----------------------------------------------------------------------===//
85 
86 namespace {
87 /// External model implementation of TilingInterface for LinalgOps. An external
88 /// model implementation is used for now till the use of `TilingInterface` is
89 /// on-par with the current Linalg tiling + fusion patterns. Once it is
90 /// maybe possible to move this into the op-definition (though there are
91 /// advantages to leaving it as an external model)
92 template <typename LinalgOpTy>
93 struct LinalgOpTilingInterface
94  : public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
95  LinalgOpTy> {
96  /// Return the loop iterator type.
97  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
98  LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
99  return concreteOp.getIteratorTypesArray();
100  }
101 
102  /// Return the iteration domain range.
103  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
105  b.setInsertionPoint(op);
106  Location loc = op->getLoc();
107  LinalgOp linalgOp = cast<LinalgOp>(op);
108  SmallVector<OpFoldResult> allShapesSizes =
109  linalgOp.createFlatListOfOperandDims(b, loc);
110  AffineMap map = linalgOp.getShapesToLoopsMap();
111 
112  return llvm::to_vector(
113  llvm::map_range(map.getResults(), [&](AffineExpr loopExpr) {
114  OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
115  b, loc, loopExpr, allShapesSizes);
116  return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
117  }));
118  }
119 
120  /// Instantiate the tiled implementation of the operation.
121  FailureOr<TilingResult>
123  ArrayRef<OpFoldResult> offsets,
124  ArrayRef<OpFoldResult> sizes) const {
125  // Leave the `sizeBounds` value empty. That is only needed when the `sizes`
126  // specified could lead to out of bounds accesses.
127  Location loc = op->getLoc();
128  LinalgOp linalgOp = cast<LinalgOp>(op);
129  SmallVector<Value> valuesToTile = linalgOp->getOperands();
130  SmallVector<Value> tiledOperands = makeTiledShapes(
131  b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true);
132  SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
133  llvm::make_filter_range(
134  tiledOperands,
135  [](Value v) -> bool {
136  return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
137  v.getDefiningOp());
138  }),
139  [](Value v) -> Operation * { return v.getDefiningOp(); });
140 
141  SmallVector<Type> resultTensorTypes =
142  getTensorOutputTypes(linalgOp, tiledOperands);
143 
144  Operation *tiledOp = clone(b, linalgOp, resultTensorTypes, tiledOperands);
145  offsetIndices(b, cast<LinalgOp>(tiledOp), offsets);
146 
147  return TilingResult{
148  {tiledOp}, SmallVector<Value>(tiledOp->getResults()), generatedSlices};
149  }
150 
151  /// Utility to fetch the offsets and sizes when applied as per the indexing
152  /// map of the linalg op. This helps in fusing the linalg op as a consumer of
153  /// a given slice op.
154  static LogicalResult
155  getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b,
156  ArrayRef<AffineMap> indexingMaps,
159  SmallVectorImpl<OpFoldResult> &mappedOffsetsVec,
160  SmallVectorImpl<OpFoldResult> &mappedSizesVec) {
161  DenseMap<unsigned, OpFoldResult> mappedOffsets, mappedSizes;
162 
163  for (auto [indexingMap, offsets, sizes] :
164  llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
165  for (auto [resultExpr, offset, size] :
166  llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
167  auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
168  if (!dimExpr)
169  continue;
170  unsigned position = dimExpr.getPosition();
171  auto it = mappedOffsets.find(position);
172  if (it != mappedOffsets.end()) {
173  OpFoldResult seenOffset = it->second;
174  OpFoldResult seenSize = mappedSizes.lookup(position);
175  if (seenOffset != offset || seenSize != size) {
176  LLVM_DEBUG({
177  llvm::dbgs() << "inconsistent iteration space mapping from "
178  "offsets/sizes of operands/results";
179  });
180  return failure();
181  }
182  } else {
183  mappedOffsets[position] = offset;
184  mappedSizes[position] = size;
185  }
186  }
187  }
188 
189  // Aggregate from the given operand offsets and sizes, or default to
190  // iteration space values.
191  SmallVector<Range> iterationDomain =
192  cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(b);
193  mappedOffsetsVec.resize(iterationDomain.size());
194  mappedSizesVec.resize(iterationDomain.size());
195  for (auto [index, domain] : llvm::enumerate(iterationDomain)) {
196  auto it = mappedOffsets.find(index);
197  if (it != mappedOffsets.end()) {
198  mappedOffsetsVec[index] = it->second;
199  mappedSizesVec[index] = mappedSizes.lookup(index);
200  continue;
201  }
202  mappedOffsetsVec[index] = domain.offset;
203  mappedSizesVec[index] = domain.size;
204  }
205  return success();
206  }
207 
208  /// Method to return the position of the result tile computed by the tiled
209  /// operation.
210  LogicalResult getIterationDomainTileFromOperandTiles(
211  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
214  SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
215  SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
216  auto linalgOp = cast<LinalgOp>(op);
217 
218  std::optional<SmallVector<OpFoldResult>> iterationSpaceOffsets,
219  iterationSpaceSizes;
220  SmallVector<AffineMap> indexingMaps =
221  llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
222  OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
223  return linalgOp.getMatchingIndexingMap(&opOperand);
224  });
225  if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets,
226  allSizes, iterDomainOffsets,
227  iterDomainSizes))) {
228  return failure();
229  }
230  return success();
231  }
232 
233  /// Return the details of the output tile generated by the tiled
234  /// implementation.
235  LogicalResult
236  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
237  ArrayRef<OpFoldResult> offsets,
239  SmallVector<OpFoldResult> &resultOffsets,
240  SmallVector<OpFoldResult> &resultSizes) const {
241  Location loc = op->getLoc();
242  LinalgOp linalgOp = cast<LinalgOp>(op);
243 
244  AffineExpr d0;
245  bindDims(b.getContext(), d0);
246  SmallVector<OpFoldResult> subShapeSizes =
247  llvm::to_vector(llvm::map_range(sizes, [&](OpFoldResult ofr) {
248  return affine::makeComposedFoldedAffineApply(b, loc, d0 - 1, ofr);
249  }));
250 
251  OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
253  b, loc, outOperand->get(), sizes,
254  linalgOp.getMatchingIndexingMap(outOperand), offsets,
255  /*ubs*/ {}, subShapeSizes, true);
256  resultOffsets = sliceParams.offsets;
257  resultSizes = sliceParams.sizes;
258  return success();
259  }
260 
261  LogicalResult getIterationDomainTileFromResultTile(
262  Operation *op, OpBuilder &b, unsigned resultNumber,
264  SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
265  SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
266  auto linalgOp = cast<LinalgOp>(op);
267 
268  // Check that the indexing map used for the output is a projected
269  // permutation. This could be relaxed with a more general approach that can
270  // map the offsets and sizes from the result to iteration space tiles
271  // (filling in full extent for dimensions not used to access the result).
272  AffineMap indexingMap =
273  linalgOp.getIndexingMapMatchingResult(op->getResult(resultNumber));
274  if (!indexingMap.isProjectedPermutation()) {
275  return op->emitOpError(
276  "unhandled tiled implementation generation when result is not "
277  "accessed using a permuted projection");
278  }
279 
280  SmallVector<OpFoldResult> allOffsets = llvm::to_vector(offsets);
281  SmallVector<OpFoldResult> allSizes = llvm::to_vector(sizes);
282  auto status =
283  getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets},
284  {allSizes}, iterDomainOffsets, iterDomainSizes);
285  (void)status;
286  assert(succeeded(status) && "unexpected error in offset calculation");
287  return success();
288  }
289 
290  FailureOr<TilingResult>
291  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
292  ArrayRef<OpFoldResult> offsets,
293  ArrayRef<OpFoldResult> sizes) const {
294  SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
295  if (failed(getIterationDomainTileFromResultTile(
296  op, b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
297  return failure();
298  }
299  auto tilingInterfaceOp = cast<TilingInterface>(op);
300  FailureOr<TilingResult> tilingResult =
301  tilingInterfaceOp.getTiledImplementation(b, mappedOffsets, mappedSizes);
302 
303  if (failed(tilingResult))
304  return failure();
305 
306  if (tilingResult->tiledOps.size() != 1)
307  return op->emitOpError("failed to generate tiled implementation");
308 
309  return TilingResult{
310  tilingResult->tiledOps,
311  SmallVector<Value>{tilingResult->tiledValues[resultNumber]},
312  tilingResult->generatedSlices};
313  }
314 
315  /// Method to generate the tiled implementation of an operation from the tile
316  /// of the operand.
317  FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
318  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
320  ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
321  SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
322  if (failed(getIterationDomainTileFromOperandTiles(
323  op, b, operandNumbers, allOffsets, allSizes, mappedOffsets,
324  mappedSizes))) {
325  return failure();
326  }
327  return getTiledImplementation(op, b, mappedOffsets, mappedSizes);
328  }
329 
330  LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
331  Location loc,
332  ValueRange ivs) const {
333  auto linalgOp = cast<LinalgOp>(op);
334  if (!linalgOp.hasPureBufferSemantics())
335  return op->emitOpError("expected operation to have buffer semantics");
336 
337  SmallVector<Value> indexedValues;
338  indexedValues.reserve(linalgOp->getNumOperands());
339  Location linalgOpLoc = op->getLoc();
340  /// Load the data corresponding to the block arguments that
341  /// represent input operands.
342  for (OpOperand &operand : linalgOp->getOpOperands()) {
343  if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
344  indexedValues.push_back(nullptr);
345  continue;
346  }
347  if (linalgOp.isScalar(&operand)) {
348  indexedValues.push_back(operand.get());
349  continue;
350  }
352  builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
353  Value load =
354  builder.create<memref::LoadOp>(linalgOpLoc, operand.get(), indices);
355  indexedValues.push_back(load);
356  }
357 
358  /// Inline the op payload and store the result.
359  return inlinePayload(builder, linalgOp, ivs, indexedValues);
360  }
361 };
362 
363 //===----------------------------------------------------------------------===//
364 // External Model for implementing `PartialReductionInterface` for `LinalgOp`s.
365 //===----------------------------------------------------------------------===//
366 
367 /// In a given set vector, get the position of a particular element.
368 std::optional<int> getPositionIn(const llvm::SetVector<unsigned> &reductionDims,
369  unsigned value) {
370  for (auto [index, reductionDim] : llvm::enumerate(reductionDims)) {
371  if (reductionDim == value) {
372  return index;
373  }
374  }
375  return std::nullopt;
376 }
377 
378 /// Return an AffineMaps to use for the `outs` operands of the linalg op
379 /// generated for partial results. The new AffineMap is the AffineMap of the
380 /// untiled op with reduction dimensions appended at end in order in which they
381 /// were specified during tiling.
383 getPartialResultAffineMaps(LinalgOp linalgOp,
384  const SetVector<unsigned> &reductionDims) {
385  auto partialReductionMaps = llvm::map_to_vector(
386  linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
387  AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
388  for (auto redPos : reductionDims) {
389  map =
390  map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
391  map.getNumResults());
392  }
393  return map;
394  });
395  return partialReductionMaps;
396 }
397 
398 struct InitSliceInfo {
399  SmallVector<int64_t> resultShape;
403 };
404 
405 /// Return the result shape, offsets, sizes and strides of the slice of the
406 /// `initValue` to use as the destination of the partial reduction op generated
407 /// with outer reduction strategy.
408 static InitSliceInfo getInitSliceInfoForOuterReduction(
409  MLIRContext *context, ArrayRef<OpFoldResult> offsets,
410  ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
411  ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
412  int64_t initRank = partialReductionMap.getNumResults();
413  SmallVector<OpFoldResult> initOffsets, initSizes;
414  Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
415  Attribute one = IntegerAttr::get(IndexType::get(context), 1);
416  SmallVector<OpFoldResult> initStrides(initRank, one);
417  for (AffineExpr dimExpr : partialReductionMap.getResults()) {
418  unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
419  if (reductionDims.contains(dim)) {
420  initOffsets.push_back(zero);
421  } else {
422  initOffsets.push_back(offsets[dim]);
423  }
424  initSizes.push_back(sizes[dim]);
425  }
426  SmallVector<int64_t> resultShape;
427  std::tie(resultShape, std::ignore) = decomposeMixedValues(initSizes);
428  return {resultShape, initOffsets, initSizes, initStrides};
429 }
430 
431 /// Return the result shape, offsets, sizes and strides of the slice of the
432 /// `initValue` to use as destination of the partial reduction op generated with
433 /// outer parallel strategy.
434 static InitSliceInfo getInitSliceInfoForOuterParallel(
435  MLIRContext *context, ArrayRef<OpFoldResult> offsets,
436  ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
437  ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
438  int64_t initRank = partialReductionMap.getNumResults();
439  SmallVector<OpFoldResult> initOffsets, initSizes;
440  Attribute one = IntegerAttr::get(IndexType::get(context), 1);
441  SmallVector<OpFoldResult> initStrides(initRank, one);
442  SmallVector<OpFoldResult> resultShape;
443  for (AffineExpr dimExpr : partialReductionMap.getResults()) {
444  unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
445  if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
446  initOffsets.push_back(splitReductionIvs[dimPos.value()]);
447  initSizes.push_back(one);
448  } else {
449  initOffsets.push_back(offsets[dim]);
450  initSizes.push_back(sizes[dim]);
451  resultShape.push_back(sizes[dim]);
452  }
453  }
454  SmallVector<int64_t> staticShapes;
455  std::tie(staticShapes, std::ignore) = decomposeMixedValues(resultShape);
456  return {staticShapes, initOffsets, initSizes, initStrides};
457 }
458 
459 /// Return the result shape, offsets, sizes and strides of the slice of the
460 /// `initValue` to use as destination of the partial reduction op.
461 static InitSliceInfo getInitSliceInfo(MLIRContext *context,
462  ReductionTilingStrategy strategy,
463  ArrayRef<OpFoldResult> offsets,
465  const SetVector<unsigned> &reductionDims,
466  ArrayRef<OpFoldResult> splitReductionIvs,
467  AffineMap partialReductionMap) {
469  return getInitSliceInfoForOuterReduction(context, offsets, sizes,
470  reductionDims, splitReductionIvs,
471  partialReductionMap);
472  }
474  "unexpected ReductionTilingStrategy");
475  return getInitSliceInfoForOuterParallel(context, offsets, sizes,
476  reductionDims, splitReductionIvs,
477  partialReductionMap);
478 }
479 
480 /// External model implementation of PartialReductionInterface for
481 /// LinalgOps.
482 template <typename LinalgOpTy>
483 struct LinalgOpPartialReductionInterface
484  : public PartialReductionOpInterface::ExternalModel<
485  LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
486  FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
488  const SetVector<unsigned> &reductionDims) const {
489  auto linalgOp = cast<LinalgOp>(op);
490 
491  OpBuilder::InsertionGuard guard(b);
492  if (linalgOp.hasPureBufferSemantics())
493  return op->emitOpError("expected operation to have tensor semantics");
494 
495  SmallVector<AffineMap> partialResultMaps =
496  getPartialResultAffineMaps(linalgOp, reductionDims);
497 
498  SmallVector<Value> inits;
499  for (auto [initIdx, result, partialMap] :
500  llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
501  SmallVector<Operation *, 4> combinerOps;
502  if (!matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
503  combinerOps) ||
504  combinerOps.size() != 1)
505  return op->emitOpError("Failed to anaysis the reduction operation.");
506 
507  Operation *reductionOp = combinerOps[0];
508  std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
509  if (!identity.has_value())
510  return op->emitOpError(
511  "Failed to get an identity value for the reduction operation.");
512 
513  // Append the new partial result dimensions.
514  SmallVector<OpFoldResult> partialResultShape;
515  for (AffineExpr dimExpr : partialMap.getResults()) {
516  auto dim = cast<AffineDimExpr>(dimExpr);
517  partialResultShape.push_back(sizes[dim.getPosition()]);
518  }
519 
520  Type elType = getElementTypeOrSelf(result.getType());
521  Value emptyTensor =
522  b.create<tensor::EmptyOp>(loc, partialResultShape, elType);
523  Value constantOp = b.create<arith::ConstantOp>(loc, *identity);
524  auto identityTensor =
525  b.create<linalg::FillOp>(loc, constantOp, emptyTensor);
526  inits.push_back(identityTensor.getResult(0));
527  }
528 
529  return inits;
530  }
531 
532  FailureOr<TilingResult>
533  tileToPartialReduction(Operation *op, OpBuilder &b, Location loc,
534  ReductionTilingStrategy tilingStrategy,
535  ValueRange init, ArrayRef<OpFoldResult> offsets,
537  const SetVector<unsigned> &reductionDims,
538  ArrayRef<OpFoldResult> splitReductionIvs) const {
539  OpBuilder::InsertionGuard guard(b);
540  auto linalgOp = cast<LinalgOp>(op);
541 
542  SmallVector<AffineMap> partialReductionMaps =
543  getPartialResultAffineMaps(linalgOp, reductionDims);
544 
545  // Step 1. Extend init maps to have reduction dimension dims, since we
546  // are converting them to parallel dimensions.
547  SmallVector<AffineMap> newInitMaps;
548  if (tilingStrategy ==
550  newInitMaps = llvm::to_vector(partialReductionMaps);
551  } else {
552  newInitMaps = llvm::map_to_vector(
553  linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
554  return linalgOp.getMatchingIndexingMap(&opOperand);
555  });
556  }
557 
558  // Step 2a: Extract a slice of the input operands.
559  SmallVector<Value> tiledInputs = makeTiledShapes(
560  b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {}, true);
561  SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
562  llvm::make_filter_range(
563  tiledInputs, [](Value v) -> bool { return v.getDefiningOp(); }),
564  [](Value v) -> Operation * { return v.getDefiningOp(); });
565 
566  // Step 2b: Extract a slice of the init operands.
567  SmallVector<Value, 1> tiledInits;
568  for (auto [partialReductionMap, valueToTile] :
569  llvm::zip_equal(partialReductionMaps, init)) {
570  InitSliceInfo sliceInfo = getInitSliceInfo(
571  b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
572  splitReductionIvs, partialReductionMap);
573  auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
574  RankedTensorType sliceResultType = RankedTensorType::get(
575  sliceInfo.resultShape, valueToTileType.getElementType(),
576  valueToTileType.getEncoding());
577  auto sliceOp = b.create<tensor::ExtractSliceOp>(
578  loc, sliceResultType, valueToTile, sliceInfo.offsets, sliceInfo.sizes,
579  sliceInfo.strides);
580  tiledInits.push_back(sliceOp.getResult());
581  generatedSlices.push_back(sliceOp);
582  }
583 
584  // Update the indexing maps.
585  SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
586  for (auto [initOperand, newInitMap] :
587  llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
588  int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
589  newMaps[mapIdx] = newInitMap;
590  }
591 
592  // Step 3. Change the reduction dim iterator types.
593  SmallVector<utils::IteratorType> newIteratorTypes =
594  linalgOp.getIteratorTypesArray();
595  if (tilingStrategy ==
597  for (int dim : reductionDims)
598  newIteratorTypes[dim] = utils::IteratorType::parallel;
599  }
600 
601  // Step 4. Create the new generic op.
602  Operation *partialReductionOp;
603  auto resultTypes = ValueRange(tiledInits).getTypes();
604  if (tilingStrategy ==
606  auto genericOp = b.create<GenericOp>(
607  loc, resultTypes, tiledInputs, tiledInits, newMaps, newIteratorTypes);
608  IRMapping mapping;
609  op->getRegion(0).cloneInto(&genericOp.getRegion(),
610  genericOp.getRegion().begin(), mapping);
611  partialReductionOp = genericOp.getOperation();
612  } else {
613  SmallVector<Value> operands = std::move(tiledInputs);
614  llvm::append_range(operands, tiledInits);
615  partialReductionOp = mlir::clone(b, op, resultTypes, operands);
616  }
617  return TilingResult{
618  {partialReductionOp},
619  llvm::map_to_vector(partialReductionOp->getResults(),
620  [](OpResult r) -> Value { return r; }),
621  generatedSlices};
622  }
623 
624  FailureOr<MergeResult>
625  mergeReductions(Operation *op, OpBuilder &b, Location loc,
626  ValueRange partialReduce,
627  const SetVector<unsigned> &reductionDims) const {
628  auto linalgOp = cast<LinalgOp>(op);
629  SmallVector<AffineMap> partialReductionMaps =
630  getPartialResultAffineMaps(linalgOp, reductionDims);
631 
632  // Permute the reduction dims as permuted by the partial result map.
633  SmallVector<Operation *> mergeOperations;
634  SmallVector<Value> replacements;
635  for (auto [idx, init, partialResult, partialMap] : llvm::enumerate(
636  linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
637  unsigned initIdx = idx;
638  // linalg.reduce's iteration space is the tiled result's iteration space
639  // (and not the tiled operation's iteration space). To account for this,
640  // permute the reduction dimensions based on the partial result map of the
641  // tiled result.
642  SmallVector<int64_t> partialReductionDims;
643  for (auto [resultNum, dimExpr] :
644  llvm::enumerate(partialMap.getResults())) {
645  unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
646  if (llvm::is_contained(reductionDims, dim)) {
647  partialReductionDims.push_back(resultNum);
648  }
649  }
650 
651  auto reduction = b.create<linalg::ReduceOp>(
652  loc, partialResult, init, partialReductionDims,
653  [&linalgOp, &initIdx](OpBuilder &b, Location loc, ValueRange inputs) {
654  // Get the combiner op.
655  SmallVector<Operation *, 4> combinerOps;
656  matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
657  combinerOps);
658  Operation *clonedReductionOp = b.clone(*combinerOps[0]);
659  // Combine the input at idx and output at numInits + idx.
660  clonedReductionOp->setOperand(0, inputs[0]);
661  clonedReductionOp->setOperand(1, inputs[1]);
662  b.create<linalg::YieldOp>(loc, clonedReductionOp->getResult(0));
663  });
664 
665  mergeOperations.push_back(reduction);
666  replacements.push_back(reduction->getResult(0));
667  }
668 
669  return MergeResult{mergeOperations, replacements};
670  }
671 
672  LogicalResult getPartialResultTilePosition(
673  Operation *op, OpBuilder &b, unsigned resultNumber,
674  ReductionTilingStrategy tilingStrategy, ArrayRef<OpFoldResult> offsets,
675  ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
676  ArrayRef<OpFoldResult> splitReductionIvs,
677  SmallVector<OpFoldResult> &resultOffsets,
678  SmallVector<OpFoldResult> &resultSizes) const {
679  auto linalgOp = cast<LinalgOp>(op);
680  SmallVector<AffineMap> partialReductionMaps =
681  getPartialResultAffineMaps(linalgOp, reductionDims);
682  InitSliceInfo sliceInfo = getInitSliceInfo(
683  b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
684  splitReductionIvs, partialReductionMaps[resultNumber]);
685  std::swap(resultOffsets, sliceInfo.offsets);
686  std::swap(resultSizes, sliceInfo.sizes);
687 
688  return success();
689  }
690 };
691 
692 template <typename OpTy>
693 static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
694  OpBuilder &builder) {
695  static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
696  "applies to only pack or unpack operations");
697  OpBuilder::InsertionGuard g(builder);
698  int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
699  : op.getDestRank();
700  OpFoldResult zero = builder.getIndexAttr(0);
701  OpFoldResult one = builder.getIndexAttr(1);
702  ReifiedRankedShapedTypeDims resultShape;
703  (void)reifyResultShapes(builder, op, resultShape);
704  SmallVector<Range> loopBounds(rank);
705  for (auto dim : llvm::seq<int64_t>(0, rank)) {
706  loopBounds[dim].offset = zero;
707  loopBounds[dim].stride = one;
708  loopBounds[dim].size = resultShape[0][dim];
709  }
710  return loopBounds;
711 }
712 
713 static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
715  ArrayRef<int64_t> permutation) {
716  if (permutation.empty())
717  return;
718  applyPermutationToVector<OpFoldResult>(offsets, permutation);
719  applyPermutationToVector<OpFoldResult>(sizes, permutation);
720 }
721 
722 struct PackOpTiling
723  : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
724 
725  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
726  // Note that here we only consider untiled dimensions and outer tiled data
727  // dimensions, the inner tiled data dimensions are materialized when
728  // building the body of the operation.
729  auto packOp = cast<PackOp>(op);
730  SmallVector<utils::IteratorType> iteratorTypes(
731  packOp.getSourceRank(), utils::IteratorType::parallel);
732  return iteratorTypes;
733  }
734 
735  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
736  return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
737  }
738 
739  FailureOr<TilingResult>
741  ArrayRef<OpFoldResult> offsets,
742  ArrayRef<OpFoldResult> sizes) const {
743  auto packOp = cast<PackOp>(op);
744  Location loc = packOp.getLoc();
745 
746  // The tiling is applied on interchanged dimensions. We have to undo the
747  // interchange to map sizes and offsets to the original input.
748  int64_t inputRank = packOp.getSourceRank();
749  SmallVector<OpFoldResult> origOffsets(offsets);
750  SmallVector<OpFoldResult> origSizes(sizes);
751  applyPermToRange(origOffsets, origSizes,
752  invertPermutationVector(packOp.getOuterDimsPerm()));
753 
754  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
755  packOp.getDimAndTileMapping();
756  SmallVector<OpFoldResult> srcDimValues =
757  tensor::getMixedSizes(b, loc, packOp.getSource());
758  SmallVector<OpFoldResult> inputIndices, inputSizes;
759  for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
760  using AV = affine::AffineValueExpr;
761  affine::AffineBuilder ab(b, loc);
762  AffineExpr dim0, dim1, sym;
763  bindDims(b.getContext(), dim0, dim1);
764  bindSymbols(b.getContext(), sym);
765  if (dimAndTileMapping.count(dim)) {
766  // If the data dimension is tiled, the i-th index is the product of
767  // offset_i and tile_i, and the i-th size is the product of sizes_i and
768  // tile_i.
769  auto avOffset = AV(dim0).bind(origOffsets[dim]);
770  auto avSize = AV(dim0).bind(origSizes[dim]);
771  auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
772  inputIndices.push_back(ab.mul(avOffset, avTileSize));
773  inputSizes.push_back(ab.mul(avSize, avTileSize));
774  } else {
775  inputIndices.push_back(origOffsets[dim]);
776  inputSizes.push_back(origSizes[dim]);
777  }
778 
779  // Limit the size of the input operand for incomplete tiles.
780  if (packOp.getPaddingValue()) {
781  OpFoldResult dimSize = srcDimValues[dim];
782  auto avDimSize = AV(dim0).bind(dimSize);
783  auto avInputIdx = AV(dim1).bind(inputIndices.back());
784  inputSizes.back() =
785  ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
786  }
787  }
788 
789  auto oneAttr = b.getI64IntegerAttr(1);
790  SmallVector<OpFoldResult> strides(inputRank, oneAttr);
791 
792  SmallVector<Value> tiledOperands;
793  auto sourceSlice = b.create<tensor::ExtractSliceOp>(
794  loc, packOp.getSource(), inputIndices, inputSizes, strides);
795  tiledOperands.push_back(sourceSlice);
796 
797  SmallVector<OpFoldResult> outputOffsets, outputSizes;
798  if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
799  outputSizes)))
800  return {};
801 
802  strides.append(packOp.getDestRank() - inputRank, oneAttr);
803  auto outSlice = b.create<tensor::ExtractSliceOp>(
804  loc, packOp.getDest(), outputOffsets, outputSizes, strides);
805  tiledOperands.push_back(outSlice);
806 
807  if (auto val = packOp.getPaddingValue())
808  tiledOperands.push_back(val);
809  for (auto tile : packOp.getInnerTiles())
810  tiledOperands.push_back(tile);
811 
812  Operation *tiledPackOp = b.create<PackOp>(
813  loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
814 
815  return TilingResult{
816  {tiledPackOp},
817  SmallVector<Value>(tiledPackOp->getResults()),
818  llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
819  }
820 
821  LogicalResult
822  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
823  ArrayRef<OpFoldResult> offsets,
825  SmallVector<OpFoldResult> &resultOffsets,
826  SmallVector<OpFoldResult> &resultSizes) const {
827  // The iteration domain is over outer dimensions of packed layout. In this
828  // context, the outer dimensions of `resultOffsets` are `offsets`. The
829  // inner dimensions of `resultOffsets` are zeros because tiling is not
830  // applied to them.
831  auto packOp = cast<PackOp>(op);
832  int64_t inputRank = packOp.getSourceRank();
833  int64_t outputRank = packOp.getDestRank();
834  auto zeroAttr = b.getI64IntegerAttr(0);
835  resultOffsets.assign(offsets.begin(), offsets.end());
836  resultOffsets.append(outputRank - inputRank, zeroAttr);
837 
838  ReifiedRankedShapedTypeDims outputShape;
839  (void)reifyResultShapes(b, packOp, outputShape);
840  resultSizes.assign(sizes.begin(), sizes.end());
841  for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
842  resultSizes.push_back(outputShape[0][dataTileDim]);
843 
844  return success();
845  }
846 
847  FailureOr<TilingResult>
848  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
849  ArrayRef<OpFoldResult> offsets,
850  ArrayRef<OpFoldResult> sizes) const {
851  auto packOp = cast<PackOp>(op);
852  int64_t numTiles = packOp.getInnerDimsPos().size();
853 
854  // tensor.pack op is fusible (as a producer) only if full inner tiles are
855  // iterated or inner dims are not tiled. Otherwise, it will generate a
856  // sequence of non-trivial ops (for partial tiles).
857  for (auto offset : offsets.take_back(numTiles))
858  if (!isZeroInteger(offset))
859  return failure();
860 
861  for (auto iter :
862  llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
863  if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
864  return failure();
865 
866  FailureOr<TilingResult> tilingResult = getTiledImplementation(
867  op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
868  if (failed(tilingResult))
869  return failure();
870  return tilingResult.value();
871  }
872 
873  /// Method to return the position of iteration domain tile computed by the
874  /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
875  /// `resultSizes` only cover outer dimensions.
876  LogicalResult getIterationDomainTileFromOperandTiles(
877  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
880  SmallVectorImpl<OpFoldResult> &resultOffsets,
881  SmallVectorImpl<OpFoldResult> &resultSizes) const {
882  if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
883  LLVM_DEBUG(
884  { llvm::dbgs() << "unsupported operands for consumer fusion"; });
885  return failure();
886  }
887 
888  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
889  ArrayRef<OpFoldResult> sizes(allSizes[0]);
890 
891  auto packOp = cast<PackOp>(op);
892  // It is not trivial to infer dest tile from source tile if `packOp` has
893  // padding semantic.
894  if (packOp.getPaddingValue())
895  return failure();
896 
897  Location loc = packOp.getLoc();
898 
899  SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
900  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
901  packOp.getDimAndTileMapping();
902  for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
903  if (dimAndTileMapping.count(dim)) {
904  FailureOr<int64_t> cstSize =
906  presburger::BoundType::UB, sizes[dim],
907  /*stopCondition=*/nullptr, /*closedUB=*/true);
908  std::optional<int64_t> cstInnerSize =
909  getConstantIntValue(dimAndTileMapping[dim]);
910  // Currently fusing `packOp` as consumer only expects perfect tiling
911  // scenario because even if without padding semantic, the `packOp` may
912  // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
913  // where the `tileSize` from operand of `packOp` is 5, which is not
914  // exactly divided by `innerTile`(=6) of `packOp`. As the result:
915  // 1. the first slice is extracted from (0) to (4) and inserted into
916  // (0,0)~(0,4) at first row.
917  // 2. the second slice is extracted from (5) to (9) and SHOULD BE
918  // respectively inserted into two rows with different length, including
919  // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
920  // them, thus adding below constraint to bypass them temporarily. In
921  // another word, we can only support tiling with consumer if the tile
922  // size for the producer is a multiple of the inner tile size for the
923  // packed dimensions at this moment.
924  if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) {
925  return failure();
926  }
927 
928  using AV = affine::AffineValueExpr;
929  affine::AffineBuilder ab(b, loc);
930  AffineExpr dim0, sym;
931  bindDims(b.getContext(), dim0);
932  bindSymbols(b.getContext(), sym);
933  auto avOffset = AV(dim0).bind(offsets[dim]);
934  auto avSize = AV(dim0).bind(sizes[dim]);
935  auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
936  outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
937  outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
938  } else {
939  outerDimOffsets.push_back(offsets[dim]);
940  outerDimSizes.push_back(sizes[dim]);
941  }
942  }
943  applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
944  resultOffsets = outerDimOffsets;
945  resultSizes = outerDimSizes;
946  return success();
947  }
948 
949  /// Method to return the tiled implementation of tensor.pack as a consumer.
950  FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
951  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
953  ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
954  if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
955  LLVM_DEBUG(
956  { llvm ::dbgs() << "unhandled operands for consumer fusion"; });
957  return failure();
958  }
959 
960  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
961  ArrayRef<OpFoldResult> sizes(allSizes[0]);
962 
963  auto packOp = cast<PackOp>(op);
964  Location loc = packOp.getLoc();
965 
966  int64_t inputRank = packOp.getSourceRank();
967  auto oneAttr = b.getI64IntegerAttr(1);
968  SmallVector<OpFoldResult> strides(inputRank, oneAttr);
969 
970  SmallVector<Value> tiledOperands;
971  auto sourceSlice = b.create<tensor::ExtractSliceOp>(
972  loc, packOp.getSource(), offsets, sizes, strides);
973  tiledOperands.push_back(sourceSlice);
974 
975  SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
976  if (failed(getIterationDomainTileFromOperandTiles(
977  op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
978  outerDimSizes)))
979  return failure();
980 
981  SmallVector<OpFoldResult> outputOffsets, outputSizes;
982  if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
983  outputOffsets, outputSizes)))
984  return failure();
985 
986  strides.append(packOp.getDestRank() - inputRank, oneAttr);
987  auto outSlice = b.create<tensor::ExtractSliceOp>(
988  loc, packOp.getDest(), outputOffsets, outputSizes, strides);
989  tiledOperands.push_back(outSlice);
990 
991  assert(!packOp.getPaddingValue() && "Expect no padding semantic");
992  for (auto tile : packOp.getInnerTiles())
993  tiledOperands.push_back(tile);
994 
995  Operation *tiledPackOp = b.create<PackOp>(
996  loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
997 
998  return TilingResult{
999  {tiledPackOp},
1000  SmallVector<Value>(tiledPackOp->getResults()),
1001  llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1002  }
1003 };
1004 
1005 struct UnpackTileDimInfo {
1006  bool isAlignedToInnerTileSize;
1007  OpFoldResult sourceOffset;
1008  OpFoldResult sourceSize;
1009  OpFoldResult resultOffset;
1010  OpFoldResult destExpandedSize;
1011 };
1012 
1013 /// Returns the needed information for tiling unpack op on `tileDim` with given
1014 /// `tileOffset` and `tileSize`. For more details, see the comment of the
1015 /// `getTiledImplementation`.
1016 static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
1017  int64_t tileDim,
1018  OpFoldResult tileOffset,
1019  OpFoldResult tileSize) {
1020  UnpackTileDimInfo info;
1021  Attribute zeroAttr = b.getIndexAttr(0);
1022  Attribute oneAttr = b.getIndexAttr(1);
1023  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1024  unpackOp.getDimAndTileMapping();
1025  // The dimension is not one of packed data dimension.
1026  if (!dimAndTileMapping.count(tileDim)) {
1027  info.isAlignedToInnerTileSize = true;
1028  info.sourceOffset = tileOffset;
1029  info.sourceSize = tileSize;
1030  info.resultOffset = zeroAttr;
1031  info.destExpandedSize = tileSize;
1032  return info;
1033  }
1034 
1035  Location loc = unpackOp.getLoc();
1036  using AV = affine::AffineValueExpr;
1037  affine::AffineBuilder ab(b, loc);
1038  AffineExpr dim0, dim1, sym0;
1039  bindDims(b.getContext(), dim0, dim1);
1040  bindSymbols(b.getContext(), sym0);
1041 
1042  OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1043 
1044  info.isAlignedToInnerTileSize = false;
1045  FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
1046  presburger::BoundType::UB, tileSize,
1047  /*stopCondition=*/nullptr, /*closedUB=*/true);
1048  std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
1049  if (!failed(cstSize) && cstInnerSize) {
1050  if (*cstSize % *cstInnerSize == 0)
1051  info.isAlignedToInnerTileSize = true;
1052 
1053  // If the tiling size equals to the inner tiling size, the outer dims are
1054  // always 1.
1055  if (*cstInnerSize == *cstSize) {
1056  auto lhs = AV(dim0).bind(tileOffset);
1057  auto rhs = AV(dim1).bind(innerTileSize);
1058  info.sourceOffset = ab.floor(lhs, rhs);
1059  info.sourceSize = oneAttr;
1060  info.resultOffset = zeroAttr;
1061  info.destExpandedSize = tileSize;
1062  return info;
1063  }
1064  }
1065 
1066  if (info.isAlignedToInnerTileSize) {
1067  info.sourceOffset =
1068  ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1069  info.resultOffset = zeroAttr;
1070  info.destExpandedSize = tileSize;
1071 
1072  // The ceilDiv is needed here because there could be incomplete tile even
1073  // it is perfect tiling cases. E.g.,
1074  // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
1075  // If the tiling size is 32, there will be 3 tiles. Two of them have
1076  // size=32; one of them have size=2. The size is represented using
1077  // affine_min op; we need ceilDiv.
1078  info.sourceSize =
1079  ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1080  return info;
1081  }
1082 
1083  affine::DivModValue firstCoord = affine::getDivMod(
1084  b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
1085  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1086  OpFoldResult tileExclusiveBound =
1087  ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1088  affine::DivModValue lastCoord = affine::getDivMod(
1089  b, loc,
1091  b, loc,
1092  ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1093  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1094 
1095  OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1096  AV(dim1).bind(firstCoord.quotient));
1097  info.sourceSize =
1098  ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1099  info.sourceOffset = firstCoord.quotient;
1100  info.resultOffset = firstCoord.remainder;
1101  // Do not create an Affine ops for expanded size because the affine op is too
1102  // complicated which would trigger an issue in affine ops simplification.
1103  info.destExpandedSize = b.createOrFold<arith::MulIOp>(
1104  loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
1105  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1106  return info;
1107 }
1108 
1109 struct UnPackOpTiling
1110  : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1111 
1112  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
1113  auto unpackOp = cast<UnPackOp>(op);
1114  SmallVector<utils::IteratorType> iteratorTypes(
1115  unpackOp.getDestRank(), utils::IteratorType::parallel);
1116  return iteratorTypes;
1117  }
1118 
1119  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
1120  return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
1121  }
1122 
1123  /// There are two cases in tiling unpack ops. If the tiling size is aligned to
1124  /// the inner tile size, the corresponding tiles of source are all complete.
1125  /// Otherwise, there are in-complete tiles. We will need to expand the slice
1126  /// of source for getting complete tiles. The tiled unpack op unpacks more
1127  /// data from source, so We'll need an extract_slice op to shift and truncate
1128  /// the output.
1129  /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
1130  /// coordinates of second tile (i.e., result[15..31]) are
1131  /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
1132  /// row are incomplete tiles. To represent the unpack op, we have to complete
1133  /// the rows. I.e., the input coordinates would start with (1, 0); end with
1134  /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
1135  /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
1136  /// can get the actual result.
1137  FailureOr<TilingResult>
1139  ArrayRef<OpFoldResult> offsets,
1140  ArrayRef<OpFoldResult> sizes) const {
1141  auto unpackOp = cast<UnPackOp>(op);
1142  int64_t srcRank = unpackOp.getSourceRank();
1143  int64_t destRank = unpackOp.getDestRank();
1144  int64_t numInnerTiles = srcRank - destRank;
1145  Location loc = unpackOp.getLoc();
1146 
1147  // The perfect tiling case indicates that the tiling sizes are multiple of
1148  // inner_tile_size. In this context, no extra data is needed when
1149  // representing the tiled unpack op.
1150  bool isPerfectTilingCase = true;
1151  Attribute oneAttr = b.getIndexAttr(1);
1152  SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1153  SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1154  SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1155  for (auto dim : llvm::seq<int64_t>(0, destRank)) {
1156  UnpackTileDimInfo info =
1157  getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
1158  if (!info.isAlignedToInnerTileSize)
1159  isPerfectTilingCase = false;
1160  sliceSrcIndices.push_back(info.sourceOffset);
1161  sliceSrcSizes.push_back(info.sourceSize);
1162  destExpandedSizes.push_back(info.destExpandedSize);
1163  resultOffsetsFromDest.push_back(info.resultOffset);
1164  }
1165 
1166  // The tiling is applied on destination dimensions. We have to apply the
1167  // interchange on source dimensions if outer_dims_perm is set.
1168  applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1169  unpackOp.getOuterDimsPerm());
1170  Attribute zeroAttr = b.getIndexAttr(0);
1171  sliceSrcIndices.append(numInnerTiles, zeroAttr);
1172  sliceSrcSizes.append(unpackOp.getMixedTiles());
1173  sliceSrcStrides.append(numInnerTiles, oneAttr);
1174  SmallVector<Operation *> generatedSlices;
1175  tensor::ExtractSliceOp sliceSource = b.create<tensor::ExtractSliceOp>(
1176  loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1177  sliceSrcStrides);
1178  generatedSlices.push_back(sliceSource);
1179 
1180  SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1181  Value sliceDest;
1182  if (isPerfectTilingCase) {
1183  auto destSliceOp = b.create<tensor::ExtractSliceOp>(
1184  loc, unpackOp.getDest(), offsets, sizes, destStrides);
1185  sliceDest = destSliceOp;
1186  generatedSlices.push_back(destSliceOp);
1187  } else {
1188  sliceDest = b.create<tensor::EmptyOp>(
1189  loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1190  }
1191 
1192  SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1193  for (auto tile : unpackOp.getInnerTiles())
1194  tiledOperands.push_back(tile);
1195 
1196  Operation *tiledUnpackOp = b.create<UnPackOp>(
1197  loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
1198 
1199  if (isPerfectTilingCase)
1200  return TilingResult{{tiledUnpackOp},
1201  SmallVector<Value>(tiledUnpackOp->getResults()),
1202  generatedSlices};
1203 
1204  auto extractSlice = b.create<tensor::ExtractSliceOp>(
1205  loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
1206  destStrides);
1207  return TilingResult{
1208  {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1209  }
1210 
1211  LogicalResult
1212  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
1213  ArrayRef<OpFoldResult> offsets,
1214  ArrayRef<OpFoldResult> sizes,
1215  SmallVector<OpFoldResult> &resultOffsets,
1216  SmallVector<OpFoldResult> &resultSizes) const {
1217  resultOffsets = llvm::to_vector(offsets);
1218  resultSizes = llvm::to_vector(sizes);
1219  return success();
1220  }
1221 
1222  FailureOr<TilingResult>
1223  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
1224  ArrayRef<OpFoldResult> offsets,
1225  ArrayRef<OpFoldResult> sizes) const {
1226  FailureOr<TilingResult> tilingResult =
1227  getTiledImplementation(op, b, offsets, sizes);
1228  if (failed(tilingResult))
1229  return failure();
1230  return tilingResult.value();
1231  }
1232 
1233  /// Method to return the position of iteration domain tile computed by the
1234  /// tiled operation.
1235  LogicalResult getIterationDomainTileFromOperandTiles(
1236  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1237  ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1239  SmallVectorImpl<OpFoldResult> &resultOffsets,
1240  SmallVectorImpl<OpFoldResult> &resultSizes) const {
1241  if (operandNumbers.size() != 1) {
1242  LLVM_DEBUG({ llvm::dbgs() << "unable to handle multiple operands"; });
1243  return failure();
1244  }
1245  auto unPackOp = cast<UnPackOp>(op);
1246  unsigned operandNumber = operandNumbers[0];
1247  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1248  ArrayRef<OpFoldResult> sizes(allSizes[0]);
1249 
1250  // If the operand tile is the dest, then no adjustment is needed.
1251  if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1252  resultOffsets = llvm::to_vector(offsets);
1253  resultSizes = llvm::to_vector(sizes);
1254  return success();
1255  }
1256  Location loc = unPackOp.getLoc();
1257 
1258  int64_t numTiles = unPackOp.getInnerDimsPos().size();
1259  auto destOffsets = offsets.drop_back(numTiles);
1260  auto destSizes = sizes.drop_back(numTiles);
1261  // The tiling is applied on interchanged dimensions. We have to undo the
1262  // interchange to map sizes and offsets to the original input.
1263  int64_t outputRank = unPackOp.getDestRank();
1264  ReifiedRankedShapedTypeDims reifiedReturnShapes;
1265  if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
1266  return failure();
1267  SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1268  SmallVector<OpFoldResult> origOffsets(destOffsets);
1269  SmallVector<OpFoldResult> origSizes(destSizes);
1270  applyPermToRange(origOffsets, origSizes,
1271  invertPermutationVector(unPackOp.getOuterDimsPerm()));
1272 
1273  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1274  unPackOp.getDimAndTileMapping();
1275 
1276  for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
1277  using AV = affine::AffineValueExpr;
1278  affine::AffineBuilder ab(b, loc);
1279  AffineExpr dim0, dim1, sym0;
1280  bindDims(b.getContext(), dim0, dim1);
1281  bindSymbols(b.getContext(), sym0);
1282  if (dimAndTileMapping.count(dim)) {
1283  // If the data dimension is tiled, the i-th index is the product of
1284  // offset_i and tile_i, and the i-th size is the product of sizes_i and
1285  // tile_i. The sizes must be clamped to the sizes of the unpack result.
1286  auto avOffset = AV(dim0).bind(origOffsets[dim]);
1287  auto avSize = AV(dim0).bind(origSizes[dim]);
1288  auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1289  auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1290  resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1291  auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1292  resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1293  ab.sub(avResultSize, avResultOffset)}));
1294  } else {
1295  resultOffsets.push_back(origOffsets[dim]);
1296  resultSizes.push_back(origSizes[dim]);
1297  }
1298  }
1299  return success();
1300  }
1301 
1302  /// Method to return the tiled implementation of tensor.unpack as a consumer.
1303  FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1304  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1305  ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1306  ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1307  if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1308  LLVM_DEBUG({ llvm::dbgs() << "unhandled operands for consumer fusion"; });
1309  return failure();
1310  }
1311  auto unPackOp = cast<UnPackOp>(op);
1312  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1313  ArrayRef<OpFoldResult> sizes(allSizes[0]);
1314 
1315  // tensor.unpack op is fusible (as a consumer) only if inner dims are not
1316  // tiled.
1317  int64_t numTiles = unPackOp.getInnerDimsPos().size();
1318  for (auto iter :
1319  llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1320  if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
1321  return failure();
1322  }
1323 
1324  Location loc = unPackOp.getLoc();
1325 
1326  // Fetch offset/size for creating the slice of the dest operand of
1327  // unpack op.
1328  SmallVector<OpFoldResult> outputOffsets, outputSizes;
1329  if (failed(getIterationDomainTileFromOperandTiles(
1330  op, b, operandNumbers, allOffsets, allSizes, outputOffsets,
1331  outputSizes)))
1332  return failure();
1333 
1334  auto oneAttr = b.getI64IntegerAttr(1);
1335  int64_t outputRank = unPackOp.getDestRank();
1336  SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1337 
1338  SmallVector<Value> tiledOperands;
1339  // Create slice of the dest operand.
1340  auto extractDestSlice = b.create<tensor::ExtractSliceOp>(
1341  loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1342  tiledOperands.push_back(extractDestSlice);
1343 
1344  strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1345  // Create slice of the source operand.
1346  auto extractSourceSlice = b.create<tensor::ExtractSliceOp>(
1347  loc, unPackOp.getSource(), offsets, sizes, strides);
1348  tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1349  for (auto tile : unPackOp.getInnerTiles())
1350  tiledOperands.push_back(tile);
1351 
1352  // Create tiled unpack op.
1353  Operation *tiledUnPackOp =
1354  b.create<UnPackOp>(loc, TypeRange{extractDestSlice.getType()},
1355  tiledOperands, op->getAttrs());
1356 
1357  return TilingResult{{tiledUnPackOp},
1358  SmallVector<Value>(tiledUnPackOp->getResults()),
1359  llvm::to_vector(ArrayRef<Operation *>{
1360  extractSourceSlice, extractDestSlice})};
1361  }
1362 };
1363 
1364 } // namespace
1365 
1366 template <typename OpType>
1367 static void registerOne(MLIRContext *ctx) {
1368  OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1369  OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1370  *ctx);
1371 }
1372 
1373 /// Variadic helper function.
1374 template <typename... OpTypes>
1375 static void registerAll(MLIRContext *ctx) {
1376  (registerOne<OpTypes>(ctx), ...);
1377 }
1378 
1379 #define GET_OP_LIST
1380 
1382  DialectRegistry &registry) {
1383  registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
1384  registerOne<linalg::GenericOp>(ctx);
1385  linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1386  linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1387  registerAll<
1388 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1389  >(ctx);
1390  });
1391 }
1392 
1394  DialectRegistry &registry) {
1395  registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
1396  linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1397  linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1398  });
1399 }
static RankedTensorType sliceResultType(Type operandType, MeshOp mesh, ArrayRef< MeshAxis > meshAxes, int64_t sliceAxis)
Definition: MeshOps.cpp:1190
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims)
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static void registerAll(MLIRContext *ctx)
Variadic helper function.
static void registerOne(MLIRContext *ctx)
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
Definition: AffineMap.cpp:611
unsigned getNumSymbols() const
Definition: AffineMap.cpp:394
unsigned getNumDims() const
Definition: AffineMap.cpp:390
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403
unsigned getNumResults() const
Definition: AffineMap.cpp:398
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244
BlockArgListType getArguments()
Definition: Block.h:87
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:209
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:107
MLIRContext * getContext() const
Definition: Builders.h:55
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
Definition: IRMapping.h:65
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:548
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:452
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271
This class represents an operand of an operation.
Definition: Value.h:257
This is a value defined by a result of an operation.
Definition: Value.h:447
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
void setOperand(unsigned idx, Value value)
Definition: Operation.h:351
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Definition: Operation.cpp:718
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
result_range getResults()
Definition: Operation.h:415
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:672
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition: Region.cpp:70
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, StopConditionFn stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1331
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1942
std::optional< TypedAttr > getNeutralElement(Operation *op)
Return the identity numeric value associated to the give op.
Definition: ArithOps.cpp:2649
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:862
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry &registry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:884
void registerTilingInterfaceExternalModels(DialectRegistry &registry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:773
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:626
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition: TensorOps.cpp:70
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: Utils.cpp:1286
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for the result of merge operation of tiling.
Container for result values of tiling.
SmallVector< Operation * > tiledOps
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:155
SmallVector< OpFoldResult > sizes
Definition: Utils.h:157
SmallVector< OpFoldResult > offsets
Definition: Utils.h:156