MLIR  22.0.0git
TilingInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- TilingInterfaceImpl.cpp - Implementation of TilingInterface -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
26 #include "llvm/Support/Debug.h"
27 #include <optional>
28 
29 #define DEBUG_TYPE "linalg-tiling-interface-impl"
30 
31 using namespace mlir;
32 using namespace mlir::linalg;
33 
34 //===----------------------------------------------------------------------===//
35 // Utility methods for implementation of Tiling Interface for Linalg ops
36 //===----------------------------------------------------------------------===//
37 
38 /// Return the SSA values that represent the data point accessed using a given
39 /// `indexingMap` for a given point in the iteration space represented by `ivs`.
41  AffineMap indexingMap,
42  ValueRange ivs) {
43  SmallVector<Value> indices;
44  indices.reserve(indexingMap.getNumResults());
45  for (auto result : indexingMap.getResults()) {
46  AffineMap m = AffineMap::get(indexingMap.getNumDims(),
47  indexingMap.getNumSymbols(), result);
48  Value v = affine::AffineApplyOp::create(b, loc, m, ivs);
49  indices.push_back(v);
50  }
51  return indices;
52 }
53 
54 /// Method to inline the payload of a `linalgOp` given the iteration space
55 /// point and values for the arguments of the payload.
56 static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp,
57  ValueRange ivs, ValueRange argValues) {
58  Block *body = linalgOp.getBlock();
59  IRMapping map;
60  map.map(body->getArguments(), argValues);
61  for (auto &op : body->without_terminator()) {
62  if (auto indexOp = dyn_cast<IndexOp>(&op)) {
63  map.map(indexOp.getResult(), ivs[indexOp.getDim()]);
64  continue;
65  }
66  b.clone(op, map);
67  }
68 
69  Operation *terminator = body->getTerminator();
70  Location loc = terminator->getLoc();
71  for (const auto &operand : llvm::enumerate(terminator->getOperands())) {
72  Value toStore = map.lookupOrDefault(operand.value());
73  OpOperand *storeInto = linalgOp.getDpsInitOperand(operand.index());
74  auto indices = getIndicesForAccess(
75  b, loc, linalgOp.getMatchingIndexingMap(storeInto), ivs);
76  memref::StoreOp::create(b, loc, toStore,
77  linalgOp.getDpsInitOperand(operand.index())->get(),
78  indices);
79  }
80  return success();
81 }
82 
83 //===----------------------------------------------------------------------===//
84 // External Model for implementing `TilingInterface` for `LinalgOp`s.
85 //===----------------------------------------------------------------------===//
86 
87 namespace {
88 /// External model implementation of TilingInterface for LinalgOps. An external
89 /// model implementation is used for now till the use of `TilingInterface` is
90 /// on-par with the current Linalg tiling + fusion patterns. Once it is
91 /// maybe possible to move this into the op-definition (though there are
92 /// advantages to leaving it as an external model)
93 template <typename LinalgOpTy>
94 struct LinalgOpTilingInterface
95  : public TilingInterface::ExternalModel<LinalgOpTilingInterface<LinalgOpTy>,
96  LinalgOpTy> {
97  /// Return the loop iterator type.
98  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
99  LinalgOpTy concreteOp = cast<LinalgOpTy>(op);
100  return concreteOp.getIteratorTypesArray();
101  }
102 
103  /// Return the iteration domain range.
104  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
106  b.setInsertionPoint(op);
107  Location loc = op->getLoc();
108  LinalgOp linalgOp = cast<LinalgOp>(op);
109  SmallVector<OpFoldResult> allShapesSizes =
110  linalgOp.createFlatListOfOperandDims(b, loc);
111  AffineMap map = linalgOp.getShapesToLoopsMap();
112 
113  return llvm::to_vector(
114  llvm::map_range(map.getResults(), [&](AffineExpr loopExpr) {
115  OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
116  b, loc, loopExpr, allShapesSizes);
117  return Range{b.getIndexAttr(0), ofr, b.getIndexAttr(1)};
118  }));
119  }
120 
121  /// Instantiate the tiled implementation of the operation.
122  FailureOr<TilingResult>
124  ArrayRef<OpFoldResult> offsets,
125  ArrayRef<OpFoldResult> sizes) const {
126  // Leave the `sizeBounds` value empty. That is only needed when the `sizes`
127  // specified could lead to out of bounds accesses.
128  Location loc = op->getLoc();
129  LinalgOp linalgOp = cast<LinalgOp>(op);
130  SmallVector<Value> valuesToTile = linalgOp->getOperands();
131  SmallVector<Value> tiledOperands = makeTiledShapes(
132  b, loc, linalgOp, valuesToTile, offsets, sizes, {}, true);
133  SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
134  llvm::make_filter_range(
135  tiledOperands,
136  [](Value v) -> bool {
137  return isa_and_nonnull<tensor::ExtractSliceOp, memref::SubViewOp>(
138  v.getDefiningOp());
139  }),
140  [](Value v) -> Operation * { return v.getDefiningOp(); });
141 
142  SmallVector<Type> resultTensorTypes =
143  getTensorOutputTypes(linalgOp, tiledOperands);
144 
145  Operation *tiledOp = clone(b, linalgOp, resultTensorTypes, tiledOperands);
146  offsetIndices(b, cast<LinalgOp>(tiledOp), offsets);
147 
148  return TilingResult{
149  {tiledOp}, SmallVector<Value>(tiledOp->getResults()), generatedSlices};
150  }
151 
152  /// Utility to fetch the offsets and sizes when applied as per the indexing
153  /// map of the linalg op. This helps in fusing the linalg op as a consumer of
154  /// a given slice op.
155  static LogicalResult
156  getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b,
157  ArrayRef<AffineMap> indexingMaps,
160  SmallVectorImpl<OpFoldResult> &mappedOffsetsVec,
161  SmallVectorImpl<OpFoldResult> &mappedSizesVec) {
162  DenseMap<unsigned, OpFoldResult> mappedOffsets, mappedSizes;
163 
164  for (auto [indexingMap, offsets, sizes] :
165  llvm::zip_equal(indexingMaps, allOffsets, allSizes)) {
166  for (auto [resultExpr, offset, size] :
167  llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) {
168  auto dimExpr = dyn_cast<AffineDimExpr>(resultExpr);
169  if (!dimExpr)
170  continue;
171  unsigned position = dimExpr.getPosition();
172  auto it = mappedOffsets.find(position);
173  if (it != mappedOffsets.end()) {
174  OpFoldResult seenOffset = it->second;
175  OpFoldResult seenSize = mappedSizes.lookup(position);
176  if (seenOffset != offset || seenSize != size) {
177  LLVM_DEBUG({
178  llvm::dbgs() << "inconsistent iteration space mapping from "
179  "offsets/sizes of operands/results";
180  });
181  return failure();
182  }
183  } else {
184  mappedOffsets[position] = offset;
185  mappedSizes[position] = size;
186  }
187  }
188  }
189 
190  // Aggregate from the given operand offsets and sizes, or default to
191  // iteration space values.
192  SmallVector<Range> iterationDomain =
193  cast<TilingInterface>(linalgOp.getOperation()).getIterationDomain(b);
194  mappedOffsetsVec.resize(iterationDomain.size());
195  mappedSizesVec.resize(iterationDomain.size());
196  for (auto [index, domain] : llvm::enumerate(iterationDomain)) {
197  auto it = mappedOffsets.find(index);
198  if (it != mappedOffsets.end()) {
199  mappedOffsetsVec[index] = it->second;
200  mappedSizesVec[index] = mappedSizes.lookup(index);
201  continue;
202  }
203  mappedOffsetsVec[index] = domain.offset;
204  mappedSizesVec[index] = domain.size;
205  }
206  return success();
207  }
208 
209  /// Method to return the position of the result tile computed by the tiled
210  /// operation.
211  LogicalResult getIterationDomainTileFromOperandTiles(
212  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
215  SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
216  SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
217  auto linalgOp = cast<LinalgOp>(op);
218 
219  std::optional<SmallVector<OpFoldResult>> iterationSpaceOffsets,
220  iterationSpaceSizes;
221  SmallVector<AffineMap> indexingMaps =
222  llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) {
223  OpOperand &opOperand = linalgOp->getOpOperand(operandNumber);
224  return linalgOp.getMatchingIndexingMap(&opOperand);
225  });
226  if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets,
227  allSizes, iterDomainOffsets,
228  iterDomainSizes))) {
229  return failure();
230  }
231  return success();
232  }
233 
234  /// Return the details of the output tile generated by the tiled
235  /// implementation.
236  LogicalResult
237  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
238  ArrayRef<OpFoldResult> offsets,
240  SmallVector<OpFoldResult> &resultOffsets,
241  SmallVector<OpFoldResult> &resultSizes) const {
242  Location loc = op->getLoc();
243  LinalgOp linalgOp = cast<LinalgOp>(op);
244 
245  AffineExpr d0;
246  bindDims(b.getContext(), d0);
247  SmallVector<OpFoldResult> subShapeSizes =
248  llvm::to_vector(llvm::map_range(sizes, [&](OpFoldResult ofr) {
249  return affine::makeComposedFoldedAffineApply(b, loc, d0 - 1, ofr);
250  }));
251 
252  OpOperand *outOperand = linalgOp.getDpsInitOperand(resultNumber);
254  b, loc, outOperand->get(), sizes,
255  linalgOp.getMatchingIndexingMap(outOperand), offsets,
256  /*ubs*/ {}, subShapeSizes, true);
257  resultOffsets = sliceParams.offsets;
258  resultSizes = sliceParams.sizes;
259  return success();
260  }
261 
262  LogicalResult getIterationDomainTileFromResultTile(
263  Operation *op, OpBuilder &b, unsigned resultNumber,
265  SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
266  SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
267  auto linalgOp = cast<LinalgOp>(op);
268 
269  // Check that the indexing map used for the output is a projected
270  // permutation. This could be relaxed with a more general approach that can
271  // map the offsets and sizes from the result to iteration space tiles
272  // (filling in full extent for dimensions not used to access the result).
273  AffineMap indexingMap =
274  linalgOp.getIndexingMapMatchingResult(op->getResult(resultNumber));
275  if (!indexingMap.isProjectedPermutation()) {
276  return op->emitOpError(
277  "unhandled tiled implementation generation when result is not "
278  "accessed using a permuted projection");
279  }
280 
281  SmallVector<OpFoldResult> allOffsets = llvm::to_vector(offsets);
282  SmallVector<OpFoldResult> allSizes = llvm::to_vector(sizes);
283  auto status =
284  getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets},
285  {allSizes}, iterDomainOffsets, iterDomainSizes);
286  (void)status;
287  assert(succeeded(status) && "unexpected error in offset calculation");
288  return success();
289  }
290 
291  FailureOr<TilingResult>
292  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
293  ArrayRef<OpFoldResult> offsets,
294  ArrayRef<OpFoldResult> sizes) const {
295  SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
296  if (failed(getIterationDomainTileFromResultTile(
297  op, b, resultNumber, offsets, sizes, mappedOffsets, mappedSizes))) {
298  return failure();
299  }
300  auto tilingInterfaceOp = cast<TilingInterface>(op);
301  FailureOr<TilingResult> tilingResult =
302  tilingInterfaceOp.getTiledImplementation(b, mappedOffsets, mappedSizes);
303 
304  if (failed(tilingResult))
305  return failure();
306 
307  if (tilingResult->tiledOps.size() != 1)
308  return op->emitOpError("failed to generate tiled implementation");
309 
310  return TilingResult{
311  tilingResult->tiledOps,
312  SmallVector<Value>{tilingResult->tiledValues[resultNumber]},
313  tilingResult->generatedSlices};
314  }
315 
316  /// Method to generate the tiled implementation of an operation from the tile
317  /// of the operand.
318  FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
319  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
321  ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
322  SmallVector<OpFoldResult> mappedOffsets, mappedSizes;
323  if (failed(getIterationDomainTileFromOperandTiles(
324  op, b, operandNumbers, allOffsets, allSizes, mappedOffsets,
325  mappedSizes))) {
326  return failure();
327  }
328  return getTiledImplementation(op, b, mappedOffsets, mappedSizes);
329  }
330 
331  LogicalResult generateScalarImplementation(Operation *op, OpBuilder &builder,
332  Location loc,
333  ValueRange ivs) const {
334  auto linalgOp = cast<LinalgOp>(op);
335  if (!linalgOp.hasPureBufferSemantics())
336  return op->emitOpError("expected operation to have buffer semantics");
337 
338  SmallVector<Value> indexedValues;
339  indexedValues.reserve(linalgOp->getNumOperands());
340  Location linalgOpLoc = op->getLoc();
341  /// Load the data corresponding to the block arguments that
342  /// represent input operands.
343  for (OpOperand &operand : linalgOp->getOpOperands()) {
344  if (!linalgOp.payloadUsesValueFromOperand(&operand)) {
345  indexedValues.push_back(nullptr);
346  continue;
347  }
348  if (linalgOp.isScalar(&operand)) {
349  indexedValues.push_back(operand.get());
350  continue;
351  }
353  builder, linalgOpLoc, linalgOp.getMatchingIndexingMap(&operand), ivs);
354  Value load =
355  memref::LoadOp::create(builder, linalgOpLoc, operand.get(), indices);
356  indexedValues.push_back(load);
357  }
358 
359  /// Inline the op payload and store the result.
360  return inlinePayload(builder, linalgOp, ivs, indexedValues);
361  }
362 };
363 
364 //===----------------------------------------------------------------------===//
365 // External Model for implementing `PartialReductionInterface` for `LinalgOp`s.
366 //===----------------------------------------------------------------------===//
367 
368 /// In a given set vector, get the position of a particular element.
369 std::optional<int> getPositionIn(const llvm::SetVector<unsigned> &reductionDims,
370  unsigned value) {
371  for (auto [index, reductionDim] : llvm::enumerate(reductionDims)) {
372  if (reductionDim == value) {
373  return index;
374  }
375  }
376  return std::nullopt;
377 }
378 
379 /// Return an AffineMaps to use for the `outs` operands of the linalg op
380 /// generated for partial results. The new AffineMap is the AffineMap of the
381 /// untiled op with reduction dimensions appended at end in order in which they
382 /// were specified during tiling.
384 getPartialResultAffineMaps(LinalgOp linalgOp,
385  const SetVector<unsigned> &reductionDims) {
386  auto partialReductionMaps = llvm::map_to_vector(
387  linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
388  AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
389  for (auto redPos : reductionDims) {
390  map =
391  map.insertResult(getAffineDimExpr(redPos, linalgOp.getContext()),
392  map.getNumResults());
393  }
394  return map;
395  });
396  return partialReductionMaps;
397 }
398 
399 struct InitSliceInfo {
400  SmallVector<int64_t> resultShape;
404 };
405 
406 /// Return the result shape, offsets, sizes and strides of the slice of the
407 /// `initValue` to use as the destination of the partial reduction op generated
408 /// with outer reduction strategy.
409 static InitSliceInfo getInitSliceInfoForOuterReduction(
410  MLIRContext *context, ArrayRef<OpFoldResult> offsets,
411  ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
412  ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
413  int64_t initRank = partialReductionMap.getNumResults();
414  SmallVector<OpFoldResult> initOffsets, initSizes;
415  Attribute zero = IntegerAttr::get(IndexType::get(context), 0);
416  Attribute one = IntegerAttr::get(IndexType::get(context), 1);
417  SmallVector<OpFoldResult> initStrides(initRank, one);
418  for (AffineExpr dimExpr : partialReductionMap.getResults()) {
419  unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
420  if (reductionDims.contains(dim)) {
421  initOffsets.push_back(zero);
422  } else {
423  initOffsets.push_back(offsets[dim]);
424  }
425  initSizes.push_back(sizes[dim]);
426  }
427  SmallVector<int64_t> resultShape;
428  std::tie(resultShape, std::ignore) = decomposeMixedValues(initSizes);
429  return {resultShape, initOffsets, initSizes, initStrides};
430 }
431 
432 /// Return the result shape, offsets, sizes and strides of the slice of the
433 /// `initValue` to use as destination of the partial reduction op generated with
434 /// outer parallel strategy.
435 static InitSliceInfo getInitSliceInfoForOuterParallel(
436  MLIRContext *context, ArrayRef<OpFoldResult> offsets,
437  ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
438  ArrayRef<OpFoldResult> splitReductionIvs, AffineMap partialReductionMap) {
439  int64_t initRank = partialReductionMap.getNumResults();
440  SmallVector<OpFoldResult> initOffsets, initSizes;
441  Attribute one = IntegerAttr::get(IndexType::get(context), 1);
442  SmallVector<OpFoldResult> initStrides(initRank, one);
443  SmallVector<OpFoldResult> resultShape;
444  for (AffineExpr dimExpr : partialReductionMap.getResults()) {
445  unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
446  if (std::optional<unsigned> dimPos = getPositionIn(reductionDims, dim)) {
447  initOffsets.push_back(splitReductionIvs[dimPos.value()]);
448  initSizes.push_back(one);
449  } else {
450  initOffsets.push_back(offsets[dim]);
451  initSizes.push_back(sizes[dim]);
452  resultShape.push_back(sizes[dim]);
453  }
454  }
455  SmallVector<int64_t> staticShapes;
456  std::tie(staticShapes, std::ignore) = decomposeMixedValues(resultShape);
457  return {staticShapes, initOffsets, initSizes, initStrides};
458 }
459 
460 /// Return the result shape, offsets, sizes and strides of the slice of the
461 /// `initValue` to use as destination of the partial reduction op.
462 static InitSliceInfo getInitSliceInfo(MLIRContext *context,
463  ReductionTilingStrategy strategy,
464  ArrayRef<OpFoldResult> offsets,
466  const SetVector<unsigned> &reductionDims,
467  ArrayRef<OpFoldResult> splitReductionIvs,
468  AffineMap partialReductionMap) {
470  return getInitSliceInfoForOuterReduction(context, offsets, sizes,
471  reductionDims, splitReductionIvs,
472  partialReductionMap);
473  }
475  "unexpected ReductionTilingStrategy");
476  return getInitSliceInfoForOuterParallel(context, offsets, sizes,
477  reductionDims, splitReductionIvs,
478  partialReductionMap);
479 }
480 
481 /// External model implementation of PartialReductionInterface for
482 /// LinalgOps.
483 template <typename LinalgOpTy>
484 struct LinalgOpPartialReductionInterface
485  : public PartialReductionOpInterface::ExternalModel<
486  LinalgOpPartialReductionInterface<LinalgOpTy>, LinalgOpTy> {
487  FailureOr<SmallVector<Value>> generateInitialTensorForPartialReduction(
489  const SetVector<unsigned> &reductionDims) const {
490  auto linalgOp = cast<LinalgOp>(op);
491 
492  OpBuilder::InsertionGuard guard(b);
493  if (linalgOp.hasPureBufferSemantics())
494  return op->emitOpError("expected operation to have tensor semantics");
495 
496  SmallVector<AffineMap> partialResultMaps =
497  getPartialResultAffineMaps(linalgOp, reductionDims);
498 
499  SmallVector<Value> inits;
500  for (auto [initIdx, result, partialMap] :
501  llvm::enumerate(linalgOp->getResults(), partialResultMaps)) {
502  SmallVector<Operation *, 4> combinerOps;
503  if (!matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
504  combinerOps) ||
505  combinerOps.size() != 1)
506  return op->emitOpError("Failed to anaysis the reduction operation.");
507 
508  Operation *reductionOp = combinerOps[0];
509  std::optional<TypedAttr> identity = arith::getNeutralElement(reductionOp);
510  if (!identity.has_value())
511  return op->emitOpError(
512  "Failed to get an identity value for the reduction operation.");
513 
514  // Append the new partial result dimensions.
515  SmallVector<OpFoldResult> partialResultShape;
516  for (AffineExpr dimExpr : partialMap.getResults()) {
517  auto dim = cast<AffineDimExpr>(dimExpr);
518  partialResultShape.push_back(sizes[dim.getPosition()]);
519  }
520 
521  Type elType = getElementTypeOrSelf(result.getType());
522  Value emptyTensor =
523  tensor::EmptyOp::create(b, loc, partialResultShape, elType);
524  Value constantOp = arith::ConstantOp::create(b, loc, *identity);
525  auto identityTensor =
526  linalg::FillOp::create(b, loc, constantOp, emptyTensor);
527  inits.push_back(identityTensor.getResult(0));
528  }
529 
530  return inits;
531  }
532 
533  FailureOr<TilingResult>
534  tileToPartialReduction(Operation *op, OpBuilder &b, Location loc,
535  ReductionTilingStrategy tilingStrategy,
536  ValueRange init, ArrayRef<OpFoldResult> offsets,
538  const SetVector<unsigned> &reductionDims,
539  ArrayRef<OpFoldResult> splitReductionIvs) const {
540  OpBuilder::InsertionGuard guard(b);
541  auto linalgOp = cast<LinalgOp>(op);
542 
543  SmallVector<AffineMap> partialReductionMaps =
544  getPartialResultAffineMaps(linalgOp, reductionDims);
545 
546  // Step 1. Extend init maps to have reduction dimension dims, since we
547  // are converting them to parallel dimensions.
548  SmallVector<AffineMap> newInitMaps;
549  if (tilingStrategy ==
551  newInitMaps = llvm::to_vector(partialReductionMaps);
552  } else {
553  newInitMaps = llvm::map_to_vector(
554  linalgOp.getDpsInitsMutable(), [&](OpOperand &opOperand) {
555  return linalgOp.getMatchingIndexingMap(&opOperand);
556  });
557  }
558 
559  // Step 2a: Extract a slice of the input operands.
560  SmallVector<Value> tiledInputs = makeTiledShapes(
561  b, loc, linalgOp, linalgOp.getDpsInputs(), offsets, sizes, {}, true);
562  SmallVector<Operation *> generatedSlices = llvm::map_to_vector(
563  llvm::make_filter_range(
564  tiledInputs, [](Value v) -> bool { return v.getDefiningOp(); }),
565  [](Value v) -> Operation * { return v.getDefiningOp(); });
566 
567  // Step 2b: Extract a slice of the init operands.
568  SmallVector<Value, 1> tiledInits;
569  for (auto [partialReductionMap, valueToTile] :
570  llvm::zip_equal(partialReductionMaps, init)) {
571  InitSliceInfo sliceInfo = getInitSliceInfo(
572  b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
573  splitReductionIvs, partialReductionMap);
574  auto valueToTileType = cast<RankedTensorType>(valueToTile.getType());
575  RankedTensorType sliceResultType = RankedTensorType::get(
576  sliceInfo.resultShape, valueToTileType.getElementType(),
577  valueToTileType.getEncoding());
578  auto sliceOp = tensor::ExtractSliceOp::create(
579  b, loc, sliceResultType, valueToTile, sliceInfo.offsets,
580  sliceInfo.sizes, sliceInfo.strides);
581  tiledInits.push_back(sliceOp.getResult());
582  generatedSlices.push_back(sliceOp);
583  }
584 
585  // Update the indexing maps.
586  SmallVector<AffineMap> newMaps = linalgOp.getIndexingMapsArray();
587  for (auto [initOperand, newInitMap] :
588  llvm::zip_equal(linalgOp.getDpsInitsMutable(), newInitMaps)) {
589  int mapIdx = linalgOp.getIndexingMapIndex(&initOperand);
590  newMaps[mapIdx] = newInitMap;
591  }
592 
593  // Step 3. Change the reduction dim iterator types.
594  SmallVector<utils::IteratorType> newIteratorTypes =
595  linalgOp.getIteratorTypesArray();
596  if (tilingStrategy ==
598  for (int dim : reductionDims)
599  newIteratorTypes[dim] = utils::IteratorType::parallel;
600  }
601 
602  // Step 4. Create the new generic op.
603  Operation *partialReductionOp;
604  auto resultTypes = ValueRange(tiledInits).getTypes();
605  if (tilingStrategy ==
607  auto genericOp = GenericOp::create(b, loc, resultTypes, tiledInputs,
608  tiledInits, newMaps, newIteratorTypes);
609  IRMapping mapping;
610  op->getRegion(0).cloneInto(&genericOp.getRegion(),
611  genericOp.getRegion().begin(), mapping);
612  partialReductionOp = genericOp.getOperation();
613  } else {
614  SmallVector<Value> operands = std::move(tiledInputs);
615  llvm::append_range(operands, tiledInits);
616  partialReductionOp = mlir::clone(b, op, resultTypes, operands);
617  }
618  return TilingResult{
619  {partialReductionOp},
620  llvm::map_to_vector(partialReductionOp->getResults(),
621  [](OpResult r) -> Value { return r; }),
622  generatedSlices};
623  }
624 
625  FailureOr<MergeResult>
626  mergeReductions(Operation *op, OpBuilder &b, Location loc,
627  ValueRange partialReduce,
628  const SetVector<unsigned> &reductionDims) const {
629  auto linalgOp = cast<LinalgOp>(op);
630  SmallVector<AffineMap> partialReductionMaps =
631  getPartialResultAffineMaps(linalgOp, reductionDims);
632 
633  // Permute the reduction dims as permuted by the partial result map.
634  SmallVector<Operation *> mergeOperations;
635  SmallVector<Value> replacements;
636  for (auto [idx, init, partialResult, partialMap] : llvm::enumerate(
637  linalgOp.getDpsInits(), partialReduce, partialReductionMaps)) {
638  unsigned initIdx = idx;
639  // linalg.reduce's iteration space is the tiled result's iteration space
640  // (and not the tiled operation's iteration space). To account for this,
641  // permute the reduction dimensions based on the partial result map of the
642  // tiled result.
643  SmallVector<int64_t> partialReductionDims;
644  for (auto [resultNum, dimExpr] :
645  llvm::enumerate(partialMap.getResults())) {
646  unsigned dim = cast<AffineDimExpr>(dimExpr).getPosition();
647  if (llvm::is_contained(reductionDims, dim)) {
648  partialReductionDims.push_back(resultNum);
649  }
650  }
651 
652  auto reduction = linalg::ReduceOp::create(
653  b, loc, partialResult, init, partialReductionDims,
654  [&linalgOp, &initIdx](OpBuilder &b, Location loc, ValueRange inputs) {
655  // Get the combiner op.
656  SmallVector<Operation *, 4> combinerOps;
657  matchReduction(linalgOp.getRegionOutputArgs(), initIdx,
658  combinerOps);
659  Operation *clonedReductionOp = b.clone(*combinerOps[0]);
660  // Combine the input at idx and output at numInits + idx.
661  clonedReductionOp->setOperand(0, inputs[0]);
662  clonedReductionOp->setOperand(1, inputs[1]);
663  linalg::YieldOp::create(b, loc, clonedReductionOp->getResult(0));
664  });
665 
666  mergeOperations.push_back(reduction);
667  replacements.push_back(reduction->getResult(0));
668  }
669 
670  return MergeResult{mergeOperations, replacements};
671  }
672 
673  LogicalResult getPartialResultTilePosition(
674  Operation *op, OpBuilder &b, unsigned resultNumber,
675  ReductionTilingStrategy tilingStrategy, ArrayRef<OpFoldResult> offsets,
676  ArrayRef<OpFoldResult> sizes, const SetVector<unsigned> &reductionDims,
677  ArrayRef<OpFoldResult> splitReductionIvs,
678  SmallVector<OpFoldResult> &resultOffsets,
679  SmallVector<OpFoldResult> &resultSizes) const {
680  auto linalgOp = cast<LinalgOp>(op);
681  SmallVector<AffineMap> partialReductionMaps =
682  getPartialResultAffineMaps(linalgOp, reductionDims);
683  InitSliceInfo sliceInfo = getInitSliceInfo(
684  b.getContext(), tilingStrategy, offsets, sizes, reductionDims,
685  splitReductionIvs, partialReductionMaps[resultNumber]);
686  std::swap(resultOffsets, sliceInfo.offsets);
687  std::swap(resultSizes, sliceInfo.sizes);
688 
689  return success();
690  }
691 };
692 
693 template <typename OpTy>
694 static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
695  OpBuilder &builder) {
696  static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
697  "applies to only pack or unpack operations");
698  OpBuilder::InsertionGuard g(builder);
699  int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
700  : op.getDestRank();
701  OpFoldResult zero = builder.getIndexAttr(0);
702  OpFoldResult one = builder.getIndexAttr(1);
703  ReifiedRankedShapedTypeDims resultShape;
704  (void)reifyResultShapes(builder, op, resultShape);
705  SmallVector<Range> loopBounds(rank);
706  for (auto dim : llvm::seq<int64_t>(0, rank)) {
707  loopBounds[dim].offset = zero;
708  loopBounds[dim].stride = one;
709  loopBounds[dim].size = resultShape[0][dim];
710  }
711  return loopBounds;
712 }
713 
714 static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
716  ArrayRef<int64_t> permutation) {
717  if (permutation.empty())
718  return;
719  applyPermutationToVector<OpFoldResult>(offsets, permutation);
720  applyPermutationToVector<OpFoldResult>(sizes, permutation);
721 }
722 
723 struct PackOpTiling
724  : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
725 
726  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
727  // Note that here we only consider untiled dimensions and outer tiled data
728  // dimensions, the inner tiled data dimensions are materialized when
729  // building the body of the operation.
730  auto packOp = cast<PackOp>(op);
731  SmallVector<utils::IteratorType> iteratorTypes(
732  packOp.getSourceRank(), utils::IteratorType::parallel);
733  return iteratorTypes;
734  }
735 
736  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
737  return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
738  }
739 
740  FailureOr<TilingResult>
742  ArrayRef<OpFoldResult> offsets,
743  ArrayRef<OpFoldResult> sizes) const {
744  auto packOp = cast<PackOp>(op);
745  Location loc = packOp.getLoc();
746 
747  // The tiling is applied on interchanged dimensions. We have to undo the
748  // interchange to map sizes and offsets to the original input.
749  int64_t inputRank = packOp.getSourceRank();
750  SmallVector<OpFoldResult> origOffsets(offsets);
751  SmallVector<OpFoldResult> origSizes(sizes);
752  applyPermToRange(origOffsets, origSizes,
753  invertPermutationVector(packOp.getOuterDimsPerm()));
754 
755  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
756  packOp.getDimAndTileMapping();
757  SmallVector<OpFoldResult> srcDimValues =
758  tensor::getMixedSizes(b, loc, packOp.getSource());
759  SmallVector<OpFoldResult> inputIndices, inputSizes;
760  for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
761  using AV = affine::AffineValueExpr;
762  affine::AffineBuilder ab(b, loc);
763  AffineExpr dim0, dim1, sym;
764  bindDims(b.getContext(), dim0, dim1);
765  bindSymbols(b.getContext(), sym);
766  if (dimAndTileMapping.count(dim)) {
767  // If the data dimension is tiled, the i-th index is the product of
768  // offset_i and tile_i, and the i-th size is the product of sizes_i and
769  // tile_i.
770  auto avOffset = AV(dim0).bind(origOffsets[dim]);
771  auto avSize = AV(dim0).bind(origSizes[dim]);
772  auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
773  inputIndices.push_back(ab.mul(avOffset, avTileSize));
774  inputSizes.push_back(ab.mul(avSize, avTileSize));
775  } else {
776  inputIndices.push_back(origOffsets[dim]);
777  inputSizes.push_back(origSizes[dim]);
778  }
779 
780  // Limit the size of the input operand for incomplete tiles.
781  if (packOp.getPaddingValue()) {
782  OpFoldResult dimSize = srcDimValues[dim];
783  auto avDimSize = AV(dim0).bind(dimSize);
784  auto avInputIdx = AV(dim1).bind(inputIndices.back());
785  inputSizes.back() =
786  ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
787  }
788  }
789 
790  auto oneAttr = b.getI64IntegerAttr(1);
791  SmallVector<OpFoldResult> strides(inputRank, oneAttr);
792 
793  SmallVector<Value> tiledOperands;
794  auto sourceSlice = tensor::ExtractSliceOp::create(
795  b, loc, packOp.getSource(), inputIndices, inputSizes, strides);
796  tiledOperands.push_back(sourceSlice);
797 
798  SmallVector<OpFoldResult> outputOffsets, outputSizes;
799  if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
800  outputSizes)))
801  return {};
802 
803  strides.append(packOp.getDestRank() - inputRank, oneAttr);
804  auto outSlice = tensor::ExtractSliceOp::create(
805  b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
806  tiledOperands.push_back(outSlice);
807 
808  if (auto val = packOp.getPaddingValue())
809  tiledOperands.push_back(val);
810  for (auto tile : packOp.getInnerTiles())
811  tiledOperands.push_back(tile);
812 
813  Operation *tiledPackOp = PackOp::create(
814  b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
815 
816  return TilingResult{
817  {tiledPackOp},
818  SmallVector<Value>(tiledPackOp->getResults()),
819  llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
820  }
821 
822  LogicalResult
823  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
824  ArrayRef<OpFoldResult> offsets,
826  SmallVector<OpFoldResult> &resultOffsets,
827  SmallVector<OpFoldResult> &resultSizes) const {
828  // The iteration domain is over outer dimensions of packed layout. In this
829  // context, the outer dimensions of `resultOffsets` are `offsets`. The
830  // inner dimensions of `resultOffsets` are zeros because tiling is not
831  // applied to them.
832  auto packOp = cast<PackOp>(op);
833  int64_t inputRank = packOp.getSourceRank();
834  int64_t outputRank = packOp.getDestRank();
835  auto zeroAttr = b.getI64IntegerAttr(0);
836  resultOffsets.assign(offsets.begin(), offsets.end());
837  resultOffsets.append(outputRank - inputRank, zeroAttr);
838 
839  ReifiedRankedShapedTypeDims outputShape;
840  (void)reifyResultShapes(b, packOp, outputShape);
841  resultSizes.assign(sizes.begin(), sizes.end());
842  for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
843  resultSizes.push_back(outputShape[0][dataTileDim]);
844 
845  return success();
846  }
847 
848  FailureOr<TilingResult>
849  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
850  ArrayRef<OpFoldResult> offsets,
851  ArrayRef<OpFoldResult> sizes) const {
852  auto packOp = cast<PackOp>(op);
853  int64_t numTiles = packOp.getInnerDimsPos().size();
854 
855  // tensor.pack op is fusible (as a producer) only if full inner tiles are
856  // iterated or inner dims are not tiled. Otherwise, it will generate a
857  // sequence of non-trivial ops (for partial tiles).
858  for (auto offset : offsets.take_back(numTiles))
859  if (!isZeroInteger(offset))
860  return failure();
861 
862  for (auto iter :
863  llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
864  if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
865  return failure();
866 
867  FailureOr<TilingResult> tilingResult = getTiledImplementation(
868  op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
869  if (failed(tilingResult))
870  return failure();
871  return tilingResult.value();
872  }
873 
874  /// Method to return the position of iteration domain tile computed by the
875  /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
876  /// `resultSizes` only cover outer dimensions.
877  LogicalResult getIterationDomainTileFromOperandTiles(
878  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
881  SmallVectorImpl<OpFoldResult> &resultOffsets,
882  SmallVectorImpl<OpFoldResult> &resultSizes) const {
883  if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
884  LLVM_DEBUG(
885  { llvm::dbgs() << "unsupported operands for consumer fusion"; });
886  return failure();
887  }
888 
889  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
890  ArrayRef<OpFoldResult> sizes(allSizes[0]);
891  auto packOp = cast<PackOp>(op);
892  Location loc = packOp.getLoc();
893  SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
894  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
895  packOp.getDimAndTileMapping();
896  SmallVector<int64_t> outerShapeWithoutTranspose(
897  packOp.getDestType().getShape().take_front(packOp.getSourceRank()));
898  if (!packOp.getOuterDimsPerm().empty()) {
900  outerShapeWithoutTranspose,
901  invertPermutationVector(packOp.getOuterDimsPerm()));
902  }
903  for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
904  if (dimAndTileMapping.count(dim)) {
905  FailureOr<int64_t> cstTileSize =
907  presburger::BoundType::UB, sizes[dim],
908  /*stopCondition=*/nullptr, /*closedUB=*/true);
909  std::optional<int64_t> cstInnerSize =
910  getConstantIntValue(dimAndTileMapping[dim]);
911 
912  // If a dimension is not tiled, it is always valid to fuse the pack op,
913  // even if the op has padding semantics. Because it always generates a
914  // full slice along the dimension. The tile sizes are for unpacked
915  // domain, i.e., `srcDimSize`, so `tileSize < srcDimSize` means that the
916  // dimension is tiled.
917  // TODO: It could be untiled if the `srcDimSize` is dynamic. It is a
918  // hard check to determine if a dimension is tiled or not.
919  int64_t srcDimSize = packOp.getSourceType().getDimSize(dim);
920  int64_t destDimSize = outerShapeWithoutTranspose[dim];
921  bool isTiled = failed(cstTileSize) ||
922  ShapedType::isDynamic(srcDimSize) ||
923  cstTileSize.value() < srcDimSize;
924  if (!isTiled) {
925  outerDimOffsets.push_back(offsets[dim]);
926  if (ShapedType::isStatic(destDimSize)) {
927  outerDimSizes.push_back(b.getIndexAttr(destDimSize));
928  } else {
929  outerDimSizes.push_back(
930  b.createOrFold<tensor::DimOp>(loc, packOp.getDest(), dim));
931  }
932  continue;
933  }
934 
935  // Currently fusing `packOp` as consumer only expects perfect tiling
936  // scenario because even if without padding semantic, the `packOp` may
937  // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
938  // where the `tileSize` from operand of `packOp` is 5, which is not
939  // exactly divided by `innerTile`(=6) of `packOp`. As the result:
940  // 1. the first slice is extracted from (0) to (4) and inserted into
941  // (0,0)~(0,4) at first row.
942  // 2. the second slice is extracted from (5) to (9) and SHOULD BE
943  // respectively inserted into two rows with different length, including
944  // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
945  // them, thus adding below constraint to bypass them temporarily. In
946  // another word, we can only support tiling with consumer if the tile
947  // size for the producer is a multiple of the inner tile size for the
948  // packed dimensions at this moment.
949  if ((failed(cstTileSize) || !cstInnerSize ||
950  *cstTileSize % *cstInnerSize != 0))
951  return failure();
952 
953  using AV = affine::AffineValueExpr;
954  affine::AffineBuilder ab(b, loc);
955  AffineExpr dim0, sym;
956  bindDims(b.getContext(), dim0);
957  bindSymbols(b.getContext(), sym);
958  auto avOffset = AV(dim0).bind(offsets[dim]);
959  auto avSize = AV(dim0).bind(sizes[dim]);
960  auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
961  outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
962  outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
963  } else {
964  outerDimOffsets.push_back(offsets[dim]);
965  outerDimSizes.push_back(sizes[dim]);
966  }
967  }
968  applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
969  resultOffsets = outerDimOffsets;
970  resultSizes = outerDimSizes;
971  return success();
972  }
973 
974  /// Method to return the tiled implementation of tensor.pack as a consumer.
975  FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
976  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
978  ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
979  if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
980  LLVM_DEBUG(
981  { llvm ::dbgs() << "unhandled operands for consumer fusion"; });
982  return failure();
983  }
984 
985  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
986  ArrayRef<OpFoldResult> sizes(allSizes[0]);
987 
988  auto packOp = cast<PackOp>(op);
989  Location loc = packOp.getLoc();
990 
991  int64_t inputRank = packOp.getSourceRank();
992  auto oneAttr = b.getI64IntegerAttr(1);
993  SmallVector<OpFoldResult> strides(inputRank, oneAttr);
994 
995  SmallVector<Value> tiledOperands;
996  auto sourceSlice = tensor::ExtractSliceOp::create(
997  b, loc, packOp.getSource(), offsets, sizes, strides);
998  tiledOperands.push_back(sourceSlice);
999 
1000  SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
1001  if (failed(getIterationDomainTileFromOperandTiles(
1002  op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets,
1003  outerDimSizes)))
1004  return failure();
1005 
1006  SmallVector<OpFoldResult> outputOffsets, outputSizes;
1007  if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
1008  outputOffsets, outputSizes)))
1009  return failure();
1010 
1011  strides.append(packOp.getDestRank() - inputRank, oneAttr);
1012  auto outSlice = tensor::ExtractSliceOp::create(
1013  b, loc, packOp.getDest(), outputOffsets, outputSizes, strides);
1014  tiledOperands.push_back(outSlice);
1015 
1016  if (auto val = packOp.getPaddingValue())
1017  tiledOperands.push_back(val);
1018  for (auto tile : packOp.getInnerTiles())
1019  tiledOperands.push_back(tile);
1020 
1021  Operation *tiledPackOp = PackOp::create(
1022  b, loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
1023 
1024  return TilingResult{
1025  {tiledPackOp},
1026  SmallVector<Value>(tiledPackOp->getResults()),
1027  llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
1028  }
1029 };
1030 
1031 struct UnpackTileDimInfo {
1032  bool isAlignedToInnerTileSize;
1033  OpFoldResult sourceOffset;
1034  OpFoldResult sourceSize;
1035  OpFoldResult resultOffset;
1036  OpFoldResult destExpandedSize;
1037 };
1038 
1039 /// Returns the needed information for tiling unpack op on `tileDim` with given
1040 /// `tileOffset` and `tileSize`. For more details, see the comment of the
1041 /// `getTiledImplementation`.
1042 static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
1043  int64_t tileDim,
1044  OpFoldResult tileOffset,
1045  OpFoldResult tileSize) {
1046  UnpackTileDimInfo info;
1047  Attribute zeroAttr = b.getIndexAttr(0);
1048  Attribute oneAttr = b.getIndexAttr(1);
1049  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1050  unpackOp.getDimAndTileMapping();
1051  // The dimension is not one of packed data dimension.
1052  if (!dimAndTileMapping.count(tileDim)) {
1053  info.isAlignedToInnerTileSize = true;
1054  info.sourceOffset = tileOffset;
1055  info.sourceSize = tileSize;
1056  info.resultOffset = zeroAttr;
1057  info.destExpandedSize = tileSize;
1058  return info;
1059  }
1060 
1061  Location loc = unpackOp.getLoc();
1062  using AV = affine::AffineValueExpr;
1063  affine::AffineBuilder ab(b, loc);
1064  AffineExpr dim0, dim1, sym0;
1065  bindDims(b.getContext(), dim0, dim1);
1066  bindSymbols(b.getContext(), sym0);
1067 
1068  OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
1069 
1070  info.isAlignedToInnerTileSize = false;
1071  FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
1072  presburger::BoundType::UB, tileSize,
1073  /*stopCondition=*/nullptr, /*closedUB=*/true);
1074  std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
1075  if (!failed(cstSize) && cstInnerSize) {
1076  if (*cstSize % *cstInnerSize == 0)
1077  info.isAlignedToInnerTileSize = true;
1078 
1079  // If the tiling size equals to the inner tiling size, the outer dims are
1080  // always 1.
1081  if (*cstInnerSize == *cstSize) {
1082  auto lhs = AV(dim0).bind(tileOffset);
1083  auto rhs = AV(dim1).bind(innerTileSize);
1084  info.sourceOffset = ab.floor(lhs, rhs);
1085  info.sourceSize = oneAttr;
1086  info.resultOffset = zeroAttr;
1087  info.destExpandedSize = tileSize;
1088  return info;
1089  }
1090  }
1091 
1092  if (info.isAlignedToInnerTileSize) {
1093  info.sourceOffset =
1094  ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
1095  info.resultOffset = zeroAttr;
1096  info.destExpandedSize = tileSize;
1097 
1098  // The ceilDiv is needed here because there could be incomplete tile even
1099  // it is perfect tiling cases. E.g.,
1100  // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
1101  // If the tiling size is 32, there will be 3 tiles. Two of them have
1102  // size=32; one of them have size=2. The size is represented using
1103  // affine_min op; we need ceilDiv.
1104  info.sourceSize =
1105  ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
1106  return info;
1107  }
1108 
1109  affine::DivModValue firstCoord = affine::getDivMod(
1110  b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
1111  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1112  OpFoldResult tileExclusiveBound =
1113  ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
1114  affine::DivModValue lastCoord = affine::getDivMod(
1115  b, loc,
1117  b, loc,
1118  ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
1119  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1120 
1121  OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
1122  AV(dim1).bind(firstCoord.quotient));
1123  info.sourceSize =
1124  ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
1125  info.sourceOffset = firstCoord.quotient;
1126  info.resultOffset = firstCoord.remainder;
1127  // Do not create an Affine ops for expanded size because the affine op is too
1128  // complicated which would trigger an issue in affine ops simplification.
1129  info.destExpandedSize = b.createOrFold<arith::MulIOp>(
1130  loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
1131  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
1132  return info;
1133 }
1134 
1135 struct UnPackOpTiling
1136  : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
1137 
1138  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
1139  auto unpackOp = cast<UnPackOp>(op);
1140  SmallVector<utils::IteratorType> iteratorTypes(
1141  unpackOp.getDestRank(), utils::IteratorType::parallel);
1142  return iteratorTypes;
1143  }
1144 
1145  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
1146  return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
1147  }
1148 
1149  /// There are two cases in tiling unpack ops. If the tiling size is aligned to
1150  /// the inner tile size, the corresponding tiles of source are all complete.
1151  /// Otherwise, there are in-complete tiles. We will need to expand the slice
1152  /// of source for getting complete tiles. The tiled unpack op unpacks more
1153  /// data from source, so We'll need an extract_slice op to shift and truncate
1154  /// the output.
1155  /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
1156  /// coordinates of second tile (i.e., result[15..31]) are
1157  /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
1158  /// row are incomplete tiles. To represent the unpack op, we have to complete
1159  /// the rows. I.e., the input coordinates would start with (1, 0); end with
1160  /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
1161  /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
1162  /// can get the actual result.
1163  FailureOr<TilingResult>
1165  ArrayRef<OpFoldResult> offsets,
1166  ArrayRef<OpFoldResult> sizes) const {
1167  auto unpackOp = cast<UnPackOp>(op);
1168  int64_t srcRank = unpackOp.getSourceRank();
1169  int64_t destRank = unpackOp.getDestRank();
1170  int64_t numInnerTiles = srcRank - destRank;
1171  Location loc = unpackOp.getLoc();
1172 
1173  // The perfect tiling case indicates that the tiling sizes are multiple of
1174  // inner_tile_size. In this context, no extra data is needed when
1175  // representing the tiled unpack op.
1176  bool isPerfectTilingCase = true;
1177  Attribute oneAttr = b.getIndexAttr(1);
1178  SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
1179  SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
1180  SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
1181  for (auto dim : llvm::seq<int64_t>(0, destRank)) {
1182  UnpackTileDimInfo info =
1183  getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
1184  if (!info.isAlignedToInnerTileSize)
1185  isPerfectTilingCase = false;
1186  sliceSrcIndices.push_back(info.sourceOffset);
1187  sliceSrcSizes.push_back(info.sourceSize);
1188  destExpandedSizes.push_back(info.destExpandedSize);
1189  resultOffsetsFromDest.push_back(info.resultOffset);
1190  }
1191 
1192  // The tiling is applied on destination dimensions. We have to apply the
1193  // interchange on source dimensions if outer_dims_perm is set.
1194  applyPermToRange(sliceSrcIndices, sliceSrcSizes,
1195  unpackOp.getOuterDimsPerm());
1196  Attribute zeroAttr = b.getIndexAttr(0);
1197  sliceSrcIndices.append(numInnerTiles, zeroAttr);
1198  sliceSrcSizes.append(unpackOp.getMixedTiles());
1199  sliceSrcStrides.append(numInnerTiles, oneAttr);
1200  SmallVector<Operation *> generatedSlices;
1201  tensor::ExtractSliceOp sliceSource = tensor::ExtractSliceOp::create(
1202  b, loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
1203  sliceSrcStrides);
1204  generatedSlices.push_back(sliceSource);
1205 
1206  SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
1207  Value sliceDest;
1208  if (isPerfectTilingCase) {
1209  auto destSliceOp = tensor::ExtractSliceOp::create(
1210  b, loc, unpackOp.getDest(), offsets, sizes, destStrides);
1211  sliceDest = destSliceOp;
1212  generatedSlices.push_back(destSliceOp);
1213  } else {
1214  sliceDest = tensor::EmptyOp::create(
1215  b, loc, destExpandedSizes, unpackOp.getDestType().getElementType());
1216  }
1217 
1218  SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
1219  for (auto tile : unpackOp.getInnerTiles())
1220  tiledOperands.push_back(tile);
1221 
1222  Operation *tiledUnpackOp = UnPackOp::create(
1223  b, loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
1224 
1225  if (isPerfectTilingCase)
1226  return TilingResult{{tiledUnpackOp},
1227  SmallVector<Value>(tiledUnpackOp->getResults()),
1228  generatedSlices};
1229 
1230  auto extractSlice = tensor::ExtractSliceOp::create(
1231  b, loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
1232  destStrides);
1233  return TilingResult{
1234  {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
1235  }
1236 
1237  LogicalResult
1238  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
1239  ArrayRef<OpFoldResult> offsets,
1240  ArrayRef<OpFoldResult> sizes,
1241  SmallVector<OpFoldResult> &resultOffsets,
1242  SmallVector<OpFoldResult> &resultSizes) const {
1243  resultOffsets = llvm::to_vector(offsets);
1244  resultSizes = llvm::to_vector(sizes);
1245  return success();
1246  }
1247 
1248  FailureOr<TilingResult>
1249  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
1250  ArrayRef<OpFoldResult> offsets,
1251  ArrayRef<OpFoldResult> sizes) const {
1252  FailureOr<TilingResult> tilingResult =
1253  getTiledImplementation(op, b, offsets, sizes);
1254  if (failed(tilingResult))
1255  return failure();
1256  return tilingResult.value();
1257  }
1258 
1259  /// Method to return the position of iteration domain tile computed by the
1260  /// tiled operation.
1261  LogicalResult getIterationDomainTileFromOperandTiles(
1262  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1263  ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1265  SmallVectorImpl<OpFoldResult> &resultOffsets,
1266  SmallVectorImpl<OpFoldResult> &resultSizes) const {
1267  if (operandNumbers.size() != 1) {
1268  LLVM_DEBUG({ llvm::dbgs() << "unable to handle multiple operands"; });
1269  return failure();
1270  }
1271  auto unPackOp = cast<UnPackOp>(op);
1272  unsigned operandNumber = operandNumbers[0];
1273  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1274  ArrayRef<OpFoldResult> sizes(allSizes[0]);
1275 
1276  // If the operand tile is the dest, then no adjustment is needed.
1277  if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
1278  resultOffsets = llvm::to_vector(offsets);
1279  resultSizes = llvm::to_vector(sizes);
1280  return success();
1281  }
1282  Location loc = unPackOp.getLoc();
1283 
1284  int64_t numTiles = unPackOp.getInnerDimsPos().size();
1285  auto destOffsets = offsets.drop_back(numTiles);
1286  auto destSizes = sizes.drop_back(numTiles);
1287  // The tiling is applied on interchanged dimensions. We have to undo the
1288  // interchange to map sizes and offsets to the original input.
1289  int64_t outputRank = unPackOp.getDestRank();
1290  ReifiedRankedShapedTypeDims reifiedReturnShapes;
1291  if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
1292  return failure();
1293  SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
1294  SmallVector<OpFoldResult> origOffsets(destOffsets);
1295  SmallVector<OpFoldResult> origSizes(destSizes);
1296  applyPermToRange(origOffsets, origSizes,
1297  invertPermutationVector(unPackOp.getOuterDimsPerm()));
1298 
1299  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
1300  unPackOp.getDimAndTileMapping();
1301 
1302  for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
1303  using AV = affine::AffineValueExpr;
1304  affine::AffineBuilder ab(b, loc);
1305  AffineExpr dim0, dim1, sym0;
1306  bindDims(b.getContext(), dim0, dim1);
1307  bindSymbols(b.getContext(), sym0);
1308  if (dimAndTileMapping.count(dim)) {
1309  // If the data dimension is tiled, the i-th index is the product of
1310  // offset_i and tile_i, and the i-th size is the product of sizes_i and
1311  // tile_i. The sizes must be clamped to the sizes of the unpack result.
1312  auto avOffset = AV(dim0).bind(origOffsets[dim]);
1313  auto avSize = AV(dim0).bind(origSizes[dim]);
1314  auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
1315  auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
1316  resultOffsets.push_back(ab.mul(avOffset, avTileSize));
1317  auto avResultOffset = AV(dim1).bind(resultOffsets.back());
1318  resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
1319  ab.sub(avResultSize, avResultOffset)}));
1320  } else {
1321  resultOffsets.push_back(origOffsets[dim]);
1322  resultSizes.push_back(origSizes[dim]);
1323  }
1324  }
1325  return success();
1326  }
1327 
1328  /// Method to return the tiled implementation of tensor.unpack as a consumer.
1329  FailureOr<TilingResult> getTiledImplementationFromOperandTiles(
1330  Operation *op, OpBuilder &b, ArrayRef<unsigned> operandNumbers,
1331  ArrayRef<SmallVector<OpFoldResult>> allOffsets,
1332  ArrayRef<SmallVector<OpFoldResult>> allSizes) const {
1333  if (operandNumbers.size() != 1 || operandNumbers[0] != 0) {
1334  LLVM_DEBUG({ llvm::dbgs() << "unhandled operands for consumer fusion"; });
1335  return failure();
1336  }
1337  auto unPackOp = cast<UnPackOp>(op);
1338  ArrayRef<OpFoldResult> offsets(allOffsets[0]);
1339  ArrayRef<OpFoldResult> sizes(allSizes[0]);
1340 
1341  // tensor.unpack op is fusible (as a consumer) only if inner dims are not
1342  // tiled.
1343  int64_t numTiles = unPackOp.getInnerDimsPos().size();
1344  for (auto iter :
1345  llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
1346  if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
1347  return failure();
1348  }
1349 
1350  Location loc = unPackOp.getLoc();
1351 
1352  // Fetch offset/size for creating the slice of the dest operand of
1353  // unpack op.
1354  SmallVector<OpFoldResult> outputOffsets, outputSizes;
1355  if (failed(getIterationDomainTileFromOperandTiles(
1356  op, b, operandNumbers, allOffsets, allSizes, outputOffsets,
1357  outputSizes)))
1358  return failure();
1359 
1360  auto oneAttr = b.getI64IntegerAttr(1);
1361  int64_t outputRank = unPackOp.getDestRank();
1362  SmallVector<OpFoldResult> strides(outputRank, oneAttr);
1363 
1364  SmallVector<Value> tiledOperands;
1365  // Create slice of the dest operand.
1366  auto extractDestSlice = tensor::ExtractSliceOp::create(
1367  b, loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
1368  tiledOperands.push_back(extractDestSlice);
1369 
1370  strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
1371  // Create slice of the source operand.
1372  auto extractSourceSlice = tensor::ExtractSliceOp::create(
1373  b, loc, unPackOp.getSource(), offsets, sizes, strides);
1374  tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
1375  for (auto tile : unPackOp.getInnerTiles())
1376  tiledOperands.push_back(tile);
1377 
1378  // Create tiled unpack op.
1379  Operation *tiledUnPackOp =
1380  UnPackOp::create(b, loc, TypeRange{extractDestSlice.getType()},
1381  tiledOperands, op->getAttrs());
1382 
1383  return TilingResult{{tiledUnPackOp},
1384  SmallVector<Value>(tiledUnPackOp->getResults()),
1385  llvm::to_vector(ArrayRef<Operation *>{
1386  extractSourceSlice, extractDestSlice})};
1387  }
1388 };
1389 
1390 } // namespace
1391 
1392 template <typename OpType>
1393 static void registerOne(MLIRContext *ctx) {
1394  OpType::template attachInterface<LinalgOpTilingInterface<OpType>>(*ctx);
1395  OpType::template attachInterface<LinalgOpPartialReductionInterface<OpType>>(
1396  *ctx);
1397 }
1398 
1399 /// Variadic helper function.
1400 template <typename... OpTypes>
1401 static void registerAll(MLIRContext *ctx) {
1402  (registerOne<OpTypes>(ctx), ...);
1403 }
1404 
1405 #define GET_OP_LIST
1406 
1408  DialectRegistry &registry) {
1409  registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
1410  registerOne<linalg::GenericOp>(ctx);
1411  linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1412  linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1413  registerAll<
1414 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
1415  >(ctx);
1416  });
1417 }
1418 
1420  DialectRegistry &registry) {
1421  registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
1422  linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
1423  linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
1424  });
1425 }
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition: Utils.cpp:74
static RankedTensorType sliceResultType(Type operandType, GridOp grid, ArrayRef< GridAxis > gridAxes, int64_t sliceAxis)
Definition: ShardOps.cpp:1151
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims)
static SmallVector< Value > getIndicesForAccess(OpBuilder &b, Location loc, AffineMap indexingMap, ValueRange ivs)
Return the SSA values that represent the data point accessed using a given indexingMap for a given po...
static LogicalResult inlinePayload(OpBuilder &b, LinalgOp linalgOp, ValueRange ivs, ValueRange argValues)
Method to inline the payload of a linalgOp given the iteration space point and values for the argumen...
static void registerAll(MLIRContext *ctx)
Variadic helper function.
static void registerOne(MLIRContext *ctx)
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
Definition: AffineMap.cpp:611
unsigned getNumSymbols() const
Definition: AffineMap.cpp:394
unsigned getNumDims() const
Definition: AffineMap.cpp:390
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403
unsigned getNumResults() const
Definition: AffineMap.cpp:398
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Block represents an ordered list of Operations.
Definition: Block.h:33
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244
BlockArgListType getArguments()
Definition: Block.h:87
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition: Block.h:209
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:107
MLIRContext * getContext() const
Definition: Builders.h:55
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
Definition: IRMapping.h:65
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:548
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272
This class represents an operand of an operation.
Definition: Value.h:257
This is a value defined by a result of an operation.
Definition: Value.h:447
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
void setOperand(unsigned idx, Value value)
Definition: Operation.h:351
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:686
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
result_range getResults()
Definition: Operation.h:415
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:672
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition: Region.cpp:70
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
static FailureOr< int64_t > computeConstantBound(presburger::BoundType type, const Variable &var, StopConditionFn stopCondition=nullptr, bool closedUB=false)
Compute a constant bound for the given variable.
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1327
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1943
std::optional< TypedAttr > getNeutralElement(Operation *op)
Return the identity numeric value associated to the give op.
Definition: ArithOps.cpp:2726
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:862
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry &registry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:884
void registerTilingInterfaceExternalModels(DialectRegistry &registry)
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition: Utils.cpp:773
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition: Utils.cpp:626
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition: TensorOps.cpp:70
Include the generated interface declarations.
ReductionTilingStrategy
Tiling can be thought of as splitting a dimension into 2 and materializing the outer dimension as a l...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2)
Return true if ofr1 and ofr2 are the same integer constant attribute values or the same SSA value.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
Value matchReduction(ArrayRef< BlockArgument > iterCarriedArgs, unsigned redPos, SmallVectorImpl< Operation * > &combinerOps)
Utility to match a generic reduction given a list of iteration-carried arguments, iterCarriedArgs and...
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:111
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: Utils.cpp:1282
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
std::pair< SmallVector< int64_t >, SmallVector< Value > > decomposeMixedValues(ArrayRef< OpFoldResult > mixedValues)
Decompose a vector of mixed static or dynamic values into the corresponding pair of arrays.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Container for the result of merge operation of tiling.
Container for result values of tiling.
SmallVector< Operation * > tiledOps
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition: Utils.h:155
SmallVector< OpFoldResult > sizes
Definition: Utils.h:157
SmallVector< OpFoldResult > offsets
Definition: Utils.h:156