MLIR  17.0.0git
TensorTilingInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
20 
21 using namespace mlir;
22 using namespace mlir::tensor;
23 
24 namespace {
25 
26 struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
27 
28  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
29  auto padOp = cast<PadOp>(op);
31  padOp.getResultType().getRank(), utils::IteratorType::parallel);
32  return iteratorTypes;
33  }
34 
35  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
36  ReifiedRankedShapedTypeDims reifiedShapes;
37  ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
38  dyn_cast<ReifyRankedShapedTypeOpInterface>(op);
39  (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes);
40 
41  Location loc = op->getLoc();
42  Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
43  Value one = b.create<arith::ConstantIndexOp>(loc, 1);
44  // Initialize all the ranges to {zero, one, one}. All the `ub`s are
45  // overwritten.
46  SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
47  for (const auto &ub : enumerate(reifiedShapes[0]))
48  loopRanges[ub.index()].size = ub.value();
49  return loopRanges;
50  }
51 
53  getTiledImplementation(Operation *op, OpBuilder &b,
54  ArrayRef<OpFoldResult> offsets,
55  ArrayRef<OpFoldResult> sizes) const {
56  Operation *result =
57  tensor::bubbleUpPadSlice(b, cast<PadOp>(op), offsets, sizes);
58  if (!result)
59  return {};
60  return {result};
61  }
62 
64  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
65  ArrayRef<OpFoldResult> offsets,
67  SmallVector<OpFoldResult> &resultOffsets,
68  SmallVector<OpFoldResult> &resultSizes) const {
69  resultOffsets.assign(offsets.begin(), offsets.end());
70  resultSizes.assign(sizes.begin(), sizes.end());
71  return success();
72  }
73 };
74 
75 template <typename OpTy>
76 static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
77  OpBuilder &builder) {
78  static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
79  "applies to only pack or unpack operations");
80  OpBuilder::InsertionGuard g(builder);
81  Location loc = op.getLoc();
82  int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
83  : op.getDestRank();
84  Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
85  Value one = builder.create<arith::ConstantIndexOp>(loc, 1);
86  ReifiedRankedShapedTypeDims resultShape;
87  (void)op.reifyResultShapes(builder, resultShape);
88  SmallVector<Range> loopBounds(rank);
89  for (auto dim : llvm::seq<int64_t>(0, rank)) {
90  loopBounds[dim].offset = zero;
91  loopBounds[dim].stride = one;
92  loopBounds[dim].size = resultShape[0][dim];
93  }
94  return loopBounds;
95 }
96 
97 static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
99  ArrayRef<int64_t> permutation) {
100  if (permutation.empty())
101  return;
102  applyPermutationToVector<OpFoldResult>(offsets, permutation);
103  applyPermutationToVector<OpFoldResult>(sizes, permutation);
104 }
105 
106 struct PackOpTiling
107  : public TilingInterface::ExternalModel<PackOpTiling, PackOp> {
108 
109  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
110  // Note that here we only consider untiled dimensions and outer tiled data
111  // dimensions, the inner tiled data dimensions are materialized when
112  // building the body of the operation.
113  auto packOp = cast<PackOp>(op);
114  SmallVector<utils::IteratorType> iteratorTypes(
115  packOp.getSourceRank(), utils::IteratorType::parallel);
116  return iteratorTypes;
117  }
118 
119  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
120  return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
121  }
122 
124  getTiledImplementation(Operation *op, OpBuilder &b,
125  ArrayRef<OpFoldResult> offsets,
126  ArrayRef<OpFoldResult> sizes) const {
127  auto packOp = cast<PackOp>(op);
128  Location loc = packOp.getLoc();
129 
130  // The tiling is applied on interchanged dimensions. We have to undo the
131  // interchange to map sizes and offsets to the original input.
132  int64_t inputRank = packOp.getSourceRank();
133  SmallVector<OpFoldResult> origOffsets(offsets.begin(), offsets.end());
134  SmallVector<OpFoldResult> origSizes(sizes.begin(), sizes.end());
135  applyPermToRange(origOffsets, origSizes,
136  invertPermutationVector(packOp.getOuterDimsPerm()));
137 
138  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
139  packOp.getDimAndTileMapping();
140  SmallVector<OpFoldResult> srcDimValues =
141  tensor::createDimValues(b, loc, packOp.getSource());
142  SmallVector<OpFoldResult> inputIndices, inputSizes;
143  for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
144  using AV = AffineValueExpr;
145  AffineBuilder ab(b, loc);
146  AffineExpr dim0, dim1, sym;
147  bindDims(b.getContext(), dim0, dim1);
148  bindSymbols(b.getContext(), sym);
149  if (dimAndTileMapping.count(dim)) {
150  // If the data dimension is tiled, the i-th index is the product of
151  // offset_i and tile_i, and the i-th size is the product of sizes_i and
152  // tile_i.
153  auto avOffset = AV(dim0).bind(origOffsets[dim]);
154  auto avSize = AV(dim0).bind(origSizes[dim]);
155  auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
156  inputIndices.push_back(ab.mul(avOffset, avTileSize));
157  inputSizes.push_back(ab.mul(avSize, avTileSize));
158  } else {
159  inputIndices.push_back(origOffsets[dim]);
160  inputSizes.push_back(origSizes[dim]);
161  }
162 
163  // Limit the size of the input operand for incomplete tiles.
164  OpFoldResult dimSize = srcDimValues[dim];
165  auto avDimSize = AV(dim0).bind(dimSize);
166  auto avInputIdx = AV(dim1).bind(inputIndices.back());
167  inputSizes.back() =
168  ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
169  }
170 
171  auto oneAttr = b.getI64IntegerAttr(1);
172  SmallVector<OpFoldResult> strides(inputRank, oneAttr);
173 
174  SmallVector<Value> tiledOperands;
175  tiledOperands.push_back(b.create<ExtractSliceOp>(
176  loc, packOp.getSource(), inputIndices, inputSizes, strides));
177 
178  SmallVector<OpFoldResult> outputOffsets, outputSizes;
179  if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
180  outputSizes)))
181  return {};
182 
183  strides.append(packOp.getDestRank() - inputRank, oneAttr);
184  auto extractSlice = b.create<ExtractSliceOp>(
185  loc, packOp.getDest(), outputOffsets, outputSizes, strides);
186  tiledOperands.push_back(extractSlice);
187 
188  if (auto val = packOp.getPaddingValue())
189  tiledOperands.push_back(val);
190  for (auto tile : packOp.getInnerTiles())
191  tiledOperands.push_back(tile);
192 
193  Operation *tiledPackOp = b.create<PackOp>(
194  loc, TypeRange{extractSlice.getType()}, tiledOperands, op->getAttrs());
195 
196  return {tiledPackOp};
197  }
198 
200  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
201  ArrayRef<OpFoldResult> offsets,
203  SmallVector<OpFoldResult> &resultOffsets,
204  SmallVector<OpFoldResult> &resultSizes) const {
205  // The iteration domain is over outer dimensions of packed layout. In this
206  // context, the outer dimensions of `resultOffsets` are `offsets`. The
207  // inner dimensions of `resultOffsets` are zeros because tiling is not
208  // applied to them.
209  auto packOp = cast<PackOp>(op);
210  int64_t inputRank = packOp.getSourceRank();
211  int64_t outputRank = packOp.getDestRank();
212  auto zeroAttr = b.getI64IntegerAttr(0);
213  resultOffsets.assign(offsets.begin(), offsets.end());
214  resultOffsets.append(outputRank - inputRank, zeroAttr);
215 
216  ReifiedRankedShapedTypeDims outputShape;
217  (void)packOp.reifyResultShapes(b, outputShape);
218  resultSizes.assign(sizes.begin(), sizes.end());
219  for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
220  resultSizes.push_back(getAsOpFoldResult(outputShape[0][dataTileDim]));
221 
222  return success();
223  }
224 };
225 
226 struct UnpackTileDimInfo {
227  bool isAlignedToInnerTileSize;
228  OpFoldResult sourceOffset;
229  OpFoldResult sourceSize;
230  OpFoldResult resultOffset;
231  OpFoldResult destExpandedSize;
232 };
233 
234 /// Returns the needed information for tiling unpack op on `tileDim` with given
235 /// `tileOffset` and `tileSize`. For more details, see the comment of the
236 /// `getTiledImplementation`.
237 static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
238  int64_t tileDim,
239  OpFoldResult tileOffset,
240  OpFoldResult tileSize) {
241  UnpackTileDimInfo info;
242  Attribute zeroAttr = b.getIndexAttr(0);
243  Attribute oneAttr = b.getIndexAttr(1);
244  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
245  unpackOp.getDimAndTileMapping();
246  // The dimension is not one of packed data dimension.
247  if (!dimAndTileMapping.count(tileDim)) {
248  info.isAlignedToInnerTileSize = true;
249  info.sourceOffset = tileOffset;
250  info.sourceSize = tileSize;
251  info.resultOffset = zeroAttr;
252  info.destExpandedSize = tileSize;
253  return info;
254  }
255 
256  Location loc = unpackOp.getLoc();
257  using AV = AffineValueExpr;
258  AffineBuilder ab(b, loc);
259  AffineExpr dim0, dim1, sym0;
260  bindDims(b.getContext(), dim0, dim1);
261  bindSymbols(b.getContext(), sym0);
262 
263  OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
264 
265  info.isAlignedToInnerTileSize = false;
267  getValueOrCreateConstantIndexOp(b, loc, tileSize));
268  std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
269  if (!failed(cstSize) && cstInnerSize) {
270  if (*cstSize % *cstInnerSize == 0)
271  info.isAlignedToInnerTileSize = true;
272 
273  // If the tiling size equals to the inner tiling size, the outer dims are
274  // always 1.
275  if (*cstInnerSize == *cstSize) {
276  auto lhs = AV(dim0).bind(tileOffset);
277  auto rhs = AV(dim1).bind(innerTileSize);
278  info.sourceOffset = ab.floor(lhs, rhs);
279  info.sourceSize = oneAttr;
280  info.resultOffset = zeroAttr;
281  info.destExpandedSize = tileSize;
282  return info;
283  }
284  }
285 
286  if (info.isAlignedToInnerTileSize) {
287  info.sourceOffset =
288  ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
289  info.resultOffset = zeroAttr;
290  info.destExpandedSize = tileSize;
291 
292  // The ceilDiv is needed here because there could be incomplete tile even
293  // it is perfect tiling cases. E.g.,
294  // %0 = unpack tensor<33x2xf32> into tensor<64xf32>
295  // If the tiling size is 32, there will be 3 tiles. Two of them have
296  // size=32; one of them have size=2. The size is represented using
297  // affine_min op; we need ceilDiv.
298  info.sourceSize =
299  ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
300  return info;
301  }
302 
303  DivModValue firstCoord =
304  getDivMod(b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
305  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
306  OpFoldResult tileExclusiveBound =
307  ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
308  DivModValue lastCoord = getDivMod(
309  b, loc,
311  b, loc,
312  ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
313  getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
314 
315  OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
316  AV(dim1).bind(firstCoord.quotient));
317  info.sourceSize =
318  ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
319  info.sourceOffset = firstCoord.quotient;
320  info.resultOffset = firstCoord.remainder;
321  info.destExpandedSize =
322  ab.mul(AV(dim0).bind(info.sourceSize), AV(sym0).bind(innerTileSize));
323  return info;
324 }
325 
326 struct UnPackOpTiling
327  : public TilingInterface::ExternalModel<UnPackOpTiling, UnPackOp> {
328 
329  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
330  auto unpackOp = cast<UnPackOp>(op);
331  SmallVector<utils::IteratorType> iteratorTypes(
332  unpackOp.getDestRank(), utils::IteratorType::parallel);
333  return iteratorTypes;
334  }
335 
336  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
337  return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
338  }
339 
340  /// There are two cases in tiling unpack ops. If the tiling size is aligned to
341  /// the inner tile size, the corresponding tiles of source are all complete.
342  /// Otherwise, there are in-complete tiles. We will need to expand the slice
343  /// of source for getting complete tiles. The tiled unpack op unpacks more
344  /// data from source, so We'll need an extract_slice op to shift and truncate
345  /// the output.
346  /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
347  /// coordinates of second tile (i.e., result[15..31]) are
348  /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
349  /// row are incomplete tiles. To represent the unpack op, we have to complete
350  /// the rows. I.e., the input coordinates would start with (1, 0); end with
351  /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
352  /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
353  /// can get the actual result.
355  getTiledImplementation(Operation *op, OpBuilder &b,
356  ArrayRef<OpFoldResult> offsets,
357  ArrayRef<OpFoldResult> sizes) const {
358  auto unpackOp = cast<UnPackOp>(op);
359  int64_t srcRank = unpackOp.getSourceRank();
360  int64_t destRank = unpackOp.getDestRank();
361  int64_t numInnerTiles = srcRank - destRank;
362  Location loc = unpackOp.getLoc();
363 
364  // The perfect tiling case indicates that the tiling sizes are multiple of
365  // inner_tile_size. In this context, no extra data is needed when
366  // representing the tiled unpack op.
367  bool isPerfectTilingCase = true;
368  Attribute oneAttr = b.getIndexAttr(1);
369  SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
370  SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
371  SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
372  for (auto dim : llvm::seq<int64_t>(0, destRank)) {
373  UnpackTileDimInfo info =
374  getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
375  if (!info.isAlignedToInnerTileSize)
376  isPerfectTilingCase = false;
377  sliceSrcIndices.push_back(info.sourceOffset);
378  sliceSrcSizes.push_back(info.sourceSize);
379  destExpandedSizes.push_back(info.destExpandedSize);
380  resultOffsetsFromDest.push_back(info.resultOffset);
381  }
382 
383  // The tiling is applied on destination dimensions. We have to apply the
384  // interchange on source dimensions if outer_dims_perm is set.
385  applyPermToRange(sliceSrcIndices, sliceSrcSizes,
386  unpackOp.getOuterDimsPerm());
387  Attribute zeroAttr = b.getIndexAttr(0);
388  sliceSrcIndices.append(numInnerTiles, zeroAttr);
389  sliceSrcSizes.append(unpackOp.getMixedTiles());
390  sliceSrcStrides.append(numInnerTiles, oneAttr);
391  Value sliceSource =
392  b.create<ExtractSliceOp>(loc, unpackOp.getSource(), sliceSrcIndices,
393  sliceSrcSizes, sliceSrcStrides);
394 
395  SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
396  Value sliceDest;
397  if (isPerfectTilingCase) {
398  sliceDest = b.create<ExtractSliceOp>(loc, unpackOp.getDest(), offsets,
399  sizes, destStrides);
400  } else {
401  sliceDest = b.create<EmptyOp>(loc, destExpandedSizes,
402  unpackOp.getDestType().getElementType());
403  }
404 
405  Operation *tiledUnpackOp =
406  b.create<UnPackOp>(loc, TypeRange{sliceDest.getType()},
407  ValueRange{sliceSource, sliceDest}, op->getAttrs());
408 
409  if (isPerfectTilingCase)
410  return {tiledUnpackOp};
411 
412  Operation *extractSlice =
413  b.create<ExtractSliceOp>(loc, tiledUnpackOp->getResult(0),
414  resultOffsetsFromDest, sizes, destStrides);
415  return {tiledUnpackOp, extractSlice};
416  }
417 
419  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
420  ArrayRef<OpFoldResult> offsets,
422  SmallVector<OpFoldResult> &resultOffsets,
423  SmallVector<OpFoldResult> &resultSizes) const {
424  resultOffsets = llvm::to_vector(offsets);
425  resultSizes = llvm::to_vector(sizes);
426  return success();
427  }
428 
429  FailureOr<Value> generateResultTileValue(Operation *op, OpBuilder &b,
430  unsigned resultNumber,
431  ArrayRef<OpFoldResult> offsets,
432  ArrayRef<OpFoldResult> sizes) const {
433  return getTiledImplementation(op, b, offsets, sizes)
434  .back()
435  ->getResult(resultNumber);
436  }
437 };
438 
439 } // namespace
440 
442  ArrayRef<OpFoldResult> offsets,
444  bool generateZeroSliceGuard) {
445  // Only constant padding value supported.
446  Value padValue = padOp.getConstantPaddingValue();
447  if (!padValue)
448  return nullptr;
449 
450  // Helper variables and functions for various arithmetic operations. These
451  // are used extensively for computing new offset/length and padding values.
452  Location loc = padOp->getLoc();
453  AffineExpr dim0, dim1;
454  bindDims(b.getContext(), dim0, dim1);
455  // Add two integers.
456  auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
457  auto add = [&](Value v1, Value v2) {
458  return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
459  };
460  // Subtract two integers.
461  auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
462  auto sub = [&](Value v1, Value v2) {
463  return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
464  };
465  // Take the minimum of two integers.
466  auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
467  auto min = [&](Value v1, Value v2) {
468  return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
469  };
470  // Take the maximum of two integers.
471  auto max = [&](Value v1, Value v2) {
472  return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
473  };
474  // Zero index-typed integer.
475  auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
476 
477  // Helper function for filling static/dynamic low/high padding indices
478  // vectors of PadOp.
479  auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
480  SmallVector<int64_t> &staticIndices) {
481  if (auto constInt = getConstantIntValue(val)) {
482  staticIndices.push_back(*constInt);
483  } else {
484  staticIndices.push_back(ShapedType::kDynamic);
485  dynIndices.push_back(val);
486  }
487  };
488 
489  // Compute new offsets, lengths, low padding, high padding.
490  SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
491  SmallVector<Value> newLows, newHighs;
492  SmallVector<int64_t> staticNewLows, staticNewHighs;
493  // Set to true if the original data source is not read at all.
494  bool hasZeroLen = false;
495  // Same as hasZeroLen, but for dynamic dimension sizes. This condition
496  // is true if the original data source turns out to be unused at runtime.
497  Value dynHasZeroLenCond;
498 
499  int64_t rank = padOp.getSourceType().getRank();
500  for (unsigned dim = 0; dim < rank; ++dim) {
501  auto low =
502  getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]);
503  bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
504  auto high =
505  getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]);
506  bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
507  auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
508  auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
509  auto srcSize = b.createOrFold<tensor::DimOp>(loc, padOp.getSource(), dim);
510 
511  // The new amount of low padding is `low - offset`. Except for the case
512  // where none of the low padding is read. In that case, the new amount of
513  // low padding is zero.
514  //
515  // Optimization: If low = 0, then newLow = 0.
516  Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
517  appendIndex(newLow, newLows, staticNewLows);
518 
519  // Start reading the data from position `offset - low`. Since the original
520  // read may have started in the low padding zone, this value could be
521  // negative. Therefore, start reading from:
522  //
523  // max(offset - low, 0)
524  //
525  // The original read could also have started in the high padding zone.
526  // In that case, set the offset to the end of source tensor. The new
527  // ExtractSliceOp length will be zero in that case. (Effectively reading
528  // no data from the source.)
529  //
530  // Optimization: If low = 0, then the formula can be simplified.
531  Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
532  : min(offset, srcSize);
533  newOffsets.push_back(getAsOpFoldResult(newOffset));
534 
535  // The original ExtractSliceOp was reading until position `offset +
536  // length`. Therefore, the corresponding position within the source tensor
537  // is:
538  //
539  // offset + length - low
540  //
541  // In case the original ExtractSliceOp stopped reading within the low
542  // padding zone, this value can be negative. In that case, the end
543  // position of the read should be zero. (Similar to newOffset.)
544  //
545  // The original read could also have stopped in the high padding zone.
546  // In that case, set the end positition of the read should be the end of
547  // the source tensor. (Similar to newOffset.)
548  //
549  // endLoc = min(max(offset - low + length, 0), srcSize)
550  //
551  // The new ExtractSliceOp length is `endLoc - newOffset`.
552  //
553  // Optimization: If low = 0, then the formula can be simplified.
554  Value endLoc = hasLowPad
555  ? min(max(add(sub(offset, low), length), zero), srcSize)
556  : min(add(offset, length), srcSize);
557  Value newLength = sub(endLoc, newOffset);
558  newLengths.push_back(getAsOpFoldResult(newLength));
559 
560  // Check if newLength is zero. In that case, no SubTensorOp should be
561  // executed.
562  if (auto newLengthInt = getConstantIntValue(newLength)) {
563  hasZeroLen |= *newLengthInt == 0;
564  } else {
565  Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
566  newLength, zero);
567  dynHasZeroLenCond =
568  dynHasZeroLenCond
569  ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
570  : check;
571  }
572 
573  // The amount of high padding is simply the number of elements remaining,
574  // so that the result has the same length as the original ExtractSliceOp.
575  // As an optimization, if the original high padding is zero, then the new
576  // high padding must also be zero.
577  Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
578  appendIndex(newHigh, newHighs, staticNewHighs);
579 
580  // Only unit stride supported.
581  newStrides.push_back(b.getIndexAttr(1));
582  }
583 
584  // The shape of the result can be obtained from the sizes passed in.
585  SmallVector<Value> dynDims;
586  SmallVector<int64_t> shape;
587  dispatchIndexOpFoldResults(sizes, dynDims, shape);
588  RankedTensorType resultType =
589  RankedTensorType::get(shape, padOp.getResultType().getElementType());
590 
591  // Insert cast to ensure that types match. (May be folded away.)
592  auto castResult = [&](Value val) -> Operation * {
593  return b.create<tensor::CastOp>(loc, resultType, val);
594  };
595 
596  // In cases where the original data source is unused: Emit a GenerateOp and
597  // do not generate a SliceOp. (The result shape of the SliceOp would
598  // have a dimension of size 0, the semantics of which is unclear.)
599  auto createGenerateOp = [&]() {
600  // Create GenerateOp.
601  auto generateOp = b.create<tensor::GenerateOp>(
602  loc, resultType, dynDims,
603  [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
604  builder.create<tensor::YieldOp>(gLoc, padValue);
605  });
606  return castResult(generateOp);
607  };
608 
609  // Emit a SliceOp and a PadOp. Should not be used in cases where
610  // the result shape of the new SliceOp has a zero dimension.
611  auto createPadOfExtractSlice = [&]() {
612  // Create pad(extract_slice(x)).
613  auto newSliceOp = b.create<tensor::ExtractSliceOp>(
614  loc, padOp.getSource(), newOffsets, newLengths, newStrides);
615  auto newPadOp = b.create<PadOp>(loc, newSliceOp, staticNewLows,
616  staticNewHighs, newLows, newHighs);
617 
618  // Copy region to new PadOp.
619  IRMapping bvm;
620  padOp.getRegion().cloneInto(&newPadOp.getRegion(), bvm);
621 
622  // Cast result and return.
623  return castResult(newPadOp);
624  };
625 
626  // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that
627  // the original data source x is not used.
628  if (hasZeroLen)
629  return createGenerateOp();
630 
631  // If there are dynamic dimensions: Generate an scf.if check to avoid
632  // creating SliceOps with result dimensions of size 0 at runtime.
633  if (generateZeroSliceGuard && dynHasZeroLenCond) {
634  auto result = b.create<scf::IfOp>(
635  loc, dynHasZeroLenCond,
636  /*thenBuilder=*/
637  [&](OpBuilder &b, Location loc) {
638  b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
639  },
640  /*elseBuilder=*/
641  [&](OpBuilder &b, Location loc) {
642  b.create<scf::YieldOp>(loc, createPadOfExtractSlice()->getResult(0));
643  });
644  return result;
645  }
646  return createPadOfExtractSlice();
647 }
648 
650  DialectRegistry &registry) {
651  registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {
652  tensor::PadOp::attachInterface<PadOpTiling>(*ctx);
653  tensor::PackOp::attachInterface<PackOpTiling>(*ctx);
654  tensor::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
655  });
656 }
657 
659  DialectRegistry &registry) {
660  registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {
661  tensor::PackOp::attachInterface<PackOpTiling>(*ctx);
662  tensor::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
663  });
664 }
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
Definition: AffineExpr.h:68
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:257
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Attributes are known-constant values of operations.
Definition: Attributes.h:25
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:109
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:113
MLIRContext * getContext() const
Definition: Builders.h:55
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
void addExtension(std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:301
This class helps build Operations.
Definition: Builders.h:199
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:473
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:422
This class represents a single result from folding an operation.
Definition: OpDefinition.h:233
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:75
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:368
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:198
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:400
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:350
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
Type getType() const
Return the type of this value.
Definition: Value.h:122
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:89
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:223
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:329
void registerTilingInterfaceExternalModels(mlir::DialectRegistry &registry)
Registers external models for Tiling interface for tensor ops.
Operation * bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, bool generateZeroSliceGuard=true)
Bubbles up a slice of this pad by taking the slice first and then performing the padding.
void registerTilingInterfaceExternalModelsForPackUnPackOps(DialectRegistry &registry)
Similar to the above registeration, but it is only for tensor.pack and tensor.unpack ops.
SmallVector< OpFoldResult > createDimValues(OpBuilder &b, Location loc, Value rankedTensor)
Definition: Utils.cpp:55
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:336
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
Definition: LoopUtils.cpp:1701
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs)
Create IR to calculate (div lhs, rhs) and (mod lhs, rhs).
Definition: Utils.cpp:1846
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:343
void dispatchIndexOpFoldResults(ArrayRef< OpFoldResult > ofrs, SmallVectorImpl< Value > &dynamicVec, SmallVectorImpl< int64_t > &staticVec)
Helper function to dispatch multiple OpFoldResults according to the behavior of dispatchIndexOpFoldRe...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:53
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
Helper struct to build simple AffineValueExprs with minimal type inference support.
Definition: Utils.h:352
Holds the result of (div a, b) and (mod a, b).
Definition: Utils.h:310
Value quotient
Definition: Utils.h:311
Value remainder
Definition: Utils.h:312
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26