MLIR  21.0.0git
TensorTilingInterfaceImpl.cpp
Go to the documentation of this file.
1 //===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
18 
19 using namespace mlir;
20 using namespace mlir::tensor;
21 
22 namespace {
23 
24 struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
25 
26  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
27  auto padOp = cast<PadOp>(op);
29  padOp.getResultType().getRank(), utils::IteratorType::parallel);
30  return iteratorTypes;
31  }
32 
33  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
34  ReifiedRankedShapedTypeDims reifiedShapes;
35  (void)reifyResultShapes(b, op, reifiedShapes);
36  OpFoldResult zero = b.getIndexAttr(0);
37  OpFoldResult one = b.getIndexAttr(1);
38  // Initialize all the ranges to {zero, one, one}. All the `ub`s are
39  // overwritten.
40  SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
41  for (const auto &ub : enumerate(reifiedShapes[0]))
42  loopRanges[ub.index()].size = ub.value();
43  return loopRanges;
44  }
45 
46  FailureOr<TilingResult>
48  ArrayRef<OpFoldResult> offsets,
49  ArrayRef<OpFoldResult> sizes) const {
50  FailureOr<TilingResult> result =
51  tensor::bubbleUpPadSlice(b, cast<PadOp>(op), offsets, sizes);
52  if (failed(result))
53  return failure();
54  return result.value();
55  }
56 
57  LogicalResult
58  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
59  ArrayRef<OpFoldResult> offsets,
61  SmallVector<OpFoldResult> &resultOffsets,
62  SmallVector<OpFoldResult> &resultSizes) const {
63  resultOffsets.assign(offsets.begin(), offsets.end());
64  resultSizes.assign(sizes.begin(), sizes.end());
65  return success();
66  }
67 
68  LogicalResult getIterationDomainTileFromResultTile(
69  Operation *op, OpBuilder &b, unsigned resultNumber,
71  SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
72  SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
73  iterDomainOffsets.assign(offsets.begin(), offsets.end());
74  iterDomainSizes.assign(sizes.begin(), sizes.end());
75  return success();
76  }
77 
78  FailureOr<TilingResult>
79  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
80  ArrayRef<OpFoldResult> offsets,
81  ArrayRef<OpFoldResult> sizes) const {
82  return getTiledImplementation(op, b, offsets, sizes);
83  }
84 };
85 
86 } // namespace
87 
88 FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
89  tensor::PadOp padOp,
90  ArrayRef<OpFoldResult> offsets,
92  bool generateZeroSliceGuard) {
93  // Only constant padding value supported.
94  Value padValue = padOp.getConstantPaddingValue();
95  if (!padValue)
96  return failure();
97 
98  // Helper variables and functions for various arithmetic operations. These
99  // are used extensively for computing new offset/length and padding values.
100  Location loc = padOp->getLoc();
101  AffineExpr dim0, dim1;
102  bindDims(b.getContext(), dim0, dim1);
103  // Subtract two integers.
104  auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
105  auto sub = [&](OpFoldResult v1, OpFoldResult v2) {
106  return affine::makeComposedFoldedAffineApply(b, loc, subMap, {v1, v2});
107  };
108  // Take the minimum of two integers.
109  auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
110  auto min = [&](OpFoldResult v1, OpFoldResult v2) {
111  return affine::makeComposedFoldedAffineMin(b, loc, idMap, {v1, v2});
112  };
113  // Take the maximum of two integers.
114  auto max = [&](OpFoldResult v1, OpFoldResult v2) {
115  return affine::makeComposedFoldedAffineMax(b, loc, idMap, {v1, v2});
116  };
117  // Zero index-typed integer.
118  OpFoldResult zero = b.getIndexAttr(0);
119 
120  // Compute new offsets, lengths, low padding, high padding.
121  SmallVector<OpFoldResult> newOffsets, newLengths;
122  SmallVector<OpFoldResult> newLows, newHighs;
123  // Set to true if the original data source is not read at all.
124  bool hasZeroLen = false;
125  // Same as hasZeroLen, but for dynamic dimension sizes. This condition
126  // is true if the original data source turns out to be unused at runtime.
127  Value dynHasZeroLenCond;
128 
129  int64_t rank = padOp.getSourceType().getRank();
130  // Only unit stride supported.
131  SmallVector<OpFoldResult> newStrides(rank, b.getIndexAttr(1));
132  for (unsigned dim = 0; dim < rank; ++dim) {
133  auto low = padOp.getMixedLowPad()[dim];
134  bool hasLowPad = !isZeroInteger(low);
135  auto high = padOp.getMixedHighPad()[dim];
136  bool hasHighPad = !isZeroInteger(high);
137  auto offset = offsets[dim];
138  auto length = sizes[dim];
139  // If the dim has no padding, we dont need to calculate new values for that
140  // dim as the exisiting ones are correct even after the pattern.
141  if (!hasLowPad && !hasHighPad) {
142  newOffsets.push_back(offset);
143  newLengths.push_back(length);
144  newLows.push_back(low);
145  newHighs.push_back(high);
146  continue;
147  }
148 
149  auto srcSize = tensor::getMixedSize(b, loc, padOp.getSource(), dim);
150 
151  // The new amount of low padding is `low - offset`. Except for the case
152  // where none of the low padding is read. In that case, the new amount of
153  // low padding is zero.
154  //
155  // Optimization: If low = 0, then newLow = 0.
156  OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
157  newLows.push_back(newLow);
158 
159  // Start reading the data from position `offset - low`. Since the original
160  // read may have started in the low padding zone, this value could be
161  // negative. Therefore, start reading from:
162  //
163  // max(offset - low, 0)
164  //
165  // The original read could also have started in the high padding zone.
166  // In that case, set the offset to the end of source tensor. The new
167  // ExtractSliceOp length will be zero in that case. (Effectively reading
168  // no data from the source.)
169  //
170  // Optimization: If low = 0, then the formula can be simplified.
171  OpFoldResult newOffset = hasLowPad
172  ? min(max(sub(offset, low), zero), srcSize)
173  : min(offset, srcSize);
174  newOffsets.push_back(newOffset);
175 
176  // The original ExtractSliceOp was reading until position `offset +
177  // length`. Therefore, the corresponding position within the source tensor
178  // is:
179  //
180  // offset + length - low
181  //
182  // In case the original ExtractSliceOp stopped reading within the low
183  // padding zone, this value can be negative. In that case, the end
184  // position of the read should be zero. (Similar to newOffset.)
185  //
186  // The original read could also have stopped in the high padding zone.
187  // In that case, set the end positition of the read should be the end of
188  // the source tensor. (Similar to newOffset.)
189  // srcSize - newOffset represents how much length we have available
190  // and length - newLow represents how much length we want at most.
191  // Note that there are many ways to order this indexing math to compute
192  // newLength, but we want to make sure that the final affine.min ops in the
193  // sequence are bounding the index to as small a value as possible. If
194  // ValueBoundsOpInterface is used, this calculation will get upper bounds
195  // from the affine.min ops, so we want to use the smallest known value to
196  // set the bound at the end of the computation sequence. In this case, the
197  // index will be upper bounded by length - newLow.
198  OpFoldResult newLength = min(sub(srcSize, newOffset), sub(length, newLow));
199  // Optimization: If low = 0, then newLow = 0. then newLength >= 0 assuming
200  // length >= 0.
201  if (hasLowPad)
202  newLength = max(newLength, zero);
203  newLengths.push_back(newLength);
204 
205  // Check if newLength is zero. In that case, no SubTensorOp should be
206  // executed.
207  if (isZeroInteger(newLength)) {
208  hasZeroLen = true;
209  } else if (!hasZeroLen) {
210  Value check = b.create<arith::CmpIOp>(
211  loc, arith::CmpIPredicate::eq,
212  getValueOrCreateConstantIndexOp(b, loc, newLength),
213  getValueOrCreateConstantIndexOp(b, loc, zero));
214  dynHasZeroLenCond =
215  dynHasZeroLenCond
216  ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
217  : check;
218  }
219 
220  // The amount of high padding is simply the number of elements remaining,
221  // so that the result has the same length as the original ExtractSliceOp.
222  // As an optimization, if the original high padding is zero, then the new
223  // high padding must also be zero.
224  OpFoldResult newHigh =
225  hasHighPad ? sub(sub(length, newLength), newLow) : zero;
226  newHighs.push_back(newHigh);
227  }
228 
229  // The shape of the result can be obtained from the sizes passed in.
230  SmallVector<Value> dynDims;
231  SmallVector<int64_t> shape;
232  dispatchIndexOpFoldResults(sizes, dynDims, shape);
233  RankedTensorType resultType =
234  RankedTensorType::get(shape, padOp.getResultType().getElementType());
235 
236  // Insert cast to ensure that types match. (May be folded away.)
237  auto castResult = [&](Value val) -> Value {
238  if (resultType == val.getType())
239  return val;
240  return b.create<tensor::CastOp>(loc, resultType, val);
241  };
242 
243  // In cases where the original data source is unused: Emit a GenerateOp and
244  // do not generate a SliceOp. (The result shape of the SliceOp would
245  // have a dimension of size 0, the semantics of which is unclear.)
246  auto createGenerateOp = [&]() {
247  // Create GenerateOp.
248  auto generateOp = b.create<tensor::GenerateOp>(
249  loc, resultType, dynDims,
250  [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
251  builder.create<tensor::YieldOp>(gLoc, padValue);
252  });
253  return generateOp;
254  };
255 
256  // Emit a SliceOp and a PadOp. Should not be used in cases where
257  // the result shape of the new SliceOp has a zero dimension.
258  auto createPadOfExtractSlice = [&]() {
259  // Create pad(extract_slice(x)).
260  auto newSliceOp = b.create<tensor::ExtractSliceOp>(
261  loc, padOp.getSource(), newOffsets, newLengths, newStrides);
262  auto newPadOp = b.create<PadOp>(
263  loc, Type(), newSliceOp, newLows, newHighs,
264  /*nofold=*/padOp.getNofold(),
265  getPrunedAttributeList(padOp, PadOp::getAttributeNames()));
266 
267  // Copy region to new PadOp.
268  IRMapping bvm;
269  padOp.getRegion().cloneInto(&newPadOp.getRegion(), bvm);
270 
271  // Cast result and return.
272  return std::make_tuple(newPadOp, newSliceOp);
273  };
274 
275  // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that
276  // the original data source x is not used.
277  if (hasZeroLen) {
278  Operation *generateOp = createGenerateOp();
279  return TilingResult{{generateOp},
280  {castResult(generateOp->getResult(0))},
281  /*generatedSlices=*/{}};
282  }
283 
284  // If there are dynamic dimensions: Generate an scf.if check to avoid
285  // creating SliceOps with result dimensions of size 0 at runtime.
286  if (generateZeroSliceGuard && dynHasZeroLenCond) {
287  Operation *thenOp;
288  Operation *elseOp;
289  Operation *sliceOp;
290  auto result = b.create<scf::IfOp>(
291  loc, dynHasZeroLenCond,
292  /*thenBuilder=*/
293  [&](OpBuilder &b, Location loc) {
294  thenOp = createGenerateOp();
295  b.create<scf::YieldOp>(loc, castResult(thenOp->getResult(0)));
296  },
297  /*elseBuilder=*/
298  [&](OpBuilder &b, Location loc) {
299  std::tie(elseOp, sliceOp) = createPadOfExtractSlice();
300  b.create<scf::YieldOp>(loc, castResult(elseOp->getResult(0)));
301  });
302  return TilingResult{
303  {elseOp}, SmallVector<Value>(result->getResults()), {sliceOp}};
304  }
305 
306  auto [newPadOp, sliceOp] = createPadOfExtractSlice();
307  return TilingResult{
308  {newPadOp}, {castResult(newPadOp->getResult(0))}, {sliceOp}};
309 }
310 
312  DialectRegistry &registry) {
313  registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {
314  tensor::PadOp::attachInterface<PadOpTiling>(*ctx);
315  });
316 }
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes, const SetVector< unsigned > &reductionDims)
Base type for affine expression.
Definition: AffineExpr.h:68
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:330
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103
MLIRContext * getContext() const
Definition: Builders.h:55
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:205
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:452
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a maximum across the results of applying map to operands,...
Definition: AffineOps.cpp:1441
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1331
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
Definition: AffineOps.cpp:1434
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
FailureOr< TilingResult > bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, bool generateZeroSliceGuard=true)
Bubbles up a slice of this pad by taking the slice first and then performing the padding.
void registerTilingInterfaceExternalModels(mlir::DialectRegistry &registry)
Registers external models for Tiling interface for tensor ops.
OpFoldResult getMixedSize(OpBuilder &builder, Location loc, Value value, int64_t dim)
Return the dimension of the given tensor value.
Definition: TensorOps.cpp:61
Include the generated interface declarations.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void dispatchIndexOpFoldResults(ArrayRef< OpFoldResult > ofrs, SmallVectorImpl< Value > &dynamicVec, SmallVectorImpl< int64_t > &staticVec)
Helper function to dispatch multiple OpFoldResults according to the behavior of dispatchIndexOpFoldRe...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
SmallVector< NamedAttribute > getPrunedAttributeList(Operation *op, ArrayRef< StringRef > elidedAttrs)
Container for result values of tiling.