MLIR 22.0.0git
TensorTilingInterfaceImpl.cpp
Go to the documentation of this file.
1//===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
18
19using namespace mlir;
20using namespace mlir::tensor;
21
22namespace {
23
24struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
25
26 SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
27 auto padOp = cast<PadOp>(op);
28 SmallVector<utils::IteratorType> iteratorTypes(
29 padOp.getResultType().getRank(), utils::IteratorType::parallel);
30 return iteratorTypes;
31 }
32
33 SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
34 ReifiedRankedShapedTypeDims reifiedShapes;
35 (void)reifyResultShapes(b, op, reifiedShapes);
36 OpFoldResult zero = b.getIndexAttr(0);
37 OpFoldResult one = b.getIndexAttr(1);
38 // Initialize all the ranges to {zero, one, one}. All the `ub`s are
39 // overwritten.
40 SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
41 for (const auto &ub : enumerate(reifiedShapes[0]))
42 loopRanges[ub.index()].size = ub.value();
43 return loopRanges;
44 }
45
46 FailureOr<TilingResult>
47 getTiledImplementation(Operation *op, OpBuilder &b,
48 ArrayRef<OpFoldResult> offsets,
49 ArrayRef<OpFoldResult> sizes) const {
50 FailureOr<TilingResult> result =
51 tensor::bubbleUpPadSlice(b, cast<PadOp>(op), offsets, sizes);
52 if (failed(result))
53 return failure();
54 return result.value();
55 }
56
57 LogicalResult
58 getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
59 ArrayRef<OpFoldResult> offsets,
60 ArrayRef<OpFoldResult> sizes,
61 SmallVector<OpFoldResult> &resultOffsets,
62 SmallVector<OpFoldResult> &resultSizes) const {
63 resultOffsets.assign(offsets.begin(), offsets.end());
64 resultSizes.assign(sizes.begin(), sizes.end());
65 return success();
66 }
67
68 LogicalResult getIterationDomainTileFromResultTile(
69 Operation *op, OpBuilder &b, unsigned resultNumber,
70 ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
71 SmallVectorImpl<OpFoldResult> &iterDomainOffsets,
72 SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {
73 iterDomainOffsets.assign(offsets.begin(), offsets.end());
74 iterDomainSizes.assign(sizes.begin(), sizes.end());
75 return success();
76 }
77
78 FailureOr<TilingResult>
79 generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
80 ArrayRef<OpFoldResult> offsets,
81 ArrayRef<OpFoldResult> sizes) const {
82 return getTiledImplementation(op, b, offsets, sizes);
83 }
84};
85
86} // namespace
87
88FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,
89 tensor::PadOp padOp,
92 bool generateZeroSliceGuard) {
93 // Only constant padding value supported.
94 Value padValue = padOp.getConstantPaddingValue();
95 if (!padValue)
96 return failure();
97
98 // Helper variables and functions for various arithmetic operations. These
99 // are used extensively for computing new offset/length and padding values.
100 Location loc = padOp->getLoc();
101 AffineExpr dim0, dim1;
102 bindDims(b.getContext(), dim0, dim1);
103 // Subtract two integers.
104 auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
105 auto sub = [&](OpFoldResult v1, OpFoldResult v2) {
106 return affine::makeComposedFoldedAffineApply(b, loc, subMap, {v1, v2});
107 };
108 // Take the minimum of two integers.
109 auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
110 auto min = [&](OpFoldResult v1, OpFoldResult v2) {
111 return affine::makeComposedFoldedAffineMin(b, loc, idMap, {v1, v2});
112 };
113 // Take the maximum of two integers.
114 auto max = [&](OpFoldResult v1, OpFoldResult v2) {
115 return affine::makeComposedFoldedAffineMax(b, loc, idMap, {v1, v2});
116 };
117 // Zero index-typed integer.
118 OpFoldResult zero = b.getIndexAttr(0);
119
120 // Compute new offsets, lengths, low padding, high padding.
121 SmallVector<OpFoldResult> newOffsets, newLengths;
122 SmallVector<OpFoldResult> newLows, newHighs;
123 // Set to true if the original data source is not read at all.
124 bool hasZeroLen = false;
125 // Same as hasZeroLen, but for dynamic dimension sizes. This condition
126 // is true if the original data source turns out to be unused at runtime.
127 Value dynHasZeroLenCond;
128
129 int64_t rank = padOp.getSourceType().getRank();
130 // Only unit stride supported.
131 SmallVector<OpFoldResult> newStrides(rank, b.getIndexAttr(1));
132 for (unsigned dim = 0; dim < rank; ++dim) {
133 auto low = padOp.getMixedLowPad()[dim];
134 bool hasLowPad = !isZeroInteger(low);
135 auto high = padOp.getMixedHighPad()[dim];
136 bool hasHighPad = !isZeroInteger(high);
137 auto offset = offsets[dim];
138 auto length = sizes[dim];
139 // If the dim has no padding, we dont need to calculate new values for that
140 // dim as the exisiting ones are correct even after the pattern.
141 if (!hasLowPad && !hasHighPad) {
142 newOffsets.push_back(offset);
143 newLengths.push_back(length);
144 newLows.push_back(low);
145 newHighs.push_back(high);
146 continue;
147 }
148
149 auto srcSize = tensor::getMixedSize(b, loc, padOp.getSource(), dim);
150
151 // The new amount of low padding is `low - offset`. Except for the case
152 // where none of the low padding is read. In that case, the new amount of
153 // low padding is zero.
154 //
155 // Optimization: If low = 0, then newLow = 0.
156 OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
157 newLows.push_back(newLow);
158
159 // Start reading the data from position `offset - low`. Since the original
160 // read may have started in the low padding zone, this value could be
161 // negative. Therefore, start reading from:
162 //
163 // max(offset - low, 0)
164 //
165 // The original read could also have started in the high padding zone.
166 // In that case, set the offset to the end of source tensor. The new
167 // ExtractSliceOp length will be zero in that case. (Effectively reading
168 // no data from the source.)
169 //
170 // Optimization: If low = 0, then the formula can be simplified.
171 OpFoldResult newOffset = hasLowPad
172 ? min(max(sub(offset, low), zero), srcSize)
173 : min(offset, srcSize);
174 newOffsets.push_back(newOffset);
175
176 // The original ExtractSliceOp was reading until position `offset +
177 // length`. Therefore, the corresponding position within the source tensor
178 // is:
179 //
180 // offset + length - low
181 //
182 // In case the original ExtractSliceOp stopped reading within the low
183 // padding zone, this value can be negative. In that case, the end
184 // position of the read should be zero. (Similar to newOffset.)
185 //
186 // The original read could also have stopped in the high padding zone.
187 // In that case, set the end positition of the read should be the end of
188 // the source tensor. (Similar to newOffset.)
189 // srcSize - newOffset represents how much length we have available
190 // and length - newLow represents how much length we want at most.
191 // Note that there are many ways to order this indexing math to compute
192 // newLength, but we want to make sure that the final affine.min ops in the
193 // sequence are bounding the index to as small a value as possible. If
194 // ValueBoundsOpInterface is used, this calculation will get upper bounds
195 // from the affine.min ops, so we want to use the smallest known value to
196 // set the bound at the end of the computation sequence. In this case, the
197 // index will be upper bounded by length - newLow.
198 OpFoldResult newLength = min(sub(srcSize, newOffset), sub(length, newLow));
199 // Optimization: If low = 0, then newLow = 0. then newLength >= 0 assuming
200 // length >= 0.
201 if (hasLowPad)
202 newLength = max(newLength, zero);
203 newLengths.push_back(newLength);
204
205 // Check if newLength is zero. In that case, no SubTensorOp should be
206 // executed.
207 if (isZeroInteger(newLength)) {
208 hasZeroLen = true;
209 } else if (!hasZeroLen) {
210 Value check = arith::CmpIOp::create(
211 b, loc, arith::CmpIPredicate::eq,
212 getValueOrCreateConstantIndexOp(b, loc, newLength),
214 dynHasZeroLenCond =
215 dynHasZeroLenCond
216 ? arith::OrIOp::create(b, loc, check, dynHasZeroLenCond)
217 : check;
218 }
219
220 // The amount of high padding is simply the number of elements remaining,
221 // so that the result has the same length as the original ExtractSliceOp.
222 // As an optimization, if the original high padding is zero, then the new
223 // high padding must also be zero.
224 OpFoldResult newHigh =
225 hasHighPad ? sub(sub(length, newLength), newLow) : zero;
226 newHighs.push_back(newHigh);
227 }
228
229 // The shape of the result can be obtained from the sizes passed in.
230 SmallVector<Value> dynDims;
232 dispatchIndexOpFoldResults(sizes, dynDims, shape);
233 RankedTensorType resultType =
234 RankedTensorType::get(shape, padOp.getResultType().getElementType());
235
236 // Insert cast to ensure that types match. (May be folded away.)
237 auto castResult = [&](Value val) -> Value {
238 if (resultType == val.getType())
239 return val;
240 return tensor::CastOp::create(b, loc, resultType, val);
241 };
242
243 // In cases where the original data source is unused: Emit a GenerateOp and
244 // do not generate a SliceOp. (The result shape of the SliceOp would
245 // have a dimension of size 0, the semantics of which is unclear.)
246 auto createGenerateOp = [&]() {
247 // Create GenerateOp.
248 auto generateOp = tensor::GenerateOp::create(
249 b, loc, resultType, dynDims,
250 [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
251 tensor::YieldOp::create(builder, gLoc, padValue);
252 });
253 return generateOp;
254 };
255
256 // Emit a SliceOp and a PadOp. Should not be used in cases where
257 // the result shape of the new SliceOp has a zero dimension.
258 auto createPadOfExtractSlice = [&]() {
259 // Create pad(extract_slice(x)).
260 auto newSliceOp = tensor::ExtractSliceOp::create(
261 b, loc, padOp.getSource(), newOffsets, newLengths, newStrides);
262 auto newPadOp = PadOp::create(
263 b, loc, Type(), newSliceOp, newLows, newHighs,
264 /*nofold=*/padOp.getNofold(),
265 getPrunedAttributeList(padOp, PadOp::getAttributeNames()));
266
267 // Copy region to new PadOp.
268 IRMapping bvm;
269 padOp.getRegion().cloneInto(&newPadOp.getRegion(), bvm);
270
271 // Cast result and return.
272 return std::make_tuple(newPadOp, newSliceOp);
273 };
274
275 // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that
276 // the original data source x is not used.
277 if (hasZeroLen) {
278 Operation *generateOp = createGenerateOp();
279 return TilingResult{{generateOp},
280 {castResult(generateOp->getResult(0))},
281 /*generatedSlices=*/{}};
282 }
283
284 // If there are dynamic dimensions: Generate an scf.if check to avoid
285 // creating SliceOps with result dimensions of size 0 at runtime.
286 if (generateZeroSliceGuard && dynHasZeroLenCond) {
287 Operation *thenOp;
288 Operation *elseOp;
289 Operation *sliceOp;
290 auto result = scf::IfOp::create(
291 b, loc, dynHasZeroLenCond,
292 /*thenBuilder=*/
293 [&](OpBuilder &b, Location loc) {
294 thenOp = createGenerateOp();
295 scf::YieldOp::create(b, loc, castResult(thenOp->getResult(0)));
296 },
297 /*elseBuilder=*/
298 [&](OpBuilder &b, Location loc) {
299 std::tie(elseOp, sliceOp) = createPadOfExtractSlice();
300 scf::YieldOp::create(b, loc, castResult(elseOp->getResult(0)));
301 });
302 return TilingResult{
303 {elseOp}, SmallVector<Value>(result->getResults()), {sliceOp}};
304 }
305
306 auto [newPadOp, sliceOp] = createPadOfExtractSlice();
307 return TilingResult{
308 {newPadOp}, {castResult(newPadOp->getResult(0))}, {sliceOp}};
309}
310
312 DialectRegistry &registry) {
313 registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {
314 tensor::PadOp::attachInterface<PadOpTiling>(*ctx);
315 });
316}
return success()
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
Base type for affine expression.
Definition AffineExpr.h:68
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a maximum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition Matchers.h:344
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561
FailureOr< TilingResult > bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, bool generateZeroSliceGuard=true)
Bubbles up a slice of this pad by taking the slice first and then performing the padding.
void registerTilingInterfaceExternalModels(mlir::DialectRegistry &registry)
Registers external models for Tiling interface for tensor ops.
OpFoldResult getMixedSize(OpBuilder &builder, Location loc, Value value, int64_t dim)
Return the dimension of the given tensor value.
Definition TensorOps.cpp:57
Include the generated interface declarations.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
SmallVector< SmallVector< OpFoldResult > > ReifiedRankedShapedTypeDims
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void dispatchIndexOpFoldResults(ArrayRef< OpFoldResult > ofrs, SmallVectorImpl< Value > &dynamicVec, SmallVectorImpl< int64_t > &staticVec)
Helper function to dispatch multiple OpFoldResults according to the behavior of dispatchIndexOpFoldRe...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
SmallVector< NamedAttribute > getPrunedAttributeList(Operation *op, ArrayRef< StringRef > elidedAttrs)
Container for result values of tiling.