MLIR  22.0.0git
PadTilingInterface.cpp
Go to the documentation of this file.
1 //===- PaddingTilingInterface.cpp - Padding of TilingInterface ops --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
16 #include "mlir/IR/AffineExpr.h"
19 #include "mlir/IR/BuiltinTypes.h"
20 #include "mlir/IR/OpDefinition.h"
21 #include "mlir/IR/Value.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Support/Casting.h"
25 
26 #define DEBUG_TYPE "pad-tiling-interface"
27 
28 using namespace mlir;
29 using namespace mlir::linalg;
30 using namespace mlir::tensor;
31 
32 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
33 #define DBGSNL() (llvm::dbgs() << "\n")
34 
35 /// Form a "full-rank" padding specification so that the application is easy.
39  SmallVector<OpFoldResult> paddingSizes;
40  // Complete the padding specification to specify all dimensions.
41  for (size_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) {
42  // Complete to zero if needed.
43  paddingSizes.push_back(options.paddingSizes.size() > idx
44  ? options.paddingSizes[idx]
45  : b.getIndexAttr(0));
46  // If a dimension is zero (either specified or completed), replace by:
47  // - 1 if we are padding to the next multiple of.
48  // - indexingSizes[idx] otherwise
49  if (isZeroInteger(paddingSizes[idx])) {
50  paddingSizes[idx] =
51  options.padToMultipleOf ? b.getIndexAttr(1) : indexingSizes[idx];
52  }
53  LLVM_DEBUG(DBGS() << "----idx: " << idx << " : " << paddingSizes[idx]
54  << "\n");
55  }
56  return paddingSizes;
57 }
58 
59 /// Extracts the constant multiplier from an affine expression of the form
60 /// `d * c` or `c * d`, where `d` is an AffineDimExpr and `c` is an
61 /// AffineConstantExpr. Returns 1 if the expression is not a simple
62 /// multiplication of a dimension and a constant.
63 static int64_t extractConstantMultiplier(AffineExpr expr) {
64  if (auto binOp = dyn_cast<AffineBinaryOpExpr>(expr)) {
65  if (binOp.getKind() == AffineExprKind::Mul) {
66  auto lhsD = dyn_cast<AffineDimExpr>(binOp.getLHS());
67  auto rhsC = dyn_cast<AffineConstantExpr>(binOp.getRHS());
68  if (lhsD && rhsC) {
69  return rhsC.getValue();
70  }
71  auto lhsC = dyn_cast<AffineConstantExpr>(binOp.getLHS());
72  auto rhsD = dyn_cast<AffineDimExpr>(binOp.getRHS());
73  if (lhsC && rhsD) {
74  return lhsC.getValue();
75  }
76  }
77  }
78  return 1;
79 }
80 
81 /// Compute the padded shape of the given value `v` of `RankedTensorType` given
82 /// - `indexingSizes` a list of OpFoldResult.
83 /// - an `indexingMap` that encodes how the shape of varies with increases
84 /// in `indexingSizes`.
85 /// The `indexingMap` encodes how the shape of varies with `indexingSizes`.
86 /// The `indexingMap` + `indexingSizes` encoding suits StructuredOps.
87 /// The implementaiton below iteratively combines increases from contributing
88 /// dimensions using affine.apply operations.
89 /// The padded shape is computed by evaluating the maximum accessed index per
90 /// dimension, which may involve multiplying by constant factors derived from
91 /// the affine indexing expressions. Currently, only a limited set of projected
92 /// permutation indexing maps are supported, such as
93 /// - affine_map<(d0, d1, d2) -> (d0, d1)>
94 /// - affine_map<(d0, d1, d2) -> (d0, d1 + d2)>
95 /// - affine_map<(d0, d1) -> (d0 * 3 + d1)>
96 /// In the future, more general interfaces can be devised to encode similar
97 /// shape evolutions and map between an op and its operands.
100  AffineMap indexingMap,
101  ArrayRef<OpFoldResult> indexingSizes,
103  Location loc = v.getLoc();
104  SmallVector<OpFoldResult> paddedShape;
105  auto tensorType = cast<RankedTensorType>(v.getType());
106  paddedShape.resize_for_overwrite(tensorType.getRank());
107  assert(tensorType.getRank() == indexingMap.getNumResults() &&
108  "expect the number of results of the affine map to match the tensor "
109  "rank");
110 
111  // "Full-rank" padding specification.
112  SmallVector<OpFoldResult> paddingSizes =
113  getFullRankPaddingSizes(builder, indexingSizes, options);
114 
115  // For each dimension in the operand's shape, iterate over indexingSizes and
116  // add the various term contributions.
117  for (const auto &enResults : enumerate(indexingMap.getResults())) {
118  int64_t resultIndex = enResults.index();
119  AffineMap partialIndexingMap = indexingMap.getSubMap(
120  ArrayRef<unsigned>{static_cast<unsigned>(resultIndex)});
121 
122  LLVM_DEBUG(DBGS() << "----resultIndex: " << resultIndex
123  << " with partialIndexingMap: " << partialIndexingMap
124  << "\n");
125 
126  // Find all padding dimensions that contribute to this operand dimension
127  // and compute the padded term contribution to the final padded shape.
129  for (size_t paddingDim = 0, e = paddingSizes.size(); paddingDim != e;
130  ++paddingDim) {
131  OpFoldResult paddingSize = paddingSizes[paddingDim];
132  LLVM_DEBUG(DBGS() << "------try apply padding of dim: " << paddingDim
133  << " to: " << paddingSize << "\n");
134  if (!enResults.value().isFunctionOfDim(paddingDim))
135  continue;
136 
137  LLVM_DEBUG(DBGS() << "------apply padding of dim: " << paddingDim
138  << " to: " << paddingSize << "\n");
139 
140  // Project non-'paddingDim' dimensions and compress the result.
141  llvm::SmallBitVector projectedDims(partialIndexingMap.getNumDims(), true);
142  projectedDims.flip(paddingDim);
143  AffineMap projectedMap =
144  mlir::projectDims(partialIndexingMap, projectedDims,
145  /*compressDimsFlag=*/true);
146 
147  // If we are padding to the next multiple of, compose with ceil(sz) * sz.
148  OpFoldResult paddingDimOfr;
149  if (options.padToMultipleOf) {
150  AffineExpr d0, s0;
151  bindDims(builder.getContext(), d0);
152  bindSymbols(builder.getContext(), s0);
153  AffineMap ceilMap = AffineMap::get(1, 1, d0.ceilDiv(s0) * s0);
154  AffineMap composedMap = projectedMap.compose(ceilMap);
156  builder, loc, composedMap, {indexingSizes[paddingDim], paddingSize},
157  /*composeAffineMin=*/true);
158  } else {
159  // Otherwise just set to paddingSize.
161  builder, loc, projectedMap, paddingSize);
162  }
163 
164  // Adjust for the maximum accessed index, which is (paddingSize - 1) *
165  // multiplier.
166  AffineExpr d0;
167  bindDims(builder.getContext(), d0);
168  int64_t multiplier = extractConstantMultiplier(projectedMap.getResult(0));
169  AffineMap subtractMap = AffineMap::get(1, 0, d0 - multiplier);
171  builder, loc, subtractMap, {paddingDimOfr});
172  terms.push_back(maxAccessIdx);
173 
174  LLVM_DEBUG(DBGS() << "------new term: " << terms.back() << "\n");
175  }
176 
177  // If there are no terms, just return the dim.
178  if (terms.empty()) {
179  paddedShape[resultIndex] =
180  createFoldedDimOp(builder, loc, v, resultIndex);
181  continue;
182  }
183 
184  // Sum individual terms' contributions.
185  SmallVector<AffineExpr> dims(terms.size());
186  bindDimsList(builder.getContext(), MutableArrayRef{dims});
187  AffineExpr sumExpr = dims.front();
188  for (unsigned i = 1; i < dims.size(); ++i)
189  sumExpr = sumExpr + dims[i];
190  // Add 1 to the maximum accessed index and get the final padded size.
191  OpFoldResult paddedDimOfr =
192  affine::makeComposedFoldedAffineApply(builder, loc, sumExpr + 1, terms);
193  paddedShape[resultIndex] = paddedDimOfr;
194  }
195 
196  return paddedShape;
197 }
198 
199 FailureOr<SmallVector<OpFoldResult>>
201  OpBuilder &builder, OpOperand &operandToPad,
202  ArrayRef<Range> iterationDomain, const PadTilingInterfaceOptions &options) {
203  auto transferOp =
204  llvm::dyn_cast<IndexingMapOpInterface>(operandToPad.getOwner());
205  if (!transferOp)
206  return failure();
207 
208  // clang-format off
209  assert(llvm::all_of(iterationDomain, [&builder](Range r) {
210  return r.offset == OpFoldResult(builder.getIndexAttr(0)) &&
211  r.stride == OpFoldResult(builder.getIndexAttr(1));
212  }) && "expected 0-offset 1-stride loop ranges");
213  // clang-format on
214  SmallVector<OpFoldResult> loopUpperBounds;
215  loopUpperBounds.reserve(iterationDomain.size());
216  for (const Range &range : iterationDomain)
217  loopUpperBounds.push_back(range.size);
218 
219  AffineMap indexingMap = transferOp.getMatchingIndexingMap(&operandToPad);
220  return computePaddedShape(
221  builder, cast<TypedValue<RankedTensorType>>(operandToPad.get()),
222  indexingMap, loopUpperBounds, options);
223 }
224 
225 /// Pad a single operand to `paddedShape` using `paddingValueAttr` as padding
226 /// Value.
227 static Value padOperand(OpBuilder &builder, TilingInterface opToPad,
229  ArrayRef<OpFoldResult> paddedShape,
230  Attribute paddingValueAttr) {
231  Value paddingValue;
232  if (auto complexTy =
233  dyn_cast<ComplexType>(getElementTypeOrSelf(v.getType()))) {
234  if (auto complexAttr = dyn_cast<ArrayAttr>(paddingValueAttr)) {
235  paddingValue = complex::ConstantOp::create(builder, opToPad.getLoc(),
236  complexTy, complexAttr);
237  }
238  } else if (isa<ub::PoisonAttr>(paddingValueAttr)) {
239  paddingValue = ub::PoisonOp::create(builder, opToPad.getLoc(),
240  getElementTypeOrSelf(v.getType()));
241  } else if (auto typedAttr = dyn_cast<TypedAttr>(paddingValueAttr)) {
242  paddingValue =
243  arith::ConstantOp::create(builder, opToPad.getLoc(), typedAttr);
244  }
245  assert(paddingValue && "failed to create value from padding attribute");
246 
247  // Pad the operand to the bounding box defined by `paddedShape`.
248  SmallVector<int64_t> tensorShape;
249  SmallVector<Value> dynDims;
250  for (OpFoldResult ofr : paddedShape) {
251  std::optional<int64_t> cst = getConstantIntValue(ofr);
252  tensorShape.push_back(cst.has_value() ? *cst : ShapedType::kDynamic);
253  if (!cst.has_value())
254  dynDims.push_back(ofr.dyn_cast<Value>());
255  }
256  // TODO: use dispatchIndexOpFoldResults(paddedShape, dynDims, paddedShape);
257 
258  auto paddedTensorType =
260  LLVM_DEBUG(DBGS() << "--SUCCESS, makeComposedPadHighOp with type: "
261  << paddedTensorType);
262  return makeComposedPadHighOp(builder, opToPad.getLoc(), paddedTensorType, v,
263  paddingValue, /*nofold=*/false, dynDims);
264 }
265 
266 FailureOr<PadTilingInterfaceResult> linalg::rewriteAsPaddedOp(
267  OpBuilder &builder, TilingInterface toPad,
269  const PadSizeComputationFunction &computePaddingSizeFun) {
270  LLVM_DEBUG(DBGS() << "Start rewriteAsPaddedOp : " << toPad << "\n");
272  Location loc = toPad.getLoc();
273 
274  // Allow inference of pad values if they are not explicitly specified.
275  // TODO: be mindful about the value depending on the actual operation.
276  if (options.paddingValues.empty()) {
277  SmallVector<Type> types(toPad->getOperandTypes());
278  llvm::append_range(types, toPad->getResultTypes());
279  for (Type t : types) {
280  options.paddingValues.push_back(
281  builder.getZeroAttr(getElementTypeOrSelf(t)));
282  }
283  }
284 
285  if (llvm::any_of(toPad->getOperands(),
286  [](Value v) { return isa<MemRefType>(v.getType()); })) {
287  LLVM_DEBUG(DBGS() << "Not an operation on tensors: FAIL\n");
288  return failure();
289  }
290 
291  // 1. Get the loopUpperBounds from the TilingInterface.
292  SmallVector<Range> iterationDomain = toPad.getIterationDomain(builder);
293 
294  // 2. For each operand.
295  SmallVector<Value> newOperands;
296  newOperands.reserve(toPad->getNumOperands());
297  for (OpOperand &opOperand : toPad->getOpOperands()) {
298  Value operand = opOperand.get();
299  LLVM_DEBUG(DBGS() << "--start padding operand: " << operand << "\n");
300 
301  // 2.a. Skip scalar-like operands.
302  Type operandType = operand.getType();
303  if (!isa<RankedTensorType>(operandType)) {
304  assert((!isa<ShapedType>(operandType) || isa<VectorType>(operandType)) &&
305  "Unexpected non-vector ShapedType");
306  newOperands.push_back(operand);
307  continue;
308  }
309 
310  // 2.a. Compute padded shape.
311  FailureOr<SmallVector<OpFoldResult>> maybePaddedShape =
312  computePaddingSizeFun(builder, opOperand, iterationDomain, options);
313  if (failed(maybePaddedShape)) {
314  LLVM_DEBUG(DBGS() << "Could not get padded shape of operand: FAIL\n");
315  return failure();
316  }
317 
318  // 2.b. Expect proper `paddingValues`.
319  // TODO: we may want to allow garbage padding in the future, in which case
320  // we would just not assert.
321  if (opOperand.getOperandNumber() >= options.paddingValues.size()) {
322  LLVM_DEBUG(DBGS() << "Too few padding values specified: FAIL\n");
323  return failure();
324  }
325  Attribute paddingValueAttr =
326  options.paddingValues[opOperand.getOperandNumber()];
327 
328  // 2.c. Perform actual padding.
329  Value paddedOperand =
330  padOperand(builder, toPad, cast<TypedValue<RankedTensorType>>(operand),
331  *maybePaddedShape, paddingValueAttr);
332  LLVM_DEBUG(DBGS() << "--done padding operand: " << paddedOperand << "\n");
333 
334  newOperands.push_back(paddedOperand);
335  if (auto padOp = paddedOperand.getDefiningOp<tensor::PadOp>())
336  padOps.push_back(padOp);
337  }
338 
339  // 3. Form the resulting tensor::ExtractSliceOp.
340  ReifiedRankedShapedTypeDims reifiedResultShapes;
341  if (failed(reifyResultShapes(builder, toPad, reifiedResultShapes))) {
342  LLVM_DEBUG(DBGS() << "Failed to reify result shapes: FAIL\n");
343  return failure();
344  }
345  assert(reifiedResultShapes.size() == toPad->getNumResults() &&
346  "expected same number of results");
347 
348  // Clone `toPad` to operate on the statically padded shapes.
349  auto resultTensorTypes =
350  ValueRange(newOperands).take_back(toPad->getNumResults()).getTypes();
351  // clone **should** properly notify the builder.
352  TilingInterface paddedOp =
353  clone(builder, toPad, resultTensorTypes, newOperands);
354  LLVM_DEBUG(DBGS() << "--cloned padded op: " << paddedOp << "\n");
355 
356  // Recover the slice out of the new static results.
357  SmallVector<Value> paddedSubtensorResults;
358  paddedSubtensorResults.reserve(toPad->getNumResults());
359  for (const auto &en : llvm::enumerate(paddedOp->getResults())) {
360  Value paddedResult = en.value();
361  int64_t resultNumber = en.index();
362  int64_t rank = cast<RankedTensorType>(paddedResult.getType()).getRank();
363  SmallVector<OpFoldResult> offsets(rank, builder.getIndexAttr(0));
364  SmallVector<OpFoldResult> strides(rank, builder.getIndexAttr(1));
365  paddedSubtensorResults.push_back(tensor::ExtractSliceOp::create(
366  builder, loc, paddedResult, offsets, reifiedResultShapes[resultNumber],
367  strides));
368  }
369 
370  return PadTilingInterfaceResult{padOps, paddedOp, paddedSubtensorResults};
371 }
#define DBGS()
static int64_t extractConstantMultiplier(AffineExpr expr)
Extracts the constant multiplier from an affine expression of the form d * c or c * d,...
static SmallVector< OpFoldResult > getFullRankPaddingSizes(Builder &b, ArrayRef< OpFoldResult > indexingSizes, const PadTilingInterfaceOptions &options)
Form a "full-rank" padding specification so that the application is easy.
static Value padOperand(OpBuilder &builder, TilingInterface opToPad, TypedValue< RankedTensorType > v, ArrayRef< OpFoldResult > paddedShape, Attribute paddingValueAttr)
Pad a single operand to paddedShape using paddingValueAttr as padding Value.
static llvm::ManagedStatic< PassManagerOptions > options
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr ceilDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
unsigned getNumDims() const
Definition: AffineMap.cpp:390
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403
unsigned getNumResults() const
Definition: AffineMap.cpp:398
AffineExpr getResult(unsigned idx) const
Definition: AffineMap.cpp:407
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Definition: AffineMap.cpp:647
AffineMap compose(AffineMap map) const
Returns the AffineMap resulting from composing this with map.
Definition: AffineMap.cpp:552
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:108
TypedAttr getZeroAttr(Type type)
Definition: Builders.cpp:324
MLIRContext * getContext() const
Definition: Builders.h:56
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
This class helps build Operations.
Definition: Builders.h:207
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272
This class represents an operand of an operation.
Definition: Value.h:257
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1469
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
LogicalResult rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad, const LinalgPaddingOptions &options, LinalgOp &paddedOp, SmallVector< Value > &replacements, SmallVector< tensor::PadOp > &padOps)
Pad the iterator dimensions options.paddingDimensions of all opToPad operands to a static bounding bo...
Definition: Padding.cpp:244
SmallVector< OpFoldResult > computePaddedShape(OpBuilder &, TypedValue< RankedTensorType > v, AffineMap indexingMap, ArrayRef< OpFoldResult > indexingSizes, const PadTilingInterfaceOptions &options)
Helper function to compute the padded shape of the given value v of RankedTensorType given:
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
Definition: LinalgOps.cpp:104
std::function< FailureOr< SmallVector< OpFoldResult > >(OpBuilder &, OpOperand &, ArrayRef< Range >, const PadTilingInterfaceOptions &)> PadSizeComputationFunction
Definition: Transforms.h:630
FailureOr< SmallVector< OpFoldResult > > computeIndexingMapOpInterfacePaddedShape(OpBuilder &, OpOperand &operandToPad, ArrayRef< Range > iterationDomain, const PadTilingInterfaceOptions &)
Specific helper for Linalg ops.
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
Definition: Utils.cpp:243
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:561
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
void bindDimsList(MLIRContext *ctx, MutableArrayRef< AffineExprTy > exprs)
Definition: AffineExpr.h:316
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition: Value.h:498
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311
@ Mul
RHS of mul is always a constant or a symbolic expression.
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
AffineMap projectDims(AffineMap map, const llvm::SmallBitVector &projectedDimensions, bool compressDimsFlag=false)
Returns the map that results from projecting out the dimensions specified in projectedDimensions.
Definition: AffineMap.cpp:899
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
OpFoldResult stride
OpFoldResult offset
Operations and values created in the process of padding a TilingInterface operation.
Definition: Transforms.h:640