MLIR  16.0.0git
Transforms.cpp
Go to the documentation of this file.
1 //===- Transforms.cpp - Linalg transformations as patterns ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic and helpers to expose Linalg transforms as rewrite
10 // patterns.
11 //
12 //===----------------------------------------------------------------------===//
13 
28 #include "mlir/IR/AffineExpr.h"
29 #include "mlir/IR/Matchers.h"
30 #include "mlir/Pass/Pass.h"
31 #include "mlir/Support/LLVM.h"
33 #include "llvm/ADT/ScopeExit.h"
34 #include "llvm/ADT/TypeSwitch.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <type_traits>
38 #include <utility>
39 
40 #define DEBUG_TYPE "linalg-transforms"
41 
42 using namespace mlir;
43 using namespace mlir::linalg;
44 
45 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
46 
47 //===----------------------------------------------------------------------===//
48 // Transformations exposed as rewrite patterns.
49 //===----------------------------------------------------------------------===//
50 
53  assert(!tileSizeComputationFunction && "tile sizes already set");
54  SmallVector<int64_t, 4> tileSizes(ts.begin(), ts.end());
55  tileSizeComputationFunction = [tileSizes](OpBuilder &b, Operation *op) {
58  &op->getParentOfType<func::FuncOp>().getBody().front());
59  return llvm::to_vector<4>(map_range(tileSizes, [&](int64_t s) {
60  Value v = b.create<arith::ConstantIndexOp>(op->getLoc(), s);
61  return v;
62  }));
63  };
64  return *this;
65 }
66 
67 /// Pad the `opOperand` in the `paddingDimensions` using the padding value and
68 /// the nofold flag found in `paddingValues` and `packPaddings`, respectively.
69 /// Exit early and return the `opOperand` value if the shape dimensions that
70 /// match `paddingDimensions` have a static size and the nofold flag is not set.
71 /// Otherwise, try to pad the shape dimensions that match the iterator
72 /// dimensions `paddingDimensions` and return the tensor::PadOp result if
73 /// padding succeeds or failure otherwise.
75  OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand,
76  ArrayRef<int64_t> paddingDimensions, ArrayRef<Attribute> paddingValues,
77  ArrayRef<bool> packPaddings) {
78  AffineMap indexingMap = opToPad.getMatchingIndexingMap(opOperand);
79  ArrayRef<int64_t> shape = opToPad.getShape(opOperand);
80 
81  // Collect the shape dimension that are a function of the `paddingDimensions`.
82  llvm::SmallDenseSet<int64_t> shapeDimsToPad;
83  for (int64_t dim : paddingDimensions)
84  for (const auto &en : enumerate(indexingMap.getResults()))
85  if (en.value().isFunctionOfDim(dim))
86  shapeDimsToPad.insert(en.index());
87 
88  // Return the unpadded operand if padding to a static shape is not needed and
89  // if the nofold flag is not set.
90  bool nofold = opOperand->getOperandNumber() < packPaddings.size()
91  ? packPaddings[opOperand->getOperandNumber()]
92  : false;
93  bool hasStaticShape = llvm::none_of(shapeDimsToPad, [&](int64_t dim) {
94  return ShapedType::isDynamic(shape[dim]);
95  });
96  if (!nofold && hasStaticShape)
97  return opOperand->get();
98 
99  // Fail if `paddingValues` specifies no padding value.
100  if (opOperand->getOperandNumber() >= paddingValues.size())
101  return failure();
102  Attribute paddingAttr = paddingValues[opOperand->getOperandNumber()];
103  Type paddingType = b.getType<NoneType>();
104  if (auto typedAttr = paddingAttr.dyn_cast<TypedAttr>())
105  paddingType = typedAttr.getType();
106  Value paddingValue =
107  b.create<arith::ConstantOp>(opToPad.getLoc(), paddingType, paddingAttr);
108 
109  // Follow the use-def chain if `currOpOperand` is defined by a LinalgOp.
110  OpOperand *currOpOperand = opOperand;
111  while (auto linalgOp = currOpOperand->get().getDefiningOp<LinalgOp>()) {
112  OpResult result = currOpOperand->get().cast<OpResult>();
113  currOpOperand = linalgOp.getDpsInitOperand(result.getResultNumber());
114  }
115 
116  // Fail if `currOpOperand` is not defined by an ExtractSliceOp.
117  auto sliceOp = currOpOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
118  if (!sliceOp)
119  return failure();
120 
121  // Compute the dropped dimensions if `sliceOp` is ranke-reducing.
122  llvm::SmallBitVector droppedDims = sliceOp.getDroppedDims();
123  OffsetSizeAndStrideOpInterface shapedOp = sliceOp;
124 
125  // Upper bound the `sliceOp` sizes to obtain a static bounding box.
126  SmallVector<int64_t> paddedShape(shape.begin(), shape.end());
127  int64_t shapeIdx = 0;
128  for (const auto &en : enumerate(shapedOp.getMixedSizes())) {
129  // Skip dropped dimensions.
130  if (droppedDims.test(en.index()))
131  continue;
132  // Skip dimensions that do not require padding.
133  if (!shapeDimsToPad.contains(shapeIdx)) {
134  shapeIdx++;
135  continue;
136  }
137  // If the size is an attribute add it directly to `paddedShape`.
138  if (en.value().is<Attribute>()) {
139  paddedShape[shapeIdx++] =
140  en.value().get<Attribute>().dyn_cast<IntegerAttr>().getInt();
141  continue;
142  }
143  // Otherwise, try to compute a constant upper bound for the size value.
144  FailureOr<int64_t> upperBound =
145  getConstantUpperBoundForIndex(en.value().get<Value>());
146  if (failed(upperBound)) {
147  LLVM_DEBUG(DBGS() << "No constant bounding box can be found for padding");
148  return failure();
149  }
150  paddedShape[shapeIdx++] = *upperBound;
151  }
152  assert(shapeIdx == static_cast<int64_t>(shape.size()) &&
153  "expect the dynamic and static ranks to match");
154 
155  // Pad the operand to the bounding box defined by `paddedShape`.
156  auto paddedTensorType = RankedTensorType::get(
157  paddedShape, getElementTypeOrSelf(opOperand->get()));
158  return makeComposedPadHighOp(b, opToPad->getLoc(), paddedTensorType,
159  opOperand->get(), paddingValue, nofold);
160 }
161 
164  ArrayRef<int64_t> paddingDimensions,
165  ArrayRef<Attribute> paddingValues,
166  ArrayRef<bool> packPaddings, LinalgOp &paddedOp) {
167  Location loc = opToPad->getLoc();
168 
169  // TODO: there are cases where we may still want to pad to larger sizes.
170  assert(opToPad.hasTensorSemantics() &&
171  "expected operation to have tensor semantics");
172 
174  // Set IP after op because we also take the dims of the original output.
175  b.setInsertionPointAfter(opToPad);
176  // Make a copy of the shaped operands and update it.
177  SmallVector<Value> newOperands;
178  newOperands.reserve(opToPad->getNumOperands());
179  for (OpOperand &opOperand : opToPad->getOpOperands()) {
181  b, opToPad, &opOperand, paddingDimensions, paddingValues, packPaddings);
182  // Exit if `paddingDimensions` cannot be bounded statically.
183  if (failed(paddedOperand))
184  return failure();
185  newOperands.push_back(*paddedOperand);
186  }
187 
188  SmallVector<SmallVector<Value>> reifiedResultShapes;
189  if (failed(cast<ReifyRankedShapedTypeOpInterface>(opToPad.getOperation())
190  .reifyResultShapes(b, reifiedResultShapes)))
191  return failure();
192  assert(reifiedResultShapes.size() == opToPad->getNumResults() &&
193  "expected same number of results");
194 
195  // Clone `opToPad` to operate on the statically padded shapes.
196  auto resultTensorTypes =
197  ValueRange(newOperands).take_back(opToPad.getNumDpsInits()).getTypes();
198  paddedOp = clone(b, opToPad, resultTensorTypes, newOperands);
199 
200  // Recover the slice out of the new static results. This keeps the original
201  // linalg op around because it uses the dims of the original results.
202  SmallVector<Value> paddedSubviewResults;
203  paddedSubviewResults.reserve(opToPad->getNumResults());
204  for (const auto &en : llvm::enumerate(paddedOp->getResults())) {
205  Value paddedResult = en.value();
206  int64_t resultNumber = en.index();
207  int64_t rank = paddedResult.getType().cast<RankedTensorType>().getRank();
208  SmallVector<OpFoldResult> offsets(rank, b.getIndexAttr(0));
210  for (Value v : reifiedResultShapes[resultNumber])
211  sizes.push_back(getAsOpFoldResult(v));
212  SmallVector<OpFoldResult> strides(rank, b.getIndexAttr(1));
213  paddedSubviewResults.push_back(b.create<tensor::ExtractSliceOp>(
214  loc, paddedResult, offsets, sizes, strides));
215  }
216  return paddedSubviewResults;
217 }
218 
219 /// Try to peel a loop `op` and return the new result.
220 // TODO: Add support for scf.parallel and affine.for loops.
222  Operation *op) {
224  .Case<scf::ForOp>([&](scf::ForOp forOp) {
225  scf::ForOp partialIteration;
226  if (succeeded(scf::peelAndCanonicalizeForLoop(rewriter, forOp,
227  partialIteration)))
228  return partialIteration->getResults();
229  assert(!partialIteration && "expected that loop was not peeled");
230  return forOp->getResults();
231  })
232  .Default([&](Operation *op) { return op->getResults(); });
233 }
234 
235 /// Peel and canonicalize 'loops'.
237  ArrayRef<scf::ForOp> loops) {
238  for (auto loopOp : loops)
239  peelLoop(rewriter, loopOp);
240 }
241 
242 /// Linalg padding pattern.
245  : OpInterfaceRewritePattern<LinalgOp>(context, benefit),
246  options(std::move(options)) {}
247 
250  LinalgOp linalgOp, PatternRewriter &rewriter) const {
251  if (!linalgOp.hasTensorSemantics())
252  return failure();
253 
254  // Pad the operation.
255  LinalgOp paddedOp;
256  FailureOr<SmallVector<Value>> newResults =
257  rewriteAsPaddedOp(rewriter, linalgOp, options.paddingDimensions,
258  options.paddingValues, options.packPaddings, paddedOp);
259  if (failed(newResults))
260  return failure();
261 
262  // Hoist the padding.
263  for (const auto &en : enumerate(options.hoistPaddings)) {
264  if (static_cast<int64_t>(en.index()) >= paddedOp->getNumOperands())
265  break;
266  OpOperand &opOperand = paddedOp->getOpOperand(en.index());
267  auto padOp = opOperand.get().getDefiningOp<tensor::PadOp>();
268  if (!padOp || en.value() == 0)
269  continue;
270 
271  // Fail hoisting if the operand shape is not fully static.
272  if (llvm::any_of(paddedOp.getShape(&opOperand), ShapedType::isDynamic))
273  return failure();
274 
275  tensor::PadOp hoistedOp;
276  SmallVector<GenericOp> transposeOps;
277  SmallVector<int64_t> transposeVector =
278  en.index() < options.transposePaddings.size()
279  ? options.transposePaddings[en.index()]
281 
283  padOp, en.value(), transposeVector, hoistedOp, transposeOps);
284  if (failed(newResult))
285  continue;
286  rewriter.replaceOp(padOp, *newResult);
287  }
288 
289  // Replace the original operation to pad.
290  rewriter.replaceOp(linalgOp, *newResults);
291 
292  return paddedOp;
293 }
294 
296  memref::CopyOp copyOp, PatternRewriter &rewriter) const {
297  return vectorizeCopy(rewriter, copyOp);
298 }
299 
301 getNParallelLoopsAttrs(unsigned nParallelLoops) {
302  return SmallVector<utils::IteratorType>(nParallelLoops,
303  utils::IteratorType::parallel);
304 }
305 
306 /// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp (to
307 /// initialize with pad_val) and GenericOp (to copy contents).
310  PatternRewriter &rewriter) const {
311 
312  auto inputShapedType = padOp.getSource().getType().cast<ShapedType>();
313  auto resultShapedType = padOp.getResult().getType().cast<ShapedType>();
314 
315  // Bail on non-static shapes.
316  if (!inputShapedType.hasStaticShape())
317  return failure();
318  if (!resultShapedType.hasStaticShape())
319  return failure();
320 
321  // Only support padding with a constant for now, i.e. either:
322  // 1. A BBarg from a different block.
323  // 2. A value defined outside of the current block.
324  Block &block = padOp.getRegion().front();
325  auto yieldOp = cast<tensor::YieldOp>(block.getTerminator());
326  Value padValue = yieldOp.getValue();
327  Operation *definingOp = padValue.getDefiningOp();
328  if (definingOp && definingOp->getBlock() == &block)
329  return failure();
330  if (!definingOp && padValue.cast<BlockArgument>().getOwner() == &block)
331  return failure();
332 
333  // Create tensor with the padded shape
334  Location loc = padOp.getLoc();
335  SmallVector<Value> indices(resultShapedType.getRank(),
336  rewriter.create<arith::ConstantIndexOp>(loc, 0));
337  Value emptyTensor = rewriter.create<tensor::EmptyOp>(
338  loc, resultShapedType.getShape(), resultShapedType.getElementType());
339 
340  // Initialize tensor with the pad value
341  Value tmpTensor = rewriter
342  .create<linalg::FillOp>(loc, ValueRange{padValue},
343  ValueRange{emptyTensor})
344  .result();
345 
346  // Copy original contents into new tensor
347  // Uses linalg.generic, but could be done with tensor.insert_slice
348  SmallVector<AffineExpr, 4> outputExprs;
349  for (unsigned i = 0; i < resultShapedType.getRank(); ++i) {
350  outputExprs.push_back(getAffineDimExpr(i, rewriter.getContext()) +
351  padOp.getStaticLow()[i]);
352  }
353 
354  SmallVector<AffineMap, 2> transferMaps = {
355  rewriter.getMultiDimIdentityMap(inputShapedType.getRank()),
356  AffineMap::get(resultShapedType.getRank(),
357  /*symbolCount=*/0, outputExprs, rewriter.getContext())};
358 
359  rewriter.replaceOpWithNewOp<linalg::GenericOp>(
360  padOp, resultShapedType, padOp.getSource(), tmpTensor, transferMaps,
361  getNParallelLoopsAttrs(resultShapedType.getRank()),
362  [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
363  nestedBuilder.create<linalg::YieldOp>(nestedLoc, args[0]);
364  });
365 
366  return success();
367 }
368 
369 /// Filling `dest` using FillOp constant padding value if possible.
370 /// Otherwise, generate a tensor::GenerateOp.
372  PatternRewriter &rewriter, tensor::PadOp padOp, Value dest,
373  const SmallVector<Value> &dynSizes) const {
374  auto padValue = padOp.getConstantPaddingValue();
375  if (padValue)
376  return rewriter.create<FillOp>(padOp.getLoc(), padValue, dest).result();
377 
378  // Fill could not be optimized: Lower to tensor::GenerateOp with region.
379  auto generateOp = rewriter.create<tensor::GenerateOp>(
380  padOp.getLoc(), padOp.getResultType(), dynSizes);
381  // Copy region to new op.
383  padOp.getRegion().cloneInto(&generateOp.getRegion(), bvm);
384  return generateOp;
385 }
386 
389  PatternRewriter &rewriter) const {
390  // Given an OpFoldResult, return an index-typed value.
391  auto getIdxValue = [&](OpFoldResult ofr) {
392  if (auto val = ofr.dyn_cast<Value>())
393  return val;
394  return rewriter
396  padOp.getLoc(), ofr.get<Attribute>().cast<IntegerAttr>().getInt())
397  .getResult();
398  };
399 
400  auto resultType = padOp.getResultType();
401  // Compute size of EmptyOp. Any combination of static/dynamic is supported.
402  SmallVector<Value> dynSizes;
403  SmallVector<int64_t> staticSizes;
404  for (unsigned dim = 0; dim < resultType.getRank(); ++dim) {
405  if (resultType.isDynamicDim(dim)) {
406  auto srcSize = rewriter.createOrFold<tensor::DimOp>(
407  padOp.getLoc(), padOp.getSource(), dim);
408  // Add low and high padding value.
409  auto plusLow = rewriter.createOrFold<arith::AddIOp>(
410  padOp.getLoc(), srcSize, getIdxValue(padOp.getMixedLowPad()[dim]));
411  auto plusHigh = rewriter.createOrFold<arith::AddIOp>(
412  padOp.getLoc(), plusLow, getIdxValue(padOp.getMixedHighPad()[dim]));
413  dynSizes.push_back(plusHigh);
414  }
415  staticSizes.push_back(resultType.getDimSize(dim));
416  }
417 
418  // Init tensor and fill it with padding.
419  Value emptyTensor = rewriter.create<tensor::EmptyOp>(
420  padOp.getLoc(), staticSizes, resultType.getElementType(), dynSizes);
421  Value fill = createFillOrGenerateOp(rewriter, padOp, emptyTensor, dynSizes);
422 
423  // Try optimize the copy of source.
424  if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded())
425  return success();
426 
427  // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead
428  // for copying the PadOp source.
429  auto sourceType = padOp.getSourceType();
430  // Compute size of source of tensor::PadOp.
431  SmallVector<OpFoldResult> srcSizes;
432  for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) {
433  if (sourceType.isDynamicDim(dim)) {
434  srcSizes.push_back(rewriter.createOrFold<tensor::DimOp>(
435  padOp.getLoc(), padOp.getSource(), dim));
436  } else {
437  srcSizes.push_back(rewriter.getIndexAttr(sourceType.getDimSize(dim)));
438  }
439  }
440  // Strides of InsertSliceOp are all 1.
441  SmallVector<OpFoldResult> strides(sourceType.getRank(),
442  rewriter.getIndexAttr(1));
443  rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
444  padOp, padOp.getSource(), fill, padOp.getMixedLowPad(), srcSizes,
445  strides);
446 
447  return success();
448 }
449 
451  tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
452  if (!sliceOp.hasUnitStride())
453  return failure();
454 
455  auto padOp = sliceOp.getSource().getDefiningOp<tensor::PadOp>();
456  if (!padOp)
457  return failure();
458 
459  bool zeroSliceGuard = true;
460  if (controlFn) {
461  if (Optional<bool> control = controlFn(sliceOp))
462  zeroSliceGuard = *control;
463  else
464  return failure();
465  }
466 
467  Operation *tiledPadOp =
468  tensor::bubbleUpPadSlice(rewriter, padOp, sliceOp.getMixedOffsets(),
469  sliceOp.getMixedSizes(), zeroSliceGuard);
470  // All shapes are static and the data source is actually used. Rewrite into
471  // pad(extract_slice(x)).
472  rewriter.replaceOp(sliceOp, tiledPadOp->getResults());
473  return success();
474 }
475 
476 // The following are patterns for downscaling convolution ops with size-1
477 // window dimensions.
478 //
479 // Note that we'd eventually want to write such transformations in a generic
480 // way, e.g., converting to linalg.generic, removing the size-1 dimensions,
481 // and then turning back to named ops. But for now it's fine to have a few
482 // patterns matching special ops to get started.
483 
484 template <typename Conv2DOp, typename Conv1DOp>
486  returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const {
487  if (convOp.hasBufferSemantics())
488  return failure(); // To be implemented.
489 
490  Value input = convOp.getInputs().front();
491  Value kernel = convOp.getInputs().back();
492  Value output = convOp.getOutputs().front();
493 
494  auto inputType = input.getType().dyn_cast<RankedTensorType>();
495  auto kernelType = kernel.getType().dyn_cast<RankedTensorType>();
496  auto outputType = output.getType().dyn_cast<RankedTensorType>();
497 
498  auto kernelShape = kernelType.getShape();
499  auto outputShape = outputType.getShape();
500 
501  // Get domain indices based on conv2D layout.
502  int khIndex, kwIndex, ohIndex, owIndex;
503 
505  .Case([&](linalg::Conv2DNhwcHwcfOp op) {
506  khIndex = 0;
507  kwIndex = 1;
508  ohIndex = 1;
509  owIndex = 2;
510  })
511  .Case([&](linalg::Conv2DNchwFchwOp op) {
512  khIndex = 2;
513  kwIndex = 3;
514  ohIndex = 2;
515  owIndex = 3;
516  })
517  .Default([&](Operation *op) {
518  llvm_unreachable("unexpected conv2d operation.");
519  });
520 
521  // Only handle the case where at least one of the window dimensions is
522  // of size 1. Other cases can rely on tiling to reduce to such cases.
523  int64_t khSize = kernelShape[khIndex], kwSize = kernelShape[kwIndex];
524  int64_t ohSize = outputShape[ohIndex], owSize = outputShape[owIndex];
525  bool removeH = (khSize == 1 && ohSize == 1);
526  bool removeW = (kwSize == 1 && owSize == 1);
527  if (!removeH && !removeW)
528  return failure();
529 
530  // Get new shapes and types for all operands by removing the size-1
531  // dimension.
532  using RTTBuilder = RankedTensorType::Builder;
533  RankedTensorType newInputType =
534  RTTBuilder(inputType).dropDim((removeH ? ohIndex : owIndex));
535  RankedTensorType newKernelType =
536  RTTBuilder(kernelType).dropDim((removeH ? khIndex : kwIndex));
537  RankedTensorType newOutputType =
538  RTTBuilder(outputType).dropDim((removeH ? ohIndex : owIndex));
539 
540  // Rank-reduce operands.
541  Location loc = convOp.getLoc();
543  rewriter, loc, input, newInputType);
545  rewriter, loc, kernel, newKernelType);
547  rewriter, loc, output, newOutputType);
548 
549  // Rank-reduce strides and dilations too.
550  // TODO: dropDim 1-liner helper.
551  auto strides =
552  llvm::to_vector<4>(convOp.getStrides().template getValues<int64_t>());
553  strides.erase(strides.begin() + (removeH ? 0 : 1));
554  auto stridesAttr = rewriter.getI64VectorAttr(strides);
555 
556  auto dilations =
557  llvm::to_vector<4>(convOp.getDilations().template getValues<int64_t>());
558  dilations.erase(dilations.begin() + (removeH ? 0 : 1));
559  auto dilationsAttr = rewriter.getI64VectorAttr(dilations);
560 
561  auto conv1DOp = rewriter.create<Conv1DOp>(
562  loc, newOutputType, ValueRange{newInput, newKernel},
563  ValueRange{newOutput}, stridesAttr, dilationsAttr);
564 
565  // Insert back.
567  rewriter, loc, conv1DOp.getResult(0), output);
568  rewriter.replaceOp(convOp, inserted);
569 
570  return conv1DOp;
571 }
572 
573 template struct linalg::DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp,
574  Conv1DNwcWcfOp>;
575 template struct linalg::DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp,
576  Conv1DNcwFcwOp>;
577 
580  DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const {
581  if (convOp.hasBufferSemantics())
582  return failure(); // To be implemented.
583 
584  Value input = convOp.getInputs().front();
585  Value kernel = convOp.getInputs().back();
586  Value output = convOp.getOutputs().front();
587 
588  auto inputType = input.getType().dyn_cast<RankedTensorType>();
589  auto kernelType = kernel.getType().dyn_cast<RankedTensorType>();
590  auto outputType = output.getType().dyn_cast<RankedTensorType>();
591 
592  auto kernelShape = kernelType.getShape();
593  auto outputShape = outputType.getShape();
594 
595  // Only handle the case where at least one of the window dimensions is
596  // of size 1. Other cases can rely on tiling to reduce to such cases.
597  int64_t khSize = kernelShape[0], kwSize = kernelShape[1];
598  int64_t ohSize = outputShape[1], owSize = outputShape[2];
599  bool removeH = (khSize == 1 && ohSize == 1);
600  bool removeW = (kwSize == 1 && owSize == 1);
601  if (!removeH && !removeW)
602  return failure();
603 
604  // Get new shapes and types for all operands by removing the size-1
605  // dimension.
606  using RTTBuilder = RankedTensorType::Builder;
607  RankedTensorType newInputType =
608  RTTBuilder(inputType).dropDim((removeH ? 1 : 2));
609  RankedTensorType newKernelType =
610  RTTBuilder(kernelType).dropDim((removeH ? 0 : 1));
611  RankedTensorType newOutputType =
612  RTTBuilder(outputType).dropDim(removeH ? 1 : 2);
613 
614  // Rank-reduce operands.
615  Location loc = convOp.getLoc();
617  rewriter, loc, input, newInputType);
619  rewriter, loc, kernel, newKernelType);
621  rewriter, loc, output, newOutputType);
622 
623  // Rank-reduce strides and dilations too.
624  // TODO: dropDim 1-liner helper.
625  auto strides = llvm::to_vector<4>(convOp.getStrides().getValues<int64_t>());
626  strides.erase(strides.begin() + (removeH ? 0 : 1));
627  auto stridesAttr = rewriter.getI64VectorAttr(strides);
628 
629  auto dilations =
630  llvm::to_vector<4>(convOp.getDilations().getValues<int64_t>());
631  dilations.erase(dilations.begin() + (removeH ? 0 : 1));
632  auto dilationsAttr = rewriter.getI64VectorAttr(dilations);
633 
634  auto conv1DOp = rewriter.create<DepthwiseConv1DNwcWcOp>(
635  loc, newOutputType, ValueRange{newInput, newKernel},
636  ValueRange{newOutput}, stridesAttr, dilationsAttr);
637 
638  // Insert back.
640  rewriter, loc, conv1DOp.getResult(0), output);
641  rewriter.replaceOp(convOp, inserted);
642 
643  return conv1DOp;
644 }
645 
647  PatternBenefit benefit) {
648  patterns.add<DownscaleSizeOneWindowed2DConvolution<linalg::Conv2DNhwcHwcfOp,
649  Conv1DNwcWcfOp>,
650  DownscaleSizeOneWindowed2DConvolution<linalg::Conv2DNchwFchwOp,
651  Conv1DNcwFcwOp>,
653  benefit);
654 }
static llvm::ManagedStatic< PassManagerOptions > options
static FailureOr< Value > padOperandToSmallestStaticBoundingBox(OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand, ArrayRef< int64_t > paddingDimensions, ArrayRef< Attribute > paddingValues, ArrayRef< bool > packPaddings)
Pad the opOperand in the paddingDimensions using the padding value and the nofold flag found in paddi...
Definition: Transforms.cpp:74
static SmallVector< utils::IteratorType > getNParallelLoopsAttrs(unsigned nParallelLoops)
Definition: Transforms.cpp:301
#define DBGS()
Definition: Transforms.cpp:45
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:42
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:319
Attributes are known-constant values of operations.
Definition: Attributes.h:25
U dyn_cast() const
Definition: Attributes.h:127
This class represents an argument of a Block.
Definition: Value.h:296
Block * getOwner() const
Returns the block that owns this argument.
Definition: Value.h:305
Block represents an ordered list of Operations.
Definition: Block.h:30
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:232
Operation & front()
Definition: Block.h:142
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:109
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:350
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:88
MLIRContext * getContext() const
Definition: Builders.h:54
DenseIntElementsAttr getI64VectorAttr(ArrayRef< int64_t > values)
Definition: Builders.cpp:129
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:137
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:64
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:300
This class helps build Operations.
Definition: Builders.h:198
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:383
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:472
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:422
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:364
This class represents a single result from folding an operation.
Definition: OpDefinition.h:233
This class represents an operand of an operation.
Definition: Value.h:247
unsigned getOperandNumber()
Return which operand this is in the OpOperand list of the Operation.
Definition: Value.cpp:212
This is a value defined by a result of an operation.
Definition: Value.h:442
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:454
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:144
result_range getResults()
Definition: Operation.h:332
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:32
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:605
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:214
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
Definition: BuiltinTypes.h:241
MLIRContext * getContext() const
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:398
virtual void replaceOp(Operation *op, ValueRange newValues)
This method replaces the results of the operation with the specified list of values.
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replaces the result op with a new op that is created without verification.
Definition: PatternMatch.h:451
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
U cast() const
Definition: Types.h:280
U dyn_cast() const
Definition: Types.h:270
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:349
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
Type getType() const
Return the type of this value.
Definition: Value.h:114
U cast() const
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:89
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:230
void peelLoops(RewriterBase &rewriter, ArrayRef< scf::ForOp > loops)
Peel and canonicalize 'loops'.
Definition: Transforms.cpp:236
void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Linalg decompose convolutions patterns.
Definition: Transforms.cpp:646
LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp)
Emit a suitable vector form for a Copy op with fully static shape.
FailureOr< SmallVector< Value > > rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, ArrayRef< int64_t > paddingDimensions, ArrayRef< Attribute > paddingValues, ArrayRef< bool > packPaddings, LinalgOp &paddedOp)
Pad the iterator dimensions paddingDimensions of all opToPad operands to a static bounding box.
Definition: Transforms.cpp:163
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
Definition: Utils.cpp:346
FailureOr< Value > hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< GenericOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
FailureOr< int64_t > getConstantUpperBoundForIndex(Value value)
Returns a constant upper bound for the result value of an index computation.
Definition: Utils.cpp:327
SmallVector< Value > peelLoop(RewriterBase &rewriter, Operation *op)
Try to peel anad canonicalize loop op and return the new result.
Definition: Transforms.cpp:221
LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp, scf::ForOp &partialIteration)
Rewrite a for loop with bounds/step that potentially do not divide evenly into a for loop where the s...
Value createCanonicalRankReducingInsertSliceOp(OpBuilder &b, Location loc, Value tensor, Value dest)
Create a rank-reducing InsertSliceOp @[0 .
Definition: TensorOps.cpp:2446
Value createCanonicalRankReducingExtractSliceOp(OpBuilder &b, Location loc, Value tensor, RankedTensorType targetType)
Create a rank-reducing ExtractSliceOp @[0 .
Definition: TensorOps.cpp:2092
Operation * bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, bool generateZeroSliceGuard=true)
Bubbles up a slice of this pad by taking the slice first and then performing the padding.
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:488
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
OpInterfaceRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting a...
Definition: PatternMatch.h:371
LogicalResult matchAndRewrite(memref::CopyOp copyOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:295
Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) dimensions into 1-D depthwise c...
Definition: Transforms.h:735
FailureOr< DepthwiseConv1DNwcWcOp > returningMatchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:579
Rewrites 2-D convolution ops with size-1 window dimensions into 1-D convolution ops.
Definition: Transforms.h:715
FailureOr< Conv1DOp > returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const
Definition: Transforms.cpp:486
LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:450
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Definition: Transforms.cpp:388
Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector< Value > &dynSizes) const
Filling dest using FillOp constant padding value if possible.
Definition: Transforms.cpp:371
FailureOr< LinalgOp > returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const
matchAndRewrite implementation that returns the significant transformed pieces of IR.
Definition: Transforms.cpp:249
LinalgPaddingPattern(MLIRContext *context, LinalgPaddingOptions options=LinalgPaddingOptions(), PatternBenefit benefit=1)
Linalg padding pattern.
Definition: Transforms.cpp:243
LinalgTilingOptions & setTileSizes(const SmallVector< Value, 4 > &ts)
Set the tileSizeComputationFunction to return the values ts.
Definition: Transforms.h:623
LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override
Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp (to initialize with pad_val) and GenericOp...
Definition: Transforms.cpp:309