MLIR  21.0.0git
TosaToLinalgNamed.cpp
Go to the documentation of this file.
1 //===- TosaToLinalgNamed.cpp - Lowering Tosa to Linalg Named Ops ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // These rewriters lower from the Tosa to the Linalg named ops.
10 //
11 //===----------------------------------------------------------------------===//
12 
24 #include "mlir/IR/Matchers.h"
25 #include "mlir/IR/PatternMatch.h"
28 
30 
31 #include <numeric>
32 #include <type_traits>
33 
34 using namespace mlir;
35 using namespace mlir::tosa;
36 
38  TypedAttr padAttr, OpBuilder &rewriter) {
39  // Input should be padded only if necessary.
40  if (llvm::all_of(pad, [](int64_t p) { return p == 0; }))
41  return input;
42 
43  ShapedType inputTy = cast<ShapedType>(input.getType());
44  Type inputETy = inputTy.getElementType();
45  auto inputShape = inputTy.getShape();
46 
47  assert((inputShape.size() * 2) == pad.size());
48 
49  SmallVector<int64_t, 4> paddedShape;
51  SmallVector<OpFoldResult, 8> highIndices;
52  for (size_t i : llvm::seq(inputShape.size())) {
53  auto lowPad = pad[i * 2];
54  auto highPad = pad[i * 2 + 1];
55  if (ShapedType::isDynamic(inputShape[i]))
56  paddedShape.push_back(inputShape[i]);
57  else
58  paddedShape.push_back(inputShape[i] + highPad + lowPad);
59  lowIndices.push_back(rewriter.getIndexAttr(lowPad));
60  highIndices.push_back(rewriter.getIndexAttr(highPad));
61  }
62 
63  Value padValue = rewriter.create<arith::ConstantOp>(loc, padAttr);
64 
65  return rewriter.create<tensor::PadOp>(
66  loc, RankedTensorType::get(paddedShape, inputETy), input, lowIndices,
67  highIndices, padValue);
68 }
69 
70 static mlir::Value
72  Value conv, Value result,
73  ArrayRef<AffineMap> indexingMaps) {
74  ShapedType resultTy = cast<ShapedType>(conv.getType());
75  return rewriter
76  .create<linalg::GenericOp>(
77  loc, resultTy, ValueRange({bias, conv}), result, indexingMaps,
78  getNParallelLoopsAttrs(resultTy.getRank()),
79  [](OpBuilder &builder, Location loc, ValueRange args) {
80  Value biasVal = args[0];
81  Type resType = args[1].getType();
82  if (resType != biasVal.getType()) {
83  biasVal = builder.create<arith::ExtSIOp>(loc, resType, biasVal);
84  }
85  Value added = builder.create<arith::AddIOp>(loc, biasVal, args[1]);
86  builder.create<linalg::YieldOp>(loc, added);
87  })
88  .getResult(0);
89 }
90 
91 // Construct the affine map that a linalg generic would use to broadcast the
92 // source tensor into the shape of the result tensor.
94  Value result) {
95  ShapedType resultTy = cast<ShapedType>(result.getType());
96  ShapedType sourceTy = cast<ShapedType>(source.getType());
97  const int64_t resultRank = resultTy.getRank();
98  const int64_t sourceRank = sourceTy.getRank();
99 
100  // The source tensor is broadcast to all the outer dimensions of the
101  // result tensor.
102  SmallVector<AffineExpr> sourceDims;
103  // In the case of a rank one source tensor with a single element TOSA
104  // specifies that the value be broadcast meaning we need an edge case for a
105  // constant map.
106  assert(sourceTy.hasStaticShape() &&
107  "Dynamic broadcasting shapes not supported!");
108  if (sourceRank == 1 && sourceTy.getDimSize(0) == 1) {
109  sourceDims.push_back(rewriter.getAffineConstantExpr(0));
110  } else {
111  for (auto dim : llvm::seq<int64_t>(0, sourceRank)) {
112  auto expr = rewriter.getAffineDimExpr(dim + resultRank - sourceRank);
113  sourceDims.push_back(expr);
114  }
115  }
116 
117  return AffineMap::get(/*dimCount=*/resultRank,
118  /*symbolCount=*/0, sourceDims, rewriter.getContext());
119 }
120 
121 // Broadcast the source value to all the outer dimensions of the result value.
122 // If required, the element type is expanded using an arith.extsi operation.
124  Location loc, Value source,
125  Value result) {
126  ShapedType resultTy = cast<ShapedType>(result.getType());
127  const int64_t resultRank = resultTy.getRank();
128  // Creating maps for the input and output of the broacast-like generic op.
129  SmallVector<AffineMap, 2> indexingMaps;
130  indexingMaps.push_back(getBroadcastingMap(rewriter, source, result));
131  indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
132 
133  // Build the broadcast-like operation as a linalg.generic.
134  return rewriter
135  .create<linalg::GenericOp>(
136  loc, resultTy, ValueRange({source}), result, indexingMaps,
137  getNParallelLoopsAttrs(resultTy.getRank()),
138  [](OpBuilder &builder, Location loc, ValueRange args) {
139  Value biasVal = args[0];
140  Type resType = args[1].getType();
141  if (resType != biasVal.getType()) {
142  biasVal = builder.create<arith::ExtSIOp>(loc, resType, biasVal);
143  }
144  builder.create<linalg::YieldOp>(loc, biasVal);
145  })
146  .getResult(0);
147 }
148 
149 static mlir::Value reifyConstantDim(int64_t attr,
150  ImplicitLocOpBuilder &builder) {
151  return builder.create<arith::ConstantIndexOp>(attr);
152 }
153 
154 // Calculating the output width/height using the formula:
155 // H = ((IH+pad_top+pad_bottom-(dilation_y*(KH-1)+1))/stride_y)+1
156 // W = ((IW+pad_left+pad_right-(dilation_x*(KW-1)+1))/stride_x)+1
157 
159  int64_t padBeforeAttr,
160  int64_t padAfterAttr, Value kernelDim,
161  int64_t strideAttr,
162  int64_t dilationAttr,
163  OpBuilder &rewriter) {
164  ImplicitLocOpBuilder builder(loc, rewriter);
165  auto one = rewriter.create<arith::ConstantOp>(
166  loc, IntegerAttr::get(inputDim.getType(), 1));
167  Value padBefore = reifyConstantDim(padBeforeAttr, builder);
168  Value paddedBefore = builder.create<arith::AddIOp>(inputDim, padBefore);
169  Value padAfter = reifyConstantDim(padAfterAttr, builder);
170  Value paddedAfter = builder.create<arith::AddIOp>(paddedBefore, padAfter);
171 
172  Value subOne = builder.create<arith::SubIOp>(kernelDim, one);
173  Value dilation = reifyConstantDim(dilationAttr, builder);
174  Value dilated = builder.create<arith::MulIOp>(dilation, subOne);
175  Value addOne = builder.create<arith::AddIOp>(dilated, one);
176 
177  Value subtract = builder.create<arith::SubIOp>(paddedAfter, addOne);
178  Value stride = reifyConstantDim(strideAttr, builder);
179  Value divide = builder.create<arith::DivUIOp>(subtract, stride);
180  return builder.create<arith::AddIOp>(divide, one);
181 }
182 
183 // Creates a vector of the dynamic output dims for Conv2D and Depthwise_Conv2D
185  Location loc, Value input, Value weight, ShapedType resultTy,
186  ArrayRef<int64_t> padAttr, ArrayRef<int64_t> strideAttr,
187  ArrayRef<int64_t> dilationAttr, ArrayRef<int64_t> inputSizeDims,
188  ArrayRef<int64_t> kernelSizeDims, OpBuilder &rewriter) {
189  ShapedType inputTy = cast<ShapedType>(input.getType());
190  int64_t inputRank = inputTy.getRank();
191 
192  SmallVector<Value> dynDims;
193  dynDims.resize(resultTy.getRank());
194 
195  for (uint32_t i = 0, s = inputSizeDims.size(); i < s; ++i) {
196  int64_t inputDim = inputSizeDims[i];
197  int64_t kernelDim = kernelSizeDims[i];
198  if (resultTy.isDynamicDim(inputDim)) {
199  auto padTop = padAttr[i * 2];
200  auto padBottom = padAttr[i * 2 + 1];
201  auto stride = strideAttr[i];
202  auto dilation = dilationAttr[i];
203  Value initDynDim = rewriter.create<tensor::DimOp>(loc, input, inputDim);
204  Value kernelDynDim =
205  rewriter.create<tensor::DimOp>(loc, weight, kernelDim);
206  // H = F(IH, pad_top, pad_bottom, dilation_y, KH, stride_y)
207  dynDims[inputDim] =
208  getConvOrPoolOutputDim(loc, initDynDim, padTop, padBottom,
209  kernelDynDim, stride, dilation, rewriter);
210  }
211  }
212 
213  // Get the batch/channels dimensions.
214  for (int i = 0; i < inputRank; i++) {
215  if (resultTy.isDynamicDim(i) && !dynDims[i])
216  dynDims[i] = rewriter.create<tensor::DimOp>(loc, input, i);
217  }
218 
219  SmallVector<Value> filteredDims = condenseValues(dynDims);
220  return filteredDims;
221 }
222 
223 // Creates a map to collapse the last dimension of the Depthwise convolution op
224 // due to a shape mismatch
226  int64_t outputRank, SmallVector<ReassociationExprs, 4> &reassociationMap,
227  OpBuilder &rewriter) {
228  reassociationMap.resize(outputRank);
229  for (int i = 0; i < outputRank; i++) {
230  reassociationMap[i].push_back(rewriter.getAffineDimExpr(i));
231  }
232  reassociationMap[outputRank - 1].push_back(
233  rewriter.getAffineDimExpr(outputRank));
234 }
235 
236 namespace {
237 
238 template <typename TosaConvOp, typename LinalgConvOp, typename LinalgConvQOp>
239 class ConvConverter : public OpConversionPattern<TosaConvOp> {
240 public:
242  LogicalResult
243  matchAndRewrite(TosaConvOp op, typename TosaConvOp::Adaptor adaptor,
244  ConversionPatternRewriter &rewriter) const final {
245  Location loc = op->getLoc();
246  Value input = op->getOperand(0);
247  Value weight = op->getOperand(1);
248  Value bias = op->getOperand(2);
249 
250  ShapedType inputTy = cast<ShapedType>(input.getType());
251  ShapedType weightTy = cast<ShapedType>(weight.getType());
252  ShapedType biasTy = cast<ShapedType>(bias.getType());
253  ShapedType resultTy = cast<ShapedType>(op->getResult(0).getType());
254 
255  Type inputETy = inputTy.getElementType();
256  Type resultETy = resultTy.getElementType();
257 
258  DenseI64ArrayAttr padAttr = op.getPadAttr();
259  DenseI64ArrayAttr strideTosaAttr = op.getStrideAttr();
260  DenseI64ArrayAttr dilationTosaAttr = op.getDilationAttr();
261 
262  auto failureOrMaybeZps = extractConvZpPair(op, rewriter);
263  if (llvm::failed(failureOrMaybeZps))
264  return failure();
265 
266  auto maybeZps = failureOrMaybeZps.value();
267 
268  if (!weightTy.hasStaticShape() || !biasTy.hasStaticShape())
269  return rewriter.notifyMatchFailure(
270  op, "tosa.conv ops require static shapes for weight and bias");
271 
272  if (inputETy.isUnsignedInteger())
273  return rewriter.notifyMatchFailure(
274  op, "tosa.conv ops does not support unsigned integer input");
275 
276  llvm::SmallVector<int64_t> inputSizeDims;
277  llvm::SmallVector<int64_t> kernelSizeDims;
278  for (int i = 1; i < resultTy.getRank() - 1; i++) {
279  inputSizeDims.push_back(i);
280  kernelSizeDims.push_back(i);
281  }
282 
284  loc, input, weight, resultTy, padAttr.asArrayRef(),
285  strideTosaAttr.asArrayRef(), dilationTosaAttr.asArrayRef(),
286  inputSizeDims, kernelSizeDims, rewriter);
287 
288  auto weightShape = weightTy.getShape();
289 
290  // Apply padding as necessary.
291  TypedAttr zeroAttr = rewriter.getZeroAttr(inputETy);
292  if (maybeZps) {
293  int64_t intMin =
294  APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth())
295  .getSExtValue();
296  int64_t intMax =
297  APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth())
298  .getSExtValue();
299 
300  if (maybeZps->inputZp < intMin || maybeZps->inputZp > intMax)
301  return rewriter.notifyMatchFailure(
302  op, "tosa.conv op quantization has zp outside of input range");
303 
304  zeroAttr = rewriter.getIntegerAttr(inputETy, maybeZps->inputZp);
305  }
306 
308  pad.resize(2, 0);
309  llvm::append_range(pad, padAttr.asArrayRef());
310  pad.resize(pad.size() + 2, 0);
311  input = applyPad(loc, input, pad, zeroAttr, rewriter);
312 
313  if (4 == inputTy.getRank()) {
314  // For 2D convolutions, we need to check if the target convolution op
315  // wants a HWCF kernel layout.
316  bool wantHwcf =
317  maybeZps ? std::is_same_v<LinalgConvQOp, linalg::Conv2DNhwcHwcfQOp>
318  : std::is_same_v<LinalgConvOp, linalg::Conv2DNhwcHwcfOp>;
319  if (wantHwcf) {
320  // Transpose the kernel to match dimension ordering of the linalg
321  // convolution operation.
322  // TODO(suderman): See if this can be efficiently folded - check whether
323  // the input is used anywhere else, if not fold the constant.
324  SmallVector<int32_t> weightPerm;
325  for (int i = 1; i < resultTy.getRank(); i++)
326  weightPerm.push_back(i);
327  weightPerm.push_back(0);
328 
329  SmallVector<int64_t> newWeightShape;
330  for (auto dim : weightPerm)
331  newWeightShape.push_back(weightShape[dim]);
332  auto weightPermAttr = rewriter.getI32TensorAttr(weightPerm);
333  Value weightPermValue =
334  rewriter.create<arith::ConstantOp>(loc, weightPermAttr);
335  Type newWeightTy =
336  RankedTensorType::get(newWeightShape, weightTy.getElementType());
337  weight = rewriter.create<tosa::TransposeOp>(loc, newWeightTy, weight,
338  weightPermValue);
339  }
340  }
341 
342  // For Conv3D transpose the kernel to match dimension ordering of the linalg
343  // convolution operation. Conv2D has a 1-1 mapping in linalg so better to
344  // map directly and then transpose later if desired.
345  if (5 == inputTy.getRank()) {
346  // TODO(suderman): See if this can be efficiently folded - check whether
347  // the input is used anywhere else, if not fold the constant.
348  SmallVector<int32_t> weightPerm;
349  for (int i = 1; i < resultTy.getRank(); i++)
350  weightPerm.push_back(i);
351  weightPerm.push_back(0);
352 
353  SmallVector<int64_t> newWeightShape;
354  for (auto dim : weightPerm)
355  newWeightShape.push_back(weightShape[dim]);
356  auto weightPermAttr = rewriter.getI32TensorAttr(weightPerm);
357  Value weightPermValue =
358  rewriter.create<arith::ConstantOp>(loc, weightPermAttr);
359  Type newWeightTy =
360  RankedTensorType::get(newWeightShape, weightTy.getElementType());
361  weight = rewriter.create<tosa::TransposeOp>(loc, newWeightTy, weight,
362  weightPermValue);
363  }
364 
365  // Extract the attributes for convolution.
366  ArrayRef<int64_t> stride = strideTosaAttr;
367  ArrayRef<int64_t> dilation = dilationTosaAttr;
368 
369  // Create the convolution op.
370  auto strideAttr = rewriter.getI64TensorAttr(stride);
371  auto dilationAttr = rewriter.getI64TensorAttr(dilation);
372 
373  Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
374  loc, resultTy.getShape(), resultETy, filteredDims);
375 
376  Value broadcastBias =
377  linalgBroadcastAndMaybeExtSI(rewriter, loc, bias, biasEmptyTensor);
378 
379  if (maybeZps) {
380  auto iZp = rewriter.getI32IntegerAttr(maybeZps->inputZp);
381  auto kZp = rewriter.getI32IntegerAttr(maybeZps->weightZp);
382 
383  auto iZpVal = rewriter.create<arith::ConstantOp>(loc, iZp);
384  auto kZpVal = rewriter.create<arith::ConstantOp>(loc, kZp);
385 
386  Value conv =
387  rewriter
388  .create<LinalgConvQOp>(
389  loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal},
390  ValueRange{broadcastBias}, strideAttr, dilationAttr)
391  ->getResult(0);
392 
393  rewriter.replaceOp(op, conv);
394  return success();
395  }
396 
397  Value conv = rewriter
398  .create<LinalgConvOp>(
399  loc, resultTy, ValueRange{input, weight},
400  ValueRange{broadcastBias}, strideAttr, dilationAttr)
401  ->getResult(0);
402 
403  rewriter.replaceOp(op, conv);
404  return success();
405  }
406 };
407 
408 class DepthwiseConvConverter
409  : public OpConversionPattern<tosa::DepthwiseConv2DOp> {
410 public:
412  LogicalResult
413  matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor,
414  ConversionPatternRewriter &rewriter) const final {
415  Location loc = op->getLoc();
416  Value input = op->getOperand(0);
417  Value weight = op->getOperand(1);
418  Value bias = op->getOperand(2);
419 
420  ShapedType inputTy = cast<ShapedType>(input.getType());
421  ShapedType weightTy = cast<ShapedType>(weight.getType());
422  ShapedType biasTy = cast<ShapedType>(bias.getType());
423  ShapedType resultTy = cast<ShapedType>(op->getResult(0).getType());
424  int64_t resultRank = resultTy.getRank();
425 
426  Type inputETy = inputTy.getElementType();
427  Type resultETy = resultTy.getElementType();
428 
429  auto padAttr = cast<DenseI64ArrayAttr>(op->getAttr("pad"));
430  auto strideTosaAttr = cast<DenseI64ArrayAttr>(op->getAttr("stride"));
431  auto dilationTosaAttr = cast<DenseI64ArrayAttr>(op->getAttr("dilation"));
432 
433  if (!weightTy.hasStaticShape() || !biasTy.hasStaticShape())
434  return rewriter.notifyMatchFailure(
435  op, "tosa.depthwise_conv ops require static shapes");
436 
437  // Compute output dynamic dims
439  loc, input, weight, resultTy, padAttr.asArrayRef(),
440  strideTosaAttr.asArrayRef(), dilationTosaAttr.asArrayRef(),
441  /*inputSizeDims=*/{1, 2},
442  /*kernelSizeDims=*/{0, 1}, rewriter);
443 
444  auto failureOrMaybeZps = extractConvZpPair(op, rewriter);
445  if (llvm::failed(failureOrMaybeZps))
446  return failure();
447 
448  auto maybeZps = failureOrMaybeZps.value();
449 
450  auto weightShape = weightTy.getShape();
451  auto resultShape = resultTy.getShape();
452 
453  // Apply padding as necessary.
454  TypedAttr zeroAttr = rewriter.getZeroAttr(inputETy);
455  if (maybeZps) {
456  int64_t intMin =
457  APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth())
458  .getSExtValue();
459  int64_t intMax =
460  APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth())
461  .getSExtValue();
462 
463  if (maybeZps->inputZp < intMin || maybeZps->inputZp > intMax)
464  return rewriter.notifyMatchFailure(
465  op, "tosa.depthwise_conv op quantization has zp outside of input "
466  "range");
467 
468  zeroAttr = rewriter.getIntegerAttr(inputETy, maybeZps->inputZp);
469  }
470 
472  pad.resize(2, 0);
473  llvm::append_range(pad, padAttr.asArrayRef());
474  pad.resize(pad.size() + 2, 0);
475 
476  input = applyPad(loc, input, pad, zeroAttr, rewriter);
477 
478  // Extract the attributes for convolution.
479  ArrayRef<int64_t> stride = strideTosaAttr;
480  ArrayRef<int64_t> dilation = dilationTosaAttr;
481 
482  // Create the convolution op.
483  auto strideAttr = rewriter.getI64TensorAttr(stride);
484  auto dilationAttr = rewriter.getI64TensorAttr(dilation);
485  ShapedType linalgConvTy =
486  RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2],
487  weightShape[2], weightShape[3]},
488  resultETy);
489 
490  auto resultZeroAttr = rewriter.getZeroAttr(resultETy);
491  Value emptyTensor = rewriter.create<tensor::EmptyOp>(
492  loc, linalgConvTy.getShape(), resultETy, filteredDims);
493  Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
494  Value zeroTensor = rewriter
495  .create<linalg::FillOp>(loc, ValueRange{zero},
496  ValueRange{emptyTensor})
497  .result();
498 
499  Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
500  loc, resultTy.getShape(), resultETy, filteredDims);
501 
502  // Broadcast the initial value to the output tensor before convolving.
503  SmallVector<AffineMap, 4> indexingMaps;
504  indexingMaps.push_back(getBroadcastingMap(rewriter, bias, biasEmptyTensor));
505  indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
506  indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
507 
508  if (!maybeZps) {
509  Value conv = rewriter
510  .create<linalg::DepthwiseConv2DNhwcHwcmOp>(
511  loc, linalgConvTy, ValueRange{input, weight},
512  ValueRange{zeroTensor}, strideAttr, dilationAttr)
513  .getResult(0);
514 
515  SmallVector<ReassociationExprs, 4> reassociationMap;
516  createDepthwiseConvCollapseMap(resultRank, reassociationMap, rewriter);
517  Value convReshape = rewriter.create<tensor::CollapseShapeOp>(
518  loc, resultTy, conv, reassociationMap);
519 
520  Value result =
521  rewriter
522  .create<linalg::GenericOp>(
523  loc, resultTy, ValueRange({bias, convReshape}),
524  biasEmptyTensor, indexingMaps,
525  getNParallelLoopsAttrs(resultRank),
526  [&](OpBuilder &nestedBuilder, Location nestedLoc,
527  ValueRange args) {
528  Value added = nestedBuilder.create<arith::AddFOp>(
529  loc, args[0], args[1]);
530  nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
531  })
532  .getResult(0);
533  rewriter.replaceOp(op, result);
534  } else {
535  IntegerAttr iZp = rewriter.getI32IntegerAttr(maybeZps->inputZp);
536  IntegerAttr wZp = rewriter.getI32IntegerAttr(maybeZps->weightZp);
537  auto iZpVal = rewriter.create<arith::ConstantOp>(loc, iZp);
538  auto kZpVal = rewriter.create<arith::ConstantOp>(loc, wZp);
539  Value conv =
540  rewriter
541  .create<linalg::DepthwiseConv2DNhwcHwcmQOp>(
542  loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal},
543  ValueRange{zeroTensor}, strideAttr, dilationAttr)
544  .getResult(0);
545  SmallVector<ReassociationExprs, 4> reassociationMap;
546  createDepthwiseConvCollapseMap(resultRank, reassociationMap, rewriter);
547  Value convReshape = rewriter.create<tensor::CollapseShapeOp>(
548  loc, resultTy, conv, reassociationMap);
550  rewriter, loc, bias, convReshape, biasEmptyTensor, indexingMaps);
551  rewriter.replaceOp(op, result);
552  }
553  return success();
554  }
555 };
556 
557 class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
558 public:
560  LogicalResult
561  matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor,
562  ConversionPatternRewriter &rewriter) const final {
563  Location loc = op.getLoc();
564 
565  auto outputTy = cast<ShapedType>(op.getType());
566  auto outputElementTy = outputTy.getElementType();
567 
568  SmallVector<Value> dynDims;
569  dynDims.resize(cast<ShapedType>(op->getResult(0).getType()).getRank());
570 
571  if (!outputTy.hasRank() || outputTy.isDynamicDim(0)) {
572  dynDims[0] = rewriter.create<tensor::DimOp>(loc, op->getOperand(0), 0);
573  }
574 
575  if (!outputTy.hasRank() || outputTy.isDynamicDim(1)) {
576  dynDims[1] = rewriter.create<tensor::DimOp>(loc, op->getOperand(0), 1);
577  }
578 
579  if (!outputTy.hasRank() || outputTy.isDynamicDim(2)) {
580  dynDims[2] = rewriter.create<tensor::DimOp>(loc, op->getOperand(1), 2);
581  }
582 
583  SmallVector<Value> filteredDims = condenseValues(dynDims);
584 
585  auto zeroAttr = rewriter.getZeroAttr(outputElementTy);
586  Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
587  auto emptyTensor = rewriter.create<tensor::EmptyOp>(
588  loc, outputTy.getShape(), outputTy.getElementType(), filteredDims);
589  Value zeroTensor = rewriter
590  .create<linalg::FillOp>(loc, ValueRange{zero},
591  ValueRange{emptyTensor})
592  .result();
593  if (!op.getAZp() && !op.getBZp()) {
594  rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
595  op, TypeRange{op.getType()},
596  ValueRange{adaptor.getA(), adaptor.getB()}, ValueRange{zeroTensor});
597  return success();
598  }
599 
600  auto aZp = rewriter.create<arith::ConstantOp>(loc, op.getAZpAttr());
601  auto bZp = rewriter.create<arith::ConstantOp>(loc, op.getBZpAttr());
602  rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
603  op, TypeRange{op.getType()},
604  ValueRange{adaptor.getA(), adaptor.getB(), aZp, bZp}, zeroTensor);
605 
606  return success();
607  }
608 };
609 
610 class FullyConnectedConverter
611  : public OpConversionPattern<tosa::FullyConnectedOp> {
612 public:
614  LogicalResult
615  matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor,
616  ConversionPatternRewriter &rewriter) const final {
617  Location loc = op.getLoc();
618  auto outputTy = cast<ShapedType>(op.getType());
619  auto input = op.getInput();
620  auto inputTy = cast<ShapedType>(input.getType());
621 
622  auto bias = op.getBias();
623 
624  auto weight = op.getWeight();
625  auto weightTy = cast<ShapedType>(weight.getType());
626  auto weightShape = weightTy.getShape();
627 
628  auto outputETy = outputTy.getElementType();
629 
630  SmallVector<Value> dynDims;
631  dynDims.resize(cast<ShapedType>(op->getResult(0).getType()).getRank());
632 
633  if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) {
634  dynDims[0] = rewriter.create<tensor::DimOp>(loc, input, 0);
635  }
636 
637  if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) {
638  dynDims[1] = rewriter.create<tensor::DimOp>(loc, weight, 0);
639  }
640 
641  SmallVector<Value> filteredDims = condenseValues(dynDims);
642 
643  SmallVector<int64_t> permutation = {1, 0};
644  auto permutationAttr = rewriter.getI64TensorAttr(permutation);
645  Value permutationValue =
646  rewriter.create<arith::ConstantOp>(loc, permutationAttr);
647 
648  SmallVector<int64_t> newWeightShape = {weightShape[1], weightShape[0]};
649  Type newWeightTy =
650  RankedTensorType::get(newWeightShape, weightTy.getElementType());
651 
652  Value transposedWeight = rewriter.create<tosa::TransposeOp>(
653  loc, newWeightTy, weight, permutationValue);
654 
655  Value biasEmptyTensor = rewriter.create<tensor::EmptyOp>(
656  loc, outputTy.getShape(), outputETy, filteredDims);
657 
658  Value broadcastBias =
659  linalgBroadcastAndMaybeExtSI(rewriter, loc, bias, biasEmptyTensor);
660 
661  if (!op.getInputZp() && !op.getWeightZp()) {
662  Value matmul = rewriter
663  .create<linalg::MatmulOp>(
664  loc, TypeRange{op.getType()},
665  ValueRange{input, transposedWeight}, broadcastBias)
666  ->getResult(0);
667 
668  rewriter.replaceOp(op, matmul);
669  return success();
670  }
671 
672  auto inputZp = rewriter.create<arith::ConstantOp>(loc, op.getInputZpAttr());
673  auto outputZp =
674  rewriter.create<arith::ConstantOp>(loc, op.getWeightZpAttr());
675  Value matmul =
676  rewriter
677  .create<linalg::QuantizedMatmulOp>(
678  loc, TypeRange{op.getType()},
679  ValueRange{input, transposedWeight, inputZp, outputZp},
680  broadcastBias)
681  ->getResult(0);
682 
683  rewriter.replaceOp(op, matmul);
684  return success();
685  }
686 };
687 
688 class MaxPool2dConverter : public OpConversionPattern<tosa::MaxPool2dOp> {
689 public:
691 
692  // Compute the dynamic output sizes of the maxpool operation.
693  static SmallVector<Value>
694  computeDynamicOutputSizes(tosa::MaxPool2dOp op, OpAdaptor adaptor,
695  ConversionPatternRewriter &rewriter) {
696  TensorType resultTy = op.getType();
697  Location loc = op.getLoc();
698 
699  Value input = adaptor.getInput();
700  ArrayRef<int64_t> kernel = op.getKernel();
701  ArrayRef<int64_t> pad = op.getPad();
702  ArrayRef<int64_t> stride = op.getStride();
703 
704  SmallVector<Value> dynamicDims;
705 
706  // Batch dimension
707  if (resultTy.isDynamicDim(0))
708  dynamicDims.push_back(rewriter.create<tensor::DimOp>(loc, input, 0));
709 
710  // Height/width dimensions
711  for (int64_t dim : {1, 2}) {
712  if (!resultTy.isDynamicDim(dim))
713  continue;
714 
715  // Index into the attribute arrays
716  int64_t index = dim - 1;
717 
718  // Input height/width
719  Value ihw = rewriter.create<tensor::DimOp>(loc, input, dim);
720 
721  // Kernel height/width
722  Value khw = rewriter.create<arith::ConstantIndexOp>(loc, kernel[index]);
723 
724  // Output height/width
725  Value ohw = getConvOrPoolOutputDim(loc, ihw, pad[index * 2],
726  pad[index * 2 + 1], khw, stride[index],
727  /*dilationAttr=*/1, rewriter);
728  dynamicDims.push_back(ohw);
729  }
730 
731  // Channel dimension
732  if (resultTy.isDynamicDim(3))
733  dynamicDims.push_back(rewriter.create<tensor::DimOp>(loc, input, 3));
734 
735  return dynamicDims;
736  }
737 
738  LogicalResult
739  matchAndRewrite(tosa::MaxPool2dOp op, OpAdaptor adaptor,
740  ConversionPatternRewriter &rewriter) const final {
741  Location loc = op.getLoc();
742  Value input = adaptor.getInput();
743  ShapedType inputTy = cast<ShapedType>(input.getType());
744 
745  bool isUnsigned = op.getType().getElementType().isUnsignedInteger();
746  ShapedType resultTy =
747  cast<ShapedType>(getTypeConverter()->convertType(op.getType()));
748  if (!resultTy)
749  return rewriter.notifyMatchFailure(op, "failed to convert type");
750  Type resultETy = inputTy.getElementType();
751 
752  SmallVector<Value> dynamicDims =
753  computeDynamicOutputSizes(op, adaptor, rewriter);
754 
755  // Determine what the initial value needs to be for the max pool op.
756  TypedAttr initialAttr;
757  if (resultETy.isF32() || resultETy.isBF16() || resultETy.isF16())
758  initialAttr = rewriter.getFloatAttr(
759  resultETy, APFloat::getLargest(
760  cast<FloatType>(resultETy).getFloatSemantics(), true));
761 
762  else if (isUnsigned)
763  initialAttr = rewriter.getIntegerAttr(
764  resultETy, APInt::getZero(resultETy.getIntOrFloatBitWidth()));
765  else if (isa<IntegerType>(resultETy))
766  initialAttr = rewriter.getIntegerAttr(
767  resultETy,
768  APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth()));
769 
770  if (!initialAttr)
771  return rewriter.notifyMatchFailure(
772  op, "Unsupported initial value for tosa.maxpool_2d op");
773 
774  // Apply padding as necessary.
776  pad.resize(2, 0);
777  llvm::append_range(pad, op.getPad());
778  pad.resize(pad.size() + 2, 0);
779 
780  Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter);
781 
782  Value initialValue = rewriter.create<arith::ConstantOp>(loc, initialAttr);
783 
784  ArrayRef<int64_t> kernel = op.getKernel();
785  ArrayRef<int64_t> stride = op.getStride();
786 
787  Attribute strideAttr = rewriter.getI64VectorAttr(stride);
788  Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
789 
790  // Create the linalg op that performs pooling.
791  Value emptyTensor = rewriter.create<tensor::EmptyOp>(
792  loc, resultTy.getShape(), resultTy.getElementType(), dynamicDims);
793 
794  Value filledEmptyTensor =
795  rewriter.create<linalg::FillOp>(loc, initialValue, emptyTensor)
796  .result();
797 
798  Value fakeWindowDims =
799  rewriter.create<tensor::EmptyOp>(loc, kernel, resultETy);
800 
801  if (isUnsigned) {
802  rewriter.replaceOpWithNewOp<linalg::PoolingNhwcMaxUnsignedOp>(
803  op, ArrayRef<Type>{resultTy}, ValueRange{paddedInput, fakeWindowDims},
804  filledEmptyTensor, strideAttr, dilationAttr);
805  } else {
806  rewriter.replaceOpWithNewOp<linalg::PoolingNhwcMaxOp>(
807  op, ArrayRef<Type>{resultTy}, ValueRange{paddedInput, fakeWindowDims},
808  filledEmptyTensor, strideAttr, dilationAttr);
809  }
810  return success();
811  }
812 };
813 
814 class AvgPool2dConverter : public OpRewritePattern<tosa::AvgPool2dOp> {
815 public:
817 
818  LogicalResult matchAndRewrite(tosa::AvgPool2dOp op,
819  PatternRewriter &rewriter) const final {
820  Location loc = op.getLoc();
821  Value input = op.getInput();
822  ShapedType inputTy = cast<ShapedType>(input.getType());
823  Type inElementTy = inputTy.getElementType();
824 
825  ShapedType resultTy = cast<ShapedType>(op.getType());
826  Type resultETy = cast<ShapedType>(op.getType()).getElementType();
827 
828  Type accETy = op.getAccType();
829  ShapedType accTy = resultTy.clone(accETy);
830 
831  auto dynamicDimsOr =
832  checkHasDynamicBatchDims(rewriter, op, {input, op.getOutput()});
833  if (!dynamicDimsOr.has_value())
834  return failure();
835  SmallVector<Value> dynamicDims = *dynamicDimsOr;
836 
837  // Apply padding as necessary.
839  pad.resize(2, 0);
840  llvm::append_range(pad, op.getPad());
841  pad.resize(pad.size() + 2, 0);
842  TypedAttr padAttr = rewriter.getZeroAttr(inElementTy);
843  // Unsupported element type
844  if (!padAttr)
845  return failure();
846  Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter);
847 
848  auto initialAttr = rewriter.getZeroAttr(accETy);
849  Value initialValue = rewriter.create<arith::ConstantOp>(loc, initialAttr);
850 
851  ArrayRef<int64_t> kernel = op.getKernel();
852  ArrayRef<int64_t> stride = op.getStride();
853 
854  Attribute strideAttr = rewriter.getI64VectorAttr(stride);
855  Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
856 
857  // Create the linalg op that performs pooling.
858  Value poolEmptyTensor = rewriter.create<tensor::EmptyOp>(
859  loc, accTy.getShape(), accETy, dynamicDims);
860 
861  Value filledEmptyTensor =
862  rewriter
863  .create<linalg::FillOp>(loc, ValueRange{initialValue},
864  ValueRange{poolEmptyTensor})
865  .result();
866 
867  Value fakeWindowDims =
868  rewriter.create<tensor::EmptyOp>(loc, kernel, accETy);
869 
870  // Sum across the pooled region.
871  Value poolingOp = rewriter
872  .create<linalg::PoolingNhwcSumOp>(
873  loc, ArrayRef<Type>{accTy},
874  ValueRange{paddedInput, fakeWindowDims},
875  filledEmptyTensor, strideAttr, dilationAttr)
876  .getResult(0);
877 
878  // Normalize the summed value by the number of elements grouped in each
879  // pool.
880  Value iH = rewriter.create<tensor::DimOp>(loc, poolingOp, 1);
881  Value iW = rewriter.create<tensor::DimOp>(loc, poolingOp, 2);
882 
883  auto one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
884  iH = rewriter.create<arith::SubIOp>(loc, iH, one);
885  iW = rewriter.create<arith::SubIOp>(loc, iW, one);
886 
887  Value genericEmptyTensor = rewriter.create<tensor::EmptyOp>(
888  loc, resultTy.getShape(), resultETy, dynamicDims);
889 
890  auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank());
891  auto genericOp = rewriter.create<linalg::GenericOp>(
892  loc, ArrayRef<Type>({resultTy}), ValueRange{poolingOp},
893  ValueRange{genericEmptyTensor},
894  ArrayRef<AffineMap>({affineMap, affineMap}),
895  getNParallelLoopsAttrs(resultTy.getRank()),
896  [&](OpBuilder &b, Location loc, ValueRange args) {
897  auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
898 
899  // Determines what the portion of valid input is covered by the
900  // kernel.
901  auto padFn = [&](Value valid, Value pos, int64_t pad) -> Value {
902  if (pad == 0)
903  return valid;
904 
905  auto padVal = rewriter.create<arith::ConstantIndexOp>(loc, pad);
906  Value dpos = rewriter.create<arith::SubIOp>(loc, pos, padVal);
907 
908  Value offset = rewriter.create<arith::MinSIOp>(loc, dpos, zero);
909  return rewriter.create<arith::AddIOp>(loc, valid, offset)
910  ->getResult(0);
911  };
912 
913  auto coverageFn = [&](int64_t i, Value isize) -> Value {
914  Value strideVal =
915  rewriter.create<arith::ConstantIndexOp>(loc, stride[i - 1]);
916  Value val =
917  rewriter.create<arith::ConstantIndexOp>(loc, kernel[i - 1]);
918 
919  // Find the position relative to the input tensor's ends.
920  Value left = rewriter.create<linalg::IndexOp>(loc, i);
921  Value right = rewriter.create<arith::SubIOp>(loc, isize, left);
922  left = rewriter.create<arith::MulIOp>(loc, left, strideVal);
923  right = rewriter.create<arith::MulIOp>(loc, right, strideVal);
924 
925  // Determine how much padding was included.
926  val = padFn(val, left, pad[i * 2]);
927  val = padFn(val, right, pad[i * 2 + 1]);
928  return rewriter.create<arith::MaxSIOp>(loc, one, val);
929  };
930 
931  // Compute the indices from either end.
932  Value kH3 = coverageFn(1, iH);
933  Value kW3 = coverageFn(2, iW);
934 
935  // Compute the total number of elements and normalize.
936  auto count = rewriter.create<arith::IndexCastOp>(
937  loc, rewriter.getI32Type(),
938  rewriter.create<arith::MulIOp>(loc, kH3, kW3));
939 
940  // Divide by the number of summed values. For floats this is just
941  // a div however for quantized values input normalization had
942  // to be applied.
943  Value poolVal = args[0];
944  if (isa<FloatType>(accETy)) {
945  auto countF = rewriter.create<arith::SIToFPOp>(loc, accETy, count);
946  poolVal = rewriter.create<arith::DivFOp>(loc, poolVal, countF)
947  ->getResult(0);
948  if (accETy.getIntOrFloatBitWidth() >
949  resultETy.getIntOrFloatBitWidth())
950  poolVal =
951  rewriter.create<arith::TruncFOp>(loc, resultETy, poolVal);
952  } else {
953 
954  // If we have quantization information we need to apply an offset
955  // for the input zp value.
956  if (op.getInputZp()) {
957  auto inputZp =
958  rewriter.create<arith::ConstantOp>(loc, op.getInputZpAttr());
959  Value offset =
960  rewriter.create<arith::MulIOp>(loc, accETy, count, inputZp);
961  poolVal =
962  rewriter.create<arith::SubIOp>(loc, accETy, poolVal, offset);
963  }
964 
965  // Compute: k = 32 - count_leading_zeros(value - 1)
966  Value one32 = rewriter.create<arith::ConstantOp>(
967  loc, rewriter.getI32IntegerAttr(1));
968  Value thirtyTwo32 = rewriter.create<arith::ConstantOp>(
969  loc, rewriter.getI32IntegerAttr(32));
970 
971  Value countSubOne =
972  rewriter.create<arith::SubIOp>(loc, count, one32);
973  Value leadingZeros =
974  rewriter.create<math::CountLeadingZerosOp>(loc, countSubOne);
975  Value k =
976  rewriter.create<arith::SubIOp>(loc, thirtyTwo32, leadingZeros);
977 
978  // Compute: numerator = ((1 << 30) + 1) << k
979  Value k64 =
980  rewriter.create<arith::ExtUIOp>(loc, rewriter.getI64Type(), k);
981  Value thirtyShiftPlusOne = rewriter.create<arith::ConstantOp>(
982  loc, rewriter.getI64IntegerAttr((1 << 30) + 1));
983  Value numerator =
984  rewriter.create<arith::ShLIOp>(loc, thirtyShiftPlusOne, k64);
985 
986  // Compute: scale.multiplier = numerator / value;
987  Value count64 = rewriter.create<arith::ExtUIOp>(
988  loc, rewriter.getI64Type(), count);
989  Value multiplier =
990  rewriter.create<arith::DivUIOp>(loc, numerator, count64);
991  multiplier = rewriter.create<arith::TruncIOp>(
992  loc, rewriter.getI32Type(), multiplier);
993 
994  // Compute: scale.shift = 30 + k
995  Value k8 =
996  rewriter.create<arith::TruncIOp>(loc, rewriter.getI8Type(), k);
997  Value thirty8 = rewriter.create<arith::ConstantOp>(
998  loc, rewriter.getI8IntegerAttr(30));
999  Value shift = rewriter.create<arith::AddIOp>(loc, k8, thirty8);
1000 
1001  auto scaled =
1002  rewriter
1003  .create<tosa::ApplyScaleOp>(loc, rewriter.getI32Type(),
1004  poolVal, multiplier, shift,
1005  rewriter.getBoolAttr(false))
1006  .getResult();
1007 
1008  // If we have quantization information we need to apply output
1009  // zeropoint.
1010  if (op.getOutputZp()) {
1011  auto outputZp =
1012  rewriter.create<arith::ConstantOp>(loc, op.getOutputZpAttr());
1013  scaled = rewriter.create<arith::AddIOp>(loc, scaled, outputZp)
1014  .getResult();
1015  }
1016 
1017  // Apply Clip.
1018  int64_t outBitwidth = resultETy.getIntOrFloatBitWidth();
1019 
1020  auto min = rewriter.create<arith::ConstantIntOp>(
1021  loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(),
1022  accETy);
1023  auto max = rewriter.create<arith::ConstantIntOp>(
1024  loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(),
1025  accETy);
1026  auto clamp = clampIntHelper(loc, scaled, min, max, rewriter,
1027  /*isUnsigned=*/false);
1028 
1029  poolVal = clamp;
1030  // Convert type.
1031  if (resultETy != clamp.getType()) {
1032  poolVal =
1033  rewriter.create<arith::TruncIOp>(loc, resultETy, poolVal);
1034  }
1035  }
1036 
1037  rewriter.create<linalg::YieldOp>(loc, poolVal);
1038  });
1039 
1040  rewriter.replaceOp(op, genericOp.getResult(0));
1041  return success();
1042  }
1043 };
1044 
1045 class TransposeConverter : public OpRewritePattern<tosa::TransposeOp> {
1046 public:
1048 
1049  LogicalResult matchAndRewrite(tosa::TransposeOp op,
1050  PatternRewriter &rewriter) const final {
1051  SmallVector<int32_t> constantPerms;
1052  if (failed(op.getConstantPerms(constantPerms)))
1053  return failure();
1054 
1055  Location loc = op.getLoc();
1056  // The verifier should have made sure we have a valid TOSA permutation
1057  // tensor. isPermutationVector doesn't actually check the TOSA perms we
1058  // expect.
1059  SmallVector<OpFoldResult> inputSizes =
1060  tensor::getMixedSizes(rewriter, loc, op.getInput1());
1061  auto permutedSizes =
1062  applyTOSAPermutation<OpFoldResult>(inputSizes, constantPerms);
1063 
1064  auto permutedInit = rewriter.create<tensor::EmptyOp>(
1065  loc, permutedSizes, op.getInput1().getType().getElementType());
1066  rewriter.replaceOpWithNewOp<linalg::TransposeOp>(
1067  op, op.getInput1(), permutedInit,
1068  llvm::to_vector(llvm::map_range(
1069  constantPerms, [](int32_t v) -> int64_t { return v; })));
1070  return success();
1071  }
1072 };
1073 } // namespace
1074 
1076  const TypeConverter &converter, RewritePatternSet *patterns,
1077  const TosaToLinalgNamedOptions &options) {
1078  if (options.preferConv2DKernelLayoutHWCF) {
1079  patterns->add<ConvConverter<tosa::Conv2DOp, linalg::Conv2DNhwcHwcfOp,
1080  linalg::Conv2DNhwcHwcfQOp>>(
1081  patterns->getContext());
1082  } else {
1083  patterns->add<ConvConverter<tosa::Conv2DOp, linalg::Conv2DNhwcFhwcOp,
1084  linalg::Conv2DNhwcFhwcQOp>>(
1085  patterns->getContext());
1086  }
1087  patterns->add<
1088  // clang-format off
1089  ConvConverter<tosa::Conv3DOp, linalg::Conv3DNdhwcDhwcfOp, linalg::Conv3DNdhwcDhwcfQOp>,
1090  DepthwiseConvConverter,
1091  MatMulConverter,
1092  AvgPool2dConverter,
1093  FullyConnectedConverter,
1094  TransposeConverter
1095  >(patterns->getContext());
1096 
1097  patterns->add<
1098  MaxPool2dConverter
1099  >(converter, patterns->getContext());
1100  // clang-format on
1101 }
static Value getZero(OpBuilder &b, Location loc, Type elementType)
Get zero value for an element type.
static llvm::ManagedStatic< PassManagerOptions > options
static Value clamp(ImplicitLocOpBuilder &builder, Value value, Value lowerBound, Value upperBound)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
static AffineMap getBroadcastingMap(PatternRewriter &rewriter, Value source, Value result)
static mlir::Value applyPad(Location loc, Value input, ArrayRef< int64_t > pad, TypedAttr padAttr, OpBuilder &rewriter)
static mlir::Value linalgBroadcastAndMaybeExtSI(PatternRewriter &rewriter, Location loc, Value source, Value result)
static void createDepthwiseConvCollapseMap(int64_t outputRank, SmallVector< ReassociationExprs, 4 > &reassociationMap, OpBuilder &rewriter)
static mlir::Value linalgIntBroadcastExtSIAdd(PatternRewriter &rewriter, Location loc, Value bias, Value conv, Value result, ArrayRef< AffineMap > indexingMaps)
static mlir::Value getConvOrPoolOutputDim(Location loc, Value inputDim, int64_t padBeforeAttr, int64_t padAfterAttr, Value kernelDim, int64_t strideAttr, int64_t dilationAttr, OpBuilder &rewriter)
static mlir::Value reifyConstantDim(int64_t attr, ImplicitLocOpBuilder &builder)
static SmallVector< Value > inferDynamicDimsForConv(Location loc, Value input, Value weight, ShapedType resultTy, ArrayRef< int64_t > padAttr, ArrayRef< int64_t > strideAttr, ArrayRef< int64_t > dilationAttr, ArrayRef< int64_t > inputSizeDims, ArrayRef< int64_t > kernelSizeDims, OpBuilder &rewriter)
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Attributes are known-constant values of operations.
Definition: Attributes.h:25
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:104
IntegerAttr getI32IntegerAttr(int32_t value)
Definition: Builders.cpp:196
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:224
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:383
FloatAttr getFloatAttr(Type type, double value)
Definition: Builders.cpp:250
AffineExpr getAffineConstantExpr(int64_t constant)
Definition: Builders.cpp:368
IntegerType getI64Type()
Definition: Builders.cpp:65
IntegerType getI32Type()
Definition: Builders.cpp:63
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:108
BoolAttr getBoolAttr(bool value)
Definition: Builders.cpp:96
TypedAttr getZeroAttr(Type type)
Definition: Builders.cpp:320
AffineExpr getAffineDimExpr(unsigned position)
Definition: Builders.cpp:360
MLIRContext * getContext() const
Definition: Builders.h:56
DenseIntElementsAttr getI64VectorAttr(ArrayRef< int64_t > values)
Definition: Builders.cpp:124
IntegerType getI8Type()
Definition: Builders.cpp:59
IntegerAttr getI8IntegerAttr(int8_t value)
Definition: Builders.cpp:217
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
OpTy create(Args &&...args)
Create an operation of specific op type at the current insertion point and location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
This class helps build Operations.
Definition: Builders.h:205
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453
OpConversionPattern is a wrapper around ConversionPattern that allows for matching and rewriting agai...
OpConversionPattern(MLIRContext *context, PatternBenefit benefit=1)
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:791
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:724
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:542
Tensor types represent multi-dimensional arrays, and have two variants: RankedTensorType and Unranked...
Definition: BuiltinTypes.h:55
Type conversion class.
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isF32() const
Definition: Types.cpp:40
bool isUnsignedInteger() const
Return true if this is an unsigned integer type (with the specified width).
Definition: Types.cpp:80
bool isF16() const
Definition: Types.cpp:38
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:114
bool isBF16() const
Definition: Types.cpp:37
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:129
Base class for DenseArrayAttr that is instantiated and specialized for each supported element type be...
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition: TensorOps.cpp:68
std::optional< SmallVector< Value > > checkHasDynamicBatchDims(PatternRewriter &rewriter, Op op, ArrayRef< Value > params)
std::enable_if_t< is_tosa_conv_v< TosaConvOp >, FailOrMaybeZP > extractConvZpPair(TosaConvOp op, PatternRewriter &rewriter)
Definition: TosaOps.h:229
SmallVector< utils::IteratorType > getNParallelLoopsAttrs(unsigned nParallelLoops)
SmallVector< Value > condenseValues(const SmallVector< Value > &values)
Value clampIntHelper(Location loc, Value arg, Value min, Value max, OpBuilder &rewriter, bool isUnsigned)
void populateTosaToLinalgNamedConversionPatterns(const TypeConverter &converter, RewritePatternSet *patterns, const TosaToLinalgNamedOptions &options)
Populates conversion passes from TOSA dialect to Linalg named operations.
Include the generated interface declarations.
const FrozenRewritePatternSet & patterns
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:358