doxygen/LowerQuantOps_8cpp_source.html

//===- LowerQuantOps.cpp - Lower 'quant' dialect ops ----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// Transforms `quant.dcast` and `quant.qcast` into lower-level ops.

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/Arith/IR/Arith.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "mlir/Dialect/Linalg/IR/Linalg.h"

#include "mlir/Dialect/Quant/IR/Quant.h"

#include "mlir/Dialect/Quant/IR/QuantTypes.h"

#include "mlir/Dialect/Quant/Transforms/Passes.h"

#include "mlir/Dialect/Shape/IR/Shape.h"

#include "mlir/Dialect/Tensor/IR/Tensor.h"


#include "mlir/IR/Matchers.h"

#include "mlir/IR/PatternMatch.h"

#include "mlir/Transforms/DialectConversion.h"


namespace mlir {

namespace quant {


#define GEN_PASS_DEF_LOWERQUANTOPS

#include "mlir/Dialect/Quant/Transforms/Passes.h.inc"


namespace {


// If 'inputType' is a tensor, return its element type. If it is a scalar,

// return it as is.

Type getScalarType(Type inputType) {

  if (auto tensorType = dyn_cast<TensorType>(inputType))

    return tensorType.getElementType();

  return inputType;


}


// Return the shape of an input value as a list of attributes (static


// dimensions) and values (dynamic dimensions). If 'input' is a scalar, an empty

// list is returned. If 'input' is a tensor, its shape is returned.

SmallVector<OpFoldResult> getScalarOrTensorShape(OpBuilder &builder,

                                                 Location loc, Value input) {

  if (isa<TensorType>(input.getType()))


    return tensor::getMixedSizes(builder, loc, input);

  return {};

}


// If 'referenceType' is a scalar, return 'elementType' as is. If

// 'referenceType' is a tensor, return another tensor with the same shape and


// elements of type 'elementType'.

Type getScalarOrTensorType(Type elementType, Type referenceType) {

  if (auto tensorType = dyn_cast<TensorType>(referenceType))


    return tensorType.clone(elementType);

  return elementType;


}


// Return a constant with the given value. If 'referenceType' is a tensor, a


// tensor splat of shape 'referenceShape' is returned. If 'referenceType' is a

// scalar, 'referenceShape' is ignored and a scalar constant is returned.


Value getScalarOrTensorConstant(OpBuilder &builder, Location loc, Value scalar,

                                Type referenceType,

                                ArrayRef<OpFoldResult> referenceShape) {

  // If the result type is a scalar, return the unmodified scalar constant.

  auto tensorType = dyn_cast<TensorType>(referenceType);

  if (!tensorType) {

    assert(referenceShape.empty());


    return scalar;

  }


  // Create tensor splat

  auto tensorConstant =

      tensor::SplatOp::create(builder, loc, scalar, referenceShape);

  return tensorConstant;

}


// Reshape an unranked tensor into a 1D ranked tensor.

//

// - input


//   Unranked tensor.


//

// Return values:


//

// - flatInput

//   1D ranked, dynamically shaped tensor.


//

// - inputShape

//   1D extent tensor containing the shape of the original unranked input.

//

std::pair<Value, Value> flattenUnrankedTensor(OpBuilder &builder, Location loc,

                                              Value input) {

  // Get unranked input shape and total size

  auto *context = builder.getContext();

  auto shapeType = shape::getExtentTensorType(context);

  auto inputShape = shape::ShapeOfOp::create(builder, loc, shapeType, input);

  Value inputSize = shape::NumElementsOp::create(

      builder, loc, builder.getIndexType(), inputShape);


  // Turn input size into 1D tensor

  auto flatShapeType = shape::getExtentTensorType(context, 1);

  auto flatInputShape =

      tensor::FromElementsOp::create(builder, loc, flatShapeType, inputSize);


  // Reshape input tensor into 1D

  auto inputType = cast<UnrankedTensorType>(input.getType());

  auto elementType = inputType.getElementType();

  auto flatInputType =

      RankedTensorType::get({ShapedType::kDynamic}, elementType);

  auto flatInput = tensor::ReshapeOp::create(builder, loc, flatInputType, input,

                                             flatInputShape);

  return std::make_pair(flatInput, inputShape);

}


// Reshape an unranked tensor into a 3D ranked tensor where the central

// dimension of the result tensor corresponds to dimension 'axis' of the input

// tensor.

//

// - input

//   Unranked tensor.

//

// - axis

//   Index of the input dimension around which other input dimiensions will be

//   collapsed.

//

// - axisSize

//   Size of input dimension 'axis'.

//

// Return values:

//

// - flatInput

//   3D ranked tensor of shape [?, axisSize, ?].

//

// - inputShape

//   1D extent tensor containing the shape of the original unranked input.

//

std::pair<Value, Value>

flattenUnrankedTensorAroundAxis(OpBuilder &builder, Location loc, Value input,

                                int64_t axis, int64_t axisSize) {

  // Get full tensor shape

  auto *context = builder.getContext();

  auto indexType = builder.getIndexType();

  auto shapeType = shape::getExtentTensorType(context);

  auto inputShape = shape::ShapeOfOp::create(builder, loc, shapeType, input);


  // Get shape and sizes on left and right of axis

  auto axisValue = arith::ConstantIndexOp::create(builder, loc, axis);

  auto axisNextValue = arith::ConstantIndexOp::create(builder, loc, axis + 1);

  auto shapeLeft =

      shape::SplitAtOp::create(builder, loc, TypeRange{shapeType, shapeType},

                               inputShape, axisValue)

          .getResult(0);

  auto sizeLeft =

      shape::NumElementsOp::create(builder, loc, indexType, shapeLeft);

  auto shapeRight =

      shape::SplitAtOp::create(builder, loc, TypeRange{shapeType, shapeType},

                               inputShape, axisNextValue)

          .getResult(1);

  auto sizeRight =

      shape::NumElementsOp::create(builder, loc, indexType, shapeRight);


  // Compute flat input shape as a 3-element 1D tensor

  auto axisSizeValue = arith::ConstantIndexOp::create(builder, loc, axisSize);

  auto flatShapeType = shape::getExtentTensorType(context, 3);

  auto flatInputShape = tensor::FromElementsOp::create(

      builder, loc, flatShapeType,

      ValueRange{sizeLeft, axisSizeValue, sizeRight});


  // Reshape input to 3D tensor

  auto inputType = cast<UnrankedTensorType>(input.getType());

  auto elementType = inputType.getElementType();

  auto flatInputType = RankedTensorType::get(

      {ShapedType::kDynamic, axisSize, ShapedType::kDynamic}, elementType);

  auto flatInput = tensor::ReshapeOp::create(builder, loc, flatInputType, input,

                                             flatInputShape);


  return std::make_pair(flatInput, inputShape);

}


// Reshape an input tensor into its original unranked shape.

//

// - input

//   Ranked tensor.

//

// - inputShape

//   1D extent tensor.

//

Value restoreUnrankedTensorShape(OpBuilder &builder, Location loc, Value input,

                                 Value inputShape) {

  auto inputType = cast<RankedTensorType>(input.getType());

  auto elementType = inputType.getElementType();

  auto unrankedType = UnrankedTensorType::get(elementType);

  return tensor::ReshapeOp::create(builder, loc, unrankedType, input,

                                   inputShape);

}


// Create a tensor constant containing all scales in a per-channel quantized

// type. Example:

//

//   !quant.uniform<i8:f32:1, {2.0:10, 3.0:20}>

//

// produces

//

//   %cst = arith.constant dense<[2.0, 3.0]> : tensor<2xf32>

//

Value materializePerChannelScales(OpBuilder &builder, Location loc,

                                  UniformQuantizedPerAxisType quantizedType) {

  auto scales = quantizedType.getScales();

  auto expressedType = quantizedType.getExpressedType();

  auto scaleAttrs = llvm::map_to_vector(scales, [&](double scale) -> Attribute {

    return builder.getFloatAttr(expressedType, scale);

  });

  auto tensorType =

      RankedTensorType::get({(int64_t)scales.size()}, expressedType);

  auto scalesAttr = DenseElementsAttr::get(tensorType, scaleAttrs);

  return arith::ConstantOp::create(builder, loc, tensorType, scalesAttr);

}


// Create a tensor constant containing all zero points in a per-channel

// quantized type. Example:

//

//   !quant.uniform<i8:f32:1, {2.0:10, 3.0:20}>

//

// produces

//

//   %cst = arith.constant dense<[10, 20]> : tensor<2xi8>

//

Value materializePerChannelZeroPoints(

    OpBuilder &builder, Location loc,

    UniformQuantizedPerAxisType quantizedType) {

  auto zeroPoints = quantizedType.getZeroPoints();

  auto storageType = quantizedType.getStorageType();

  auto zeroPointAttrs =

      llvm::map_to_vector(zeroPoints, [&](int64_t zeroPoint) -> Attribute {

        return builder.getIntegerAttr(storageType, zeroPoint);

      });

  auto tensorType =

      RankedTensorType::get({(int64_t)zeroPoints.size()}, storageType);

  auto zeroPointsAttr = DenseElementsAttr::get(tensorType, zeroPointAttrs);

  return arith::ConstantOp::create(builder, loc, tensorType, zeroPointsAttr);

}


// Create a tensor constant containing all scales in a sub-channel quantized

// type. Example:

//

//   !quant.uniform<i8:f32:{0:1,1:2}, {{2.0:10, 3.0:20}, {4.0:30, 5.0:40}}>

//

// produces

//

//   %cst = arith.constant dense<[[2.0, 3.0], [4.0, 5.0]]> : tensor<2x2xf32>

//

Value materializeSubChannelScales(

    OpBuilder &builder, Location loc,

    UniformQuantizedSubChannelType quantizedType) {

  auto scales = quantizedType.getScales();

  auto expressedType = quantizedType.getExpressedType();

  auto scaleAttrs = llvm::map_to_vector(

      scales.getValues<APFloat>(), [&](APFloat scale) -> Attribute {

        return builder.getFloatAttr(expressedType, scale);

      });

  auto tensorType =

      RankedTensorType::get(scales.getType().getShape(), expressedType);

  auto scalesAttr = DenseElementsAttr::get(tensorType, scaleAttrs);

  return arith::ConstantOp::create(builder, loc, tensorType, scalesAttr);

}


// Create a tensor constant containing all zero points in a sub-channel

// quantized type. Example:

//

//   !quant.uniform<i8:f32:{0:1,1:2}, {{2.0:10, 3.0:20}, {4.0:30, 5.0:40}}>

//

// produces

//

//   %cst = arith.constant dense<[[10, 20], [30, 40]]> : tensor<2x2xi8>

//

Value materializeSubChannelZeroPoints(

    OpBuilder &builder, Location loc,

    UniformQuantizedSubChannelType quantizedType) {

  auto zeroPoints = quantizedType.getZeroPoints();

  auto storageType = quantizedType.getStorageType();

  auto zeroPointAttrs = llvm::map_to_vector(

      zeroPoints.getValues<APInt>(), [&](APInt zeroPoint) -> Attribute {

        return builder.getIntegerAttr(storageType, zeroPoint);

      });

  auto tensorType =

      RankedTensorType::get(zeroPoints.getType().getShape(), storageType);

  auto zeroPointsAttr = DenseElementsAttr::get(tensorType, zeroPointAttrs);

  return arith::ConstantOp::create(builder, loc, tensorType, zeroPointsAttr);

}


// Clamp the given scalar or tensor input using the storage bounds encoded in

// the given quantized type, if present.

//

// - input

//   Scalar or ranked tensor input. The element type must match the storage type

//   of 'quantizedType'.

//

// - inputShape

//   If 'input' is a tensor, combination of attributes/values representing its

//   static/dynamic dimensions. If 'input' is a scalar, empty list.

//

// - quantizedType

//   Per-axis or per-channel quantized type.

Value clampScalarOrTensor(OpBuilder &builder, Location loc, Value input,

                          ArrayRef<OpFoldResult> inputShape,

                          QuantizedType quantizedType) {

  // If quantized type does not narrow down the storage type range, there is

  // nothing to do.

  if (!quantizedType.hasStorageTypeBounds())

    return input;


  // Materialize bounds

  auto inputType = input.getType();

  auto storageType = quantizedType.getStorageType();

  auto storageMinScalar = arith::ConstantIntOp::create(

      builder, loc, storageType, quantizedType.getStorageTypeMin());

  auto storageMaxScalar = arith::ConstantIntOp::create(

      builder, loc, storageType, quantizedType.getStorageTypeMax());

  auto storageMin = getScalarOrTensorConstant(builder, loc, storageMinScalar,

                                              inputType, inputShape);

  auto storageMax = getScalarOrTensorConstant(builder, loc, storageMaxScalar,

                                              inputType, inputShape);


  // Clamp

  if (quantizedType.isSigned()) {

    input = arith::MaxSIOp::create(builder, loc, input, storageMin);

    input = arith::MinSIOp::create(builder, loc, input, storageMax);

  } else {

    input = arith::MaxUIOp::create(builder, loc, input, storageMin);

    input = arith::MinUIOp::create(builder, loc, input, storageMax);

  }

  return input;

}


// Emit op 'arith.fptosi' or 'arith.fptoui'.

Value convertFloatToInteger(OpBuilder &builder, Location loc, Value input,

                            Type resultType, bool isSigned) {

  if (isSigned)

    return arith::FPToSIOp::create(builder, loc, resultType, input);

  return arith::FPToUIOp::create(builder, loc, resultType, input);

}


// Emit op 'arith.sitofp' or 'arith.uitofp'.

Value convertIntegerToFloat(OpBuilder &builder, Location loc, Value input,

                            Type resultType, bool isSigned) {

  if (isSigned)

    return arith::SIToFPOp::create(builder, loc, resultType, input);

  return arith::UIToFPOp::create(builder, loc, resultType, input);

}


// Quantize a scalar or ranked tensor value. The stored value is clamped using

// the storage bounds encoded in the given quantized type.

//

// See function 'convertRanked()' below for a description of the arguments.

Value quantizeValue(OpBuilder &builder, Location loc, Value input,

                    ArrayRef<OpFoldResult> inputShape, Value scale,

                    Value zeroPoint, QuantizedType quantizedType) {

  // Convert scale to tensor if necessary

  auto inputType = input.getType();

  scale = getScalarOrTensorConstant(builder, loc, scale, inputType, inputShape);


  // Scale input

  auto scaledValue = arith::DivFOp::create(builder, loc, input, scale);


  // Skip unnecessary computations if no zero point is given

  Value storedValueFloat = scaledValue;

  if (!matchPattern(zeroPoint, m_Zero())) {

    // Convert zero point to tensor if necessary

    zeroPoint = getScalarOrTensorConstant(builder, loc, zeroPoint, inputType,

                                          inputShape);


    // Convert zero point from storage to expressed type

    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint, scale.getType(),

                                      quantizedType.isSigned());


    // Add zero point to stored value

    storedValueFloat =

        arith::AddFOp::create(builder, loc, scaledValue, zeroPoint);

  }


  // Convert stored value to storage type

  auto storageScalarOrTensorType =

      getScalarOrTensorType(quantizedType.getStorageType(), inputType);

  auto storedValueInt = convertFloatToInteger(builder, loc, storedValueFloat,

                                              storageScalarOrTensorType,

                                              quantizedType.isSigned());


  // Clamp stored value it if the storage type is bound

  auto storedValueClamped = clampScalarOrTensor(builder, loc, storedValueInt,

                                                inputShape, quantizedType);

  return storedValueClamped;

}


// Dequantize a scalar or ranked tensor input.

//

// See function 'convertRanked()' below for a description of the arguments.

Value dequantizeValue(OpBuilder &builder, Location loc, Value input,

                      ArrayRef<OpFoldResult> inputShape, Value scale,

                      Value zeroPoint, QuantizedType quantizedType) {

  // Convert scale to tensor if necessary

  auto inputType = input.getType();

  scale = getScalarOrTensorConstant(builder, loc, scale, inputType, inputShape);


  // Convert stored value to float

  auto result = convertIntegerToFloat(builder, loc, input, scale.getType(),

                                      quantizedType.isSigned());


  // Skip unnecessary computations if no zero point is given

  if (!matchPattern(zeroPoint, m_Zero())) {

    // Convert zero point to tensor if necessary

    zeroPoint = getScalarOrTensorConstant(builder, loc, zeroPoint, inputType,

                                          inputShape);


    // Convert zero point from storage to expressed type

    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint, scale.getType(),

                                      quantizedType.isSigned());


    // Subtract zero point to stored value

    result = arith::SubFOp::create(builder, loc, result, zeroPoint);

  }


  // Multiply by scale

  result = arith::MulFOp::create(builder, loc, result, scale);

  return result;

}


// Convert a scalar or ranked tensor input with the given scale and zero point

// values.

//

// - input

//   Scalar or ranked tensor value.

//

// - inputShape

//   If 'input' is a tensor, combination or attributes/values representing its

//   static/dynamic dimensions. If 'input' is a scalar, empty list.

//

// - scale

//   Scale as a floating-point scalar value.

//

// - zeroPoint

//   Zero point as an integer scalar value.

//

// - quantizedType

//   Scalar quantized type of the result ('quant.qcast') or of the input

//   ('quant.dcast').

//

Value convertRanked(OpBuilder &builder, Location loc, Operation *op,

                    Value input, ArrayRef<OpFoldResult> inputShape, Value scale,

                    Value zeroPoint, QuantizedType quantizedType) {

  if (isa<QuantizeCastOp>(op))

    return quantizeValue(builder, loc, input, inputShape, scale, zeroPoint,

                         quantizedType);

  if (isa<DequantizeCastOp>(op))

    return dequantizeValue(builder, loc, input, inputShape, scale, zeroPoint,

                           quantizedType);

  llvm_unreachable("unexpected quant op");

}


// Convert an operation using per-layer quantization with a scalar or ranked

// tensor input.

//

// - op

//   'quant.dcast' or 'quant.qcast' op.

//

// - input

//   Scalar or ranked tensor.

//

// - quantizedType

//   Per-layer quantized type.

//

Value convertPerLayerRanked(OpBuilder &builder, Location loc, Operation *op,

                            Value input, UniformQuantizedType quantizedType) {

  // Create scale and zero point constants

  auto expressedType = quantizedType.getExpressedType();

  auto storageType = quantizedType.getStorageType();

  auto scaleAttr =

      builder.getFloatAttr(expressedType, quantizedType.getScale());

  auto scale =

      arith::ConstantOp::create(builder, loc, expressedType, scaleAttr);

  auto zeroPointAttr =

      builder.getIntegerAttr(storageType, quantizedType.getZeroPoint());

  auto zeroPoint =

      arith::ConstantOp::create(builder, loc, storageType, zeroPointAttr);


  auto inputShape = getScalarOrTensorShape(builder, loc, input);

  return convertRanked(builder, loc, op, input, inputShape, scale, zeroPoint,

                       quantizedType);

}


// Convert an operation using per-layer quantization.

//

// - op

//   'quant.dcast' or 'quant.qcast' op.

//

// - input

//   Scalar, ranked tensor, or unranked tensor.

//

// - quantizedType

//   Per-layer quantized type.

//

Value convertPerLayer(OpBuilder &builder, Location loc, Operation *op,

                      Value input, UniformQuantizedType quantizedType) {

  // Flatten input if unranked

  bool isUnranked = isa<UnrankedTensorType>(input.getType());

  Value inputShape;

  if (isUnranked)

    std::tie(input, inputShape) = flattenUnrankedTensor(builder, loc, input);


  // Process ranked tensor

  auto result = convertPerLayerRanked(builder, loc, op, input, quantizedType);


  // Restore original shape if unranked

  if (isUnranked)

    result = restoreUnrankedTensorShape(builder, loc, result, inputShape);


  return result;

}


// Convert an operation using per-channel quantization and a scalar or ranked

// tensor as an input.

//

// - op

//   'quant.dcast' or 'quant.qcast' op.

//

// - input

//   Scalar or ranked tensor.

//

// - quantizedType

//   Per-channel quantized type.

//

Value convertPerChannelRanked(OpBuilder &builder, Location loc, Operation *op,

                              Value input,

                              UniformQuantizedPerAxisType quantizedType,

                              int64_t channelAxis) {

  auto *context = builder.getContext();


  auto inputType = cast<RankedTensorType>(input.getType());

  auto inputRank = inputType.getRank();


  auto scales = materializePerChannelScales(builder, loc, quantizedType);

  auto zeroPoints =

      materializePerChannelZeroPoints(builder, loc, quantizedType);


  auto elementType = isa<FloatType>(inputType.getElementType())

                         ? quantizedType.getStorageType()

                         : quantizedType.getExpressedType();

  auto initShape = tensor::getMixedSizes(builder, loc, input);

  Value init = tensor::EmptyOp::create(builder, loc, initShape, elementType);


  SmallVector<utils::IteratorType> iteratorTypes(inputRank,

                                                 utils::IteratorType::parallel);

  auto channelAxisAffineMap = AffineMap::get(

      inputRank, 0, builder.getAffineDimExpr(channelAxis), context);

  SmallVector<AffineMap> indexingMaps{

      builder.getMultiDimIdentityMap(inputRank), channelAxisAffineMap,

      channelAxisAffineMap, builder.getMultiDimIdentityMap(inputRank)};

  auto result = linalg::GenericOp::create(

                    builder, loc,

                    init.getType(),                        // resultType

                    ValueRange{input, scales, zeroPoints}, // inputs

                    ValueRange{init},                      // outputs

                    indexingMaps, iteratorTypes,

                    [&](OpBuilder &builder, Location loc, ValueRange args) {

                      assert(args.size() == 4);

                      auto input = args[0];

                      auto scale = args[1];

                      auto zeroPoint = args[2];


                      auto result =

                          convertRanked(builder, loc, op, input, {}, scale,

                                        zeroPoint, quantizedType);


                      linalg::YieldOp::create(builder, loc, result);

                    })

                    .getResult(0);


  return result;

}


// Convert an operation using per-channel quantization.

//

// - op

//   'quant.dcast' or 'quant.qcast' op.

//

// - input

//   Scalar, ranked tensor, or unranked tensor.

//

// - quantizedType

//   Per-channel quantized type.

//

Value convertPerChannel(OpBuilder &builder, Location loc, Operation *op,

                        Value input,

                        UniformQuantizedPerAxisType quantizedType) {

  // Flatten unranked tensor into a 3D ranked tensor if necessary

  bool isUnranked = isa<UnrankedTensorType>(input.getType());

  int64_t channelAxis = quantizedType.getQuantizedDimension();

  int64_t channelAxisSize = (int64_t)quantizedType.getScales().size();

  Value inputShape;

  if (isUnranked) {

    std::tie(input, inputShape) = flattenUnrankedTensorAroundAxis(

        builder, loc, input, channelAxis, channelAxisSize);

    channelAxis = 1;

  }


  // Work on a ranked tensor

  auto result = convertPerChannelRanked(builder, loc, op, input, quantizedType,

                                        channelAxis);


  // Restore original tensor shape if unranked

  if (isUnranked)

    result = restoreUnrankedTensorShape(builder, loc, result, inputShape);


  return result;

}


// Convert an operation using sub-channel quantization.

//

// - op

//   'quant.dcast' or 'quant.qcast' op.

//

// - input

//   Scalar, ranked tensor.

//

// - quantizedType

//   Sub-channel quantized type.

//

Value convertSubChannel(OpBuilder &builder, Location loc, Operation *op,

                        Value input,

                        UniformQuantizedSubChannelType quantizedType) {

  auto *context = builder.getContext();


  auto inputType = cast<RankedTensorType>(input.getType());

  auto inputRank = inputType.getRank();


  auto scales = materializeSubChannelScales(builder, loc, quantizedType);

  auto zeroPoints =

      materializeSubChannelZeroPoints(builder, loc, quantizedType);


  auto elementType = isa<FloatType>(inputType.getElementType())

                         ? quantizedType.getStorageType()

                         : quantizedType.getExpressedType();

  auto initShape = tensor::getMixedSizes(builder, loc, input);

  Value init = tensor::EmptyOp::create(builder, loc, initShape, elementType);


  SmallVector<utils::IteratorType> iteratorTypes(inputRank,

                                                 utils::IteratorType::parallel);

  const SmallVector<std::pair<int32_t, int64_t>> &blockSizeInfo =

      quantizedType.getBlockSizeInfo();

  SmallVector<AffineExpr> affineExprs(inputRank,

                                      builder.getAffineConstantExpr(0));

  for (auto [quantizedDimension, blockSize] : blockSizeInfo) {

    affineExprs[quantizedDimension] =

        builder.getAffineDimExpr(quantizedDimension).floorDiv(blockSize);

  }

  auto affineMap = AffineMap::get(inputRank, 0, affineExprs, context);

  SmallVector<AffineMap> indexingMaps{

      builder.getMultiDimIdentityMap(inputRank), affineMap, affineMap,

      builder.getMultiDimIdentityMap(inputRank)};

  auto result = linalg::GenericOp::create(

                    builder, loc,

                    init.getType(),                        // resultType

                    ValueRange{input, scales, zeroPoints}, // inputs

                    ValueRange{init},                      // outputs

                    indexingMaps, iteratorTypes,

                    [&](OpBuilder &builder, Location loc, ValueRange args) {

                      assert(args.size() == 4);

                      auto input = args[0];

                      auto scale = args[1];

                      auto zeroPoint = args[2];


                      auto result =

                          convertRanked(builder, loc, op, input, {}, scale,

                                        zeroPoint, quantizedType);


                      linalg::YieldOp::create(builder, loc, result);

                    })

                    .getResult(0);


  return result;

}


// Convert a quantization operation.

//

// - op

//   'quant.dcast' or 'quant.qcast' op.

//

// - input

//   Scalar, ranked tensor, or unranked tensor. The element type matches

//   the storage type (quant.dcast) or expressed type (quant.qcast) of

//   'quantizedType'.

//

// - quantizedType

//   Per-layer or per-channel quantized type.

//

Value convertQuantized(OpBuilder &builder, Location loc, Operation *op,

                       Value input, Type quantizedType) {

  if (auto uniformQuantizedType = dyn_cast<UniformQuantizedType>(quantizedType))

    return convertPerLayer(builder, loc, op, input, uniformQuantizedType);


  if (auto uniformQuantizedPerAxisType =

          dyn_cast<UniformQuantizedPerAxisType>(quantizedType))

    return convertPerChannel(builder, loc, op, input,

                             uniformQuantizedPerAxisType);


  if (auto uniformQuantizedSubChannelType =

          dyn_cast<UniformQuantizedSubChannelType>(quantizedType))

    return convertSubChannel(builder, loc, op, input,

                             uniformQuantizedSubChannelType);


  llvm_unreachable("unexpected quantized type");

}


// Lowering pattern for 'quant.dcast'

struct DequantizeCastOpConversion

    : public OpConversionPattern<quant::DequantizeCastOp> {

  using OpConversionPattern<quant::DequantizeCastOp>::OpConversionPattern;


  LogicalResult

  matchAndRewrite(quant::DequantizeCastOp op, OpAdaptor adaptor,

                  ConversionPatternRewriter &rewriter) const override {

    auto loc = op.getLoc();

    auto input = op.getInput();

    auto quantizedType =

        cast<QuantizedType>(getScalarType(op.getInput().getType()));


    // Convert quantized input to storage type

    auto storageScalarOrTensorType =

        getScalarOrTensorType(quantizedType.getStorageType(), input.getType());

    input = quant::StorageCastOp::create(rewriter, loc,

                                         storageScalarOrTensorType, input);


    auto result = convertQuantized(rewriter, loc, op, input, quantizedType);


    rewriter.replaceOp(op, result);

    return success();

  }

};


// Lowering pattern for 'quant.qcast'

struct QuantizeCastOpConversion

    : public OpConversionPattern<quant::QuantizeCastOp> {

  using OpConversionPattern<quant::QuantizeCastOp>::OpConversionPattern;


  LogicalResult

  matchAndRewrite(quant::QuantizeCastOp op, OpAdaptor adaptor,

                  ConversionPatternRewriter &rewriter) const override {

    auto loc = op.getLoc();

    auto input = op.getInput();

    auto quantizedType = getScalarType(op.getResult().getType());


    // Flatten unranked tensor input

    auto result = convertQuantized(rewriter, loc, op, input, quantizedType);


    // Cast stored value to result quantized value

    rewriter.replaceOpWithNewOp<quant::StorageCastOp>(

        op, op.getResult().getType(), result);

    return success();

  }

};


struct LowerQuantOps : public impl::LowerQuantOpsBase<LowerQuantOps> {

  void runOnOperation() override {

    RewritePatternSet patterns(&getContext());

    populateLowerQuantOpsPatterns(patterns);


    ConversionTarget target(getContext());

    target.addLegalOp<quant::StorageCastOp>();

    target.addIllegalDialect<quant::QuantDialect>();

    target.addLegalDialect<arith::ArithDialect, linalg::LinalgDialect,

                           shape::ShapeDialect, tensor::TensorDialect>();


    if (failed(applyPartialConversion(getOperation(), target,

                                      std::move(patterns))))

      signalPassFailure();

  }

};


} // namespace


void populateLowerQuantOpsPatterns(RewritePatternSet &patterns) {

  patterns.add<DequantizeCastOpConversion, QuantizeCastOpConversion>(

      patterns.getContext());

}


} // namespace quant

} // namespace mlir

success
return success()

DialectConversion.h

Passes.h

FuncOps.h

TypeRange
TypeRange
Definition LinalgTransformOps.cpp:2098

ValueRange
b ValueRange
Definition LinalgTransformOps.cpp:2102

target
target
Definition LinalgTransformOps.cpp:2099

result
result
Definition LinalgTransformOps.cpp:2097

getContext
b getContext())

Matchers.h

PatternMatch.h

QuantTypes.h

int64_t

llvm::ArrayRef
Definition LLVM.h:48

llvm::SmallVector
Definition LLVM.h:72

mlir::AffineMap::get
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Definition MLIRContext.cpp:1224

mlir::Builder::getContext
MLIRContext * getContext() const
Definition Builders.h:56

mlir::Builder::getIndexType
IndexType getIndexType()
Definition Builders.cpp:51

mlir::DenseElementsAttr::get
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
Definition BuiltinAttributes.cpp:910

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76

mlir::OpBuilder
This class helps build Operations.
Definition Builders.h:207

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88

mlir::RewritePatternSet
Definition PatternMatch.h:816

mlir::TypeRange
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition Value.h:105

mlir::arith::ConstantIndexOp::create
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:359

mlir::arith::ConstantIntOp::create
static ConstantIntOp create(OpBuilder &builder, Location location, int64_t value, unsigned width)
Definition ArithOps.cpp:258

mlir::quant::QuantizedType
Base class for all quantized types known to this dialect.
Definition QuantTypes.h:50

mlir::quant::UniformQuantizedPerAxisType
Represents per-axis (also known as per-channel quantization).
Definition QuantTypes.h:324

mlir::quant::UniformQuantizedSubChannelType
Represents sub-channel (also known as blockwise quantization).
Definition QuantTypes.h:409

mlir::quant::UniformQuantizedType
Represents a family of uniform, quantized types.
Definition QuantTypes.h:264

Arith.h

Linalg.h

Quant.h

Shape.h

Tensor.h

mlir::quant
Definition Quant.h:25

mlir::quant::populateLowerQuantOpsPatterns
void populateLowerQuantOpsPatterns(RewritePatternSet &patterns)
Definition LowerQuantOps.cpp:781

mlir::remark::failed
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561

mlir::shape::getExtentTensorType
RankedTensorType getExtentTensorType(MLIRContext *ctx, int64_t rank=ShapedType::kDynamic)
Alias type for extent tensors.
Definition Shape.cpp:40

mlir::tensor::getMixedSizes
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition TensorOps.cpp:66

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::matchPattern
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304

mlir::m_Zero
detail::constant_int_predicate_matcher m_Zero()
Matches a constant scalar / vector splat / tensor splat integer zero.
Definition Matchers.h:442

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition GreedyPatternRewriteDriver.h:283