doxygen/UniformSupport_8h_source.html

 //===- UniformSupport.h - Support utilities for uniform quant ---*- C++ -*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #ifndef MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_

 #define MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_


 #include <utility>


 #include "mlir/Dialect/Quant/IR/QuantTypes.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/Types.h"

 #include "llvm/ADT/APFloat.h"

 #include "llvm/ADT/APInt.h"

 #include "llvm/ADT/APSInt.h"


 namespace mlir {

 namespace quant {


 /// Performs type conversion from an arbitrary input type to a type

 /// that is expressed by a QuantizedType.

 ///

 /// This handles cases where the inputType is a supported primitive type

 /// (i.e. f32, bf16, etc) or a vector/tensor type based on a supported

 /// elemental type.

 ///

 /// Since conversion often involves introspecting some attributes of the

 /// input type in order to determine how to represent it, this is a two step

 /// process.

 struct ExpressedToQuantizedConverter {

   /// Creates a converter for the given input type.

   static ExpressedToQuantizedConverter forInputType(Type inputType);


   /// Converts the inputType to be based on the given elemental type,

   /// returning the new type (or nullptr and emit an error on failure).

   Type convert(QuantizedType elementalType) const;


   /// Whether the conversion is legal.

   explicit operator bool() const { return (bool)expressedType; }


   /// The input type that is being converted from.

   /// This may be an elemental or composite type.

   const Type inputType;


   /// Supported, elemental expressed type (i.e. f32).

   /// Will be nullptr if conversion is not supported.

   const Type expressedType;

 };


 /// Reference implementation of converting between real numbers and values

 /// represented by a UniformQuantizedType.

 /// Note that this is not expected to be speedy and may be superseded eventually

 /// by a more optimal implementation.

 /// Also, the interface assumes that quantization is done per-layer and will

 /// need to be wider for various per-channel schemes. As such, this is a

 /// placeholder.

 class UniformQuantizedValueConverter {

 public:

   explicit UniformQuantizedValueConverter(UniformQuantizedType uniformType)

       : UniformQuantizedValueConverter(

             uniformType.getScale(),

             static_cast<double>(uniformType.getZeroPoint()),

             static_cast<double>(uniformType.getStorageTypeMin()),

             static_cast<double>(uniformType.getStorageTypeMax()),

             uniformType.getStorageTypeIntegralWidth(), uniformType.isSigned()) {

     assert(isa<FloatType>(uniformType.getExpressedType()));

     assert(uniformType.getStorageType().isSignlessInteger());

   }


   UniformQuantizedValueConverter(double scale, double zeroPoint,

                                  double clampMin, double clampMax,

                                  uint32_t storageBitWidth, bool isSigned)

       : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),

         clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),

         clampMinDouble(clampMin), clampMaxDouble(clampMax),

         storageBitWidth(storageBitWidth), isSigned(isSigned),

         roundMode(APFloat::rmNearestTiesToAway) {}


   UniformQuantizedValueConverter(double scale, double zeroPoint,

                                  const APFloat &clampMin,

                                  const APFloat &clampMax,

                                  uint32_t storageBitWidth, bool isSigned)

       : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),

         clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),

         clampMinDouble(clampMin.convertToDouble()),

         clampMaxDouble(clampMax.convertToDouble()),

         storageBitWidth(storageBitWidth), isSigned(isSigned),

         roundMode(APFloat::rmNearestTiesToAway) {}


   virtual APInt quantizeFloatToInt(APFloat expressedValue) const {

     // This function is a performance critical code path in quantization

     // since it runs for each single float parameter value.


     // Specialize f32->u8/i8 case to optimize performance.

     if (&expressedValue.getSemantics() == &APFloat::IEEEsingle() &&

         storageBitWidth == 8 &&

         roundMode == llvm::APFloatBase::rmNearestTiesToAway) {

       return quantizeF32ToInt8(expressedValue);

     }


     bool lossy;

     expressedValue.convert(scale.getSemantics(), roundMode, &lossy);

     // fixedpoint = clamp(clampMin, clampMax, (

     //   roundHalfToEven(expressed / scale) + zeroPoint))

     APFloat scaled = (expressedValue / scale);

     scaled.roundToIntegral(roundMode);

     scaled.add(zeroPoint, roundMode);

     APFloat fixedpoint = llvm::minimum(scaled, clampMax);

     fixedpoint = llvm::maximum(fixedpoint, clampMin);


     llvm::APSInt result(storageBitWidth, !isSigned);

     fixedpoint.convertToInteger(result, roundMode, &lossy);


     return std::move(result);

   }


   int64_t quantizeFloatToInt64(APFloat expressedValue) const {

     APInt qValue = quantizeFloatToInt(std::move(expressedValue));

     return isSigned ? qValue.getSExtValue() : qValue.getZExtValue();

   }


   virtual ~UniformQuantizedValueConverter() = default;


 private:

   // An optimized implementation to quantize f32 to i8/u8 with C++ native

   // arithmetic.

   virtual APInt quantizeF32ToInt8(APFloat expressedValue) const {

     assert(&expressedValue.getSemantics() == &APFloat::IEEEsingle());

     assert(storageBitWidth == 8);

     assert(roundMode == llvm::APFloatBase::rmNearestTiesToAway);


     const float realValue = expressedValue.convertToFloat();


     const double scaled = realValue / scaleDouble + zeroPointDouble;

     // Round to nearest integer with halfway cases rounded away from zero.

     const double scaledRounded = std::round(scaled);

     const double clamped =

         std::min(std::max(scaledRounded, clampMinDouble), clampMaxDouble);


     uint64_t signlessResult;

     if (isSigned) {

       int64_t clampedInt = static_cast<int8_t>(clamped);

       memcpy(&signlessResult, &clampedInt, sizeof(clampedInt));

     } else {

       signlessResult = static_cast<uint8_t>(clamped);

     }

     return APInt(storageBitWidth, signlessResult);

   }


   // Keep both APFloat and double versions of the quantization parameters

   // around since they will be used in generic and specialized arithmetic,

   // respectively.

   const APFloat scale;

   const APFloat zeroPoint;

   const APFloat clampMin;

   const APFloat clampMax;


   const double scaleDouble;

   const double zeroPointDouble;

   const double clampMinDouble;

   const double clampMaxDouble;


   const uint32_t storageBitWidth;

   const bool isSigned;

   const llvm::APFloat::roundingMode roundMode;

 };


 /// An utility class to quantize an attribute by the per-axis quantization

 /// parameters. The size of the quantization dim in the converted elements

 /// attribute should match the size of scales/zeroPoints vectors in the

 /// quantization parameters.

 class UniformQuantizedPerAxisValueConverter {

 public:

   explicit UniformQuantizedPerAxisValueConverter(

       UniformQuantizedPerAxisType uniformType)

       : scales(uniformType.getScales()),

         zeroPoints(uniformType.getZeroPoints()),

         clampMin(static_cast<double>(uniformType.getStorageTypeMin())),

         clampMax(static_cast<double>(uniformType.getStorageTypeMax())),

         storageBitWidth(uniformType.getStorageTypeIntegralWidth()),

         isSigned(uniformType.isSigned()),

         quantizationDim(uniformType.getQuantizedDimension()) {

     assert(isa<FloatType>(uniformType.getExpressedType()));

     assert(uniformType.getStorageType().isSignlessInteger());

     assert(scales.size() == zeroPoints.size());

   }


   /// Quantize an Attribute by the quantization parameters. Return nullptr if

   /// the conversion fails or the input array isn't an ElementsAttr.

   ElementsAttr convert(Attribute realValue);


 private:

   /// Quantize an DenseFPElementsAttr by the quantization parameters.

   DenseElementsAttr convert(DenseFPElementsAttr attr);


   /// Get a uniform converter for the index-th chunk along the quantizationDim.

   /// All the elements in this chunk is quantized by the returned converter.

   UniformQuantizedValueConverter getPerChunkConverter(int index) const {

     UniformQuantizedValueConverter converter(scales[index], zeroPoints[index],

                                              clampMin, clampMax,

                                              storageBitWidth, isSigned);

     return converter;

   }


   const ArrayRef<double> scales;

   const ArrayRef<int64_t> zeroPoints;

   const APFloat clampMin;

   const APFloat clampMax;

   const uint32_t storageBitWidth;

   const bool isSigned;

   int32_t quantizationDim;

 };


 } // namespace quant

 } // namespace mlir


 #endif // MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_

Types.h

max
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:213

min
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:206

QuantTypes.h

getZeroPoint
static FailureOr< int64_t > getZeroPoint(Value val, bool signExtend)
Definition: TosaOps.cpp:2183

llvm::ArrayRef
Definition: LLVM.h:48

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::DenseElementsAttr
An attribute that represents a reference to a dense vector or tensor object.
Definition: BuiltinAttributes.h:82

mlir::DenseFPElementsAttr
An attribute that represents a reference to a dense float vector or tensor object.
Definition: BuiltinAttributes.h:913

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::quant::QuantizedType
Base class for all quantized types known to this dialect.
Definition: QuantTypes.h:50

mlir::quant::UniformQuantizedPerAxisType
Represents per-axis (also known as per-channel quantization).
Definition: QuantTypes.h:322

mlir::quant::UniformQuantizedPerAxisValueConverter
An utility class to quantize an attribute by the per-axis quantization parameters.
Definition: UniformSupport.h:176

mlir::quant::UniformQuantizedPerAxisValueConverter::convert
ElementsAttr convert(Attribute realValue)
Quantize an Attribute by the quantization parameters.
Definition: UniformSupport.cpp:52

mlir::quant::UniformQuantizedPerAxisValueConverter::UniformQuantizedPerAxisValueConverter
UniformQuantizedPerAxisValueConverter(UniformQuantizedPerAxisType uniformType)
Definition: UniformSupport.h:178

mlir::quant::UniformQuantizedType
Represents a family of uniform, quantized types.
Definition: QuantTypes.h:262

mlir::quant::UniformQuantizedValueConverter
Reference implementation of converting between real numbers and values represented by a UniformQuanti...
Definition: UniformSupport.h:61

mlir::quant::UniformQuantizedValueConverter::~UniformQuantizedValueConverter
virtual ~UniformQuantizedValueConverter()=default

mlir::quant::UniformQuantizedValueConverter::UniformQuantizedValueConverter
UniformQuantizedValueConverter(double scale, double zeroPoint, const APFloat &clampMin, const APFloat &clampMax, uint32_t storageBitWidth, bool isSigned)
Definition: UniformSupport.h:83

mlir::quant::UniformQuantizedValueConverter::quantizeFloatToInt64
int64_t quantizeFloatToInt64(APFloat expressedValue) const
Definition: UniformSupport.h:121

mlir::quant::UniformQuantizedValueConverter::quantizeFloatToInt
virtual APInt quantizeFloatToInt(APFloat expressedValue) const
Definition: UniformSupport.h:94

mlir::quant::UniformQuantizedValueConverter::UniformQuantizedValueConverter
UniformQuantizedValueConverter(double scale, double zeroPoint, double clampMin, double clampMax, uint32_t storageBitWidth, bool isSigned)
Definition: UniformSupport.h:74

mlir::quant::UniformQuantizedValueConverter::UniformQuantizedValueConverter
UniformQuantizedValueConverter(UniformQuantizedType uniformType)
Definition: UniformSupport.h:63

BuiltinTypes.h

mlir::presburger::round
DynamicAPInt round(const Fraction &f)
Definition: Fraction.h:136

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::quant::ExpressedToQuantizedConverter
Performs type conversion from an arbitrary input type to a type that is expressed by a QuantizedType.
Definition: UniformSupport.h:34

mlir::quant::ExpressedToQuantizedConverter::forInputType
static ExpressedToQuantizedConverter forInputType(Type inputType)
Creates a converter for the given input type.
Definition: UniformSupport.cpp:21

mlir::quant::ExpressedToQuantizedConverter::inputType
const Type inputType
The input type that is being converted from.
Definition: UniformSupport.h:47

mlir::quant::ExpressedToQuantizedConverter::convert
Type convert(QuantizedType elementalType) const
Converts the inputType to be based on the given elemental type, returning the new type (or nullptr an...
Definition: UniformSupport.cpp:35

mlir::quant::ExpressedToQuantizedConverter::expressedType
const Type expressedType
Supported, elemental expressed type (i.e.
Definition: UniformSupport.h:51