doxygen/FakeQuantSupport_8cpp_source.html

 //===- FakeQuantSupport.cpp - Support utilities for FakeQuant ops ---------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Quant/IR/QuantTypes.h"

 #include "mlir/Dialect/Quant/Utils/FakeQuantSupport.h"


 using namespace mlir;

 using namespace mlir::quant;


 static bool getDefaultStorageParams(unsigned numBits, bool narrowRange,

                                     bool isSigned, MLIRContext *ctx,

                                     Type &storageType, int64_t &qmin,

                                     int64_t &qmax) {

   // Hard-coded type mapping from TFLite.

   if (numBits <= 8) {

     storageType = IntegerType::get(ctx, 8);

     if (isSigned) {

       qmin = -128;

       qmax = 127;

     } else {

       qmin = 0;

       qmax = 255;

     }

   } else if (numBits <= 16) {

     storageType = IntegerType::get(ctx, 16);

     if (isSigned) {

       qmin = -32768;

       qmax = 32767;

     } else {

       qmin = 0;

       qmax = 65535;

     }

   } else if (numBits <= 32) {

     storageType = IntegerType::get(ctx, 32);

     if (isSigned) {

       qmin = std::numeric_limits<int32_t>::min();

       qmax = std::numeric_limits<int32_t>::max();

     } else {

       qmin = std::numeric_limits<uint32_t>::min();

       qmax = std::numeric_limits<uint32_t>::max();

     }

   } else {

     return true;

   }


   // Handle narrowRange.

   if (narrowRange) {

     qmin += 1;

   }

   return false;

 }


 // This is a specific implementation of nudging:

 // If 0.0 < rmin < rmax or rmin < rmax < 0.0, the range will be shifted

 // to include 0.0, but the range width size (rmax-rmin) isn't changed. The zero

 // point is derived from the shifted range, and the scale isn't changed. As

 // a consequence some values, which are supposed in the original [rmin, rmax]

 // range will be outside the shifted range and be clamped during quantization.

 // TODO: we should nudge the scale as well, but that requires the

 // fake quant op used in the training to use the nudged scale as well.

 static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,

                                        double rmax, double &scale,

                                        int64_t &nudgedZeroPoint) {

   // Determine the scale.

   const double qminDouble = qmin;

   const double qmaxDouble = qmax;

   scale = (rmax - rmin) / (qmaxDouble - qminDouble);


   // Zero point computation.

   // In float, solve the affine equation for any known pair

   // (real value, corresponding quantized value), of which, two such pairs

   // are known: (rmin, qmin), (rmax, qmax).

   // The arithmetic error on the zero point computed from either pair will be

   // roughly machine_epsilon * (sum of absolute values of terms).

   // Use the variant that adds the smaller error.

   const double zeroPointFromMin = qminDouble - rmin / scale;

   const double zeroPointFromMinError =

       std::abs(qminDouble) + std::abs(rmin / scale);

   const double zeroPointFromMax = qmaxDouble - rmax / scale;

   const double zeroPointFromMaxError =

       std::abs(qmaxDouble) + std::abs(rmax / scale);


   const double zeroPointDouble = (zeroPointFromMinError < zeroPointFromMaxError)

                                      ? zeroPointFromMin

                                      : zeroPointFromMax;


   // Now nudge the zero point to be an integer.

   nudgedZeroPoint = 0;

   if (zeroPointDouble < qminDouble) {

     nudgedZeroPoint = qmin;

   } else if (zeroPointDouble > qmaxDouble) {

     nudgedZeroPoint = qmax;

   } else {

     nudgedZeroPoint = round(zeroPointDouble);

   }


   // By construction, the nudged zero point should always be in range.

   assert(nudgedZeroPoint >= qmin);

   assert(nudgedZeroPoint <= qmax);

 }


 UniformQuantizedType

 mlir::quant::fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin,

                                   double rmax, bool narrowRange,

                                   Type expressedType, bool isSigned) {

   MLIRContext *ctx = expressedType.getContext();

   unsigned flags = isSigned ? QuantizationFlags::Signed : 0;

   Type storageType;

   int64_t qmin;

   int64_t qmax;

   if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,

                               qmin, qmax)) {

     return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,

             nullptr);

   }


   // Special case where min/max is close enough. The tensor contents are all

   // 0.0s, so the scale is set to 1.0 and the tensor can be quantized to zero

   // points and dequantized to 0.0.

   if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {

     return UniformQuantizedType::getChecked(

         loc, flags, storageType, expressedType, 1.0, qmin, qmin, qmax);

   }


   double scale;

   int64_t nudgedZeroPoint;

   getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);


   return UniformQuantizedType::getChecked(loc, flags, storageType,

                                           expressedType, scale, nudgedZeroPoint,

                                           qmin, qmax);

 }


 UniformQuantizedPerAxisType mlir::quant::fakeQuantAttrsToType(

     Location loc, unsigned numBits, int32_t quantizedDimension,

     ArrayRef<double> rmins, ArrayRef<double> rmaxs, bool narrowRange,

     Type expressedType, bool isSigned) {

   size_t axisSize = rmins.size();

   if (axisSize != rmaxs.size()) {

     return (emitError(loc, "mismatched per-axis min and max size: ")

                 << axisSize << " vs. " << rmaxs.size(),

             nullptr);

   }


   MLIRContext *ctx = expressedType.getContext();

   Type storageType;

   int64_t qmin;

   int64_t qmax;

   if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,

                               qmin, qmax)) {

     return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,

             nullptr);

   }


   SmallVector<double, 4> scales;

   SmallVector<int64_t, 4> zeroPoints;

   scales.reserve(axisSize);

   zeroPoints.reserve(axisSize);

   for (size_t axis = 0; axis != axisSize; ++axis) {

     double rmin = rmins[axis];

     double rmax = rmaxs[axis];

     if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {

       scales.push_back(1.0);

       zeroPoints.push_back(qmin);

       continue;

     }


     double scale;

     int64_t nudgedZeroPoint;

     getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);

     scales.push_back(scale);

     zeroPoints.push_back(nudgedZeroPoint);

   }


   unsigned flags = isSigned ? QuantizationFlags::Signed : 0;

   return UniformQuantizedPerAxisType::getChecked(

       loc, flags, storageType, expressedType, scales, zeroPoints,

       quantizedDimension, qmin, qmax);

 }

getNudgedScaleAndZeroPoint
static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin, double rmax, double &scale, int64_t &nudgedZeroPoint)
Definition: FakeQuantSupport.cpp:66

getDefaultStorageParams
static bool getDefaultStorageParams(unsigned numBits, bool narrowRange, bool isSigned, MLIRContext *ctx, Type &storageType, int64_t &qmin, int64_t &qmax)
Definition: FakeQuantSupport.cpp:15

FakeQuantSupport.h

max
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:213

min
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:206

QuantTypes.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::getContext
MLIRContext * getContext() const
Return the MLIRContext in which this type was uniqued.
Definition: Types.cpp:35

mlir::quant::UniformQuantizedPerAxisType
Represents per-axis (also known as per-channel quantization).
Definition: QuantTypes.h:322

mlir::quant::UniformQuantizedPerAxisType::getChecked
static UniformQuantizedPerAxisType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:348

mlir::quant::UniformQuantizedType
Represents a family of uniform, quantized types.
Definition: QuantTypes.h:262

mlir::quant::UniformQuantizedType::getChecked
static UniformQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:292

mlir::presburger::round
DynamicAPInt round(const Fraction &f)
Definition: Fraction.h:136

mlir::presburger::abs
Fraction abs(const Fraction &f)
Definition: Fraction.h:107

mlir::quant::QuantizationFlags::Signed
@ Signed
Definition: QuantTypes.h:38

mlir::quant
Definition: Quant.h:25

mlir::quant::fakeQuantAttrsToType
UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin, double rmax, bool narrowRange, Type expressedType, bool isSigned=false)
Converts per-layer FakeQuant attributes to the corresponding type.
Definition: FakeQuantSupport.cpp:108

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::emitError
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
Definition: Diagnostics.cpp:328

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509