17 Type &storageType, int64_t &qmin,
29 }
else if (numBits <= 16) {
38 }
else if (numBits <= 32) {
67 double rmax,
double &scale,
68 int64_t &nudgedZeroPoint) {
70 const double qminDouble = qmin;
71 const double qmaxDouble = qmax;
72 scale = (rmax - rmin) / (qmaxDouble - qminDouble);
81 const double zeroPointFromMin = qminDouble - rmin / scale;
82 const double zeroPointFromMinError =
84 const double zeroPointFromMax = qmaxDouble - rmax / scale;
85 const double zeroPointFromMaxError =
88 const double zeroPointDouble = (zeroPointFromMinError < zeroPointFromMaxError)
94 if (zeroPointDouble < qminDouble) {
95 nudgedZeroPoint = qmin;
96 }
else if (zeroPointDouble > qmaxDouble) {
97 nudgedZeroPoint = qmax;
99 nudgedZeroPoint =
round(zeroPointDouble);
103 assert(nudgedZeroPoint >= qmin);
104 assert(nudgedZeroPoint <= qmax);
109 double rmax,
bool narrowRange,
110 Type expressedType,
bool isSigned) {
118 return (
emitError(loc,
"unsupported FakeQuant number of bits: ") << numBits,
125 if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
127 loc, flags, storageType, expressedType, 1.0, qmin, qmin, qmax);
131 int64_t nudgedZeroPoint;
135 expressedType, scale, nudgedZeroPoint,
140 Location loc,
unsigned numBits, int32_t quantizedDimension,
142 Type expressedType,
bool isSigned) {
143 size_t axisSize = rmins.size();
144 if (axisSize != rmaxs.size()) {
145 return (
emitError(loc,
"mismatched per-axis min and max size: ")
146 << axisSize <<
" vs. " << rmaxs.size(),
156 return (
emitError(loc,
"unsupported FakeQuant number of bits: ") << numBits,
162 scales.reserve(axisSize);
163 zeroPoints.reserve(axisSize);
164 for (
size_t axis = 0; axis != axisSize; ++axis) {
165 double rmin = rmins[axis];
166 double rmax = rmaxs[axis];
167 if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
168 scales.push_back(1.0);
169 zeroPoints.push_back(qmin);
174 int64_t nudgedZeroPoint;
176 scales.push_back(scale);
177 zeroPoints.push_back(nudgedZeroPoint);
182 loc, flags, storageType, expressedType, scales, zeroPoints,
183 quantizedDimension, qmin, qmax);
static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin, double rmax, double &scale, int64_t &nudgedZeroPoint)
static bool getDefaultStorageParams(unsigned numBits, bool narrowRange, bool isSigned, MLIRContext *ctx, Type &storageType, int64_t &qmin, int64_t &qmax)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
MLIRContext * getContext() const
Return the MLIRContext in which this type was uniqued.
DynamicAPInt round(const Fraction &f)
Fraction abs(const Fraction &f)
UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin, double rmax, bool narrowRange, Type expressedType, bool isSigned=false)
Converts per-layer FakeQuant attributes to the corresponding type.
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...