9 #ifndef MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_
10 #define MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_
17 #include "llvm/ADT/APFloat.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/APSInt.h"
65 uniformType.getScale(),
66 static_cast<double>(uniformType.getZeroPoint()),
67 static_cast<double>(uniformType.getStorageTypeMin()),
68 static_cast<double>(uniformType.getStorageTypeMax()),
69 uniformType.getStorageTypeIntegralWidth(), uniformType.isSigned()) {
70 assert(isa<FloatType>(uniformType.getExpressedType()));
71 assert(uniformType.getStorageType().isSignlessInteger());
75 double clampMin,
double clampMax,
76 uint32_t storageBitWidth,
bool isSigned)
77 : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
78 clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
79 clampMinDouble(clampMin), clampMaxDouble(clampMax),
80 storageBitWidth(storageBitWidth), isSigned(isSigned),
81 roundMode(APFloat::rmNearestTiesToAway) {}
84 const APFloat &clampMin,
85 const APFloat &clampMax,
86 uint32_t storageBitWidth,
bool isSigned)
87 : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
88 clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
89 clampMinDouble(clampMin.convertToDouble()),
90 clampMaxDouble(clampMax.convertToDouble()),
91 storageBitWidth(storageBitWidth), isSigned(isSigned),
92 roundMode(APFloat::rmNearestTiesToAway) {}
99 if (&expressedValue.getSemantics() == &APFloat::IEEEsingle() &&
100 storageBitWidth == 8 &&
101 roundMode == llvm::APFloatBase::rmNearestTiesToAway) {
102 return quantizeF32ToInt8(expressedValue);
106 expressedValue.convert(scale.getSemantics(), roundMode, &lossy);
109 APFloat scaled = (expressedValue / scale);
110 scaled.roundToIntegral(roundMode);
111 scaled.add(zeroPoint, roundMode);
112 APFloat fixedpoint = llvm::minimum(scaled, clampMax);
113 fixedpoint = llvm::maximum(fixedpoint, clampMin);
115 llvm::APSInt result(storageBitWidth, !isSigned);
116 fixedpoint.convertToInteger(result, roundMode, &lossy);
118 return std::move(result);
123 return isSigned ? qValue.getSExtValue() : qValue.getZExtValue();
131 virtual APInt quantizeF32ToInt8(APFloat expressedValue)
const {
132 assert(&expressedValue.getSemantics() == &APFloat::IEEEsingle());
133 assert(storageBitWidth == 8);
134 assert(roundMode == llvm::APFloatBase::rmNearestTiesToAway);
136 const float realValue = expressedValue.convertToFloat();
138 const double scaled = realValue / scaleDouble + zeroPointDouble;
140 const double scaledRounded =
std::round(scaled);
141 const double clamped =
144 uint64_t signlessResult;
146 int64_t clampedInt =
static_cast<int8_t
>(clamped);
147 memcpy(&signlessResult, &clampedInt,
sizeof(clampedInt));
149 signlessResult =
static_cast<uint8_t
>(clamped);
151 return APInt(storageBitWidth, signlessResult);
158 const APFloat zeroPoint;
159 const APFloat clampMin;
160 const APFloat clampMax;
162 const double scaleDouble;
163 const double zeroPointDouble;
164 const double clampMinDouble;
165 const double clampMaxDouble;
167 const uint32_t storageBitWidth;
169 const llvm::APFloat::roundingMode roundMode;
180 : scales(uniformType.getScales()),
181 zeroPoints(uniformType.getZeroPoints()),
182 clampMin(static_cast<double>(uniformType.getStorageTypeMin())),
183 clampMax(static_cast<double>(uniformType.getStorageTypeMax())),
184 storageBitWidth(uniformType.getStorageTypeIntegralWidth()),
185 isSigned(uniformType.isSigned()),
186 quantizationDim(uniformType.getQuantizedDimension()) {
187 assert(isa<FloatType>(uniformType.getExpressedType()));
188 assert(uniformType.getStorageType().isSignlessInteger());
189 assert(scales.size() == zeroPoints.size());
205 storageBitWidth, isSigned);
211 const APFloat clampMin;
212 const APFloat clampMax;
213 const uint32_t storageBitWidth;
215 int32_t quantizationDim;
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
An attribute that represents a reference to a dense vector or tensor object.
An attribute that represents a reference to a dense float vector or tensor object.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Base class for all quantized types known to this dialect.
DynamicAPInt round(const Fraction &f)
Include the generated interface declarations.
Performs type conversion from an arbitrary input type to a type that is expressed by a QuantizedType.
static ExpressedToQuantizedConverter forInputType(Type inputType)
Creates a converter for the given input type.
const Type inputType
The input type that is being converted from.
Type convert(QuantizedType elementalType) const
Converts the inputType to be based on the given elemental type, returning the new type (or nullptr an...
const Type expressedType
Supported, elemental expressed type (i.e.