9 #ifndef MLIR_DIALECT_QUANT_UNIFORMSUPPORT_H_ 10 #define MLIR_DIALECT_QUANT_UNIFORMSUPPORT_H_ 17 #include "llvm/ADT/APFloat.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/APSInt.h" 65 uniformType.getScale(),
66 static_cast<double>(uniformType.getZeroPoint()),
67 static_cast<double>(uniformType.getStorageTypeMin()),
68 static_cast<double>(uniformType.getStorageTypeMax()),
69 uniformType.getStorageTypeIntegralWidth(), uniformType.isSigned()) {
70 assert(uniformType.getExpressedType().isa<
FloatType>());
71 assert(uniformType.getStorageType().isSignlessInteger());
75 double clampMin,
double clampMax,
76 uint32_t storageBitWidth,
bool isSigned)
77 : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
78 clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
79 clampMinDouble(clampMin), clampMaxDouble(clampMax),
80 storageBitWidth(storageBitWidth), isSigned(isSigned),
81 roundMode(APFloat::rmNearestTiesToAway) {}
84 const APFloat &clampMin,
85 const APFloat &clampMax,
86 uint32_t storageBitWidth,
bool isSigned)
87 : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
88 clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
89 clampMinDouble(clampMin.convertToDouble()),
90 clampMaxDouble(clampMax.convertToDouble()),
91 storageBitWidth(storageBitWidth), isSigned(isSigned),
92 roundMode(APFloat::rmNearestTiesToAway) {}
99 if (&expressedValue.getSemantics() == &APFloat::IEEEsingle() &&
100 storageBitWidth == 8 &&
101 roundMode == llvm::APFloatBase::rmNearestTiesToAway) {
102 return quantizeF32ToInt8(expressedValue);
106 expressedValue.convert(scale.getSemantics(), roundMode, &lossy);
109 APFloat scaled = (expressedValue / scale);
110 scaled.roundToIntegral(roundMode);
111 scaled.add(zeroPoint, roundMode);
112 APFloat fixedpoint = llvm::minimum(scaled, clampMax);
113 fixedpoint = llvm::maximum(fixedpoint, clampMin);
115 llvm::APSInt result(storageBitWidth, !isSigned);
116 fixedpoint.convertToInteger(result, roundMode, &lossy);
118 return std::move(result);
122 APInt qValue = quantizeFloatToInt(std::move(expressedValue));
123 return isSigned ? qValue.getSExtValue() : qValue.getZExtValue();
131 virtual APInt quantizeF32ToInt8(APFloat expressedValue)
const {
132 assert(&expressedValue.getSemantics() == &APFloat::IEEEsingle());
133 assert(storageBitWidth == 8);
134 assert(roundMode == llvm::APFloatBase::rmNearestTiesToAway);
136 const float realValue = expressedValue.convertToFloat();
138 const double scaled = realValue / scaleDouble + zeroPointDouble;
140 const double scaledRounded = std::round(scaled);
141 const double clamped =
144 uint64_t signlessResult;
146 int64_t clampedInt =
static_cast<int8_t
>(clamped);
147 memcpy(&signlessResult, &clampedInt,
sizeof(clampedInt));
149 signlessResult =
static_cast<uint8_t
>(clamped);
151 return APInt(storageBitWidth, signlessResult);
158 const APFloat zeroPoint;
159 const APFloat clampMin;
160 const APFloat clampMax;
162 const double scaleDouble;
163 const double zeroPointDouble;
164 const double clampMinDouble;
165 const double clampMaxDouble;
167 const uint32_t storageBitWidth;
169 const llvm::APFloat::roundingMode roundMode;
180 : scales(uniformType.getScales()),
181 zeroPoints(uniformType.getZeroPoints()),
182 clampMin(static_cast<double>(uniformType.getStorageTypeMin())),
183 clampMax(static_cast<double>(uniformType.getStorageTypeMax())),
184 storageBitWidth(uniformType.getStorageTypeIntegralWidth()),
185 isSigned(uniformType.isSigned()),
186 quantizationDim(uniformType.getQuantizedDimension()) {
187 assert(uniformType.getExpressedType().isa<
FloatType>());
188 assert(uniformType.getStorageType().isSignlessInteger());
189 assert(scales.size() == zeroPoints.size());
205 storageBitWidth, isSigned);
211 const APFloat clampMin;
212 const APFloat clampMax;
213 const uint32_t storageBitWidth;
215 int32_t quantizationDim;
221 #endif // MLIR_DIALECT_QUANT_UNIFORMSUPPORT_H_ Include the generated interface declarations.
An attribute that represents a reference to a dense float vector or tensor object.
const Type expressedType
Supported, elemental expressed type (i.e.
An attribute that represents a reference to a dense vector or tensor object.
Performs type conversion from an arbitrary input type to a type that is expressed by a QuantizedType...
Attributes are known-constant values of operations.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base class for all quantized types known to this dialect.
Type convert(QuantizedType elementalType) const
Converts the inputType to be based on the given elemental type, returning the new type (or nullptr an...
const Type inputType
The input type that is being converted from.
static ExpressedToQuantizedConverter forInputType(Type inputType)
Creates a converter for the given input type.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)