26 const double mantissa =
std::frexp(scale, &shift);
27 auto shiftedM =
std::round(mantissa * (int64_t(1) << 15));
30 assert(shiftedM <= (int64_t(1) << 15) &&
31 "Shifted mantissa exceeds 16 signed bits");
33 if (shiftedM == (int64_t(1) << 15)) {
40 shift = (-shift) + 15;
43 "Shifted mantissa exceeds 32-bit signed output type");
45 multiplier =
static_cast<int32_t
>(shiftedM);
52 multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
64 const double mantissa =
std::frexp(scale, &shift);
65 auto shiftedM =
std::round(mantissa * (int64_t(1) << 31));
68 assert(shiftedM <= (int64_t(1) << 31) &&
69 "Shifted mantissa exceeds 32 signed bits");
70 if (shiftedM == (int64_t(1) << 31)) {
77 shift = (-shift) + 31;
80 "Shifted mantissa exceeds 32-bit signed output type");
82 multiplier =
static_cast<int32_t
>(shiftedM);
89 multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
96 int32_t &shift, int32_t scaleWidth) {
104 return (!(shift < 2));
110 return (!(shift < 2));
112 assert(0 &&
"Unsupported Tosa quantized_scale regime specified!");
117 #define GET_UQTYPE(inputType) \
118 (llvm::dyn_cast<quant::UniformQuantizedType>((inputType).getElementType()))
119 #define GET_QTYPE(inputType) \
120 (llvm::dyn_cast<quant::QuantizedType>((inputType).getElementType()))
122 static std::optional<std::pair<std::int64_t, std::int64_t>>
125 auto inputType = dyn_cast<ShapedType>(input.
getType());
126 auto weightType = dyn_cast<ShapedType>(weight.
getType());
128 if (!inputType || !weightType)
132 auto weightPerTensorQType =
GET_UQTYPE(weightType);
133 auto weightPerAxisQType =
134 dyn_cast<quant::UniformQuantizedPerAxisType>(weightType.getElementType());
137 assert(!((
bool)weightPerTensorQType && (
bool)weightPerAxisQType) &&
138 "Weights must be either per-tensor or per-axis quantized");
141 assert(!((
bool)inputQType ^
142 ((
bool)weightPerTensorQType || (
bool)weightPerAxisQType)) &&
143 "Inputs and weights must be all quantized or all not quantized");
146 int64_t inputZp = inputQType.getZeroPoint();
147 int64_t weightZp = 0;
149 if (weightPerTensorQType) {
150 weightZp = weightPerTensorQType.getZeroPoint();
151 }
else if (weightPerAxisQType) {
152 weightZp = weightPerAxisQType.getZeroPoints().front();
155 return std::make_pair(inputZp, weightZp);
161 std::pair<Value, Value>
163 std::int64_t inputZp, weightZp;
168 if (mlir::isa<FloatType>(inputEType) && mlir::isa<FloatType>(weightEType)) {
173 if (!maybeZps.has_value())
176 inputZp = maybeZps->first;
177 weightZp = maybeZps->second;
180 auto maybeInputZpValue =
182 if (!maybeInputZpValue.has_value())
185 auto maybeWeightZpValue =
187 if (!maybeWeightZpValue.has_value())
190 return std::make_pair(*maybeInputZpValue, *maybeWeightZpValue);
197 ConvOpQuantizationAttr
202 if (!maybeZps.has_value())
205 return builder.
getAttr<tosa::ConvOpQuantizationAttr>(maybeZps->first,
213 MatMulOpQuantizationAttr
217 auto aType = dyn_cast<ShapedType>(a.
getType());
218 auto bType = dyn_cast<ShapedType>(b.
getType());
220 if (!aType || !bType)
227 assert(!((
bool)aQType ^ (
bool)bQType) &&
228 "Matmul operands must be all quantized or all not quantized");
231 return builder.
getAttr<tosa::MatMulOpQuantizationAttr>(
232 aQType.getZeroPoint(), bQType.getZeroPoint());
242 UnaryOpQuantizationAttr
244 Type outputRawType) {
246 auto inputType = dyn_cast<ShapedType>(input.
getType());
247 auto outputType = dyn_cast<ShapedType>(outputRawType);
249 if (!inputType || !outputType)
256 assert(!((
bool)inputQType ^ (
bool)outputQType) &&
257 "Unary inputs/outputs must be all quantized or all not quantized");
260 return builder.
getAttr<UnaryOpQuantizationAttr>(inputQType.getZeroPoint(),
261 outputQType.getZeroPoint());
272 auto inputType = dyn_cast<ShapedType>(input.
getType());
280 return builder.
getAttr<tosa::PadOpQuantizationAttr>(
281 inputQType.getZeroPoint());
292 auto inputType = dyn_cast<ShapedType>(input.
getType());
293 auto weightType = dyn_cast<ShapedType>(weight.
getType());
295 assert(inputType && weightType &&
296 "Could not extract input or weight tensors from Conv op");
299 auto weightQType =
GET_QTYPE(weightType);
301 assert(inputQType && weightQType &&
302 "Could not extract input or weight tensor types from Conv op");
304 unsigned inputBits = inputQType.getStorageTypeIntegralWidth();
305 unsigned weightBits = weightQType.getStorageTypeIntegralWidth();
307 auto outputShapedType = dyn_cast<ShapedType>(outputType);
308 assert(outputShapedType &&
309 "Could not extract output shape type from Conv op");
311 IntegerType accElementType;
312 if (inputBits == 16 && weightBits == 8)
316 auto accType = outputShapedType.clone(accElementType);
323 IntegerAttr quantBits,
int filterQuantDim,
324 bool isSigned,
BoolAttr narrowRange) {
331 auto minElems = dyn_cast<DenseFPElementsAttr>(minAttr);
332 auto maxElems = dyn_cast<DenseFPElementsAttr>(maxAttr);
337 if (minElems || maxElems) {
339 if (minElems.getNumElements() != maxElems.getNumElements())
341 min.reserve(minElems.getNumElements());
342 max.reserve(maxElems.getNumElements());
343 for (
auto i : minElems)
344 min.push_back(FloatAttr::getValueAsDouble(i));
345 for (
auto i : maxElems)
346 max.push_back(FloatAttr::getValueAsDouble(i));
348 auto minVal = dyn_cast<FloatAttr>(minAttr);
350 min.push_back(minVal.getValueAsDouble());
353 auto maxVal = dyn_cast<FloatAttr>(maxAttr);
355 max.push_back(maxVal.getValueAsDouble());
360 if (
min.size() ==
max.size()) {
361 if (
min.size() == 1) {
364 narrowRange.
getValue(), convfunc.expressedType, isSigned);
365 }
else if (
min.size() > 1) {
366 auto shape = dyn_cast<ShapedType>(inputDType);
369 if ((filterQuantDim) >= 0 && (shape.getRank() > filterQuantDim)) {
372 max[0], narrowRange.
getValue(), convfunc.expressedType, isSigned);
384 return convfunc.convert(retType);
391 IntegerAttr quantBits,
int filterQuantDim,
392 bool isSigned,
BoolAttr narrowRange) {
395 maxAttr, quantBits, filterQuantDim,
396 isSigned, narrowRange));
static std::pair< Value, Value > frexp(ImplicitLocOpBuilder &builder, Value arg, bool isPositive=false)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
#define GET_UQTYPE(inputType)
static std::optional< std::pair< std::int64_t, std::int64_t > > getConvZeroPoints(Value input, Value weight)
static void computeMultiplierAndShiftTosaScale16(double scale, int32_t &multiplier, int32_t &shift)
From a scale value, generates multiplier and shift values where mantissa is in [-1....
#define GET_QTYPE(inputType)
static void computeMultiplierAndShiftTosaScale32(double scale, int32_t &multiplier, int32_t &shift)
From a scale value, generates multiplier and shift values where mantissa is in [-1....
Attributes are known-constant values of operations.
Special case of IntegerAttr to represent boolean integers, i.e., signless i1 integers.
bool getValue() const
Return the boolean value of this attribute.
IntegerType getIntegerType(unsigned width)
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
This class helps build Operations.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Location getLoc() const
Return the location of this value.
Base class for all quantized types known to this dialect.
DynamicAPInt round(const Fraction &f)
UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin, double rmax, bool narrowRange, Type expressedType, bool isSigned=false)
Converts per-layer FakeQuant attributes to the corresponding type.
ConvOpQuantizationAttr buildConvOpQuantizationAttr(OpBuilder &builder, Value input, Value weight)
Method to build ConvOpQuantizationAttr, called from ConvOpQuantInfoBuilder/TransConvOpQuantInfoBuilde...
TypeAttr buildQTypeAttrFromMinMax(OpBuilder builder, Type inputDType, Attribute minAttr, Attribute maxAttr, IntegerAttr quantBits, int filterQuantDim, bool isSigned, BoolAttr narrowRange)
Builds Tosa quantization attributes from min/max values.
Type buildConvOpResultTypeInfo(OpBuilder &builder, Type outputType, Value input, Value weight)
construct ConvOp output type with correct bitwidth based on input/weight width.
bool computeMultiplierAndShift(double scale, int32_t &multiplier, int32_t &shift, int32_t scaleWidth)
From a scale value, computes multiplier and shift values for 16 or 32-bit scale widths.
Type buildQTypeFromMinMax(OpBuilder builder, Type inputDType, Attribute minAttr, Attribute maxAttr, IntegerAttr quantBits, int filterQuantDim, bool isSigned, BoolAttr narrowRange)
Builds Tosa quantization attributes from min/max values.
PadOpQuantizationAttr buildPadOpQuantizationAttr(OpBuilder &builder, Value input)
Builds PadOpQuantizationAttr, called from PadOpQuantInfoBuilder: inputZp: input zeropoint.
std::pair< Value, Value > createZPsAsConst(OpBuilder &builder, Value input, Value weight)
MatMulOpQuantizationAttr buildMatMulOpQuantizationAttr(OpBuilder &builder, Value a, Value b)
Builds MatMulOpQuantizationAttr, called from MatMulOpQuantInfoBuilder: aZp: input a zeropoint bZp: in...
std::optional< Value > createZeroPointTensor(OpBuilder &builder, Location loc, Type srcElemType, int64_t zp=0)
UnaryOpQuantizationAttr buildUnaryOpQuantizationAttr(OpBuilder &builder, Value input, Type outputRawType)
Builds UnaryOpQuantizationAttr UnaryOpQuantInfoBuilder: inputZp: input zeropoint outputZp: output zer...
Include the generated interface declarations.
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
static ExpressedToQuantizedConverter forInputType(Type inputType)
Creates a converter for the given input type.