MLIR 22.0.0git
QuantTypes.h
Go to the documentation of this file.
1//===- QuantTypes.h - Quantization Ops and Types ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
10#define MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
11
12#include "mlir/IR/Attributes.h"
13#include "mlir/IR/Builders.h"
15#include "mlir/IR/Dialect.h"
17#include "mlir/IR/Types.h"
18#include "llvm/Support/MathExtras.h"
19
20namespace mlir {
21namespace quant {
32
33/// Enumeration of bit-mapped flags related to quantized types.
36 // Indicates that the storage type should be interpreted as a signed
37 // integer. The default is to interpret it as an unsigned value.
38 Signed = 1,
39};
40} // namespace QuantizationFlags
41
42/// Base class for all quantized types known to this dialect.
43/// All quantized types have:
44/// - storageType: The (narrower) numeric type that is being used to
45/// approximate some expressed type.
46/// - expressedType: The type that is being approximated.
47///
48/// The base class provides generic support for manipulating the types based
49/// on these fields.
50class QuantizedType : public Type {
51public:
53 using Type::Type;
54
55 /// The maximum number of bits supported for storage types.
56 static constexpr unsigned MaxStorageBits = 32;
57
58 static LogicalResult
60 Type storageType, Type expressedType, int64_t storageTypeMin,
61 int64_t storageTypeMax);
62
63 /// Support method to enable LLVM-style type casting.
64 static bool classof(Type type);
65
66 /// Gets the minimum possible stored by a storageType. storageTypeMin must
67 /// be greater than or equal to this value.
69 unsigned integralWidth) {
70 if (isSigned) {
71 return llvm::minIntN(integralWidth);
72 }
73 return 0;
74 }
75
76 /// Gets the maximum possible stored by a storageType. storageTypeMax must
77 /// be less than or equal to this value.
79 unsigned integralWidth) {
80 if (isSigned) {
81 return llvm::maxIntN(integralWidth);
82 }
83 return llvm::maxUIntN(integralWidth);
84 }
85
86 /// Gets the original expressed type that this quantized type approximates.
87 /// Note that this presumes that the quantized type was always derived from
88 /// a floating point type, which in the broadest definition, is not true (i.e.
89 /// it could be some form of integral, fixed type or affine type in its own
90 /// right); however, at the high level, no examples of such usage are
91 /// presently known and the restriction serves some useful purposes (such as
92 /// always being able to reverse a transformation or measure error). In most
93 /// cases, this will be f32.
94 Type getExpressedType() const;
95
96 /// Gets the flags associated with this type. Typically a more specific
97 /// accessor is appropriate.
98 unsigned getFlags() const;
99
100 // Convenience helpers.
101 /// Whether the storage type should be interpreted as a signed quantity
102 /// (true) or an unsigned value (false).
103 bool isSigned() const {
104 return (getFlags() & QuantizationFlags::Signed) ==
106 }
107
108 /// Gets the underlying type used for to store values. Note that this may
109 /// be signed or unsigned. Use the isSigned() accessor to differentiate.
110 Type getStorageType() const;
111
112 /// The minimum value that storageType can take.
114
115 /// The maximum value that storageType can take.
117
118 /// Return whether the storage type has explicit min or max boundaries
119 /// different from the minimum and maximum representable values.
120 bool hasStorageTypeBounds() const;
121
122 /// Gets the integral bit width that the underlying storage type can exactly
123 /// represent. For integral storage types, this will just be their width.
124 unsigned getStorageTypeIntegralWidth() const;
125
126 /// Returns whether the candidateExpressedType is a match for this
127 /// QuantizedType. This will be true if the candidate type is either a
128 /// primitive type or a container type whose element type equals this
129 /// QuantizedType's expressed type.
130 /// Examples of compatible candidateExpressedType:
131 /// !quant.uniform<i8:f32, 1.0> =~ f32
132 /// !quant.uniform<i8:f32, 1.0> =~ tensor<4xf32>
133 bool isCompatibleExpressedType(Type candidateExpressedType);
134
135 /// Returns the element type as a QuantizedType or nullptr if it is not
136 /// a quantized type. If the type is primitive, returns that. If it is a
137 /// container (vector/tensor), return the element type.
138 /// Examples:
139 /// !quant.uniform<i8:f32, 1.0> -> !quant.uniform<i8:f32, 1.0>
140 /// tensor<4x!quant.uniform<i8:f32, 1.0> -> quant.uniform<i8:f32, 1.0>
141 static QuantizedType getQuantizedElementType(Type primitiveOrContainerType);
142
143 /// Casts from a type based on the storageType to a corresponding type based
144 /// on this type (returns nullptr if the cast is not valid).
145 /// Examples:
146 /// `candidate type` -> `return type`
147 /// i8 -> !quant.uniform<i8:f32, 1.0>
148 /// tensor<4xi8> -> tensor<4x!quant.uniform<i8:f32, 1.0}>>
149 /// vector<4xi8> -> vector<4x!quant.uniform<i8:f32, 1.0>>
150 /// It is assumed above that this type's quantization is `<i8:f32, 1.0>`.
151 Type castFromStorageType(Type candidateType);
152
153 /// Casts from a type based on a QuantizedType to a corresponding type based
154 /// on the storageType (returns nullptr if the cast is not valid).
155 /// This is the inverse of castFromStorageType().
156 static Type castToStorageType(Type quantizedType);
157
158 /// Casts from a type based on the expressedType to a corresponding type based
159 /// on this type (returns nullptr if the cast is not valid).
160 /// Examples:
161 /// f32 -> !quant.uniform<i8:f32, 1.0>
162 /// tensor<4xf32> -> tensor<4x!quant.uniform<i8:f32, 1.0>>
163 /// vector<4xf32> -> vector<4x!quant.uniform<i8:f32, 1.0>>
164 Type castFromExpressedType(Type candidateType);
165
166 /// Casts from a type based on QuantizedType to a corresponding type based
167 /// on the expressedType (returns nullptr if the cast is not valid).
168 /// This is the inverse of castFromExpressedType.
169 static Type castToExpressedType(Type quantizedType);
170
171 /// Casts from a type based on the expressedType to the equivalent type
172 /// based on storageType by way of this QuantizedType. Equivalent to:
173 /// QuantizedType::castToStorageType(castFromExpressedType(candidateType))
174 /// (but with validity checks).
175 /// Example (for this = !quant.uniform<i8:f32, 1.0>):
176 /// tensor<4xf32> -> tensor<4xi8>
177 Type castExpressedToStorageType(Type candidateType);
178
179private:
180 /// Hide the following methods inherited from `Type`. It is almost certainly
181 /// a bug to call them from a `QuantizedType` object. Users should call
182 /// `getStorageType` or `getExpressedType` to get the underlying types
183 /// they want to inspect.
184 using Type::isBF16;
185 using Type::isF16;
186 using Type::isF32;
187 using Type::isF64;
188 using Type::isIndex;
189 using Type::isInteger;
190};
191
192/// A quantized type that maps storage to/from expressed types in an
193/// unspecified way.
194///
195/// Typical syntax:
196/// quant.any<i8:f32>
197/// quant.any<i8>
198/// quant.any<i8<-16,15>>
199///
200/// Note that for the any type, the expressed type is optional.
202 : public Type::TypeBase<AnyQuantizedType, QuantizedType,
203 detail::AnyQuantizedTypeStorage> {
204public:
205 using Base::Base;
206 using Base::getChecked;
207
208 static constexpr StringLiteral name = "quant.any";
209
210 /// Gets an instance of the type with all parameters specified but not
211 /// checked.
212 static AnyQuantizedType get(unsigned flags, Type storageType,
213 Type expressedType, int64_t storageTypeMin,
214 int64_t storageTypeMax);
215
216 /// Gets an instance of the type with all specified parameters checked.
217 /// Returns a nullptr convertible type on failure.
218 static AnyQuantizedType
220 Type storageType, Type expressedType, int64_t storageTypeMin,
221 int64_t storageTypeMax);
222
223 /// Verifies construction invariants and issues errors/warnings.
224 static LogicalResult
226 Type storageType, Type expressedType, int64_t storageTypeMin,
227 int64_t storageTypeMax);
228};
229
230/// Represents a family of uniform, quantized types.
231///
232/// Each instance of this type expresses a mapping between real values (most
233/// often expressed in floating point f32) and quantized values (either fixed
234/// point or affine).
235///
236/// The relationship is:
237/// real_value = scale * (quantized_value - zero_point)
238///
239/// It is used as part of high level graph transformations that have the goal
240/// of re-expressing parts of a computation in terms of this common form for
241/// more efficient execution at runtime. In addition, it is designed to be
242/// expressive enough to facilitate lowering to precise types and operations
243/// in target hardware.
244///
245/// As a high-level type, focused on intermediate passes, this type holds
246/// opinions consistent with high-level usage. If lowering math kernels below
247/// the high level arithmetic ops (i.e. to LLVM IR or hardware specific
248/// instruction sets), it is expected that the information expressed here
249/// will be used to drive low level codegen and target specific type selection,
250/// but this type will likely be erased in the process.
251///
252/// Syntax synopsis:
253/// Per-layer, all parameters expressed:
254/// !quant<uniform[StorageType:ExpressedType]{Scale:ZeroPoint}>
255/// Per-layer, optional parameters omitted:
256/// !quant<uniform[StorageType]{Scale}>
257///
258/// StorageType: 'i'|'u' NumBits
259/// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
260/// Scale: A legal double value
261/// ZeroPoint: An integer value
263 : public Type::TypeBase<UniformQuantizedType, QuantizedType,
264 detail::UniformQuantizedTypeStorage> {
265public:
266 using Base::Base;
267 using Base::getChecked;
268
269 static constexpr StringLiteral name = "quant.uniform";
270
271 /// Gets an instance of the type with all parameters specified but not
272 /// checked.
273 static UniformQuantizedType get(unsigned flags, Type storageType,
274 Type expressedType, double scale,
275 int64_t zeroPoint, int64_t storageTypeMin,
276 int64_t storageTypeMax);
277
278 /// Gets an instance of the type with all specified parameters checked.
279 /// Returns a nullptr convertible type on failure.
282 Type storageType, Type expressedType, double scale,
283 int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax);
284
285 /// Verifies construction invariants and issues errors/warnings.
286 static LogicalResult
288 Type storageType, Type expressedType, double scale,
289 int64_t zeroPoint, int64_t storageTypeMin,
290 int64_t storageTypeMax);
291
292 /// Gets the scale term. The scale designates the difference between the real
293 /// values corresponding to consecutive quantized values differing by 1.
294 double getScale() const;
295
296 /// Gets the storage value corresponding to the real value 0 in the affine
297 /// equation.
298 int64_t getZeroPoint() const;
299
300 // Fixed point values are real numbers divided by a scale.
301 // Currently, only signed storage types are treated as fixed point.
302 // A fixed point value can be obtained from an affine value by subtracting
303 // the zeroPoint.
304 // In the future, this may be explicit versus implied by type and zeroPoint.
305 bool isFixedPoint() const { return isSigned() && getZeroPoint() == 0; }
306};
307
308/// Represents per-axis (also known as per-channel quantization).
309///
310/// Syntax synopsis:
311/// Per-axis, all parameters expressed:
312/// !quant<uniform[StorageType:ExpressedType:QuantizedDim]{QuantParams}>
313/// Per-axis, optional parameters omitted:
314/// !quant<uniform[StorageType]{Scale}>
315///
316/// StorageType: 'i'|'u' NumBits
317/// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
318/// QuantizedDim: An integer value
319/// QuantParams: (Scale ':' ZeroPoint)+
320/// Scale: A legal double value
321/// ZeroPoint: An integer value
323 : public Type::TypeBase<UniformQuantizedPerAxisType, QuantizedType,
324 detail::UniformQuantizedPerAxisTypeStorage> {
325public:
326 using Base::Base;
327 using Base::getChecked;
328
329 static constexpr StringLiteral name = "quant.uniform_per_axis";
330
331 /// Gets an instance of the type with all parameters specified but not
332 /// checked.
334 get(unsigned flags, Type storageType, Type expressedType,
335 ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
336 int32_t quantizedDimension, int64_t storageTypeMin,
337 int64_t storageTypeMax);
338
339 /// Gets an instance of the type with all specified parameters checked.
340 /// Returns a nullptr convertible type on failure.
343 Type storageType, Type expressedType, ArrayRef<double> scales,
344 ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,
345 int64_t storageTypeMin, int64_t storageTypeMax);
346
347 /// Verifies construction invariants and issues errors/warnings.
348 static LogicalResult
350 Type storageType, Type expressedType,
351 ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
352 int32_t quantizedDimension, int64_t storageTypeMin,
353 int64_t storageTypeMax);
354
355 /// Gets the quantization scales. The scales designate the difference between
356 /// the real values corresponding to consecutive quantized values differing
357 /// by 1. The ith scale corresponds to the ith slice in the
358 /// quantized_dimension.
360
361 /// Gets the storage values corresponding to the real value 0 in the affine
362 /// equation. The ith zero point corresponds to the ith slice in the
363 /// quantized_dimension.
365
366 /// Specifies the dimension of the Tensor's shape that the scales and
367 /// zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
368 /// with quantization params:
369 /// scales=[1.0, 2.0, 3.0], zeroPoints=[1, 2, 3], quantizedDimension=1
370 /// will be quantized across the second dimension of t.
371 /// t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
372 /// t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
373 /// t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
374 int32_t getQuantizedDimension() const;
375
376 /// Fixed point values are real numbers divided by a scale.
377 /// Currently, only signed storage types are treated as fixed point.
378 /// A fixed point value can be obtained from an affine value by subtracting
379 /// the zeroPoint.
380 /// In the future, this may be explicit versus implied by type and zeroPoint.
381 bool isFixedPoint() const {
382 if (!isSigned())
383 return false;
384 return !llvm::is_contained(getZeroPoints(), 0);
385 }
386};
387
388/// Represents sub-channel (also known as blockwise quantization).
389///
390/// Syntax synopsis:
391/// UniformQuantizedSubChannelType ::= '!quant.uniform' '<'
392/// storageType ('<' storageMin ':' storageMax '>')? ':'
393/// expressedType ':' BlockSizeInfo ',' ScaleZeroTensor '>'
394/// BlockSizeInfo: '{' '}' | '{' AxisBlock (',' AxisBlock)* '}'
395/// AxisBlock ::= AxisSpec ':' BlockSizeSpec
396/// ScaleZeroTensor ::= ScaleZeroDenseExp | ScaleZeroList
397/// ScaleZeroDenseExp ::= '{' ScaleZeroTensor (',' ScaleZeroTensor)* '}'
398/// ScaleZeroList ::= ScaleZero (',' ScaleZero)*
399/// ScaleZero ::= Scale (':' ZeroPoint)?
400///
401/// StorageType: 'i'|'u' NumBits
402/// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
403/// AxisSpec: An integer value
404/// BlockSizeSpec: An integer value
405/// Scale: An attribute (usually floating-point value)
406/// ZeroPoint: An attribute (usually integer value)
408 : public Type::TypeBase<UniformQuantizedSubChannelType, QuantizedType,
409 detail::UniformQuantizedSubChannelTypeStorage> {
410public:
411 using Base::Base;
412 using Base::getChecked;
413
414 static constexpr StringLiteral name = "quant.uniform_sub_channel";
415
416 /// Gets an instance of the type with all parameters specified but not
417 /// checked.
419 get(unsigned flags, Type storageType, Type expressedType,
420 DenseElementsAttr scales, DenseElementsAttr zeroPoints,
421 ArrayRef<int32_t> quantizedDimensions, ArrayRef<int64_t> blockSizes,
422 int64_t storageTypeMin, int64_t storageTypeMax);
423
424 /// Gets an instance of the type with all specified parameters checked.
425 /// Returns a nullptr convertible type on failure.
428 Type storageType, Type expressedType, DenseElementsAttr scales,
429 DenseElementsAttr zeroPoints,
430 ArrayRef<int32_t> quantizedDimensions,
431 ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,
432 int64_t storageTypeMax);
433
434 /// Verifies construction invariants and issues errors/warnings.
435 static LogicalResult
437 Type storageType, Type expressedType,
438 DenseElementsAttr scales, DenseElementsAttr zeroPoints,
439 ArrayRef<int32_t> quantizedDimensions,
440 ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,
441 int64_t storageTypeMax);
442
443 /// Gets the quantization scales. The scales are organized in a
444 /// multi-dimensional tensor. The size of each dimension in the scales tensor
445 /// is determined by the number of blocks along the corresponding dimension in
446 /// the quantized data tensor.
447 ///
448 /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]
449 /// and the block sizes are [B0, B1, ..., BR-1], then the scales tensor will
450 /// have shape [X0/B0, X1/B1, ..., XR-1/BR-1].
451 ///
452 /// The scale value for a specific element in the quantized data tensor at
453 /// position [i0, i1, ..., iR-1] is determined by accessing the corresponding
454 /// element in the scales tensor at position [i0/B0, i1/B1, ..., iR-1/BR-1].
456
457 /// Gets the quantization zero-points. The zero-points are organized in a
458 /// multi-dimensional tensor. The size of each dimension in the zero-point
459 /// tensor is determined by the number of blocks along the corresponding
460 /// dimension in the quantized data tensor.
461 ///
462 /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]
463 /// and the block sizes are [B0, B1, ..., BR-1], then the zero-point tensor
464 /// will have shape [X0/B0, X1/B1, ..., XR-1/BR-1].
465 ///
466 /// The zero-point value for a specific element in the quantized data tensor
467 /// at position [i0, i1, ..., iR-1] is determined by accessing the
468 /// corresponding element in the zero-point tensor at position [i0/B0, i1/B1,
469 /// ..., iR-1/BR-1].
471
472 /// Gets the quantized dimensions. Each element in the returned list
473 /// represents an axis of the quantized data tensor that has a specified block
474 /// size. The order of elements corresponds to the order of block sizes
475 /// returned by `getBlockSizes()`.
476 ///
477 /// It means that the data tensor is quantized along the `i`-th dimension in
478 /// the returned list using the `i`-th block size from `getBlockSizes()`.
479 ///
480 /// Note that the type expression does not have to specify the block size for
481 /// all axes in the data tensor. Any unspecified block size for an axis `i`
482 /// defaults to the tensor dimension size of that axis.
483 ///
484 /// For example, for a quantized type:
485 /// `tensor<8x4x2x!quant.uniform<i8:f32:{1:2, 0:8}, {{1.0, 2.0}, {3.0, 4.0}}>`
486 ///
487 /// `getQuantizedDimensions()` returns [1, 0].
488 /// `getBlockSizes()` returns [2, 8].
489 ///
490 /// This indicates that:
491 /// * Axis 1 (second dimension) is quantized with a block size of 2.
492 /// * Axis 0 (first dimension) is quantized with a block size of 8.
493 /// Since axis 2 is not specified, it implicitly has a block size equal to
494 /// the size of the third dimension (which is 2 in this case).
496
497 /// Gets the block sizes for the quantized dimensions. The `i`-th element in
498 /// the returned list corresponds to the block size for the `i`-th dimension
499 /// in the list returned by `getQuantizedDimensions()`.
500 ///
501 /// See `getQuantizedDimensions()` for more details and examples.
503
504 /// Gets the block size information. This returns a list of pairs, where each
505 /// pair represents a quantized dimension and its corresponding block size.
506 ///
507 /// For example, for the type:
508 /// `tensor<8x4x!quant.uniform<i8:f32:{1:2, 0:8}, {{2.0, 3.0}}>`
509 ///
510 /// This method returns:
511 /// `[(1, 2), (0, 8)]`
512 ///
513 /// This list indicates that axis 1 has a block size of 2, and axis 0 has a
514 /// block size of 8.
516};
517
518/// A quantized type that infers its range from given min/max values.
519///
520/// Typical syntax:
521/// quant.calibrated<f32<-0.922,0.981>>
523 : public Type::TypeBase<CalibratedQuantizedType, QuantizedType,
524 detail::CalibratedQuantizedTypeStorage> {
525public:
526 using Base::Base;
527 using Base::getChecked;
528
529 static constexpr StringLiteral name = "quant.calibrated";
530
531 /// Gets an instance of the type with all parameters specified but not
532 /// checked.
533 static CalibratedQuantizedType get(Type expressedType, double min,
534 double max);
535
536 /// Gets an instance of the type with all specified parameters checked.
537 /// Returns a nullptr convertible type on failure.
540 double min, double max);
541
542 /// Verifies construction invariants and issues errors/warnings.
543 static LogicalResult
545 Type expressedType, double min, double max);
546 double getMin() const;
547 double getMax() const;
548};
549
550} // namespace quant
551} // namespace mlir
552
553#endif // MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
An attribute that represents a reference to a dense vector or tensor object.
This class represents a diagnostic that is inflight and set to be reported.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
bool isF64() const
Definition Types.cpp:41
bool isIndex() const
Definition Types.cpp:54
constexpr Type()=default
bool isF32() const
Definition Types.cpp:40
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition Types.cpp:56
detail::StorageUserBase< ConcreteType, BaseType, StorageType, detail::TypeUniquer, Traits... > TypeBase
Utility class for implementing types.
Definition Types.h:79
bool isF16() const
Definition Types.cpp:38
bool isBF16() const
Definition Types.cpp:37
StorageUserBase< ConcreteType, BaseType, StorageType, detail::TypeUniquer, Traits... > Base
A quantized type that maps storage to/from expressed types in an unspecified way.
Definition QuantTypes.h:203
static AnyQuantizedType get(unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
static constexpr StringLiteral name
Definition QuantTypes.h:208
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
static AnyQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
A quantized type that infers its range from given min/max values.
Definition QuantTypes.h:524
static constexpr StringLiteral name
Definition QuantTypes.h:529
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Verifies construction invariants and issues errors/warnings.
static CalibratedQuantizedType get(Type expressedType, double min, double max)
Gets an instance of the type with all parameters specified but not checked.
static CalibratedQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Gets an instance of the type with all specified parameters checked.
Base class for all quantized types known to this dialect.
Definition QuantTypes.h:50
Type getExpressedType() const
Gets the original expressed type that this quantized type approximates.
static constexpr unsigned MaxStorageBits
The maximum number of bits supported for storage types.
Definition QuantTypes.h:56
bool hasStorageTypeBounds() const
Return whether the storage type has explicit min or max boundaries different from the minimum and max...
static Type castToStorageType(Type quantizedType)
Casts from a type based on a QuantizedType to a corresponding type based on the storageType (returns ...
Type castExpressedToStorageType(Type candidateType)
Casts from a type based on the expressedType to the equivalent type based on storageType by way of th...
detail::QuantizedTypeStorage ImplType
Definition QuantTypes.h:52
static Type castToExpressedType(Type quantizedType)
Casts from a type based on QuantizedType to a corresponding type based on the expressedType (returns ...
bool isSigned() const
Whether the storage type should be interpreted as a signed quantity (true) or an unsigned value (fals...
Definition QuantTypes.h:103
constexpr Type()=default
static QuantizedType getQuantizedElementType(Type primitiveOrContainerType)
Returns the element type as a QuantizedType or nullptr if it is not a quantized type.
unsigned getFlags() const
Gets the flags associated with this type.
int64_t getStorageTypeMax() const
The maximum value that storageType can take.
static int64_t getDefaultMaximumForInteger(bool isSigned, unsigned integralWidth)
Gets the maximum possible stored by a storageType.
Definition QuantTypes.h:78
unsigned getStorageTypeIntegralWidth() const
Gets the integral bit width that the underlying storage type can exactly represent.
static bool classof(Type type)
Support method to enable LLVM-style type casting.
Type castFromStorageType(Type candidateType)
Casts from a type based on the storageType to a corresponding type based on this type (returns nullpt...
int64_t getStorageTypeMin() const
The minimum value that storageType can take.
static int64_t getDefaultMinimumForInteger(bool isSigned, unsigned integralWidth)
Gets the minimum possible stored by a storageType.
Definition QuantTypes.h:68
Type getStorageType() const
Gets the underlying type used for to store values.
Type castFromExpressedType(Type candidateType)
Casts from a type based on the expressedType to a corresponding type based on this type (returns null...
bool isCompatibleExpressedType(Type candidateExpressedType)
Returns whether the candidateExpressedType is a match for this QuantizedType.
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Represents per-axis (also known as per-channel quantization).
Definition QuantTypes.h:324
static constexpr StringLiteral name
Definition QuantTypes.h:329
static UniformQuantizedPerAxisType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
bool isFixedPoint() const
Fixed point values are real numbers divided by a scale.
Definition QuantTypes.h:381
static UniformQuantizedPerAxisType get(unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
int32_t getQuantizedDimension() const
Specifies the dimension of the Tensor's shape that the scales and zero_points correspond to.
ArrayRef< int64_t > getZeroPoints() const
Gets the storage values corresponding to the real value 0 in the affine equation.
ArrayRef< double > getScales() const
Gets the quantization scales.
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Represents sub-channel (also known as blockwise quantization).
Definition QuantTypes.h:409
static constexpr StringLiteral name
Definition QuantTypes.h:414
ArrayRef< int32_t > getQuantizedDimensions() const
Gets the quantized dimensions.
DenseElementsAttr getZeroPoints() const
Gets the quantization zero-points.
ArrayRef< int64_t > getBlockSizes() const
Gets the block sizes for the quantized dimensions.
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
const SmallVector< std::pair< int32_t, int64_t > > getBlockSizeInfo() const
Gets the block size information.
static UniformQuantizedSubChannelType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
static UniformQuantizedSubChannelType get(unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
DenseElementsAttr getScales() const
Gets the quantization scales.
Represents a family of uniform, quantized types.
Definition QuantTypes.h:264
double getScale() const
Gets the scale term.
int64_t getZeroPoint() const
Gets the storage value corresponding to the real value 0 in the affine equation.
static constexpr StringLiteral name
Definition QuantTypes.h:269
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
static UniformQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
static UniformQuantizedType get(unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Enumeration of bit-mapped flags related to quantized types.
Definition QuantTypes.h:34
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152