MLIR  21.0.0git
QuantTypes.h
Go to the documentation of this file.
1 //===- QuantTypes.h - Quantization Ops and Types ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
10 #define MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
11 
12 #include "mlir/IR/Attributes.h"
13 #include "mlir/IR/Builders.h"
14 #include "mlir/IR/BuiltinTypes.h"
15 #include "mlir/IR/Dialect.h"
16 #include "mlir/IR/OpDefinition.h"
17 #include "mlir/IR/Types.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 namespace mlir {
21 namespace quant {
22 namespace detail {
23 
30 
31 } // namespace detail
32 
33 /// Enumeration of bit-mapped flags related to quantized types.
34 namespace QuantizationFlags {
35 enum FlagValue {
36  // Indicates that the storage type should be interpreted as a signed
37  // integer. The default is to interpret it as an unsigned value.
38  Signed = 1,
39 };
40 } // namespace QuantizationFlags
41 
42 /// Base class for all quantized types known to this dialect.
43 /// All quantized types have:
44 /// - storageType: The (narrower) numeric type that is being used to
45 /// approximate some expressed type.
46 /// - expressedType: The type that is being approximated.
47 ///
48 /// The base class provides generic support for manipulating the types based
49 /// on these fields.
50 class QuantizedType : public Type {
51 public:
53  using Type::Type;
54 
55  /// The maximum number of bits supported for storage types.
56  static constexpr unsigned MaxStorageBits = 32;
57 
58  static LogicalResult
60  Type storageType, Type expressedType, int64_t storageTypeMin,
61  int64_t storageTypeMax);
62 
63  /// Support method to enable LLVM-style type casting.
64  static bool classof(Type type);
65 
66  /// Gets the minimum possible stored by a storageType. storageTypeMin must
67  /// be greater than or equal to this value.
69  unsigned integralWidth) {
70  if (isSigned) {
71  return llvm::minIntN(integralWidth);
72  }
73  return 0;
74  }
75 
76  /// Gets the maximum possible stored by a storageType. storageTypeMax must
77  /// be less than or equal to this value.
79  unsigned integralWidth) {
80  if (isSigned) {
81  return llvm::maxIntN(integralWidth);
82  }
83  return llvm::maxUIntN(integralWidth);
84  }
85 
86  /// Gets the original expressed type that this quantized type approximates.
87  /// Note that this presumes that the quantized type was always derived from
88  /// a floating point type, which in the broadest definition, is not true (i.e.
89  /// it could be some form of integral, fixed type or affine type in its own
90  /// right); however, at the high level, no examples of such usage are
91  /// presently known and the restriction serves some useful purposes (such as
92  /// always being able to reverse a transformation or measure error). In most
93  /// cases, this will be f32.
94  Type getExpressedType() const;
95 
96  /// Gets the flags associated with this type. Typically a more specific
97  /// accessor is appropriate.
98  unsigned getFlags() const;
99 
100  // Convenience helpers.
101  /// Whether the storage type should be interpreted as a signed quantity
102  /// (true) or an unsigned value (false).
103  bool isSigned() const {
104  return (getFlags() & QuantizationFlags::Signed) ==
106  }
107 
108  /// Gets the underlying type used for to store values. Note that this may
109  /// be signed or unsigned. Use the isSigned() accessor to differentiate.
110  Type getStorageType() const;
111 
112  /// The minimum value that storageType can take.
113  int64_t getStorageTypeMin() const;
114 
115  /// The maximum value that storageType can take.
116  int64_t getStorageTypeMax() const;
117 
118  /// Return whether the storage type has explicit min or max boundaries
119  /// different from the minimum and maximum representable values.
120  bool hasStorageTypeBounds() const;
121 
122  /// Gets the integral bit width that the underlying storage type can exactly
123  /// represent. For integral storage types, this will just be their width.
124  unsigned getStorageTypeIntegralWidth() const;
125 
126  /// Returns whether the candidateExpressedType is a match for this
127  /// QuantizedType. This will be true if the candidate type is either a
128  /// primitive type or a container type whose element type equals this
129  /// QuantizedType's expressed type.
130  /// Examples of compatible candidateExpressedType:
131  /// !quant.uniform<i8:f32, 1.0> =~ f32
132  /// !quant.uniform<i8:f32, 1.0> =~ tensor<4xf32>
133  bool isCompatibleExpressedType(Type candidateExpressedType);
134 
135  /// Returns the element type as a QuantizedType or nullptr if it is not
136  /// a quantized type. If the type is primitive, returns that. If it is a
137  /// container (vector/tensor), return the element type.
138  /// Examples:
139  /// !quant.uniform<i8:f32, 1.0> -> !quant.uniform<i8:f32, 1.0>
140  /// tensor<4x!quant.uniform<i8:f32, 1.0> -> quant.uniform<i8:f32, 1.0>
141  static QuantizedType getQuantizedElementType(Type primitiveOrContainerType);
142 
143  /// Casts from a type based on the storageType to a corresponding type based
144  /// on this type (returns nullptr if the cast is not valid).
145  /// Examples:
146  /// i8 -> !quant.uniform<i8:f32, 1.0>
147  /// tensor<4xi8> -> tensor<4x!quant.uniform<i8:f32, 1.0}>>
148  /// vector<4xi8> -> vector<4x!quant.uniform<i8:f32, 1.0>>
149  Type castFromStorageType(Type candidateType);
150 
151  /// Casts from a type based on a QuantizedType to a corresponding type based
152  /// on the storageType (returns nullptr if the cast is not valid).
153  /// This is the inverse of castFromStorageType().
154  static Type castToStorageType(Type quantizedType);
155 
156  /// Casts from a type based on the expressedType to a corresponding type based
157  /// on this type (returns nullptr if the cast is not valid).
158  /// Examples:
159  /// f32 -> !quant.uniform<i8:f32, 1.0>
160  /// tensor<4xf32> -> tensor<4x!quant.uniform<i8:f32, 1.0>>
161  /// vector<4xf32> -> vector<4x!quant.uniform<i8:f32, 1.0>>
162  Type castFromExpressedType(Type candidateType);
163 
164  /// Casts from a type based on QuantizedType to a corresponding type based
165  /// on the expressedType (returns nullptr if the cast is not valid).
166  /// This is the inverse of castFromExpressedType.
167  static Type castToExpressedType(Type quantizedType);
168 
169  /// Casts from a type based on the expressedType to the equivalent type
170  /// based on storageType by way of this QuantizedType. Equivalent to:
171  /// QuantizedType::castToStorageType(castFromExpressedType(candidateType))
172  /// (but with validity checks).
173  /// Example (for this = !quant.uniform<i8:f32, 1.0>):
174  /// tensor<4xf32> -> tensor<4xi8>
175  Type castExpressedToStorageType(Type candidateType);
176 
177 private:
178  /// Hide the following methods inherited from `Type`. It is almost certainly
179  /// a bug to call them from a `QuantizedType` object. Users should call
180  /// `getStorageType` or `getExpressedType` to get the underlying types
181  /// they want to inspect.
182  using Type::isBF16;
183  using Type::isF16;
184  using Type::isF32;
185  using Type::isF64;
186  using Type::isIndex;
187  using Type::isInteger;
188 };
189 
190 /// A quantized type that maps storage to/from expressed types in an
191 /// unspecified way.
192 ///
193 /// Typical syntax:
194 /// quant.any<i8:f32>
195 /// quant.any<i8>
196 /// quant.any<i8<-16,15>>
197 ///
198 /// Note that for the any type, the expressed type is optional.
200  : public Type::TypeBase<AnyQuantizedType, QuantizedType,
201  detail::AnyQuantizedTypeStorage> {
202 public:
203  using Base::Base;
204  using Base::getChecked;
205 
206  static constexpr StringLiteral name = "quant.any";
207 
208  /// Gets an instance of the type with all parameters specified but not
209  /// checked.
210  static AnyQuantizedType get(unsigned flags, Type storageType,
211  Type expressedType, int64_t storageTypeMin,
212  int64_t storageTypeMax);
213 
214  /// Gets an instance of the type with all specified parameters checked.
215  /// Returns a nullptr convertible type on failure.
216  static AnyQuantizedType
218  Type storageType, Type expressedType, int64_t storageTypeMin,
219  int64_t storageTypeMax);
220 
221  /// Verifies construction invariants and issues errors/warnings.
222  static LogicalResult
224  Type storageType, Type expressedType, int64_t storageTypeMin,
225  int64_t storageTypeMax);
226 };
227 
228 /// Represents a family of uniform, quantized types.
229 ///
230 /// Each instance of this type expresses a mapping between real values (most
231 /// often expressed in floating point f32) and quantized values (either fixed
232 /// point or affine).
233 ///
234 /// The relationship is:
235 /// real_value = scale * (quantized_value - zero_point)
236 ///
237 /// It is used as part of high level graph transformations that have the goal
238 /// of re-expressing parts of a computation in terms of this common form for
239 /// more efficient execution at runtime. In addition, it is designed to be
240 /// expressive enough to facilitate lowering to precise types and operations
241 /// in target hardware.
242 ///
243 /// As a high-level type, focused on intermediate passes, this type holds
244 /// opinions consistent with high-level usage. If lowering math kernels below
245 /// the high level arithmetic ops (i.e. to LLVM IR or hardware specific
246 /// instruction sets), it is expected that the information expressed here
247 /// will be used to drive low level codegen and target specific type selection,
248 /// but this type will likely be erased in the process.
249 ///
250 /// Syntax synopsis:
251 /// Per-layer, all parameters expressed:
252 /// !quant<uniform[StorageType:ExpressedType]{Scale:ZeroPoint}>
253 /// Per-layer, optional parameters omitted:
254 /// !quant<uniform[StorageType]{Scale}>
255 ///
256 /// StorageType: 'i'|'u' NumBits
257 /// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
258 /// Scale: A legal double value
259 /// ZeroPoint: An integer value
261  : public Type::TypeBase<UniformQuantizedType, QuantizedType,
262  detail::UniformQuantizedTypeStorage> {
263 public:
264  using Base::Base;
265  using Base::getChecked;
266 
267  static constexpr StringLiteral name = "quant.uniform";
268 
269  /// Gets an instance of the type with all parameters specified but not
270  /// checked.
271  static UniformQuantizedType get(unsigned flags, Type storageType,
272  Type expressedType, double scale,
273  int64_t zeroPoint, int64_t storageTypeMin,
274  int64_t storageTypeMax);
275 
276  /// Gets an instance of the type with all specified parameters checked.
277  /// Returns a nullptr convertible type on failure.
278  static UniformQuantizedType
280  Type storageType, Type expressedType, double scale,
281  int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax);
282 
283  /// Verifies construction invariants and issues errors/warnings.
284  static LogicalResult
286  Type storageType, Type expressedType, double scale,
287  int64_t zeroPoint, int64_t storageTypeMin,
288  int64_t storageTypeMax);
289 
290  /// Gets the scale term. The scale designates the difference between the real
291  /// values corresponding to consecutive quantized values differing by 1.
292  double getScale() const;
293 
294  /// Gets the storage value corresponding to the real value 0 in the affine
295  /// equation.
296  int64_t getZeroPoint() const;
297 
298  // Fixed point values are real numbers divided by a scale.
299  // Currently, only signed storage types are treated as fixed point.
300  // A fixed point value can be obtained from an affine value by subtracting
301  // the zeroPoint.
302  // In the future, this may be explicit versus implied by type and zeroPoint.
303  bool isFixedPoint() const { return isSigned() && getZeroPoint() == 0; }
304 };
305 
306 /// Represents per-axis (also known as per-channel quantization).
307 ///
308 /// Syntax synopsis:
309 /// Per-axis, all parameters expressed:
310 /// !quant<uniform[StorageType:ExpressedType:QuantizedDim]{QuantParams}>
311 /// Per-axis, optional parameters omitted:
312 /// !quant<uniform[StorageType]{Scale}>
313 ///
314 /// StorageType: 'i'|'u' NumBits
315 /// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
316 /// QuantizedDim: An integer value
317 /// QuantParams: (Scale ':' ZeroPoint)+
318 /// Scale: A legal double value
319 /// ZeroPoint: An integer value
321  : public Type::TypeBase<UniformQuantizedPerAxisType, QuantizedType,
322  detail::UniformQuantizedPerAxisTypeStorage> {
323 public:
324  using Base::Base;
325  using Base::getChecked;
326 
327  static constexpr StringLiteral name = "quant.uniform_per_axis";
328 
329  /// Gets an instance of the type with all parameters specified but not
330  /// checked.
332  get(unsigned flags, Type storageType, Type expressedType,
333  ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
334  int32_t quantizedDimension, int64_t storageTypeMin,
335  int64_t storageTypeMax);
336 
337  /// Gets an instance of the type with all specified parameters checked.
338  /// Returns a nullptr convertible type on failure.
341  Type storageType, Type expressedType, ArrayRef<double> scales,
342  ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,
343  int64_t storageTypeMin, int64_t storageTypeMax);
344 
345  /// Verifies construction invariants and issues errors/warnings.
346  static LogicalResult
348  Type storageType, Type expressedType,
349  ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
350  int32_t quantizedDimension, int64_t storageTypeMin,
351  int64_t storageTypeMax);
352 
353  /// Gets the quantization scales. The scales designate the difference between
354  /// the real values corresponding to consecutive quantized values differing
355  /// by 1. The ith scale corresponds to the ith slice in the
356  /// quantized_dimension.
357  ArrayRef<double> getScales() const;
358 
359  /// Gets the storage values corresponding to the real value 0 in the affine
360  /// equation. The ith zero point corresponds to the ith slice in the
361  /// quantized_dimension.
363 
364  /// Specifies the dimension of the Tensor's shape that the scales and
365  /// zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
366  /// with quantization params:
367  /// scales=[1.0, 2.0, 3.0], zeroPoints=[1, 2, 3], quantizedDimension=1
368  /// will be quantized across the second dimension of t.
369  /// t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
370  /// t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
371  /// t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
372  int32_t getQuantizedDimension() const;
373 
374  /// Fixed point values are real numbers divided by a scale.
375  /// Currently, only signed storage types are treated as fixed point.
376  /// A fixed point value can be obtained from an affine value by subtracting
377  /// the zeroPoint.
378  /// In the future, this may be explicit versus implied by type and zeroPoint.
379  bool isFixedPoint() const {
380  if (!isSigned())
381  return false;
382  return !llvm::is_contained(getZeroPoints(), 0);
383  }
384 };
385 
386 /// Represents sub-channel (also known as blockwise quantization).
387 ///
388 /// Syntax synopsis:
389 /// UniformQuantizedSubChannelType ::= '!quant.uniform' '<'
390 /// storageType ('<' storageMin ':' storageMax '>')? ':'
391 /// expressedType ':' BlockSizeInfo ',' ScaleZeroTensor '>'
392 /// BlockSizeInfo: '{' '}' | '{' AxisBlock (',' AxisBlock)* '}'
393 /// AxisBlock ::= AxisSpec ':' BlockSizeSpec
394 /// ScaleZeroTensor ::= ScaleZeroDenseExp | ScaleZeroList
395 /// ScaleZeroDenseExp ::= '{' ScaleZeroTensor (',' ScaleZeroTensor)* '}'
396 /// ScaleZeroList ::= ScaleZero (',' ScaleZero)*
397 /// ScaleZero ::= Scale (':' ZeroPoint)?
398 ///
399 /// StorageType: 'i'|'u' NumBits
400 /// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
401 /// AxisSpec: An integer value
402 /// BlockSizeSpec: An integer value
403 /// Scale: An attribute (usually floating-point value)
404 /// ZeroPoint: An attribute (usually integer value)
406  : public Type::TypeBase<UniformQuantizedSubChannelType, QuantizedType,
407  detail::UniformQuantizedSubChannelTypeStorage> {
408 public:
409  using Base::Base;
410  using Base::getChecked;
411 
412  static constexpr StringLiteral name = "quant.uniform_sub_channel";
413 
414  /// Gets an instance of the type with all parameters specified but not
415  /// checked.
417  get(unsigned flags, Type storageType, Type expressedType,
418  DenseElementsAttr scales, DenseElementsAttr zeroPoints,
419  ArrayRef<int32_t> quantizedDimensions, ArrayRef<int64_t> blockSizes,
420  int64_t storageTypeMin, int64_t storageTypeMax);
421 
422  /// Gets an instance of the type with all specified parameters checked.
423  /// Returns a nullptr convertible type on failure.
426  Type storageType, Type expressedType, DenseElementsAttr scales,
427  DenseElementsAttr zeroPoints,
428  ArrayRef<int32_t> quantizedDimensions,
429  ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,
430  int64_t storageTypeMax);
431 
432  /// Verifies construction invariants and issues errors/warnings.
433  static LogicalResult
435  Type storageType, Type expressedType,
436  DenseElementsAttr scales, DenseElementsAttr zeroPoints,
437  ArrayRef<int32_t> quantizedDimensions,
438  ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,
439  int64_t storageTypeMax);
440 
441  /// Gets the quantization scales. The scales are organized in a
442  /// multi-dimensional tensor. The size of each dimension in the scales tensor
443  /// is determined by the number of blocks along the corresponding dimension in
444  /// the quantized data tensor.
445  ///
446  /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]
447  /// and the block sizes are [B0, B1, ..., BR-1], then the scales tensor will
448  /// have shape [X0/B0, X1/B1, ..., XR-1/BR-1].
449  ///
450  /// The scale value for a specific element in the quantized data tensor at
451  /// position [i0, i1, ..., iR-1] is determined by accessing the corresponding
452  /// element in the scales tensor at position [i0/B0, i1/B1, ..., iR-1/BR-1].
454 
455  /// Gets the quantization zero-points. The zero-points are organized in a
456  /// multi-dimensional tensor. The size of each dimension in the zero-point
457  /// tensor is determined by the number of blocks along the corresponding
458  /// dimension in the quantized data tensor.
459  ///
460  /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]
461  /// and the block sizes are [B0, B1, ..., BR-1], then the zero-point tensor
462  /// will have shape [X0/B0, X1/B1, ..., XR-1/BR-1].
463  ///
464  /// The zero-point value for a specific element in the quantized data tensor
465  /// at position [i0, i1, ..., iR-1] is determined by accessing the
466  /// corresponding element in the zero-point tensor at position [i0/B0, i1/B1,
467  /// ..., iR-1/BR-1].
469 
470  /// Gets the quantized dimensions. Each element in the returned list
471  /// represents an axis of the quantized data tensor that has a specified block
472  /// size. The order of elements corresponds to the order of block sizes
473  /// returned by `getBlockSizes()`.
474  ///
475  /// It means that the data tensor is quantized along the `i`-th dimension in
476  /// the returned list using the `i`-th block size from `getBlockSizes()`.
477  ///
478  /// Note that the type expression does not have to specify the block size for
479  /// all axes in the data tensor. Any unspecified block size for an axis `i`
480  /// defaults to the tensor dimension size of that axis.
481  ///
482  /// For example, for a quantized type:
483  /// `tensor<8x4x2x!quant.uniform<i8:f32:{1:2, 0:8}, {{1.0, 2.0}, {3.0, 4.0}}>`
484  ///
485  /// `getQuantizedDimensions()` returns [1, 0].
486  /// `getBlockSizes()` returns [2, 8].
487  ///
488  /// This indicates that:
489  /// * Axis 1 (second dimension) is quantized with a block size of 2.
490  /// * Axis 0 (first dimension) is quantized with a block size of 8.
491  /// Since axis 2 is not specified, it implicitly has a block size equal to
492  /// the size of the third dimension (which is 2 in this case).
494 
495  /// Gets the block sizes for the quantized dimensions. The `i`-th element in
496  /// the returned list corresponds to the block size for the `i`-th dimension
497  /// in the list returned by `getQuantizedDimensions()`.
498  ///
499  /// See `getQuantizedDimensions()` for more details and examples.
501 
502  /// Gets the block size information. This returns a list of pairs, where each
503  /// pair represents a quantized dimension and its corresponding block size.
504  ///
505  /// For example, for the type:
506  /// `tensor<8x4x!quant.uniform<i8:f32:{1:2, 0:8}, {{2.0, 3.0}}>`
507  ///
508  /// This method returns:
509  /// `[(1, 2), (0, 8)]`
510  ///
511  /// This list indicates that axis 1 has a block size of 2, and axis 0 has a
512  /// block size of 8.
514 };
515 
516 /// A quantized type that infers its range from given min/max values.
517 ///
518 /// Typical syntax:
519 /// quant.calibrated<f32<-0.922,0.981>>
521  : public Type::TypeBase<CalibratedQuantizedType, QuantizedType,
522  detail::CalibratedQuantizedTypeStorage> {
523 public:
524  using Base::Base;
525  using Base::getChecked;
526 
527  static constexpr StringLiteral name = "quant.calibrated";
528 
529  /// Gets an instance of the type with all parameters specified but not
530  /// checked.
531  static CalibratedQuantizedType get(Type expressedType, double min,
532  double max);
533 
534  /// Gets an instance of the type with all specified parameters checked.
535  /// Returns a nullptr convertible type on failure.
538  double min, double max);
539 
540  /// Verifies construction invariants and issues errors/warnings.
541  static LogicalResult
543  Type expressedType, double min, double max);
544  double getMin() const;
545  double getMax() const;
546 };
547 
548 } // namespace quant
549 } // namespace mlir
550 
551 #endif // MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
An attribute that represents a reference to a dense vector or tensor object.
This class represents a diagnostic that is inflight and set to be reported.
Definition: Diagnostics.h:314
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isF64() const
Definition: Types.cpp:41
bool isIndex() const
Definition: Types.cpp:54
constexpr Type()=default
bool isF32() const
Definition: Types.cpp:40
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56
bool isF16() const
Definition: Types.cpp:38
bool isBF16() const
Definition: Types.cpp:37
Utility class for implementing users of storage classes uniqued by a StorageUniquer.
A quantized type that maps storage to/from expressed types in an unspecified way.
Definition: QuantTypes.h:201
static AnyQuantizedType get(unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:245
static constexpr StringLiteral name
Definition: QuantTypes.h:206
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:264
static AnyQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:254
A quantized type that infers its range from given min/max values.
Definition: QuantTypes.h:522
static constexpr StringLiteral name
Definition: QuantTypes.h:527
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:542
static CalibratedQuantizedType get(Type expressedType, double min, double max)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:530
static CalibratedQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:535
Base class for all quantized types known to this dialect.
Definition: QuantTypes.h:50
Type getExpressedType() const
Gets the original expressed type that this quantized type approximates.
Definition: QuantTypes.cpp:109
static constexpr unsigned MaxStorageBits
The maximum number of bits supported for storage types.
Definition: QuantTypes.h:56
bool hasStorageTypeBounds() const
Return whether the storage type has explicit min or max boundaries different from the minimum and max...
Definition: QuantTypes.cpp:92
static Type castToStorageType(Type quantizedType)
Casts from a type based on a QuantizedType to a corresponding type based on the storageType (returns ...
Definition: QuantTypes.cpp:155
Type castExpressedToStorageType(Type candidateType)
Casts from a type based on the expressedType to the equivalent type based on storageType by way of th...
Definition: QuantTypes.cpp:237
static Type castToExpressedType(Type quantizedType)
Casts from a type based on QuantizedType to a corresponding type based on the expressedType (returns ...
Definition: QuantTypes.cpp:210
bool isSigned() const
Whether the storage type should be interpreted as a signed quantity (true) or an unsigned value (fals...
Definition: QuantTypes.h:103
static QuantizedType getQuantizedElementType(Type primitiveOrContainerType)
Returns the element type as a QuantizedType or nullptr if it is not a quantized type.
Definition: QuantTypes.cpp:122
unsigned getFlags() const
Gets the flags associated with this type.
Definition: QuantTypes.cpp:39
int64_t getStorageTypeMax() const
The maximum value that storageType can take.
Definition: QuantTypes.cpp:88
static int64_t getDefaultMaximumForInteger(bool isSigned, unsigned integralWidth)
Gets the maximum possible stored by a storageType.
Definition: QuantTypes.h:78
unsigned getStorageTypeIntegralWidth() const
Gets the integral bit width that the underlying storage type can exactly represent.
Definition: QuantTypes.cpp:103
static bool classof(Type type)
Support method to enable LLVM-style type casting.
Definition: QuantTypes.cpp:43
Type castFromStorageType(Type candidateType)
Casts from a type based on the storageType to a corresponding type based on this type (returns nullpt...
Definition: QuantTypes.cpp:131
int64_t getStorageTypeMin() const
The minimum value that storageType can take.
Definition: QuantTypes.cpp:84
static int64_t getDefaultMinimumForInteger(bool isSigned, unsigned integralWidth)
Gets the minimum possible stored by a storageType.
Definition: QuantTypes.h:68
Type getStorageType() const
Gets the underlying type used for to store values.
Definition: QuantTypes.cpp:80
Type castFromExpressedType(Type candidateType)
Casts from a type based on the expressedType to a corresponding type based on this type (returns null...
Definition: QuantTypes.cpp:182
bool isCompatibleExpressedType(Type candidateExpressedType)
Returns whether the candidateExpressedType is a match for this QuantizedType.
Definition: QuantTypes.cpp:113
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Definition: QuantTypes.cpp:48
Represents per-axis (also known as per-channel quantization).
Definition: QuantTypes.h:322
static constexpr StringLiteral name
Definition: QuantTypes.h:327
static UniformQuantizedPerAxisType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:348
bool isFixedPoint() const
Fixed point values are real numbers divided by a scale.
Definition: QuantTypes.h:379
static UniformQuantizedPerAxisType get(unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:338
int32_t getQuantizedDimension() const
Specifies the dimension of the Tensor's shape that the scales and zero_points correspond to.
Definition: QuantTypes.cpp:409
ArrayRef< int64_t > getZeroPoints() const
Gets the storage values corresponding to the real value 0 in the affine equation.
Definition: QuantTypes.cpp:405
ArrayRef< double > getScales() const
Gets the quantization scales.
Definition: QuantTypes.cpp:401
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:358
Represents sub-channel (also known as blockwise quantization).
Definition: QuantTypes.h:407
static constexpr StringLiteral name
Definition: QuantTypes.h:412
ArrayRef< int32_t > getQuantizedDimensions() const
Gets the quantized dimensions.
Definition: QuantTypes.cpp:509
DenseElementsAttr getZeroPoints() const
Gets the quantization zero-points.
Definition: QuantTypes.cpp:504
ArrayRef< int64_t > getBlockSizes() const
Gets the block sizes for the quantized dimensions.
Definition: QuantTypes.cpp:513
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:435
const SmallVector< std::pair< int32_t, int64_t > > getBlockSizeInfo() const
Gets the block size information.
Definition: QuantTypes.cpp:518
static UniformQuantizedSubChannelType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:423
static UniformQuantizedSubChannelType get(unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:413
DenseElementsAttr getScales() const
Gets the quantization scales.
Definition: QuantTypes.cpp:500
Represents a family of uniform, quantized types.
Definition: QuantTypes.h:262
double getScale() const
Gets the scale term.
Definition: QuantTypes.cpp:332
int64_t getZeroPoint() const
Gets the storage value corresponding to the real value 0 in the affine equation.
Definition: QuantTypes.cpp:334
static constexpr StringLiteral name
Definition: QuantTypes.h:267
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:301
static UniformQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:292
static UniformQuantizedType get(unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:283
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.