MLIR  22.0.0git
QuantTypes.h
Go to the documentation of this file.
1 //===- QuantTypes.h - Quantization Ops and Types ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
10 #define MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
11 
12 #include "mlir/IR/Attributes.h"
13 #include "mlir/IR/Builders.h"
14 #include "mlir/IR/BuiltinTypes.h"
15 #include "mlir/IR/Dialect.h"
16 #include "mlir/IR/OpDefinition.h"
17 #include "mlir/IR/Types.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 namespace mlir {
21 namespace quant {
22 namespace detail {
23 
30 
31 } // namespace detail
32 
33 /// Enumeration of bit-mapped flags related to quantized types.
34 namespace QuantizationFlags {
35 enum FlagValue {
36  // Indicates that the storage type should be interpreted as a signed
37  // integer. The default is to interpret it as an unsigned value.
38  Signed = 1,
39 };
40 } // namespace QuantizationFlags
41 
42 /// Base class for all quantized types known to this dialect.
43 /// All quantized types have:
44 /// - storageType: The (narrower) numeric type that is being used to
45 /// approximate some expressed type.
46 /// - expressedType: The type that is being approximated.
47 ///
48 /// The base class provides generic support for manipulating the types based
49 /// on these fields.
50 class QuantizedType : public Type {
51 public:
53  using Type::Type;
54 
55  /// The maximum number of bits supported for storage types.
56  static constexpr unsigned MaxStorageBits = 32;
57 
58  static LogicalResult
60  Type storageType, Type expressedType, int64_t storageTypeMin,
61  int64_t storageTypeMax);
62 
63  /// Support method to enable LLVM-style type casting.
64  static bool classof(Type type);
65 
66  /// Gets the minimum possible stored by a storageType. storageTypeMin must
67  /// be greater than or equal to this value.
69  unsigned integralWidth) {
70  if (isSigned) {
71  return llvm::minIntN(integralWidth);
72  }
73  return 0;
74  }
75 
76  /// Gets the maximum possible stored by a storageType. storageTypeMax must
77  /// be less than or equal to this value.
79  unsigned integralWidth) {
80  if (isSigned) {
81  return llvm::maxIntN(integralWidth);
82  }
83  return llvm::maxUIntN(integralWidth);
84  }
85 
86  /// Gets the original expressed type that this quantized type approximates.
87  /// Note that this presumes that the quantized type was always derived from
88  /// a floating point type, which in the broadest definition, is not true (i.e.
89  /// it could be some form of integral, fixed type or affine type in its own
90  /// right); however, at the high level, no examples of such usage are
91  /// presently known and the restriction serves some useful purposes (such as
92  /// always being able to reverse a transformation or measure error). In most
93  /// cases, this will be f32.
94  Type getExpressedType() const;
95 
96  /// Gets the flags associated with this type. Typically a more specific
97  /// accessor is appropriate.
98  unsigned getFlags() const;
99 
100  // Convenience helpers.
101  /// Whether the storage type should be interpreted as a signed quantity
102  /// (true) or an unsigned value (false).
103  bool isSigned() const {
104  return (getFlags() & QuantizationFlags::Signed) ==
106  }
107 
108  /// Gets the underlying type used for to store values. Note that this may
109  /// be signed or unsigned. Use the isSigned() accessor to differentiate.
110  Type getStorageType() const;
111 
112  /// The minimum value that storageType can take.
113  int64_t getStorageTypeMin() const;
114 
115  /// The maximum value that storageType can take.
116  int64_t getStorageTypeMax() const;
117 
118  /// Return whether the storage type has explicit min or max boundaries
119  /// different from the minimum and maximum representable values.
120  bool hasStorageTypeBounds() const;
121 
122  /// Gets the integral bit width that the underlying storage type can exactly
123  /// represent. For integral storage types, this will just be their width.
124  unsigned getStorageTypeIntegralWidth() const;
125 
126  /// Returns whether the candidateExpressedType is a match for this
127  /// QuantizedType. This will be true if the candidate type is either a
128  /// primitive type or a container type whose element type equals this
129  /// QuantizedType's expressed type.
130  /// Examples of compatible candidateExpressedType:
131  /// !quant.uniform<i8:f32, 1.0> =~ f32
132  /// !quant.uniform<i8:f32, 1.0> =~ tensor<4xf32>
133  bool isCompatibleExpressedType(Type candidateExpressedType);
134 
135  /// Returns the element type as a QuantizedType or nullptr if it is not
136  /// a quantized type. If the type is primitive, returns that. If it is a
137  /// container (vector/tensor), return the element type.
138  /// Examples:
139  /// !quant.uniform<i8:f32, 1.0> -> !quant.uniform<i8:f32, 1.0>
140  /// tensor<4x!quant.uniform<i8:f32, 1.0> -> quant.uniform<i8:f32, 1.0>
141  static QuantizedType getQuantizedElementType(Type primitiveOrContainerType);
142 
143  /// Casts from a type based on the storageType to a corresponding type based
144  /// on this type (returns nullptr if the cast is not valid).
145  /// Examples:
146  /// `candidate type` -> `return type`
147  /// i8 -> !quant.uniform<i8:f32, 1.0>
148  /// tensor<4xi8> -> tensor<4x!quant.uniform<i8:f32, 1.0}>>
149  /// vector<4xi8> -> vector<4x!quant.uniform<i8:f32, 1.0>>
150  /// It is assumed above that this type's quantization is `<i8:f32, 1.0>`.
151  Type castFromStorageType(Type candidateType);
152 
153  /// Casts from a type based on a QuantizedType to a corresponding type based
154  /// on the storageType (returns nullptr if the cast is not valid).
155  /// This is the inverse of castFromStorageType().
156  static Type castToStorageType(Type quantizedType);
157 
158  /// Casts from a type based on the expressedType to a corresponding type based
159  /// on this type (returns nullptr if the cast is not valid).
160  /// Examples:
161  /// f32 -> !quant.uniform<i8:f32, 1.0>
162  /// tensor<4xf32> -> tensor<4x!quant.uniform<i8:f32, 1.0>>
163  /// vector<4xf32> -> vector<4x!quant.uniform<i8:f32, 1.0>>
164  Type castFromExpressedType(Type candidateType);
165 
166  /// Casts from a type based on QuantizedType to a corresponding type based
167  /// on the expressedType (returns nullptr if the cast is not valid).
168  /// This is the inverse of castFromExpressedType.
169  static Type castToExpressedType(Type quantizedType);
170 
171  /// Casts from a type based on the expressedType to the equivalent type
172  /// based on storageType by way of this QuantizedType. Equivalent to:
173  /// QuantizedType::castToStorageType(castFromExpressedType(candidateType))
174  /// (but with validity checks).
175  /// Example (for this = !quant.uniform<i8:f32, 1.0>):
176  /// tensor<4xf32> -> tensor<4xi8>
177  Type castExpressedToStorageType(Type candidateType);
178 
179 private:
180  /// Hide the following methods inherited from `Type`. It is almost certainly
181  /// a bug to call them from a `QuantizedType` object. Users should call
182  /// `getStorageType` or `getExpressedType` to get the underlying types
183  /// they want to inspect.
184  using Type::isBF16;
185  using Type::isF16;
186  using Type::isF32;
187  using Type::isF64;
188  using Type::isIndex;
189  using Type::isInteger;
190 };
191 
192 /// A quantized type that maps storage to/from expressed types in an
193 /// unspecified way.
194 ///
195 /// Typical syntax:
196 /// quant.any<i8:f32>
197 /// quant.any<i8>
198 /// quant.any<i8<-16,15>>
199 ///
200 /// Note that for the any type, the expressed type is optional.
202  : public Type::TypeBase<AnyQuantizedType, QuantizedType,
203  detail::AnyQuantizedTypeStorage> {
204 public:
205  using Base::Base;
206  using Base::getChecked;
207 
208  static constexpr StringLiteral name = "quant.any";
209 
210  /// Gets an instance of the type with all parameters specified but not
211  /// checked.
212  static AnyQuantizedType get(unsigned flags, Type storageType,
213  Type expressedType, int64_t storageTypeMin,
214  int64_t storageTypeMax);
215 
216  /// Gets an instance of the type with all specified parameters checked.
217  /// Returns a nullptr convertible type on failure.
218  static AnyQuantizedType
220  Type storageType, Type expressedType, int64_t storageTypeMin,
221  int64_t storageTypeMax);
222 
223  /// Verifies construction invariants and issues errors/warnings.
224  static LogicalResult
226  Type storageType, Type expressedType, int64_t storageTypeMin,
227  int64_t storageTypeMax);
228 };
229 
230 /// Represents a family of uniform, quantized types.
231 ///
232 /// Each instance of this type expresses a mapping between real values (most
233 /// often expressed in floating point f32) and quantized values (either fixed
234 /// point or affine).
235 ///
236 /// The relationship is:
237 /// real_value = scale * (quantized_value - zero_point)
238 ///
239 /// It is used as part of high level graph transformations that have the goal
240 /// of re-expressing parts of a computation in terms of this common form for
241 /// more efficient execution at runtime. In addition, it is designed to be
242 /// expressive enough to facilitate lowering to precise types and operations
243 /// in target hardware.
244 ///
245 /// As a high-level type, focused on intermediate passes, this type holds
246 /// opinions consistent with high-level usage. If lowering math kernels below
247 /// the high level arithmetic ops (i.e. to LLVM IR or hardware specific
248 /// instruction sets), it is expected that the information expressed here
249 /// will be used to drive low level codegen and target specific type selection,
250 /// but this type will likely be erased in the process.
251 ///
252 /// Syntax synopsis:
253 /// Per-layer, all parameters expressed:
254 /// !quant<uniform[StorageType:ExpressedType]{Scale:ZeroPoint}>
255 /// Per-layer, optional parameters omitted:
256 /// !quant<uniform[StorageType]{Scale}>
257 ///
258 /// StorageType: 'i'|'u' NumBits
259 /// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
260 /// Scale: A legal double value
261 /// ZeroPoint: An integer value
263  : public Type::TypeBase<UniformQuantizedType, QuantizedType,
264  detail::UniformQuantizedTypeStorage> {
265 public:
266  using Base::Base;
267  using Base::getChecked;
268 
269  static constexpr StringLiteral name = "quant.uniform";
270 
271  /// Gets an instance of the type with all parameters specified but not
272  /// checked.
273  static UniformQuantizedType get(unsigned flags, Type storageType,
274  Type expressedType, double scale,
275  int64_t zeroPoint, int64_t storageTypeMin,
276  int64_t storageTypeMax);
277 
278  /// Gets an instance of the type with all specified parameters checked.
279  /// Returns a nullptr convertible type on failure.
280  static UniformQuantizedType
282  Type storageType, Type expressedType, double scale,
283  int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax);
284 
285  /// Verifies construction invariants and issues errors/warnings.
286  static LogicalResult
288  Type storageType, Type expressedType, double scale,
289  int64_t zeroPoint, int64_t storageTypeMin,
290  int64_t storageTypeMax);
291 
292  /// Gets the scale term. The scale designates the difference between the real
293  /// values corresponding to consecutive quantized values differing by 1.
294  double getScale() const;
295 
296  /// Gets the storage value corresponding to the real value 0 in the affine
297  /// equation.
298  int64_t getZeroPoint() const;
299 
300  // Fixed point values are real numbers divided by a scale.
301  // Currently, only signed storage types are treated as fixed point.
302  // A fixed point value can be obtained from an affine value by subtracting
303  // the zeroPoint.
304  // In the future, this may be explicit versus implied by type and zeroPoint.
305  bool isFixedPoint() const { return isSigned() && getZeroPoint() == 0; }
306 };
307 
308 /// Represents per-axis (also known as per-channel quantization).
309 ///
310 /// Syntax synopsis:
311 /// Per-axis, all parameters expressed:
312 /// !quant<uniform[StorageType:ExpressedType:QuantizedDim]{QuantParams}>
313 /// Per-axis, optional parameters omitted:
314 /// !quant<uniform[StorageType]{Scale}>
315 ///
316 /// StorageType: 'i'|'u' NumBits
317 /// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
318 /// QuantizedDim: An integer value
319 /// QuantParams: (Scale ':' ZeroPoint)+
320 /// Scale: A legal double value
321 /// ZeroPoint: An integer value
323  : public Type::TypeBase<UniformQuantizedPerAxisType, QuantizedType,
324  detail::UniformQuantizedPerAxisTypeStorage> {
325 public:
326  using Base::Base;
327  using Base::getChecked;
328 
329  static constexpr StringLiteral name = "quant.uniform_per_axis";
330 
331  /// Gets an instance of the type with all parameters specified but not
332  /// checked.
334  get(unsigned flags, Type storageType, Type expressedType,
335  ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
336  int32_t quantizedDimension, int64_t storageTypeMin,
337  int64_t storageTypeMax);
338 
339  /// Gets an instance of the type with all specified parameters checked.
340  /// Returns a nullptr convertible type on failure.
343  Type storageType, Type expressedType, ArrayRef<double> scales,
344  ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,
345  int64_t storageTypeMin, int64_t storageTypeMax);
346 
347  /// Verifies construction invariants and issues errors/warnings.
348  static LogicalResult
350  Type storageType, Type expressedType,
351  ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,
352  int32_t quantizedDimension, int64_t storageTypeMin,
353  int64_t storageTypeMax);
354 
355  /// Gets the quantization scales. The scales designate the difference between
356  /// the real values corresponding to consecutive quantized values differing
357  /// by 1. The ith scale corresponds to the ith slice in the
358  /// quantized_dimension.
359  ArrayRef<double> getScales() const;
360 
361  /// Gets the storage values corresponding to the real value 0 in the affine
362  /// equation. The ith zero point corresponds to the ith slice in the
363  /// quantized_dimension.
365 
366  /// Specifies the dimension of the Tensor's shape that the scales and
367  /// zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
368  /// with quantization params:
369  /// scales=[1.0, 2.0, 3.0], zeroPoints=[1, 2, 3], quantizedDimension=1
370  /// will be quantized across the second dimension of t.
371  /// t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
372  /// t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
373  /// t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
374  int32_t getQuantizedDimension() const;
375 
376  /// Fixed point values are real numbers divided by a scale.
377  /// Currently, only signed storage types are treated as fixed point.
378  /// A fixed point value can be obtained from an affine value by subtracting
379  /// the zeroPoint.
380  /// In the future, this may be explicit versus implied by type and zeroPoint.
381  bool isFixedPoint() const {
382  if (!isSigned())
383  return false;
384  return !llvm::is_contained(getZeroPoints(), 0);
385  }
386 };
387 
388 /// Represents sub-channel (also known as blockwise quantization).
389 ///
390 /// Syntax synopsis:
391 /// UniformQuantizedSubChannelType ::= '!quant.uniform' '<'
392 /// storageType ('<' storageMin ':' storageMax '>')? ':'
393 /// expressedType ':' BlockSizeInfo ',' ScaleZeroTensor '>'
394 /// BlockSizeInfo: '{' '}' | '{' AxisBlock (',' AxisBlock)* '}'
395 /// AxisBlock ::= AxisSpec ':' BlockSizeSpec
396 /// ScaleZeroTensor ::= ScaleZeroDenseExp | ScaleZeroList
397 /// ScaleZeroDenseExp ::= '{' ScaleZeroTensor (',' ScaleZeroTensor)* '}'
398 /// ScaleZeroList ::= ScaleZero (',' ScaleZero)*
399 /// ScaleZero ::= Scale (':' ZeroPoint)?
400 ///
401 /// StorageType: 'i'|'u' NumBits
402 /// ExpressedType: 'f16', 'f32', 'bf16', 'f64'
403 /// AxisSpec: An integer value
404 /// BlockSizeSpec: An integer value
405 /// Scale: An attribute (usually floating-point value)
406 /// ZeroPoint: An attribute (usually integer value)
408  : public Type::TypeBase<UniformQuantizedSubChannelType, QuantizedType,
409  detail::UniformQuantizedSubChannelTypeStorage> {
410 public:
411  using Base::Base;
412  using Base::getChecked;
413 
414  static constexpr StringLiteral name = "quant.uniform_sub_channel";
415 
416  /// Gets an instance of the type with all parameters specified but not
417  /// checked.
419  get(unsigned flags, Type storageType, Type expressedType,
420  DenseElementsAttr scales, DenseElementsAttr zeroPoints,
421  ArrayRef<int32_t> quantizedDimensions, ArrayRef<int64_t> blockSizes,
422  int64_t storageTypeMin, int64_t storageTypeMax);
423 
424  /// Gets an instance of the type with all specified parameters checked.
425  /// Returns a nullptr convertible type on failure.
428  Type storageType, Type expressedType, DenseElementsAttr scales,
429  DenseElementsAttr zeroPoints,
430  ArrayRef<int32_t> quantizedDimensions,
431  ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,
432  int64_t storageTypeMax);
433 
434  /// Verifies construction invariants and issues errors/warnings.
435  static LogicalResult
437  Type storageType, Type expressedType,
438  DenseElementsAttr scales, DenseElementsAttr zeroPoints,
439  ArrayRef<int32_t> quantizedDimensions,
440  ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,
441  int64_t storageTypeMax);
442 
443  /// Gets the quantization scales. The scales are organized in a
444  /// multi-dimensional tensor. The size of each dimension in the scales tensor
445  /// is determined by the number of blocks along the corresponding dimension in
446  /// the quantized data tensor.
447  ///
448  /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]
449  /// and the block sizes are [B0, B1, ..., BR-1], then the scales tensor will
450  /// have shape [X0/B0, X1/B1, ..., XR-1/BR-1].
451  ///
452  /// The scale value for a specific element in the quantized data tensor at
453  /// position [i0, i1, ..., iR-1] is determined by accessing the corresponding
454  /// element in the scales tensor at position [i0/B0, i1/B1, ..., iR-1/BR-1].
456 
457  /// Gets the quantization zero-points. The zero-points are organized in a
458  /// multi-dimensional tensor. The size of each dimension in the zero-point
459  /// tensor is determined by the number of blocks along the corresponding
460  /// dimension in the quantized data tensor.
461  ///
462  /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]
463  /// and the block sizes are [B0, B1, ..., BR-1], then the zero-point tensor
464  /// will have shape [X0/B0, X1/B1, ..., XR-1/BR-1].
465  ///
466  /// The zero-point value for a specific element in the quantized data tensor
467  /// at position [i0, i1, ..., iR-1] is determined by accessing the
468  /// corresponding element in the zero-point tensor at position [i0/B0, i1/B1,
469  /// ..., iR-1/BR-1].
471 
472  /// Gets the quantized dimensions. Each element in the returned list
473  /// represents an axis of the quantized data tensor that has a specified block
474  /// size. The order of elements corresponds to the order of block sizes
475  /// returned by `getBlockSizes()`.
476  ///
477  /// It means that the data tensor is quantized along the `i`-th dimension in
478  /// the returned list using the `i`-th block size from `getBlockSizes()`.
479  ///
480  /// Note that the type expression does not have to specify the block size for
481  /// all axes in the data tensor. Any unspecified block size for an axis `i`
482  /// defaults to the tensor dimension size of that axis.
483  ///
484  /// For example, for a quantized type:
485  /// `tensor<8x4x2x!quant.uniform<i8:f32:{1:2, 0:8}, {{1.0, 2.0}, {3.0, 4.0}}>`
486  ///
487  /// `getQuantizedDimensions()` returns [1, 0].
488  /// `getBlockSizes()` returns [2, 8].
489  ///
490  /// This indicates that:
491  /// * Axis 1 (second dimension) is quantized with a block size of 2.
492  /// * Axis 0 (first dimension) is quantized with a block size of 8.
493  /// Since axis 2 is not specified, it implicitly has a block size equal to
494  /// the size of the third dimension (which is 2 in this case).
496 
497  /// Gets the block sizes for the quantized dimensions. The `i`-th element in
498  /// the returned list corresponds to the block size for the `i`-th dimension
499  /// in the list returned by `getQuantizedDimensions()`.
500  ///
501  /// See `getQuantizedDimensions()` for more details and examples.
503 
504  /// Gets the block size information. This returns a list of pairs, where each
505  /// pair represents a quantized dimension and its corresponding block size.
506  ///
507  /// For example, for the type:
508  /// `tensor<8x4x!quant.uniform<i8:f32:{1:2, 0:8}, {{2.0, 3.0}}>`
509  ///
510  /// This method returns:
511  /// `[(1, 2), (0, 8)]`
512  ///
513  /// This list indicates that axis 1 has a block size of 2, and axis 0 has a
514  /// block size of 8.
516 };
517 
518 /// A quantized type that infers its range from given min/max values.
519 ///
520 /// Typical syntax:
521 /// quant.calibrated<f32<-0.922,0.981>>
523  : public Type::TypeBase<CalibratedQuantizedType, QuantizedType,
524  detail::CalibratedQuantizedTypeStorage> {
525 public:
526  using Base::Base;
527  using Base::getChecked;
528 
529  static constexpr StringLiteral name = "quant.calibrated";
530 
531  /// Gets an instance of the type with all parameters specified but not
532  /// checked.
533  static CalibratedQuantizedType get(Type expressedType, double min,
534  double max);
535 
536  /// Gets an instance of the type with all specified parameters checked.
537  /// Returns a nullptr convertible type on failure.
540  double min, double max);
541 
542  /// Verifies construction invariants and issues errors/warnings.
543  static LogicalResult
545  Type expressedType, double min, double max);
546  double getMin() const;
547  double getMax() const;
548 };
549 
550 } // namespace quant
551 } // namespace mlir
552 
553 #endif // MLIR_DIALECT_QUANT_IR_QUANTTYPES_H
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
An attribute that represents a reference to a dense vector or tensor object.
This class represents a diagnostic that is inflight and set to be reported.
Definition: Diagnostics.h:314
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isF64() const
Definition: Types.cpp:41
bool isIndex() const
Definition: Types.cpp:54
constexpr Type()=default
bool isF32() const
Definition: Types.cpp:40
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56
bool isF16() const
Definition: Types.cpp:38
bool isBF16() const
Definition: Types.cpp:37
Utility class for implementing users of storage classes uniqued by a StorageUniquer.
A quantized type that maps storage to/from expressed types in an unspecified way.
Definition: QuantTypes.h:203
static AnyQuantizedType get(unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:242
static constexpr StringLiteral name
Definition: QuantTypes.h:208
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:261
static AnyQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:251
A quantized type that infers its range from given min/max values.
Definition: QuantTypes.h:524
static constexpr StringLiteral name
Definition: QuantTypes.h:529
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:539
static CalibratedQuantizedType get(Type expressedType, double min, double max)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:527
static CalibratedQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:532
Base class for all quantized types known to this dialect.
Definition: QuantTypes.h:50
Type getExpressedType() const
Gets the original expressed type that this quantized type approximates.
Definition: QuantTypes.cpp:106
static constexpr unsigned MaxStorageBits
The maximum number of bits supported for storage types.
Definition: QuantTypes.h:56
bool hasStorageTypeBounds() const
Return whether the storage type has explicit min or max boundaries different from the minimum and max...
Definition: QuantTypes.cpp:89
static Type castToStorageType(Type quantizedType)
Casts from a type based on a QuantizedType to a corresponding type based on the storageType (returns ...
Definition: QuantTypes.cpp:152
Type castExpressedToStorageType(Type candidateType)
Casts from a type based on the expressedType to the equivalent type based on storageType by way of th...
Definition: QuantTypes.cpp:234
static Type castToExpressedType(Type quantizedType)
Casts from a type based on QuantizedType to a corresponding type based on the expressedType (returns ...
Definition: QuantTypes.cpp:207
bool isSigned() const
Whether the storage type should be interpreted as a signed quantity (true) or an unsigned value (fals...
Definition: QuantTypes.h:103
static QuantizedType getQuantizedElementType(Type primitiveOrContainerType)
Returns the element type as a QuantizedType or nullptr if it is not a quantized type.
Definition: QuantTypes.cpp:119
unsigned getFlags() const
Gets the flags associated with this type.
Definition: QuantTypes.cpp:36
int64_t getStorageTypeMax() const
The maximum value that storageType can take.
Definition: QuantTypes.cpp:85
static int64_t getDefaultMaximumForInteger(bool isSigned, unsigned integralWidth)
Gets the maximum possible stored by a storageType.
Definition: QuantTypes.h:78
unsigned getStorageTypeIntegralWidth() const
Gets the integral bit width that the underlying storage type can exactly represent.
Definition: QuantTypes.cpp:100
static bool classof(Type type)
Support method to enable LLVM-style type casting.
Definition: QuantTypes.cpp:40
Type castFromStorageType(Type candidateType)
Casts from a type based on the storageType to a corresponding type based on this type (returns nullpt...
Definition: QuantTypes.cpp:128
int64_t getStorageTypeMin() const
The minimum value that storageType can take.
Definition: QuantTypes.cpp:81
static int64_t getDefaultMinimumForInteger(bool isSigned, unsigned integralWidth)
Gets the minimum possible stored by a storageType.
Definition: QuantTypes.h:68
Type getStorageType() const
Gets the underlying type used for to store values.
Definition: QuantTypes.cpp:77
Type castFromExpressedType(Type candidateType)
Casts from a type based on the expressedType to a corresponding type based on this type (returns null...
Definition: QuantTypes.cpp:179
bool isCompatibleExpressedType(Type candidateExpressedType)
Returns whether the candidateExpressedType is a match for this QuantizedType.
Definition: QuantTypes.cpp:110
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Definition: QuantTypes.cpp:45
Represents per-axis (also known as per-channel quantization).
Definition: QuantTypes.h:324
static constexpr StringLiteral name
Definition: QuantTypes.h:329
static UniformQuantizedPerAxisType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:345
bool isFixedPoint() const
Fixed point values are real numbers divided by a scale.
Definition: QuantTypes.h:381
static UniformQuantizedPerAxisType get(unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:335
int32_t getQuantizedDimension() const
Specifies the dimension of the Tensor's shape that the scales and zero_points correspond to.
Definition: QuantTypes.cpp:406
ArrayRef< int64_t > getZeroPoints() const
Gets the storage values corresponding to the real value 0 in the affine equation.
Definition: QuantTypes.cpp:402
ArrayRef< double > getScales() const
Gets the quantization scales.
Definition: QuantTypes.cpp:398
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:355
Represents sub-channel (also known as blockwise quantization).
Definition: QuantTypes.h:409
static constexpr StringLiteral name
Definition: QuantTypes.h:414
ArrayRef< int32_t > getQuantizedDimensions() const
Gets the quantized dimensions.
Definition: QuantTypes.cpp:506
DenseElementsAttr getZeroPoints() const
Gets the quantization zero-points.
Definition: QuantTypes.cpp:501
ArrayRef< int64_t > getBlockSizes() const
Gets the block sizes for the quantized dimensions.
Definition: QuantTypes.cpp:510
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:432
const SmallVector< std::pair< int32_t, int64_t > > getBlockSizeInfo() const
Gets the block size information.
Definition: QuantTypes.cpp:515
static UniformQuantizedSubChannelType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:420
static UniformQuantizedSubChannelType get(unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:410
DenseElementsAttr getScales() const
Gets the quantization scales.
Definition: QuantTypes.cpp:497
Represents a family of uniform, quantized types.
Definition: QuantTypes.h:264
double getScale() const
Gets the scale term.
Definition: QuantTypes.cpp:329
int64_t getZeroPoint() const
Gets the storage value corresponding to the real value 0 in the affine equation.
Definition: QuantTypes.cpp:331
static constexpr StringLiteral name
Definition: QuantTypes.h:269
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:298
static UniformQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:289
static UniformQuantizedType get(unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:280
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.