doxygen/QuantTypes%5F8h%5Fsource.html

 //===- QuantTypes.h - Quantization Ops and Types ----------------*- C++ -*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #ifndef MLIR_DIALECT_QUANT_IR_QUANTTYPES_H

 #define MLIR_DIALECT_QUANT_IR_QUANTTYPES_H


 #include "mlir/IR/Attributes.h"

 #include "mlir/IR/Builders.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/Dialect.h"

 #include "mlir/IR/OpDefinition.h"

 #include "mlir/IR/Types.h"

 #include "llvm/Support/MathExtras.h"


 namespace mlir {

 namespace quant {

 namespace detail {


 struct QuantizedTypeStorage;

 struct AnyQuantizedTypeStorage;

 struct UniformQuantizedSubChannelTypeStorage;

 struct UniformQuantizedTypeStorage;

 struct UniformQuantizedPerAxisTypeStorage;

 struct CalibratedQuantizedTypeStorage;


 } // namespace detail


 /// Enumeration of bit-mapped flags related to quantized types.

 namespace QuantizationFlags {

 enum FlagValue {

   // Indicates that the storage type should be interpreted as a signed

   // integer. The default is to interpret it as an unsigned value.

   Signed = 1,

 };

 } // namespace QuantizationFlags


 /// Base class for all quantized types known to this dialect.

 /// All quantized types have:

 ///   - storageType: The (narrower) numeric type that is being used to

 ///     approximate some expressed type.

 ///   - expressedType: The type that is being approximated.

 ///

 /// The base class provides generic support for manipulating the types based

 /// on these fields.

 class QuantizedType : public Type {

 public:

   using ImplType = detail::QuantizedTypeStorage;

   using Type::Type;


   /// The maximum number of bits supported for storage types.

   static constexpr unsigned MaxStorageBits = 32;


   static LogicalResult

   verifyInvariants(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

                    Type storageType, Type expressedType, int64_t storageTypeMin,

                    int64_t storageTypeMax);


   /// Support method to enable LLVM-style type casting.

   static bool classof(Type type);


   /// Gets the minimum possible stored by a storageType. storageTypeMin must

   /// be greater than or equal to this value.

   static int64_t getDefaultMinimumForInteger(bool isSigned,

                                              unsigned integralWidth) {

     if (isSigned) {

       return llvm::minIntN(integralWidth);

     }

     return 0;

   }


   /// Gets the maximum possible stored by a storageType. storageTypeMax must

   /// be less than or equal to this value.

   static int64_t getDefaultMaximumForInteger(bool isSigned,

                                              unsigned integralWidth) {

     if (isSigned) {

       return llvm::maxIntN(integralWidth);

     }

     return llvm::maxUIntN(integralWidth);

   }


   /// Gets the original expressed type that this quantized type approximates.

   /// Note that this presumes that the quantized type was always derived from

   /// a floating point type, which in the broadest definition, is not true (i.e.

   /// it could be some form of integral, fixed type or affine type in its own

   /// right); however, at the high level, no examples of such usage are

   /// presently known and the restriction serves some useful purposes (such as

   /// always being able to reverse a transformation or measure error). In most

   /// cases, this will be f32.

   Type getExpressedType() const;


   /// Gets the flags associated with this type. Typically a more specific

   /// accessor is appropriate.

   unsigned getFlags() const;


   // Convenience helpers.

   /// Whether the storage type should be interpreted as a signed quantity

   /// (true) or an unsigned value (false).

   bool isSigned() const {

     return (getFlags() & QuantizationFlags::Signed) ==

            QuantizationFlags::Signed;

   }


   /// Gets the underlying type used for to store values. Note that this may

   /// be signed or unsigned. Use the isSigned() accessor to differentiate.

   Type getStorageType() const;


   /// The minimum value that storageType can take.

   int64_t getStorageTypeMin() const;


   /// The maximum value that storageType can take.

   int64_t getStorageTypeMax() const;


   /// Return whether the storage type has explicit min or max boundaries

   /// different from the minimum and maximum representable values.

   bool hasStorageTypeBounds() const;


   /// Gets the integral bit width that the underlying storage type can exactly

   /// represent. For integral storage types, this will just be their width.

   unsigned getStorageTypeIntegralWidth() const;


   /// Returns whether the candidateExpressedType is a match for this

   /// QuantizedType. This will be true if the candidate type is either a

   /// primitive type or a container type whose element type equals this

   /// QuantizedType's expressed type.

   /// Examples of compatible candidateExpressedType:

   ///   !quant.uniform<i8:f32, 1.0> =~ f32

   ///   !quant.uniform<i8:f32, 1.0> =~ tensor<4xf32>

   bool isCompatibleExpressedType(Type candidateExpressedType);


   /// Returns the element type as a QuantizedType or nullptr if it is not

   /// a quantized type. If the type is primitive, returns that. If it is a

   /// container (vector/tensor), return the element type.

   /// Examples:

   ///   !quant.uniform<i8:f32, 1.0> -> !quant.uniform<i8:f32, 1.0>

   ///   tensor<4x!quant.uniform<i8:f32, 1.0> -> quant.uniform<i8:f32, 1.0>

   static QuantizedType getQuantizedElementType(Type primitiveOrContainerType);


   /// Casts from a type based on the storageType to a corresponding type based

   /// on this type (returns nullptr if the cast is not valid).

   /// Examples:

   ///  `candidate type` -> `return type`

   ///   i8 -> !quant.uniform<i8:f32, 1.0>

   ///   tensor<4xi8> -> tensor<4x!quant.uniform<i8:f32, 1.0}>>

   ///   vector<4xi8> -> vector<4x!quant.uniform<i8:f32, 1.0>>

   ///   It is assumed above that this type's quantization is `<i8:f32, 1.0>`.

   Type castFromStorageType(Type candidateType);


   /// Casts from a type based on a QuantizedType to a corresponding type based

   /// on the storageType (returns nullptr if the cast is not valid).

   /// This is the inverse of castFromStorageType().

   static Type castToStorageType(Type quantizedType);


   /// Casts from a type based on the expressedType to a corresponding type based

   /// on this type (returns nullptr if the cast is not valid).

   /// Examples:

   ///   f32 -> !quant.uniform<i8:f32, 1.0>

   ///   tensor<4xf32> -> tensor<4x!quant.uniform<i8:f32, 1.0>>

   ///   vector<4xf32> -> vector<4x!quant.uniform<i8:f32, 1.0>>

   Type castFromExpressedType(Type candidateType);


   /// Casts from a type based on QuantizedType to a corresponding type based

   /// on the expressedType (returns nullptr if the cast is not valid).

   /// This is the inverse of castFromExpressedType.

   static Type castToExpressedType(Type quantizedType);


   /// Casts from a type based on the expressedType to the equivalent type

   /// based on storageType by way of this QuantizedType. Equivalent to:

   ///   QuantizedType::castToStorageType(castFromExpressedType(candidateType))

   /// (but with validity checks).

   /// Example (for this = !quant.uniform<i8:f32, 1.0>):

   ///   tensor<4xf32> -> tensor<4xi8>

   Type castExpressedToStorageType(Type candidateType);


 private:

   /// Hide the following methods inherited from `Type`. It is almost certainly

   /// a bug to call them from a `QuantizedType` object. Users should call

   /// `getStorageType` or `getExpressedType` to get the underlying types

   /// they want to inspect.

   using Type::isBF16;

   using Type::isF16;

   using Type::isF32;

   using Type::isF64;

   using Type::isIndex;

   using Type::isInteger;

 };


 /// A quantized type that maps storage to/from expressed types in an

 /// unspecified way.

 ///

 /// Typical syntax:

 ///   quant.any<i8:f32>

 ///   quant.any<i8>

 ///   quant.any<i8<-16,15>>

 ///

 /// Note that for the any type, the expressed type is optional.

 class AnyQuantizedType

     : public Type::TypeBase<AnyQuantizedType, QuantizedType,

                             detail::AnyQuantizedTypeStorage> {

 public:

   using Base::Base;

   using Base::getChecked;


   static constexpr StringLiteral name = "quant.any";


   /// Gets an instance of the type with all parameters specified but not

   /// checked.

   static AnyQuantizedType get(unsigned flags, Type storageType,

                               Type expressedType, int64_t storageTypeMin,

                               int64_t storageTypeMax);


   /// Gets an instance of the type with all specified parameters checked.

   /// Returns a nullptr convertible type on failure.

   static AnyQuantizedType

   getChecked(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

              Type storageType, Type expressedType, int64_t storageTypeMin,

              int64_t storageTypeMax);


   /// Verifies construction invariants and issues errors/warnings.

   static LogicalResult

   verifyInvariants(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

                    Type storageType, Type expressedType, int64_t storageTypeMin,

                    int64_t storageTypeMax);

 };


 /// Represents a family of uniform, quantized types.

 ///

 /// Each instance of this type expresses a mapping between real values (most

 /// often expressed in floating point f32) and quantized values (either fixed

 /// point or affine).

 ///

 /// The relationship is:

 ///     real_value = scale * (quantized_value - zero_point)

 ///

 /// It is used as part of high level graph transformations that have the goal

 /// of re-expressing parts of a computation in terms of this common form for

 /// more efficient execution at runtime. In addition, it is designed to be

 /// expressive enough to facilitate lowering to precise types and operations

 /// in target hardware.

 ///

 /// As a high-level type, focused on intermediate passes, this type holds

 /// opinions consistent with high-level usage. If lowering math kernels below

 /// the high level arithmetic ops (i.e. to LLVM IR or hardware specific

 /// instruction sets), it is expected that the information expressed here

 /// will be used to drive low level codegen and target specific type selection,

 /// but this type will likely be erased in the process.

 ///

 /// Syntax synopsis:

 ///   Per-layer, all parameters expressed:

 ///     !quant<uniform[StorageType:ExpressedType]{Scale:ZeroPoint}>

 ///   Per-layer, optional parameters omitted:

 ///     !quant<uniform[StorageType]{Scale}>

 ///

 ///   StorageType: 'i'|'u' NumBits

 ///   ExpressedType: 'f16', 'f32', 'bf16', 'f64'

 ///   Scale: A legal double value

 ///   ZeroPoint: An integer value

 class UniformQuantizedType

     : public Type::TypeBase<UniformQuantizedType, QuantizedType,

                             detail::UniformQuantizedTypeStorage> {

 public:

   using Base::Base;

   using Base::getChecked;


   static constexpr StringLiteral name = "quant.uniform";


   /// Gets an instance of the type with all parameters specified but not

   /// checked.

   static UniformQuantizedType get(unsigned flags, Type storageType,

                                   Type expressedType, double scale,

                                   int64_t zeroPoint, int64_t storageTypeMin,

                                   int64_t storageTypeMax);


   /// Gets an instance of the type with all specified parameters checked.

   /// Returns a nullptr convertible type on failure.

   static UniformQuantizedType

   getChecked(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

              Type storageType, Type expressedType, double scale,

              int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax);


   /// Verifies construction invariants and issues errors/warnings.

   static LogicalResult

   verifyInvariants(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

                    Type storageType, Type expressedType, double scale,

                    int64_t zeroPoint, int64_t storageTypeMin,

                    int64_t storageTypeMax);


   /// Gets the scale term. The scale designates the difference between the real

   /// values corresponding to consecutive quantized values differing by 1.

   double getScale() const;


   /// Gets the storage value corresponding to the real value 0 in the affine

   /// equation.

   int64_t getZeroPoint() const;


   // Fixed point values are real numbers divided by a scale.

   // Currently, only signed storage types are treated as fixed point.

   // A fixed point value can be obtained from an affine value by subtracting

   // the zeroPoint.

   // In the future, this may be explicit versus implied by type and zeroPoint.

   bool isFixedPoint() const { return isSigned() && getZeroPoint() == 0; }

 };


 /// Represents per-axis (also known as per-channel quantization).

 ///

 /// Syntax synopsis:

 ///   Per-axis, all parameters expressed:

 ///     !quant<uniform[StorageType:ExpressedType:QuantizedDim]{QuantParams}>

 ///   Per-axis, optional parameters omitted:

 ///     !quant<uniform[StorageType]{Scale}>

 ///

 ///   StorageType: 'i'|'u' NumBits

 ///   ExpressedType: 'f16', 'f32', 'bf16', 'f64'

 ///   QuantizedDim: An integer value

 ///   QuantParams: (Scale ':' ZeroPoint)+

 ///   Scale: A legal double value

 ///   ZeroPoint: An integer value

 class UniformQuantizedPerAxisType

     : public Type::TypeBase<UniformQuantizedPerAxisType, QuantizedType,

                             detail::UniformQuantizedPerAxisTypeStorage> {

 public:

   using Base::Base;

   using Base::getChecked;


   static constexpr StringLiteral name = "quant.uniform_per_axis";


   /// Gets an instance of the type with all parameters specified but not

   /// checked.

   static UniformQuantizedPerAxisType

   get(unsigned flags, Type storageType, Type expressedType,

       ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,

       int32_t quantizedDimension, int64_t storageTypeMin,

       int64_t storageTypeMax);


   /// Gets an instance of the type with all specified parameters checked.

   /// Returns a nullptr convertible type on failure.

   static UniformQuantizedPerAxisType

   getChecked(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

              Type storageType, Type expressedType, ArrayRef<double> scales,

              ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,

              int64_t storageTypeMin, int64_t storageTypeMax);


   /// Verifies construction invariants and issues errors/warnings.

   static LogicalResult

   verifyInvariants(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

                    Type storageType, Type expressedType,

                    ArrayRef<double> scales, ArrayRef<int64_t> zeroPoints,

                    int32_t quantizedDimension, int64_t storageTypeMin,

                    int64_t storageTypeMax);


   /// Gets the quantization scales. The scales designate the difference between

   /// the real values corresponding to consecutive quantized values differing

   /// by 1. The ith scale corresponds to the ith slice in the

   /// quantized_dimension.

   ArrayRef<double> getScales() const;


   /// Gets the storage values corresponding to the real value 0 in the affine

   /// equation. The ith zero point corresponds to the ith slice in the

   /// quantized_dimension.

   ArrayRef<int64_t> getZeroPoints() const;


   /// Specifies the dimension of the Tensor's shape that the scales and

   /// zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]

   /// with quantization params:

   ///   scales=[1.0, 2.0, 3.0], zeroPoints=[1, 2, 3], quantizedDimension=1

   /// will be quantized across the second dimension of t.

   ///   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1

   ///   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2

   ///   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3

   int32_t getQuantizedDimension() const;


   /// Fixed point values are real numbers divided by a scale.

   /// Currently, only signed storage types are treated as fixed point.

   /// A fixed point value can be obtained from an affine value by subtracting

   /// the zeroPoint.

   /// In the future, this may be explicit versus implied by type and zeroPoint.

   bool isFixedPoint() const {

     if (!isSigned())

       return false;

     return !llvm::is_contained(getZeroPoints(), 0);

   }

 };


 /// Represents sub-channel (also known as blockwise quantization).

 ///

 /// Syntax synopsis:

 ///   UniformQuantizedSubChannelType ::= '!quant.uniform' '<'

 ///       storageType ('<' storageMin ':' storageMax '>')? ':'

 ///       expressedType ':' BlockSizeInfo ',' ScaleZeroTensor '>'

 ///   BlockSizeInfo: '{' '}' | '{' AxisBlock (',' AxisBlock)* '}'

 ///   AxisBlock ::= AxisSpec ':' BlockSizeSpec

 ///   ScaleZeroTensor ::= ScaleZeroDenseExp | ScaleZeroList

 ///   ScaleZeroDenseExp ::= '{' ScaleZeroTensor (',' ScaleZeroTensor)* '}'

 ///   ScaleZeroList  ::= ScaleZero (',' ScaleZero)*

 ///   ScaleZero ::= Scale (':' ZeroPoint)?

 ///

 ///   StorageType: 'i'|'u' NumBits

 ///   ExpressedType: 'f16', 'f32', 'bf16', 'f64'

 ///   AxisSpec: An integer value

 ///   BlockSizeSpec: An integer value

 ///   Scale: An attribute (usually floating-point value)

 ///   ZeroPoint: An attribute (usually integer value)

 class UniformQuantizedSubChannelType

     : public Type::TypeBase<UniformQuantizedSubChannelType, QuantizedType,

                             detail::UniformQuantizedSubChannelTypeStorage> {

 public:

   using Base::Base;

   using Base::getChecked;


   static constexpr StringLiteral name = "quant.uniform_sub_channel";


   /// Gets an instance of the type with all parameters specified but not

   /// checked.

   static UniformQuantizedSubChannelType

   get(unsigned flags, Type storageType, Type expressedType,

       DenseElementsAttr scales, DenseElementsAttr zeroPoints,

       ArrayRef<int32_t> quantizedDimensions, ArrayRef<int64_t> blockSizes,

       int64_t storageTypeMin, int64_t storageTypeMax);


   /// Gets an instance of the type with all specified parameters checked.

   /// Returns a nullptr convertible type on failure.

   static UniformQuantizedSubChannelType

   getChecked(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

              Type storageType, Type expressedType, DenseElementsAttr scales,

              DenseElementsAttr zeroPoints,

              ArrayRef<int32_t> quantizedDimensions,

              ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,

              int64_t storageTypeMax);


   /// Verifies construction invariants and issues errors/warnings.

   static LogicalResult

   verifyInvariants(function_ref<InFlightDiagnostic()> emitError, unsigned flags,

                    Type storageType, Type expressedType,

                    DenseElementsAttr scales, DenseElementsAttr zeroPoints,

                    ArrayRef<int32_t> quantizedDimensions,

                    ArrayRef<int64_t> blockSizes, int64_t storageTypeMin,

                    int64_t storageTypeMax);


   /// Gets the quantization scales. The scales are organized in a

   /// multi-dimensional tensor. The size of each dimension in the scales tensor

   /// is determined by the number of blocks along the corresponding dimension in

   /// the quantized data tensor.

   ///

   /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]

   /// and the block sizes are [B0, B1, ..., BR-1], then the scales tensor will

   /// have shape [X0/B0, X1/B1, ..., XR-1/BR-1].

   ///

   /// The scale value for a specific element in the quantized data tensor at

   /// position [i0, i1, ..., iR-1] is determined by accessing the corresponding

   /// element in the scales tensor at position [i0/B0, i1/B1, ..., iR-1/BR-1].

   DenseElementsAttr getScales() const;


   /// Gets the quantization zero-points. The zero-points are organized in a

   /// multi-dimensional tensor. The size of each dimension in the zero-point

   /// tensor is determined by the number of blocks along the corresponding

   /// dimension in the quantized data tensor.

   ///

   /// For example, if the quantized data tensor has shape [X0, X1, ..., XR-1]

   /// and the block sizes are [B0, B1, ..., BR-1], then the zero-point tensor

   /// will have shape [X0/B0, X1/B1, ..., XR-1/BR-1].

   ///

   /// The zero-point value for a specific element in the quantized data tensor

   /// at position [i0, i1, ..., iR-1] is determined by accessing the

   /// corresponding element in the zero-point tensor at position [i0/B0, i1/B1,

   /// ..., iR-1/BR-1].

   DenseElementsAttr getZeroPoints() const;


   /// Gets the quantized dimensions. Each element in the returned list

   /// represents an axis of the quantized data tensor that has a specified block

   /// size. The order of elements corresponds to the order of block sizes

   /// returned by `getBlockSizes()`.

   ///

   /// It means that the data tensor is quantized along the `i`-th dimension in

   /// the returned list using the `i`-th block size from `getBlockSizes()`.

   ///

   /// Note that the type expression does not have to specify the block size for

   /// all axes in the data tensor. Any unspecified block size for an axis `i`

   /// defaults to the tensor dimension size of that axis.

   ///

   /// For example, for a quantized type:

   /// `tensor<8x4x2x!quant.uniform<i8:f32:{1:2, 0:8}, {{1.0, 2.0}, {3.0, 4.0}}>`

   ///

   /// `getQuantizedDimensions()` returns [1, 0].

   /// `getBlockSizes()` returns [2, 8].

   ///

   /// This indicates that:

   ///  * Axis 1 (second dimension) is quantized with a block size of 2.

   ///  * Axis 0 (first dimension) is quantized with a block size of 8.

   ///  Since axis 2 is not specified, it implicitly has a block size equal to

   ///  the size of the third dimension (which is 2 in this case).

   ArrayRef<int32_t> getQuantizedDimensions() const;


   /// Gets the block sizes for the quantized dimensions. The `i`-th element in

   /// the returned list corresponds to the block size for the `i`-th dimension

   /// in the list returned by `getQuantizedDimensions()`.

   ///

   /// See `getQuantizedDimensions()` for more details and examples.

   ArrayRef<int64_t> getBlockSizes() const;


   /// Gets the block size information. This returns a list of pairs, where each

   /// pair represents a quantized dimension and its corresponding block size.

   ///

   /// For example, for the type:

   ///  `tensor<8x4x!quant.uniform<i8:f32:{1:2, 0:8}, {{2.0, 3.0}}>`

   ///

   /// This method returns:

   ///  `[(1, 2), (0, 8)]`

   ///

   /// This list indicates that axis 1 has a block size of 2, and axis 0 has a

   /// block size of 8.

   const SmallVector<std::pair<int32_t, int64_t>> getBlockSizeInfo() const;

 };


 /// A quantized type that infers its range from given min/max values.

 ///

 /// Typical syntax:

 ///   quant.calibrated<f32<-0.922,0.981>>

 class CalibratedQuantizedType

     : public Type::TypeBase<CalibratedQuantizedType, QuantizedType,

                             detail::CalibratedQuantizedTypeStorage> {

 public:

   using Base::Base;

   using Base::getChecked;


   static constexpr StringLiteral name = "quant.calibrated";


   /// Gets an instance of the type with all parameters specified but not

   /// checked.

   static CalibratedQuantizedType get(Type expressedType, double min,

                                      double max);


   /// Gets an instance of the type with all specified parameters checked.

   /// Returns a nullptr convertible type on failure.

   static CalibratedQuantizedType

   getChecked(function_ref<InFlightDiagnostic()> emitError, Type expressedType,

              double min, double max);


   /// Verifies construction invariants and issues errors/warnings.

   static LogicalResult

   verifyInvariants(function_ref<InFlightDiagnostic()> emitError,

                    Type expressedType, double min, double max);

   double getMin() const;

   double getMax() const;

 };


 } // namespace quant

 } // namespace mlir


 #endif // MLIR_DIALECT_QUANT_IR_QUANTTYPES_H

Attributes.h

Builders.h

Dialect.h

Types.h

OpDefinition.h

max
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:212

min
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:204

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

llvm::function_ref
Definition: LLVM.h:90

mlir::DenseElementsAttr
An attribute that represents a reference to a dense vector or tensor object.
Definition: BuiltinAttributes.h:82

mlir::InFlightDiagnostic
This class represents a diagnostic that is inflight and set to be reported.
Definition: Diagnostics.h:314

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::isF64
bool isF64() const
Definition: Types.cpp:41

mlir::Type::isIndex
bool isIndex() const
Definition: Types.cpp:54

mlir::Type::Type
constexpr Type()=default

mlir::Type::isF32
bool isF32() const
Definition: Types.cpp:40

mlir::Type::isInteger
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition: Types.cpp:56

mlir::Type::isF16
bool isF16() const
Definition: Types.cpp:38

mlir::Type::isBF16
bool isBF16() const
Definition: Types.cpp:37

mlir::detail::StorageUserBase
Utility class for implementing users of storage classes uniqued by a StorageUniquer.
Definition: StorageUniquerSupport.h:95

mlir::quant::AnyQuantizedType
A quantized type that maps storage to/from expressed types in an unspecified way.
Definition: QuantTypes.h:203

mlir::quant::AnyQuantizedType::get
static AnyQuantizedType get(unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:242

mlir::quant::AnyQuantizedType::name
static constexpr StringLiteral name
Definition: QuantTypes.h:208

mlir::quant::AnyQuantizedType::verifyInvariants
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:261

mlir::quant::AnyQuantizedType::getChecked
static AnyQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:251

mlir::quant::CalibratedQuantizedType
A quantized type that infers its range from given min/max values.
Definition: QuantTypes.h:524

mlir::quant::CalibratedQuantizedType::name
static constexpr StringLiteral name
Definition: QuantTypes.h:529

mlir::quant::CalibratedQuantizedType::getMax
double getMax() const
Definition: QuantTypes.cpp:555

mlir::quant::CalibratedQuantizedType::getMin
double getMin() const
Definition: QuantTypes.cpp:553

mlir::quant::CalibratedQuantizedType::verifyInvariants
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:539

mlir::quant::CalibratedQuantizedType::get
static CalibratedQuantizedType get(Type expressedType, double min, double max)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:527

mlir::quant::CalibratedQuantizedType::getChecked
static CalibratedQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, Type expressedType, double min, double max)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:532

mlir::quant::QuantizedType
Base class for all quantized types known to this dialect.
Definition: QuantTypes.h:50

mlir::quant::QuantizedType::getExpressedType
Type getExpressedType() const
Gets the original expressed type that this quantized type approximates.
Definition: QuantTypes.cpp:106

mlir::quant::QuantizedType::MaxStorageBits
static constexpr unsigned MaxStorageBits
The maximum number of bits supported for storage types.
Definition: QuantTypes.h:56

mlir::quant::QuantizedType::hasStorageTypeBounds
bool hasStorageTypeBounds() const
Return whether the storage type has explicit min or max boundaries different from the minimum and max...
Definition: QuantTypes.cpp:89

mlir::quant::QuantizedType::castToStorageType
static Type castToStorageType(Type quantizedType)
Casts from a type based on a QuantizedType to a corresponding type based on the storageType (returns ...
Definition: QuantTypes.cpp:152

mlir::quant::QuantizedType::castExpressedToStorageType
Type castExpressedToStorageType(Type candidateType)
Casts from a type based on the expressedType to the equivalent type based on storageType by way of th...
Definition: QuantTypes.cpp:234

mlir::quant::QuantizedType::castToExpressedType
static Type castToExpressedType(Type quantizedType)
Casts from a type based on QuantizedType to a corresponding type based on the expressedType (returns ...
Definition: QuantTypes.cpp:207

mlir::quant::QuantizedType::isSigned
bool isSigned() const
Whether the storage type should be interpreted as a signed quantity (true) or an unsigned value (fals...
Definition: QuantTypes.h:103

mlir::quant::QuantizedType::getQuantizedElementType
static QuantizedType getQuantizedElementType(Type primitiveOrContainerType)
Returns the element type as a QuantizedType or nullptr if it is not a quantized type.
Definition: QuantTypes.cpp:119

mlir::quant::QuantizedType::getFlags
unsigned getFlags() const
Gets the flags associated with this type.
Definition: QuantTypes.cpp:36

mlir::quant::QuantizedType::getStorageTypeMax
int64_t getStorageTypeMax() const
The maximum value that storageType can take.
Definition: QuantTypes.cpp:85

mlir::quant::QuantizedType::getDefaultMaximumForInteger
static int64_t getDefaultMaximumForInteger(bool isSigned, unsigned integralWidth)
Gets the maximum possible stored by a storageType.
Definition: QuantTypes.h:78

mlir::quant::QuantizedType::getStorageTypeIntegralWidth
unsigned getStorageTypeIntegralWidth() const
Gets the integral bit width that the underlying storage type can exactly represent.
Definition: QuantTypes.cpp:100

mlir::quant::QuantizedType::classof
static bool classof(Type type)
Support method to enable LLVM-style type casting.
Definition: QuantTypes.cpp:40

mlir::quant::QuantizedType::castFromStorageType
Type castFromStorageType(Type candidateType)
Casts from a type based on the storageType to a corresponding type based on this type (returns nullpt...
Definition: QuantTypes.cpp:128

mlir::quant::QuantizedType::getStorageTypeMin
int64_t getStorageTypeMin() const
The minimum value that storageType can take.
Definition: QuantTypes.cpp:81

mlir::quant::QuantizedType::getDefaultMinimumForInteger
static int64_t getDefaultMinimumForInteger(bool isSigned, unsigned integralWidth)
Gets the minimum possible stored by a storageType.
Definition: QuantTypes.h:68

mlir::quant::QuantizedType::getStorageType
Type getStorageType() const
Gets the underlying type used for to store values.
Definition: QuantTypes.cpp:77

mlir::quant::QuantizedType::castFromExpressedType
Type castFromExpressedType(Type candidateType)
Casts from a type based on the expressedType to a corresponding type based on this type (returns null...
Definition: QuantTypes.cpp:179

mlir::quant::QuantizedType::isCompatibleExpressedType
bool isCompatibleExpressedType(Type candidateExpressedType)
Returns whether the candidateExpressedType is a match for this QuantizedType.
Definition: QuantTypes.cpp:110

mlir::quant::QuantizedType::verifyInvariants
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, int64_t storageTypeMin, int64_t storageTypeMax)
Definition: QuantTypes.cpp:45

mlir::quant::UniformQuantizedPerAxisType
Represents per-axis (also known as per-channel quantization).
Definition: QuantTypes.h:324

mlir::quant::UniformQuantizedPerAxisType::name
static constexpr StringLiteral name
Definition: QuantTypes.h:329

mlir::quant::UniformQuantizedPerAxisType::getChecked
static UniformQuantizedPerAxisType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:345

mlir::quant::UniformQuantizedPerAxisType::isFixedPoint
bool isFixedPoint() const
Fixed point values are real numbers divided by a scale.
Definition: QuantTypes.h:381

mlir::quant::UniformQuantizedPerAxisType::get
static UniformQuantizedPerAxisType get(unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:335

mlir::quant::UniformQuantizedPerAxisType::getQuantizedDimension
int32_t getQuantizedDimension() const
Specifies the dimension of the Tensor's shape that the scales and zero_points correspond to.
Definition: QuantTypes.cpp:406

mlir::quant::UniformQuantizedPerAxisType::getZeroPoints
ArrayRef< int64_t > getZeroPoints() const
Gets the storage values corresponding to the real value 0 in the affine equation.
Definition: QuantTypes.cpp:402

mlir::quant::UniformQuantizedPerAxisType::getScales
ArrayRef< double > getScales() const
Gets the quantization scales.
Definition: QuantTypes.cpp:398

mlir::quant::UniformQuantizedPerAxisType::verifyInvariants
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, ArrayRef< double > scales, ArrayRef< int64_t > zeroPoints, int32_t quantizedDimension, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:355

mlir::quant::UniformQuantizedSubChannelType
Represents sub-channel (also known as blockwise quantization).
Definition: QuantTypes.h:409

mlir::quant::UniformQuantizedSubChannelType::name
static constexpr StringLiteral name
Definition: QuantTypes.h:414

mlir::quant::UniformQuantizedSubChannelType::getQuantizedDimensions
ArrayRef< int32_t > getQuantizedDimensions() const
Gets the quantized dimensions.
Definition: QuantTypes.cpp:506

mlir::quant::UniformQuantizedSubChannelType::getZeroPoints
DenseElementsAttr getZeroPoints() const
Gets the quantization zero-points.
Definition: QuantTypes.cpp:501

mlir::quant::UniformQuantizedSubChannelType::getBlockSizes
ArrayRef< int64_t > getBlockSizes() const
Gets the block sizes for the quantized dimensions.
Definition: QuantTypes.cpp:510

mlir::quant::UniformQuantizedSubChannelType::verifyInvariants
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:432

mlir::quant::UniformQuantizedSubChannelType::getBlockSizeInfo
const SmallVector< std::pair< int32_t, int64_t > > getBlockSizeInfo() const
Gets the block size information.
Definition: QuantTypes.cpp:515

mlir::quant::UniformQuantizedSubChannelType::getChecked
static UniformQuantizedSubChannelType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:420

mlir::quant::UniformQuantizedSubChannelType::get
static UniformQuantizedSubChannelType get(unsigned flags, Type storageType, Type expressedType, DenseElementsAttr scales, DenseElementsAttr zeroPoints, ArrayRef< int32_t > quantizedDimensions, ArrayRef< int64_t > blockSizes, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:410

mlir::quant::UniformQuantizedSubChannelType::getScales
DenseElementsAttr getScales() const
Gets the quantization scales.
Definition: QuantTypes.cpp:497

mlir::quant::UniformQuantizedType
Represents a family of uniform, quantized types.
Definition: QuantTypes.h:264

mlir::quant::UniformQuantizedType::getScale
double getScale() const
Gets the scale term.
Definition: QuantTypes.cpp:329

mlir::quant::UniformQuantizedType::isFixedPoint
bool isFixedPoint() const
Definition: QuantTypes.h:305

mlir::quant::UniformQuantizedType::getZeroPoint
int64_t getZeroPoint() const
Gets the storage value corresponding to the real value 0 in the affine equation.
Definition: QuantTypes.cpp:331

mlir::quant::UniformQuantizedType::name
static constexpr StringLiteral name
Definition: QuantTypes.h:269

mlir::quant::UniformQuantizedType::verifyInvariants
static LogicalResult verifyInvariants(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Verifies construction invariants and issues errors/warnings.
Definition: QuantTypes.cpp:298

mlir::quant::UniformQuantizedType::getChecked
static UniformQuantizedType getChecked(function_ref< InFlightDiagnostic()> emitError, unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all specified parameters checked.
Definition: QuantTypes.cpp:289

mlir::quant::UniformQuantizedType::get
static UniformQuantizedType get(unsigned flags, Type storageType, Type expressedType, double scale, int64_t zeroPoint, int64_t storageTypeMin, int64_t storageTypeMax)
Gets an instance of the type with all parameters specified but not checked.
Definition: QuantTypes.cpp:280

BuiltinTypes.h

mlir::quant::QuantizationFlags::FlagValue
FlagValue
Definition: QuantTypes.h:35

mlir::quant::QuantizationFlags::Signed
@ Signed
Definition: QuantTypes.h:38

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::emitError
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
Definition: Diagnostics.cpp:328

mlir::quant::detail::AnyQuantizedTypeStorage
Definition: TypeDetail.h:46

mlir::quant::detail::CalibratedQuantizedTypeStorage
Definition: TypeDetail.h:375

mlir::quant::detail::QuantizedTypeStorage
Definition: TypeDetail.h:24

mlir::quant::detail::UniformQuantizedPerAxisTypeStorage
Definition: TypeDetail.h:161

mlir::quant::detail::UniformQuantizedSubChannelTypeStorage
Definition: TypeDetail.h:256

mlir::quant::detail::UniformQuantizedTypeStorage
Definition: TypeDetail.h:96