16 #ifdef MLIR_FLOAT16_DEFINE_FUNCTIONS
30 const uint32_t kF32MantiBits = 23;
31 const uint32_t kF32HalfMantiBitDiff = 13;
32 const uint32_t kF32HalfBitDiff = 16;
33 const Float32Bits kF32Magic = {113 << kF32MantiBits};
34 const uint32_t kF32HalfExpAdjust = (127 - 15) << kF32MantiBits;
38 uint16_t float2half(
float floatValue) {
39 const Float32Bits inf = {255 << kF32MantiBits};
40 const Float32Bits f16max = {(127 + 16) << kF32MantiBits};
41 const Float32Bits denormMagic = {((127 - 15) + (kF32MantiBits - 10) + 1)
43 uint32_t signMask = 0x80000000u;
44 uint16_t halfValue =
static_cast<uint16_t
>(0x0u);
47 uint32_t sign = f.u & signMask;
50 if (f.u >= f16max.u) {
51 const uint32_t halfQnan = 0x7e00;
52 const uint32_t halfInf = 0x7c00;
54 halfValue = (f.u > inf.u) ? halfQnan : halfInf;
57 if (f.u < kF32Magic.u) {
64 halfValue =
static_cast<uint16_t
>(f.u - denormMagic.u);
67 (f.u >> kF32HalfMantiBitDiff) & 1;
75 halfValue =
static_cast<uint16_t
>(f.u >> kF32HalfMantiBitDiff);
79 halfValue |=
static_cast<uint16_t
>(sign >> kF32HalfBitDiff);
85 float half2float(uint16_t halfValue) {
86 const uint32_t shiftedExp =
87 0x7c00 << kF32HalfMantiBitDiff;
91 static_cast<uint32_t
>((halfValue & 0x7fff) << kF32HalfMantiBitDiff)};
92 const uint32_t exp = shiftedExp & f.u;
93 f.u += kF32HalfExpAdjust;
96 if (exp == shiftedExp) {
98 f.u += kF32HalfExpAdjust;
99 }
else if (exp == 0) {
101 f.u += 1 << kF32MantiBits;
105 f.u |= (halfValue & 0x8000) << kF32HalfBitDiff;
109 const uint32_t kF32BfMantiBitDiff = 16;
113 uint16_t float2bfloat(
float floatValue) {
114 if (std::isnan(floatValue))
115 return std::signbit(floatValue) ? 0xFFC0 : 0x7FC0;
117 Float32Bits floatBits;
118 floatBits.f = floatValue;
122 uint32_t lsb = (floatBits.u >> kF32BfMantiBitDiff) & 1;
123 uint32_t roundingBias = 0x7fff + lsb;
124 floatBits.u += roundingBias;
125 bfloatBits =
static_cast<uint16_t
>(floatBits.u >> kF32BfMantiBitDiff);
131 float bfloat2float(uint16_t bfloatBits) {
132 Float32Bits floatBits;
133 floatBits.u =
static_cast<uint32_t
>(bfloatBits) << kF32BfMantiBitDiff;
139 f16::f16(
float f) : bits(float2half(f)) {}
141 bf16::bf16(
float f) : bits(float2bfloat(f)) {}
144 os << half2float(f.
bits);
149 os << bfloat2float(d.
bits);
160 #ifdef __has_attribute
161 #if __has_attribute(weak) && !defined(__MINGW32__) && !defined(__CYGWIN__) && \
164 #define ATTR_WEAK __attribute__((__weak__))
168 #if defined(__x86_64__) || defined(_M_X64)
175 using BF16ABIType = float;
178 using BF16ABIType = uint16_t;
183 extern "C" BF16ABIType ATTR_WEAK __truncsfbf2(
float f) {
184 uint16_t bf = float2bfloat(f);
187 std::memcpy(&ret, &bf,
sizeof(bf));
193 extern "C" BF16ABIType ATTR_WEAK __truncdfbf2(
double d) {
196 return __truncsfbf2(
static_cast<float>(d));
200 extern "C" void printF16(uint16_t bits) {
202 std::memcpy(&f, &bits,
sizeof(
f16));
205 extern "C" void printBF16(uint16_t bits) {
207 std::memcpy(&f, &bits,
sizeof(
bf16));
MLIR_FLOAT16_EXPORT void printBF16(uint16_t bits)
MLIR_FLOAT16_EXPORT void printF16(uint16_t bits)
bool operator==(const Fraction &x, const Fraction &y)
raw_ostream & operator<<(raw_ostream &os, const AliasResult &result)