Fix compiler runtime for _Float16 type

This commit is contained in:
Justine Tunney 2024-02-27 06:31:16 -08:00
parent 0ef36489c8
commit 64a9e6fe56
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
14 changed files with 797 additions and 53 deletions

42
libc/intrin/float16.c Normal file
View file

@ -0,0 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
/**
* @fileoverview fp16 compiler runtime
*/
#define asint(x) ((union pun){x}).i
#define isnan(x) (((x) & 0x7fff) > 0x7c00)
union pun {
_Float16 f;
unsigned short i;
};
int __eqhf2(_Float16 fx, _Float16 fy) {
int x = asint(fx);
int y = asint(fy);
return (x == y) & !isnan(x) & !isnan(y);
}
int __nehf2(_Float16 fx, _Float16 fy) {
int x = asint(fx);
int y = asint(fy);
return (x != y) & !isnan(x) & !isnan(y);
}

View file

@ -159,10 +159,10 @@ typedef double double_t;
#define fpclassify(x) \
__builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
#define signbit(x) \
(sizeof(x) == sizeof(double) ? __builtin_signbit(x) \
: sizeof(x) == sizeof(float) ? __builtin_signbitf(x) \
: __builtin_signbitl(x))
#define signbit(x) \
(sizeof(x) == sizeof(long double) ? __builtin_signbitl(x) \
: sizeof(x) == sizeof(float) ? __builtin_signbitf(x) \
: __builtin_signbit(x))
extern int signgam;

107
test/math/float16_test.c Normal file
View file

@ -0,0 +1,107 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
#define CHECK(x) \
if (!(x)) return __LINE__
#define FALSE(x) \
{ \
volatile bool x_ = x; \
if (x_) return __LINE__; \
}
#define TRUE(x) \
{ \
volatile bool x_ = x; \
if (!x_) return __LINE__; \
}
_Float16 identity(_Float16 x) {
return x;
}
_Float16 (*half)(_Float16) = identity;
int main() {
volatile float f;
volatile double d;
volatile _Float16 pi = 3.141;
// half → float → half
f = pi;
pi = f;
// half → float
float __extendhfsf2(_Float16);
CHECK(0.f == __extendhfsf2(0));
CHECK(3.140625f == __extendhfsf2(pi));
CHECK(3.140625f == pi);
// half → double → half
d = pi;
pi = d;
// half → double
double __extendhfdf2(_Float16);
CHECK(0. == __extendhfdf2(0));
CHECK(3.140625 == __extendhfdf2(pi));
// float → half
_Float16 __truncsfhf2(float);
CHECK(0 == (float)__truncsfhf2(0));
CHECK(pi == (float)__truncsfhf2(3.141f));
CHECK(3.140625f == (float)__truncsfhf2(3.141f));
// double → half
_Float16 __truncdfhf2(double);
CHECK(0 == (double)__truncdfhf2(0));
CHECK(3.140625 == (double)__truncdfhf2(3.141));
// specials
volatile _Float16 nan = NAN;
volatile _Float16 positive_infinity = +INFINITY;
volatile _Float16 negative_infinity = -INFINITY;
CHECK(isnan(nan));
CHECK(!isinf(pi));
CHECK(!isnan(pi));
CHECK(isinf(positive_infinity));
CHECK(isinf(negative_infinity));
CHECK(!isnan(positive_infinity));
CHECK(!isnan(negative_infinity));
CHECK(!signbit(pi));
CHECK(signbit(half(-pi)));
CHECK(!signbit(half(+0.)));
CHECK(signbit(half(-0.)));
// arithmetic
CHECK(half(-3) == -half(3));
CHECK(half(9) == half(3) * half(3));
CHECK(half(0) == half(pi) - half(pi));
CHECK(half(6.28125) == half(pi) + half(pi));
// comparisons
CHECK(half(3) > half(2));
CHECK(half(3) < half(4));
CHECK(half(3) <= half(3));
CHECK(half(3) >= half(3));
TRUE(half(NAN) != half(NAN));
FALSE(half(NAN) == half(NAN));
TRUE(half(3) != half(NAN));
FALSE(half(3) == half(NAN));
TRUE(half(NAN) != half(3));
FALSE(half(NAN) == half(3));
}

17
third_party/compiler_rt/extendhfdf2.c vendored Normal file
View file

@ -0,0 +1,17 @@
//===-- lib/extendhfdf2.c - half -> dubble conversion -------------*- C -*-===//
//
// The Cosmopolitan Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#define SRC_HALF
#define DST_DOUBLE
#include "third_party/compiler_rt/fp16_extend_impl.inc"
COMPILER_RT_ABI dst_t __extendhfdf2(src_t a) {
return __extendXfYf2__(a);
}

View file

@ -1,35 +1,27 @@
//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
__static_yoink("huge_compiler_rt_license");
#define SRC_HALF
#define DST_SINGLE
#include "third_party/compiler_rt/fp_extend_impl.inc"
#include "fp16_extend_impl.inc"
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
COMPILER_RT_ABI __attribute__((__noinline__)) float __extendhfsf2(uint16_t a) {
return __extendXfYf2__(a);
COMPILER_RT_ABI NOINLINE float __extendhfsf2(src_t a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
return __extendhfsf2(a);
}
COMPILER_RT_ABI float __gnu_h2f_ieee(src_t a) { return __extendhfsf2(a); }
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI float __aeabi_h2f(uint16_t a) {
return __extendhfsf2(a);
}
AEABI_RTABI float __aeabi_h2f(src_t a) { return __extendhfsf2(a); }
#else
AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2);
COMPILER_RT_ALIAS(__extendhfsf2, __aeabi_h2f)
#endif
#endif

170
third_party/compiler_rt/fp16_extend.inc vendored Normal file
View file

@ -0,0 +1,170 @@
//===-lib/fp_extend.h - low precision -> high precision conversion -*- C
//-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Set source and destination setting
//
//===----------------------------------------------------------------------===//
#ifndef FP_EXTEND_HEADER
#define FP_EXTEND_HEADER
#include "int_lib.h"
#if defined SRC_SINGLE
typedef float src_t;
typedef uint32_t src_rep_t;
#define SRC_REP_C UINT32_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 23;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 8;
#define src_rep_t_clz clzsi
#elif defined SRC_DOUBLE
typedef double src_t;
typedef uint64_t src_rep_t;
#define SRC_REP_C UINT64_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 52;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 11;
static inline int src_rep_t_clz_impl(src_rep_t a) {
#if defined __LP64__
return __builtin_clzl(a);
#else
if (a & REP_C(0xffffffff00000000))
return clzsi(a >> 32);
else
return 32 + clzsi(a & REP_C(0xffffffff));
#endif
}
#define src_rep_t_clz src_rep_t_clz_impl
#elif defined SRC_80
typedef xf_float src_t;
typedef __uint128_t src_rep_t;
#define SRC_REP_C (__uint128_t)
// sign bit, exponent and significand occupy the lower 80 bits.
static const int srcBits = 80;
static const int srcSigFracBits = 63;
// -1 accounts for the sign bit.
// -1 accounts for the explicitly stored integer bit.
// srcBits - srcSigFracBits - 1 - 1
static const int srcExpBits = 15;
#elif defined SRC_HALF
typedef _Float16 src_t;
typedef uint16_t src_rep_t;
#define SRC_REP_C UINT16_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 10;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 5;
static inline int src_rep_t_clz_impl(src_rep_t a) {
return __builtin_clz(a) - 16;
}
#define src_rep_t_clz src_rep_t_clz_impl
#else
#error Source should be half, single, or double precision!
#endif // end source precision
#if defined DST_SINGLE
typedef float dst_t;
typedef uint32_t dst_rep_t;
#define DST_REP_C UINT32_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 23;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 8;
#elif defined DST_DOUBLE
typedef double dst_t;
typedef uint64_t dst_rep_t;
#define DST_REP_C UINT64_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 52;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 11;
#elif defined DST_QUAD
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 112;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 15;
#else
#error Destination should be single, double, or quad precision!
#endif // end destination precision
// End of specialization parameters.
// TODO: These helper routines should be placed into fp_lib.h
// Currently they depend on macros/constants defined above.
static inline src_rep_t extract_sign_from_src(src_rep_t x) {
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
return (x & srcSignMask) >> (srcBits - 1);
}
static inline src_rep_t extract_exp_from_src(src_rep_t x) {
const int srcSigBits = srcBits - 1 - srcExpBits;
const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
return (x & srcExpMask) >> srcSigBits;
}
static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
return x & srcSigFracMask;
}
#ifdef src_rep_t_clz
static inline int clz_in_sig_frac(src_rep_t sigFrac) {
const int skip = 1 + srcExpBits;
return src_rep_t_clz(sigFrac) - skip;
}
#endif
static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
}
// Two helper routines for conversion to and from the representation of
// floating-point data as integer values follow.
static inline src_rep_t srcToRep(src_t x) {
const union {
src_t f;
src_rep_t i;
} rep = {.f = x};
return rep.i;
}
static inline dst_t dstFromRep(dst_rep_t x) {
const union {
dst_t f;
dst_rep_t i;
} rep = {.i = x};
return rep.f;
}
// End helper routines. Conversion implementation follows.
#endif // FP_EXTEND_HEADER

View file

@ -0,0 +1,108 @@
//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a fairly generic conversion from a narrower to a wider
// IEEE-754 floating-point type. The constants and types defined following the
// includes below parameterize the conversion.
//
// It does not support types that don't use the usual IEEE-754 interchange
// formats; specifically, some work would be needed to adapt it to
// (for example) the Intel 80-bit format or PowerPC double-double format.
//
// Note please, however, that this implementation is only intended to support
// *widening* operations; if you need to convert to a *narrower* floating-point
// type (e.g. double -> float), then this routine will not do what you want it
// to.
//
// It also requires that integer types at least as large as both formats
// are available on the target platform; this may pose a problem when trying
// to add support for quad on some 32-bit systems, for example. You also may
// run into trouble finding an appropriate CLZ function for wide source types;
// you will likely need to roll your own on some platforms.
//
// Finally, the following assumptions are made:
//
// 1. Floating-point types and integer types have the same endianness on the
// target platform.
//
// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
// significand field being set.
//
//===----------------------------------------------------------------------===//
#include "fp16_extend.inc"
// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
// format. In particular, for the source type srcSigFracBits may be not equal to
// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
// types.
static __inline dst_t __extendXfYf2__(src_t a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const int srcInfExp = (1 << srcExpBits) - 1;
const int srcExpBias = srcInfExp >> 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;
// Break a into a sign and representation of the absolute value.
const src_rep_t aRep = srcToRep(a);
const src_rep_t srcSign = extract_sign_from_src(aRep);
const src_rep_t srcExp = extract_exp_from_src(aRep);
const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
dst_rep_t dstSign = srcSign;
dst_rep_t dstExp;
dst_rep_t dstSigFrac;
if (srcExp >= 1 && srcExp < (src_rep_t)srcInfExp) {
// a is a normal number.
dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
}
else if (srcExp == srcInfExp) {
// a is NaN or infinity.
dstExp = dstInfExp;
dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
}
else if (srcSigFrac) {
// a is denormal.
if (srcExpBits == dstExpBits) {
// The exponent fields are identical and this is a denormal number, so all
// the non-significand bits are zero. In particular, this branch is always
// taken when we extend a denormal F80 to F128.
dstExp = 0;
dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
} else {
#ifndef src_rep_t_clz
// If src_rep_t_clz is not defined this branch must be unreachable.
__builtin_unreachable();
#else
// Renormalize the significand and clear the leading bit.
// For F80 -> F128 this codepath is unused.
const int scale = clz_in_sig_frac(srcSigFrac) + 1;
dstExp = dstExpBias - srcExpBias - scale + 1;
dstSigFrac = (dst_rep_t)srcSigFrac
<< (dstSigFracBits - srcSigFracBits + scale);
const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
dstSigFrac ^= dstMinNormal;
#endif
}
}
else {
// a is zero.
dstExp = 0;
dstSigFrac = 0;
}
const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
return dstFromRep(result);
}

154
third_party/compiler_rt/fp16_trunc.inc vendored Normal file
View file

@ -0,0 +1,154 @@
//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Set source and destination precision setting
//
//===----------------------------------------------------------------------===//
#ifndef FP_TRUNC_HEADER
#define FP_TRUNC_HEADER
#include "int_lib.h"
#if defined SRC_SINGLE
typedef float src_t;
typedef uint32_t src_rep_t;
#define SRC_REP_C UINT32_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 23;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 8;
#elif defined SRC_DOUBLE
typedef double src_t;
typedef uint64_t src_rep_t;
#define SRC_REP_C UINT64_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 52;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 11;
#elif defined SRC_QUAD
typedef tf_float src_t;
typedef __uint128_t src_rep_t;
#define SRC_REP_C (__uint128_t)
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 112;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 15;
#else
#error Source should be double precision or quad precision!
#endif // end source precision
#if defined DST_DOUBLE
typedef double dst_t;
typedef uint64_t dst_rep_t;
#define DST_REP_C UINT64_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 52;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 11;
#elif defined DST_80
typedef xf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)
static const int dstBits = 80;
static const int dstSigFracBits = 63;
// -1 accounts for the sign bit.
// -1 accounts for the explicitly stored integer bit.
// dstBits - dstSigFracBits - 1 - 1
static const int dstExpBits = 15;
#elif defined DST_SINGLE
typedef float dst_t;
typedef uint32_t dst_rep_t;
#define DST_REP_C UINT32_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 23;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 8;
#elif defined DST_HALF
typedef _Float16 dst_t;
typedef uint16_t dst_rep_t;
#define DST_REP_C UINT16_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 10;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 5;
#elif defined DST_BFLOAT
typedef __bf16 dst_t;
typedef uint16_t dst_rep_t;
#define DST_REP_C UINT16_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 7;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 8;
#else
#error Destination should be single precision or double precision!
#endif // end destination precision
// TODO: These helper routines should be placed into fp_lib.h
// Currently they depend on macros/constants defined above.
static inline src_rep_t extract_sign_from_src(src_rep_t x) {
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
return (x & srcSignMask) >> (srcBits - 1);
}
static inline src_rep_t extract_exp_from_src(src_rep_t x) {
const int srcSigBits = srcBits - 1 - srcExpBits;
const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
return (x & srcExpMask) >> srcSigBits;
}
static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
return x & srcSigFracMask;
}
static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
// Set the explicit integer bit in F80 if present.
if (dstBits == 80 && exp) {
result |= (DST_REP_C(1) << dstSigFracBits);
}
return result;
}
// End of specialization parameters. Two helper routines for conversion to and
// from the representation of floating-point data as integer values follow.
static inline src_rep_t srcToRep(src_t x) {
const union {
src_t f;
src_rep_t i;
} rep = {.f = x};
return rep.i;
}
static inline dst_t dstFromRep(dst_rep_t x) {
const union {
dst_t f;
dst_rep_t i;
} rep = {.i = x};
return rep.f;
}
#endif // FP_TRUNC_HEADER

View file

@ -0,0 +1,155 @@
//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a fairly generic conversion from a wider to a narrower
// IEEE-754 floating-point type in the default (round to nearest, ties to even)
// rounding mode. The constants and types defined following the includes below
// parameterize the conversion.
//
// This routine can be trivially adapted to support conversions to
// half-precision or from quad-precision. It does not support types that don't
// use the usual IEEE-754 interchange formats; specifically, some work would be
// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
// double-double format.
//
// Note please, however, that this implementation is only intended to support
// *narrowing* operations; if you need to convert to a *wider* floating-point
// type (e.g. float -> double), then this routine will not do what you want it
// to.
//
// It also requires that integer types at least as large as both formats
// are available on the target platform; this may pose a problem when trying
// to add support for quad on some 32-bit systems, for example.
//
// Finally, the following assumptions are made:
//
// 1. Floating-point types and integer types have the same endianness on the
// target platform.
//
// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
// significand field being set.
//
//===----------------------------------------------------------------------===//
#include "fp16_trunc.inc"
// The destination type may use a usual IEEE-754 interchange format or Intel
// 80-bit format. In particular, for the destination type dstSigFracBits may be
// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
// standard types.
static __inline dst_t __truncXfYf2__(src_t a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const int srcInfExp = (1 << srcExpBits) - 1;
const int srcExpBias = srcInfExp >> 1;
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits;
const src_rep_t roundMask =
(SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;
const src_rep_t halfway = SRC_REP_C(1)
<< (srcSigFracBits - dstSigFracBits - 1);
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
const src_rep_t srcNaNCode = srcQNaN - 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;
const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);
const dst_rep_t dstNaNCode = dstQNaN - 1;
const src_rep_t aRep = srcToRep(a);
const src_rep_t srcSign = extract_sign_from_src(aRep);
const src_rep_t srcExp = extract_exp_from_src(aRep);
const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
dst_rep_t dstSign = srcSign;
dst_rep_t dstExp;
dst_rep_t dstSigFrac;
// Same size exponents and a's significand tail is 0.
// The significand can be truncated and the exponent can be copied over.
const int sigFracTailBits = srcSigFracBits - dstSigFracBits;
if (srcExpBits == dstExpBits &&
((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {
dstExp = srcExp;
dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
}
const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias;
if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) {
// The exponent of a is within the range of normal numbers in the
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
dstExp = dstExpCandidate;
dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
const src_rep_t roundBits = srcSigFrac & roundMask;
// Round to nearest.
if (roundBits > halfway)
dstSigFrac++;
// Tie to even.
else if (roundBits == halfway)
dstSigFrac += dstSigFrac & 1;
// Rounding has changed the exponent.
if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
dstExp += 1;
dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
}
} else if (srcExp == srcInfExp && srcSigFrac) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
dstExp = dstInfExp;
dstSigFrac = dstQNaN;
dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode;
} else if ((int)srcExp >= overflowExponent) {
dstExp = dstInfExp;
dstSigFrac = 0;
} else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
src_rep_t significand = srcSigFrac;
int shift = srcExpBias - dstExpBias - srcExp;
if (srcExp) {
// Set the implicit integer bit if the source is a normal number.
significand |= srcMinNormal;
shift += 1;
}
// Right shift by the denormalization amount with sticky.
if (shift > srcSigFracBits) {
dstExp = 0;
dstSigFrac = 0;
} else {
dstExp = 0;
const bool sticky = shift && ((significand << (srcBits - shift)) != 0);
src_rep_t denormalizedSignificand = significand >> shift | sticky;
dstSigFrac = denormalizedSignificand >> sigFracTailBits;
const src_rep_t roundBits = denormalizedSignificand & roundMask;
// Round to nearest
if (roundBits > halfway)
dstSigFrac++;
// Ties to even
else if (roundBits == halfway)
dstSigFrac += dstSigFrac & 1;
// Rounding has changed the exponent.
if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
dstExp += 1;
dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
}
}
}
return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
}

View file

@ -41,11 +41,21 @@ static __inline int src_rep_t_clz(src_rep_t a) {
}
#elif defined SRC_HALF
typedef uint16_t src_t;
#error use fp16_extend.inc
typedef _Float16 src_t;
typedef uint16_t src_rep_t;
#define SRC_REP_C UINT16_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigBits = 10;
#define src_rep_t_clz __builtin_clz
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 5;
static inline int src_rep_t_clz_impl(src_rep_t a) {
return __builtin_clz(a) - 16;
}
#define src_rep_t_clz src_rep_t_clz_impl
#else
#error Source should be half, single, or double precision!

View file

@ -51,6 +51,7 @@ typedef uint32_t dst_rep_t;
static const int dstSigBits = 23;
#elif defined DST_HALF
#error use fp16_trunc.inc
typedef uint16_t dst_t;
typedef uint16_t dst_rep_t;
#define DST_REP_C UINT16_C

View file

@ -46,10 +46,12 @@
#ifdef _MSC_VER
#define ALWAYS_INLINE __forceinline
#define NOINLINE __declspec(noinline)
#define NORETURN __declspec(noreturn)
#define UNUSED
#else
#define ALWAYS_INLINE __attribute__((__always_inline__))
#define NOINLINE __attribute__((__noinline__))
#define NORETURN __attribute__((__noreturn__))
#define UNUSED __attribute__((__unused__))
#endif

View file

@ -1,28 +1,21 @@
//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
__static_yoink("huge_compiler_rt_license");
#define SRC_DOUBLE
#define DST_HALF
#include "third_party/compiler_rt/fp_trunc_impl.inc"
#include "fp16_trunc_impl.inc"
COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
return __truncXfYf2__(a);
}
COMPILER_RT_ABI dst_t __truncdfhf2(double a) { return __truncXfYf2__(a); }
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI uint16_t __aeabi_d2h(double a) {
return __truncdfhf2(a);
}
AEABI_RTABI dst_t __aeabi_d2h(double a) { return __truncdfhf2(a); }
#else
AEABI_RTABI uint16_t __aeabi_d2h(double a) COMPILER_RT_ALIAS(__truncdfhf2);
COMPILER_RT_ALIAS(__truncdfhf2, __aeabi_d2h)
#endif
#endif

View file

@ -1,34 +1,27 @@
//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
__static_yoink("huge_compiler_rt_license");
#define SRC_SINGLE
#define DST_HALF
#include "third_party/compiler_rt/fp_trunc_impl.inc"
#include "fp16_trunc_impl.inc"
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
COMPILER_RT_ABI __attribute__((__noinline__)) uint16_t __truncsfhf2(float a) {
return __truncXfYf2__(a);
COMPILER_RT_ABI NOINLINE dst_t __truncsfhf2(float a) {
return __truncXfYf2__(a);
}
COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
return __truncsfhf2(a);
}
COMPILER_RT_ABI dst_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); }
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI uint16_t __aeabi_f2h(float a) {
return __truncsfhf2(a);
}
AEABI_RTABI dst_t __aeabi_f2h(float a) { return __truncsfhf2(a); }
#else
AEABI_RTABI uint16_t __aeabi_f2h(float a) COMPILER_RT_ALIAS(__truncsfhf2);
COMPILER_RT_ALIAS(__truncsfhf2, __aeabi_f2h)
#endif
#endif