Make quality improvements

- Write some more unit tests
- memcpy() on ARM is now faster
- Address the Musl complex math FIXME comments
- Some libm funcs like pow() now support setting errno
- Import the latest and greatest math functions from ARM
- Use more accurate atan2f() and log1pf() implementations
- atoi() and atol() will no longer saturate or clobber errno
This commit is contained in:
Justine Tunney 2024-02-25 14:57:28 -08:00
parent af8f2bd19f
commit 592f6ebc20
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
122 changed files with 6305 additions and 3859 deletions

View file

@ -206,7 +206,7 @@ endif
.UNVEIL += \
libc/integral \
libc/stdbool.h \
rwc:/dev/shm \
rwc:/dev/shm \
rx:.cosmocc \
rx:build/bootstrap \
r:build/portcosmo.h \
@ -297,6 +297,7 @@ include third_party/nsync/testing/BUILD.mk
include libc/testlib/BUILD.mk
include tool/viz/lib/BUILD.mk
include tool/args/BUILD.mk
include test/math/BUILD.mk
include test/posix/BUILD.mk
include test/libcxx/BUILD.mk
include test/tool/args/BUILD.mk

View file

@ -95,7 +95,6 @@ DEFAULT_CCFLAGS += \
DEFAULT_COPTS ?= \
-fno-ident \
-fno-common \
-fno-math-errno \
-fno-gnu-unique \
-fstrict-aliasing \
-fstrict-overflow \

View file

@ -16,44 +16,28 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/stdckdint.h"
#include "libc/str/str.h"
/**
* Decodes decimal integer from ASCII string.
* Turns string into int.
*
* atoi 10 22𝑐 7𝑛𝑠
* strtol 10 37𝑐 12𝑛𝑠
* strtoul 10 35𝑐 11𝑛𝑠
* wcstol 10 30𝑐 10𝑛𝑠
* wcstoul 10 30𝑐 10𝑛𝑠
* strtoimax 10 80𝑐 26𝑛𝑠
* strtoumax 10 78𝑐 25𝑛𝑠
* wcstoimax 10 77𝑐 25𝑛𝑠
* wcstoumax 10 76𝑐 25𝑛𝑠
* Decimal is the only radix supported. Leading whitespace (as specified
* by the isspace() function) is skipped over. Unlike strtol(), the atoi
* function has undefined behavior on error and it never changes `errno`
*
* @param s is a non-null nul-terminated string
* @param nptr is a non-null nul-terminated string
* @return the decoded signed saturated integer
* @raise ERANGE on overflow
*/
int atoi(const char *s) {
int atoi(const char *nptr) {
int x, c, d;
do c = *s++;
while (c == ' ' || c == '\t');
do c = *nptr++;
while (isspace(c));
d = c == '-' ? -1 : 1;
if (c == '-' || c == '+') c = *s++;
for (x = 0; isdigit(c); c = *s++) {
if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) {
errno = ERANGE;
if (d > 0) {
return INT_MAX;
} else {
return INT_MIN;
}
}
if (c == '-' || c == '+') c = *nptr++;
for (x = 0; isdigit(c); c = *nptr++) {
x *= 10;
x += (c - '0') * d;
}
return x;
}

View file

@ -16,34 +16,29 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/stdckdint.h"
#include "libc/str/str.h"
/**
* Decodes decimal integer from ASCII string.
* Turns string into long.
*
* @param s is a non-null nul-terminated string
* Decimal is the only radix supported. Leading whitespace (as specified
* by the isspace() function) is skipped over. Unlike strtol(), the atoi
* function has undefined behavior on error and it never changes `errno`
*
* @param nptr is a non-null nul-terminated string
* @return the decoded signed saturated integer
*/
long atol(const char *s) {
long atol(const char *nptr) {
long x;
int c, d;
do c = *s++;
while (c == ' ' || c == '\t');
do c = *nptr++;
while (isspace(c));
d = c == '-' ? -1 : 1;
if (c == '-' || c == '+') c = *s++;
for (x = 0; isdigit(c); c = *s++) {
if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) {
errno = ERANGE;
if (d > 0) {
return LONG_MAX;
} else {
return LONG_MIN;
}
}
if (c == '-' || c == '+') c = *nptr++;
for (x = 0; isdigit(c); c = *nptr++) {
x *= 10;
x += (c - '0') * d;
}
return x;
}

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -80,11 +80,12 @@ ENTRY (__memcpy_aarch64_simd)
PTR_ARG (1)
SIZE_ARG (2)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
b.hi L(copy_long)
add dstend, dstin, count
cmp count, 32
b.hi L(copy32_128)
nop
/* Small copies: 0..32 bytes. */
cmp count, 16
@ -95,6 +96,18 @@ ENTRY (__memcpy_aarch64_simd)
str B_q, [dstend, -16]
ret
.p2align 4
/* Medium copies: 33..128 bytes. */
L(copy32_128):
ldp A_q, B_q, [src]
ldp C_q, D_q, [srcend, -32]
cmp count, 64
b.hi L(copy128)
stp A_q, B_q, [dstin]
stp C_q, D_q, [dstend, -32]
ret
.p2align 4
/* Copy 8-15 bytes. */
L(copy16):
tbz count, 3, L(copy8)
@ -104,7 +117,6 @@ L(copy16):
str A_h, [dstend, -8]
ret
.p2align 3
/* Copy 4-7 bytes. */
L(copy8):
tbz count, 2, L(copy4)
@ -114,6 +126,19 @@ L(copy8):
str B_lw, [dstend, -4]
ret
/* Copy 65..128 bytes. */
L(copy128):
ldp E_q, F_q, [src, 32]
cmp count, 96
b.ls L(copy96)
ldp G_q, H_q, [srcend, -64]
stp G_q, H_q, [dstend, -64]
L(copy96):
stp A_q, B_q, [dstin]
stp E_q, F_q, [dstin, 32]
stp C_q, D_q, [dstend, -32]
ret
/* Copy 0..3 bytes using a branchless sequence. */
L(copy4):
cbz count, L(copy0)
@ -127,33 +152,11 @@ L(copy4):
L(copy0):
ret
.p2align 4
/* Medium copies: 33..128 bytes. */
L(copy32_128):
ldp A_q, B_q, [src]
ldp C_q, D_q, [srcend, -32]
cmp count, 64
b.hi L(copy128)
stp A_q, B_q, [dstin]
stp C_q, D_q, [dstend, -32]
ret
.p2align 4
/* Copy 65..128 bytes. */
L(copy128):
ldp E_q, F_q, [src, 32]
cmp count, 96
b.ls L(copy96)
ldp G_q, H_q, [srcend, -64]
stp G_q, H_q, [dstend, -64]
L(copy96):
stp A_q, B_q, [dstin]
stp E_q, F_q, [dstin, 32]
stp C_q, D_q, [dstend, -32]
ret
.p2align 3
/* Copy more than 128 bytes. */
L(copy_long):
add dstend, dstin, count
/* Use backwards copy if there is an overlap. */
sub tmp1, dstin, src
cmp tmp1, count
@ -190,6 +193,9 @@ L(copy64_from_end):
stp A_q, B_q, [dstend, -32]
ret
.p2align 4
nop
/* Large backwards copy for overlapping copies.
Copy 16 bytes and then align srcend to 16-byte alignment. */
L(copy_long_backwards):

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

2
libc/intrin/fbclibm.c Normal file
View file

@ -0,0 +1,2 @@
__notice(freebsd_complex_notice, "FreeBSD Complex Math (BSD-2 License)\n\
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>");

View file

@ -9,15 +9,47 @@
#define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */
#define M_LN2 0.69314718055994530942 /* logₑ2 */
#define M_LN10 2.30258509299404568402 /* logₑ10 */
#define M_PI 3.14159265358979323846 /* pi */
#define M_PI_2 1.57079632679489661923 /* pi/2 */
#define M_PI_4 0.78539816339744830962 /* pi/4 */
#define M_1_PI 0.31830988618379067154 /* 1/pi */
#define M_2_PI 0.63661977236758134308 /* 2/pi */
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
#define M_PI 3.14159265358979323846 /* 𝜋 */
#define M_PI_2 1.57079632679489661923 /* 𝜋/2 */
#define M_PI_4 0.78539816339744830962 /* 𝜋/4 */
#define M_1_PI 0.31830988618379067154 /* 1/𝜋 */
#define M_2_PI 0.63661977236758134308 /* 2/𝜋 */
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(𝜋) */
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
#define M_Ef 2.7182818284590452354f /* 𝑒 */
#define M_LOG2Ef 1.4426950408889634074f /* log₂𝑒 */
#define M_LOG10Ef 0.43429448190325182765f /* log₁₀𝑒 */
#define M_LN2f 0.69314718055994530942f /* logₑ2 */
#define M_LN10f 2.30258509299404568402f /* logₑ10 */
#define M_PIf 3.14159265358979323846f /* 𝜋 */
#define M_PI_2f 1.57079632679489661923f /* 𝜋/2 */
#define M_PI_4f 0.78539816339744830962f /* 𝜋/4 */
#define M_1_PIf 0.31830988618379067154f /* 1/𝜋 */
#define M_2_PIf 0.63661977236758134308f /* 2/𝜋 */
#define M_2_SQRTPIf 1.12837916709551257390f /* 2/sqrt(𝜋) */
#define M_SQRT2f 1.41421356237309504880f /* sqrt(2) */
#define M_SQRT1_2f 0.70710678118654752440f /* 1/sqrt(2) */
#endif
#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
#define M_El 2.718281828459045235360287471352662498L /* 𝑒 */
#define M_LOG2El 1.442695040888963407359924681001892137L /* log₂𝑒 */
#define M_LOG10El 0.434294481903251827651128918916605082L /* log₁₀𝑒 */
#define M_LN2l 0.693147180559945309417232121458176568L /* logₑ2 */
#define M_LN10l 2.302585092994045684017991454684364208L /* logₑ10 */
#define M_PIl 3.141592653589793238462643383279502884L /* 𝜋 */
#define M_PI_2l 1.570796326794896619231321691639751442L /* 𝜋/2 */
#define M_PI_4l 0.785398163397448309615660845819875721L /* 𝜋/4 */
#define M_1_PIl 0.318309886183790671537767526745028724L /* 1/𝜋 */
#define M_2_PIl 0.636619772367581343075535053490057448L /* 2/𝜋 */
#define M_2_SQRTPIl 1.128379167095512573896158903121545172L /* 2/sqrt(𝜋) */
#define M_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */
#define M_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */
#endif
#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
#define DBL_DIG __DBL_DIG__
#define DBL_EPSILON __DBL_EPSILON__
@ -76,6 +108,27 @@
#define FP_ILOGB0 (-2147483647 - 1)
#define FP_ILOGBNAN (-2147483647 - 1)
#define MATH_ERRNO 1
#define MATH_ERREXCEPT 2
#ifdef __FAST_MATH__
#define math_errhandling 0
#elif defined(__NO_MATH_ERRNO__)
#define math_errhandling (MATH_ERREXCEPT)
#else
#define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)
#endif
#ifdef __FP_FAST_FMA
#define FP_FAST_FMA 1
#endif
#ifdef __FP_FAST_FMAF
#define FP_FAST_FMAF 1
#endif
#ifdef __FP_FAST_FMAL
#define FP_FAST_FMAL 1
#endif
COSMOPOLITAN_C_START_
#define NAN __builtin_nanf("")

View file

@ -54,6 +54,7 @@ o/$(MODE)/libc/tinymath/loglq.o: private \
$(LIBC_TINYMATH_A_OBJS): private \
CFLAGS += \
-fmath-errno \
-fsigned-zeros \
-ftrapping-math \
-frounding-math \

View file

@ -5,7 +5,7 @@ MIT OR Apache-2.0 WITH LLVM-exception
MIT License
-----------
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
double __math_divzero(uint32_t sign)
{
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
float __math_divzerof(uint32_t sign)
{
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
double __math_invalid(double x)
{
return (x - x) / (x - x);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
float __math_invalidf(float x)
{
return (x - x) / (x - x);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument double __math_oflow(uint32_t sign)
{
return __math_xflow(sign, 0x1p769);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument float __math_oflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p97f);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument double __math_uflow(uint32_t sign)
{
return __math_xflow(sign, 0x1p-767);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument float __math_uflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p-95f);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument double __math_xflow(uint32_t sign, double y)
{
return eval_as_double(fp_barrier(sign ? -y : y) * y);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument float __math_xflowf(uint32_t sign, float y)
{
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
}

View file

@ -0,0 +1,509 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
#include "libc/math.h"
COSMOPOLITAN_C_START_
#define USE_GLIBC_ABI 1
/* If defined to 1, return correct results for special cases in non-nearest
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
This may be set to 0 if there is no fenv support or if math functions only
get called in round to nearest mode. */
#ifdef __ROUNDING_MATH__
#define WANT_ROUNDING 1
#else
#define WANT_ROUNDING 0
#endif
/* If defined to 1, set errno in math functions according to ISO C. Many math
libraries do not set errno, so this is 0 by default. It may need to be
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
#ifdef __NO_MATH_ERRNO__
#define WANT_ERRNO 0
#else
#define WANT_ERRNO 1
#endif
/*------------------------------------------------------------------------------*/
/* optimized-routines/math/math_config.h */
#ifndef WANT_ROUNDING
/* If defined to 1, return correct results for special cases in non-nearest
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
This may be set to 0 if there is no fenv support or if math functions only
get called in round to nearest mode. */
# define WANT_ROUNDING 1
#endif
#ifndef WANT_ERRNO
/* If defined to 1, set errno in math functions according to ISO C. Many math
libraries do not set errno, so this is 0 by default. It may need to be
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
# define WANT_ERRNO 0
#endif
#ifndef WANT_ERRNO_UFLOW
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
#endif
/* Compiler can inline round as a single instruction. */
#ifndef HAVE_FAST_ROUND
# if __aarch64__
# define HAVE_FAST_ROUND 1
# else
# define HAVE_FAST_ROUND 0
# endif
#endif
/* Compiler can inline lround, but not (long)round(x). */
#ifndef HAVE_FAST_LROUND
# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
# define HAVE_FAST_LROUND 1
# else
# define HAVE_FAST_LROUND 0
# endif
#endif
/* Compiler can inline fma as a single instruction. */
#ifndef HAVE_FAST_FMA
# if defined FP_FAST_FMA || __aarch64__
# define HAVE_FAST_FMA 1
# else
# define HAVE_FAST_FMA 0
# endif
#endif
/* Provide *_finite symbols and some of the glibc hidden symbols
so libmathlib can be used with binaries compiled against glibc
to interpose math functions with both static and dynamic linking. */
#ifndef USE_GLIBC_ABI
# if __GNUC__
# define USE_GLIBC_ABI 1
# else
# define USE_GLIBC_ABI 0
# endif
#endif
/* Optionally used extensions. */
#ifdef __GNUC__
# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
# define NOINLINE __attribute__ ((noinline))
# define UNUSED __attribute__ ((unused))
# define likely(x) __builtin_expect (!!(x), 1)
# define unlikely(x) __builtin_expect (x, 0)
# if __GNUC__ >= 9
# define attribute_copy(f) __attribute__ ((copy (f)))
# else
# define attribute_copy(f)
# endif
# define strong_alias(f, a) \
extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
# define hidden_alias(f, a) \
extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
attribute_copy (f);
#else
# define HIDDEN
# define NOINLINE
# define UNUSED
# define likely(x) (x)
# define unlikely(x) (x)
#endif
/* Return ptr but hide its value from the compiler so accesses through it
cannot be optimized based on the contents. */
#define ptr_barrier(ptr) \
({ \
__typeof (ptr) __ptr = (ptr); \
__asm("" : "+r"(__ptr)); \
__ptr; \
})
#if HAVE_FAST_ROUND
/* When set, the roundtoint and converttoint functions are provided with
the semantics documented below. */
# define TOINT_INTRINSICS 1
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static inline double_t
roundtoint (double_t x)
{
return round (x);
}
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static inline int32_t
converttoint (double_t x)
{
# if HAVE_FAST_LROUND
return lround (x);
# else
return (long) round (x);
# endif
}
#endif
static inline uint32_t
asuint (float f)
{
union
{
float f;
uint32_t i;
} u = {f};
return u.i;
}
static inline float
asfloat (uint32_t i)
{
union
{
uint32_t i;
float f;
} u = {i};
return u.f;
}
static inline uint64_t
asuint64 (double f)
{
union
{
double f;
uint64_t i;
} u = {f};
return u.i;
}
static inline double
asdouble (uint64_t i)
{
union
{
uint64_t i;
double f;
} u = {i};
return u.f;
}
#ifndef IEEE_754_2008_SNAN
# define IEEE_754_2008_SNAN 1
#endif
static inline int
issignalingf_inline (float x)
{
uint32_t ix = asuint (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7fc00000) == 0x7fc00000;
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
}
static inline int
issignaling_inline (double x)
{
uint64_t ix = asuint64 (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
}
#if __aarch64__ && __GNUC__
/* Prevent the optimization of a floating-point expression. */
static inline float
opt_barrier_float (float x)
{
__asm__ __volatile__ ("" : "+w" (x));
return x;
}
static inline double
opt_barrier_double (double x)
{
__asm__ __volatile__ ("" : "+w" (x));
return x;
}
/* Force the evaluation of a floating-point expression for its side-effect. */
static inline void
force_eval_float (float x)
{
__asm__ __volatile__ ("" : "+w" (x));
}
static inline void
force_eval_double (double x)
{
__asm__ __volatile__ ("" : "+w" (x));
}
#else
static inline float
opt_barrier_float (float x)
{
volatile float y = x;
return y;
}
static inline double
opt_barrier_double (double x)
{
volatile double y = x;
return y;
}
static inline void
force_eval_float (float x)
{
volatile float y UNUSED = x;
}
static inline void
force_eval_double (double x)
{
volatile double y UNUSED = x;
}
#endif
/* Evaluate an expression as the specified type, normally a type
cast should be enough, but compilers implement non-standard
excess-precision handling, so when FLT_EVAL_METHOD != 0 then
these functions may need to be customized. */
static inline float
eval_as_float (float x)
{
return x;
}
static inline double
eval_as_double (double x)
{
return x;
}
/* Error handling tail calls for special cases, with a sign argument.
The sign of the return value is set if the argument is non-zero. */
/* The result overflows. */
HIDDEN float __math_oflowf (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN float __math_uflowf (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN float __math_may_uflowf (uint32_t);
/* Division by zero. */
HIDDEN float __math_divzerof (uint32_t);
/* The result overflows. */
HIDDEN double __math_oflow (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN double __math_uflow (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN double __math_may_uflow (uint32_t);
/* Division by zero. */
HIDDEN double __math_divzero (uint32_t);
/* Error handling using input checking. */
/* Invalid input unless it is a quiet NaN. */
HIDDEN float __math_invalidf (float);
/* Invalid input unless it is a quiet NaN. */
HIDDEN double __math_invalid (double);
/* Invalid input unless it is a quiet NaN. */
HIDDEN long double __math_invalidl (long double);
/* Error handling using output checking, only for errno setting. */
/* Check if the result overflowed to infinity. */
HIDDEN double __math_check_oflow (double);
/* Check if the result underflowed to 0. */
HIDDEN double __math_check_uflow (double);
/* Check if the result overflowed to infinity. */
static inline double
check_oflow (double x)
{
return WANT_ERRNO ? __math_check_oflow (x) : x;
}
/* Check if the result underflowed to 0. */
static inline double
check_uflow (double x)
{
return WANT_ERRNO ? __math_check_uflow (x) : x;
}
/* Check if the result overflowed to infinity. */
HIDDEN float __math_check_oflowf (float);
/* Check if the result underflowed to 0. */
HIDDEN float __math_check_uflowf (float);
/* Check if the result overflowed to infinity. */
static inline float
check_oflowf (float x)
{
return WANT_ERRNO ? __math_check_oflowf (x) : x;
}
/* Check if the result underflowed to 0. */
static inline float
check_uflowf (float x)
{
return WANT_ERRNO ? __math_check_uflowf (x) : x;
}
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern const struct exp2f_data
{
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data HIDDEN;
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
extern const struct logf_data
{
struct
{
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data HIDDEN;
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
extern const struct log2f_data
{
struct
{
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data HIDDEN;
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
# define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
# define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
extern const struct powf_log2_data
{
struct
{
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data HIDDEN;
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
/* Use polynomial that is optimized for a wider input range. This may be
needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
#define EXP_POLY_WIDE 0
/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
#define EXP2_POLY_WIDE 0
/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
and !TOINT_INTRINSICS. */
#define EXP10_POLY_WIDE 0
extern const struct exp_data
{
double invln2N;
double invlog10_2N;
double shift;
double negln2hiN;
double negln2loN;
double neglog10_2hiN;
double neglog10_2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
double exp10_poly[5];
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data HIDDEN;
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
extern const struct log_data
{
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data HIDDEN;
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern const struct log2_data
{
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data HIDDEN;
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
extern const struct pow_log_data
{
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data HIDDEN;
extern const struct erff_data
{
float erff_poly_A[6];
float erff_poly_B[7];
} __erff_data HIDDEN;
#define ERF_POLY_A_ORDER 19
#define ERF_POLY_A_NCOEFFS 10
#define ERFC_POLY_C_NCOEFFS 16
#define ERFC_POLY_D_NCOEFFS 18
#define ERFC_POLY_E_NCOEFFS 14
#define ERFC_POLY_F_NCOEFFS 17
extern const struct erf_data
{
double erf_poly_A[ERF_POLY_A_NCOEFFS];
double erf_ratio_N_A[5];
double erf_ratio_D_A[5];
double erf_ratio_N_B[7];
double erf_ratio_D_B[6];
double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
} __erf_data HIDDEN;
#define V_EXP_TABLE_BITS 7
extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
#define V_LOG_TABLE_BITS 7
extern const struct v_log_data
{
struct
{
double invc, logc;
} table[1 << V_LOG_TABLE_BITS];
} __v_log_data HIDDEN;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ */

View file

@ -5,13 +5,6 @@
FreeBSD lib/msun/src/s_asinhl.c
Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
Copyright (c) 1992-2023 The FreeBSD Project.
Redistribution and use in source and binary forms, with or without
@ -35,12 +28,17 @@
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/math.h"
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("fdlibm_notice");
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

View file

@ -1,177 +1,120 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
FreeBSD lib/msun/src/s_asinhl.c
Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Copyright (c) 1992-2023 The FreeBSD Project.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/atanf_common.internal.h"
#include "libc/tinymath/internal.h"
__static_yoink("arm_optimized_routines_notice");
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
#define Pi (0x1.921fb6p+1f)
#define PiOver2 (0x1.921fb6p+0f)
#define PiOver4 (0x1.921fb6p-1f)
#define SignMask (0x80000000)
static volatile float
tiny = 1.0e-30;
static const float
zero = 0.0,
pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */
pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */
pi = 3.1415927410e+00; /* 0x40490fdb */
static volatile float
pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
/* We calculate atan2f by P(n/d), where n and d are similar to the input
arguments, and P is a polynomial. The polynomial may underflow.
POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
for which P underflows, and is used to special-case such inputs. */
#define POLY_UFLOW_BOUND 24
static inline int32_t
biased_exponent (float f)
{
uint32_t fi = asuint (f);
int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
if (UNLIKELY (ex == 0))
{
/* Subnormal case - we still need to get the exponent right for subnormal
numbers as division may take us back inside the normal range. */
return ex - __builtin_clz (fi << 9);
}
return ex;
}
/* Fast implementation of scalar atan2f. Largest observed error is
2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
want 0x1.964640p-1. */
/**
* Returns arc tangent of 𝑦/𝑥.
*/
float
atan2f (float y, float x)
atan2f(float y, float x)
{
uint32_t ix = asuint (x);
uint32_t iy = asuint (y);
float z;
int32_t k,m,hx,hy,ix,iy;
uint32_t sign_x = ix & SignMask;
uint32_t sign_y = iy & SignMask;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
GET_FLOAT_WORD(hy,y);
iy = hy&0x7fffffff;
if((ix>0x7f800000)||
(iy>0x7f800000)) /* x or y is NaN */
return nan_mix(x, y);
if(hx==0x3f800000) return atanf(y); /* x=1.0 */
m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */
uint32_t iax = ix & ~SignMask;
uint32_t iay = iy & ~SignMask;
/* x or y is NaN. */
if ((iax > 0x7f800000) || (iay > 0x7f800000))
return x + y;
/* m = 2 * sign(x) + sign(y). */
uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
/* The following follows glibc ieee754 implementation, except
that we do not use +-tiny shifts (non-nearest rounding mode). */
int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
/* Special case for (x, y) either on or very close to the x axis. Either y =
0, or y is tiny and x is huge (difference in exponents >=
POLY_UFLOW_BOUND). In the second case, we only want to use this special
case when x is negative (i.e. quadrants 2 or 3). */
if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
{
switch (m)
{
case 0:
case 1:
return y; /* atan(+-0,+anything)=+-0. */
case 2:
return Pi; /* atan(+0,-anything) = pi. */
case 3:
return -Pi; /* atan(-0,-anything) =-pi. */
}
}
/* Special case for (x, y) either on or very close to the y axis. Either x =
0, or x is tiny and y is huge (difference in exponents >=
POLY_UFLOW_BOUND). */
if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
return sign_y ? -PiOver2 : PiOver2;
/* x is INF. */
if (iax == 0x7f800000)
{
if (iay == 0x7f800000)
{
switch (m)
{
case 0:
return PiOver4; /* atan(+INF,+INF). */
case 1:
return -PiOver4; /* atan(-INF,+INF). */
case 2:
return 3.0f * PiOver4; /* atan(+INF,-INF). */
case 3:
return -3.0f * PiOver4; /* atan(-INF,-INF). */
/* when y = 0 */
if(iy==0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi+tiny;/* atan(+0,-anything) = pi */
case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
}
}
else
{
switch (m)
{
case 0:
return 0.0f; /* atan(+...,+INF). */
case 1:
return -0.0f; /* atan(-...,+INF). */
case 2:
return Pi; /* atan(+...,-INF). */
case 3:
return -Pi; /* atan(-...,-INF). */
/* when x = 0 */
if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
/* when x is INF */
if(ix==0x7f800000) {
if(iy==0x7f800000) {
switch(m) {
case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */
case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
case 2: return (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
}
} else {
switch(m) {
case 0: return zero ; /* atan(+...,+INF) */
case 1: return -zero ; /* atan(-...,+INF) */
case 2: return pi+tiny ; /* atan(+...,-INF) */
case 3: return -pi-tiny ; /* atan(-...,-INF) */
}
}
}
}
/* y is INF. */
if (iay == 0x7f800000)
return sign_y ? -PiOver2 : PiOver2;
/* when y is INF */
if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
uint32_t sign_xy = sign_x ^ sign_y;
float ax = asfloat (iax);
float ay = asfloat (iay);
bool pred_aygtax = (ay > ax);
/* Set up z for call to atanf. */
float n = pred_aygtax ? -ax : ay;
float d = pred_aygtax ? ay : ax;
float z = n / d;
float ret;
if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
{
/* If (x, y) is very close to x axis and x is positive, the polynomial
will underflow and evaluate to z. */
ret = z;
}
else
{
/* Work out the correct shift. */
float shift = sign_x ? -2.0f : 0.0f;
shift = pred_aygtax ? shift + 1.0f : shift;
shift *= PiOver2;
ret = eval_poly (z, z, shift);
}
/* Account for the sign of x and y. */
return asfloat (asuint (ret) ^ sign_xy);
/* compute y/x */
k = (iy-ix)>>23;
if(k > 26) { /* |y/x| > 2**26 */
z=pi_o_2+(float)0.5*pi_lo;
m&=1;
}
else if(k<-26&&hx<0) z=0.0; /* 0 > |y|/x > -2**-26 */
else z=atanf(fabsf(y/x)); /* safe to do y/x */
switch (m) {
case 0: return z ; /* atan(+,+) */
case 1: return -z ; /* atan(-,+) */
case 2: return pi-(z-pi_lo);/* atan(+,-) */
default: /* case 3 */
return (z-pi_lo)-pi;/* atan(-,-) */
}
}

View file

@ -1,54 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
#include "libc/tinymath/atan_data.internal.h"
#include "libc/tinymath/estrin_wrap.internal.h"
#include "libc/tinymath/horner.internal.h"
COSMOPOLITAN_C_START_
/*
* Double-precision polynomial evaluation function for scalar and vector atan(x)
* and atan2(y,x).
*
* Copyright (c) 2021-2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if WANT_VMATH
#define DBL_T float64x2_t
#define P(i) v_f64 (__atan_poly_data.poly[i])
#else
#define DBL_T double
#define P(i) __atan_poly_data.poly[i]
#endif
/* Polynomial used in fast atan(x) and atan2(y,x) implementations
The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
static inline DBL_T
eval_poly (DBL_T z, DBL_T az, DBL_T shift)
{
/* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
full scheme to avoid underflow in x^16. */
DBL_T z2 = z * z;
DBL_T x2 = z2 * z2;
DBL_T x4 = x2 * x2;
DBL_T x8 = x4 * x4;
DBL_T y
= FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
/* Finalize. y = shift + z + z^3 * P(z^2). */
y = FMA (y, z2 * az, az);
y = y + shift;
return y;
}
#undef DBL_T
#undef FMA
#undef P
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */

View file

@ -1,11 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
COSMOPOLITAN_C_START_
#define ATAN_POLY_NCOEFFS 20
extern const struct atan_poly_data {
double poly[ATAN_POLY_NCOEFFS];
} __atan_poly_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */

View file

@ -1,43 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
#include "libc/tinymath/atanf_data.internal.h"
#include "libc/tinymath/estrin_wrap.internal.h"
#include "libc/tinymath/hornerf.internal.h"
COSMOPOLITAN_C_START_
#if WANT_VMATH
#define FLT_T float32x4_t
#define P(i) v_f32 (__atanf_poly_data.poly[i])
#else
#define FLT_T float
#define P(i) __atanf_poly_data.poly[i]
#endif
/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
static inline FLT_T
eval_poly (FLT_T z, FLT_T az, FLT_T shift)
{
/* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
a standard implementation using z8 creates spurious underflow
in the very last fma (when z^8 is small enough).
Therefore, we split the last fma into a mul and and an fma.
Horner and single-level Estrin have higher errors that exceed
threshold. */
FLT_T z2 = z * z;
FLT_T z4 = z2 * z2;
/* Then assemble polynomial. */
FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
/* Finalize:
y = shift + z * P(z^2). */
return FMA (y, z2 * az, az) + shift;
}
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */

View file

@ -1,11 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
COSMOPOLITAN_C_START_
#define ATANF_POLY_NCOEFFS 8
extern const struct atanf_poly_data {
float poly[ATANF_POLY_NCOEFFS];
} __atanf_poly_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */

View file

@ -1,41 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
/* acos(z) = pi/2 - asin(z) */
double complex cacos(double complex z)
{
z = casin(z);
return CMPLX(M_PI_2 - creal(z), -cimag(z));
}

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME
float complex cacosf(float complex z)
{
z = casinf(z);
return CMPLXF((float)M_PI_2 - crealf(z), -cimagf(z));
}

View file

@ -1,44 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
/* acosh(z) = i acos(z) */
double complex cacosh(double complex z)
{
int zineg = signbit(cimag(z));
z = cacos(z);
if (zineg) return CMPLX(cimag(z), -creal(z));
else return CMPLX(-cimag(z), creal(z));
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(cacosh, cacoshl);
#endif

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
float complex cacoshf(float complex z)
{
int zineg = signbit(cimagf(z));
z = cacosf(z);
if (zineg) return CMPLXF(cimagf(z), -crealf(z));
else return CMPLXF(-cimagf(z), crealf(z));
}

View file

@ -1,45 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME
float complex casinf(float complex z)
{
float complex w;
float x, y;
x = crealf(z);
y = cimagf(z);
w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y);
float complex r = clogf(CMPLXF(-y, x) + csqrtf(w));
return CMPLXF(cimagf(r), -crealf(r));
}

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
/* asinh(z) = -i asin(i z) */
double complex casinh(double complex z)
{
z = casin(CMPLX(-cimag(z), creal(z)));
return CMPLX(cimag(z), -creal(z));
}

View file

@ -1,37 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
float complex casinhf(float complex z)
{
z = casinf(CMPLXF(-cimagf(z), crealf(z)));
return CMPLXF(cimagf(z), -crealf(z));
}

View file

@ -1,142 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
__static_yoink("openbsd_libm_notice");
/* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Complex circular arc tangent
*
*
* SYNOPSIS:
*
* double complex catan();
* double complex z, w;
*
* w = catan (z);
*
*
* DESCRIPTION:
*
* If
* z = x + iy,
*
* then
* 1 ( 2x )
* Re w = - arctan(-----------) + k PI
* 2 ( 2 2)
* (1 - x - y )
*
* ( 2 2)
* 1 (x + (y+1) )
* Im w = - log(------------)
* 4 ( 2 2)
* (x + (y-1) )
*
* Where k is an arbitrary integer.
*
* catan(z) = -i catanh(iz).
*
* ACCURACY:
*
* Relative error:
* arithmetic domain # trials peak rms
* DEC -10,+10 5900 1.3e-16 7.8e-18
* IEEE -10,+10 30000 2.3e-15 8.5e-17
* The check catan( ctan(z) ) = z, with |x| and |y| < PI/2,
* had peak relative error 1.5e-16, rms relative error
* 2.9e-17. See also clog().
*/
#define MAXNUM 1.0e308
static const double DP1 = 3.14159265160560607910E0;
static const double DP2 = 1.98418714791870343106E-9;
static const double DP3 = 1.14423774522196636802E-17;
static double _redupi(double x)
{
double t;
long i;
t = x/M_PI;
if (t >= 0.0)
t += 0.5;
else
t -= 0.5;
i = t; /* the multiple */
t = i;
t = ((x - t * DP1) - t * DP2) - t * DP3;
return t;
}
double complex catan(double complex z)
{
double complex w;
double a, t, x, x2, y;
x = creal(z);
y = cimag(z);
x2 = x * x;
a = 1.0 - x2 - (y * y);
t = 0.5 * atan2(2.0 * x, a);
w = _redupi(t);
t = y - 1.0;
a = x2 + (t * t);
t = y + 1.0;
a = (x2 + t * t)/a;
w = CMPLX(w, 0.25 * log(a));
return w;
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(catan, catanl);
#endif

View file

@ -1,135 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
__static_yoink("openbsd_libm_notice");
/* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Complex circular arc tangent
*
*
* SYNOPSIS:
*
* float complex catanf();
* float complex z, w;
*
* w = catanf( z );
*
*
* DESCRIPTION:
*
* If
* z = x + iy,
*
* then
* 1 ( 2x )
* Re w = - arctan(-----------) + k PI
* 2 ( 2 2)
* (1 - x - y )
*
* ( 2 2)
* 1 (x + (y+1) )
* Im w = - log(------------)
* 4 ( 2 2)
* (x + (y-1) )
*
* Where k is an arbitrary integer.
*
*
* ACCURACY:
*
* Relative error:
* arithmetic domain # trials peak rms
* IEEE -10,+10 30000 2.3e-6 5.2e-8
*/
#define MAXNUMF 1.0e38F
static const double DP1 = 3.140625;
static const double DP2 = 9.67502593994140625E-4;
static const double DP3 = 1.509957990978376432E-7;
static float _redupif(float xx)
{
float x, t;
long i;
x = xx;
t = x/(float)M_PI;
if (t >= 0.0f)
t += 0.5f;
else
t -= 0.5f;
i = t; /* the multiple */
t = i;
t = ((x - t * DP1) - t * DP2) - t * DP3;
return t;
}
float complex catanf(float complex z)
{
float complex w;
float a, t, x, x2, y;
x = crealf(z);
y = cimagf(z);
x2 = x * x;
a = 1.0f - x2 - (y * y);
t = 0.5f * atan2f(2.0f * x, a);
w = _redupif(t);
t = y - 1.0f;
a = x2 + (t * t);
t = y + 1.0f;
a = (x2 + (t * t))/a;
w = CMPLXF(w, 0.25f * logf(a));
return w;
}

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
/* atanh = -i atan(i z) */
double complex catanh(double complex z)
{
z = catan(CMPLX(-cimag(z), creal(z)));
return CMPLX(cimag(z), -creal(z));
}

View file

@ -1,37 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
float complex catanhf(float complex z)
{
z = catanf(CMPLXF(-cimagf(z), crealf(z)));
return CMPLXF(cimagf(z), -crealf(z));
}

651
libc/tinymath/catrig.c Normal file
View file

@ -0,0 +1,651 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
FreeBSD lib/msun/src/catrig.c
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_complex_notice");
#undef isinf
#define isinf(x) (fabs(x) == INFINITY)
#undef isnan
#define isnan(x) ((x) != (x))
#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0)
#undef signbit
#define signbit(x) (__builtin_signbit(x))
/* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */
static const double
A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better */
B_crossover = 0.6417, /* suggested by Hull et al */
FOUR_SQRT_MIN = 0x1p-509, /* >= 4 * sqrt(DBL_MIN) */
QUARTER_SQRT_MAX = 0x1p509, /* <= sqrt(DBL_MAX) / 4 */
m_e = 2.7182818284590452e0, /* 0x15bf0a8b145769.0p-51 */
m_ln2 = 6.9314718055994531e-1, /* 0x162e42fefa39ef.0p-53 */
pio2_hi = 1.5707963267948966e0, /* 0x1921fb54442d18.0p-52 */
RECIP_EPSILON = 1 / DBL_EPSILON,
SQRT_3_EPSILON = 2.5809568279517849e-8, /* 0x1bb67ae8584caa.0p-78 */
SQRT_6_EPSILON = 3.6500241499888571e-8, /* 0x13988e1409212e.0p-77 */
SQRT_MIN = 0x1p-511; /* >= sqrt(DBL_MIN) */
static const volatile double
pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */
static const volatile float
tiny = 0x1p-100;
static double complex clog_for_large_values(double complex z);
/*
* Testing indicates that all these functions are accurate up to 4 ULP.
* The functions casin(h) and cacos(h) are about 2.5 times slower than asinh.
* The functions catan(h) are a little under 2 times slower than atanh.
*
* The code for casinh, casin, cacos, and cacosh comes first. The code is
* rather complicated, and the four functions are highly interdependent.
*
* The code for catanh and catan comes at the end. It is much simpler than
* the other functions, and the code for these can be disconnected from the
* rest of the code.
*/
/*
* ================================
* | casinh, casin, cacos, cacosh |
* ================================
*/
/*
* The algorithm is very close to that in "Implementing the complex arcsine
* and arccosine functions using exception handling" by T. E. Hull, Thomas F.
* Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on
* Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335,
* http://dl.acm.org/citation.cfm?id=275324.
*
* Throughout we use the convention z = x + I*y.
*
* casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B)
* where
* A = (|z+I| + |z-I|) / 2
* B = (|z+I| - |z-I|) / 2 = y/A
*
* These formulas become numerically unstable:
* (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that
* is, Re(casinh(z)) is close to 0);
* (b) for Im(casinh(z)) when z is close to either of the intervals
* [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is
* close to PI/2).
*
* These numerical problems are overcome by defining
* f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2
* Then if A < A_crossover, we use
* log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1)))
* A-1 = f(x, 1+y) + f(x, 1-y)
* and if B > B_crossover, we use
* asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y)))
* A-y = f(x, y+1) + f(x, y-1)
* where without loss of generality we have assumed that x and y are
* non-negative.
*
* Much of the difficulty comes because the intermediate computations may
* produce overflows or underflows. This is dealt with in the paper by Hull
* et al by using exception handling. We do this by detecting when
* computations risk underflow or overflow. The hardest part is handling the
* underflows when computing f(a, b).
*
* Note that the function f(a, b) does not appear explicitly in the paper by
* Hull et al, but the idea may be found on pages 308 and 309. Introducing the
* function f(a, b) allows us to concentrate many of the clever tricks in this
* paper into one function.
*/
/*
* Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2.
* Pass hypot(a, b) as the third argument.
*/
static inline double
f(double a, double b, double hypot_a_b)
{
if (b < 0)
return ((hypot_a_b - b) / 2);
if (b == 0)
return (a / 2);
return (a * a / (hypot_a_b + b) / 2);
}
/*
* All the hard work is contained in this function.
* x and y are assumed positive or zero, and less than RECIP_EPSILON.
* Upon return:
* rx = Re(casinh(z)) = -Im(cacos(y + I*x)).
* B_is_usable is set to 1 if the value of B is usable.
* If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y.
* If returning sqrt_A2my2 has potential to result in an underflow, it is
* rescaled, and new_y is similarly rescaled.
*/
static inline void
do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B,
double *sqrt_A2my2, double *new_y)
{
double R, S, A; /* A, B, R, and S are as in Hull et al. */
double Am1, Amy; /* A-1, A-y. */
R = hypot(x, y + 1); /* |z+I| */
S = hypot(x, y - 1); /* |z-I| */
/* A = (|z+I| + |z-I|) / 2 */
A = (R + S) / 2;
/*
* Mathematically A >= 1. There is a small chance that this will not
* be so because of rounding errors. So we will make certain it is
* so.
*/
if (A < 1)
A = 1;
if (A < A_crossover) {
/*
* Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y).
* rx = log1p(Am1 + sqrt(Am1*(A+1)))
*/
if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) {
/*
* fp is of order x^2, and fm = x/2.
* A = 1 (inexactly).
*/
*rx = sqrt(x);
} else if (x >= DBL_EPSILON * fabs(y - 1)) {
/*
* Underflow will not occur because
* x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN
*/
Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
*rx = log1p(Am1 + sqrt(Am1 * (A + 1)));
} else if (y < 1) {
/*
* fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and
* A = 1 (inexactly).
*/
*rx = x / sqrt((1 - y) * (1 + y));
} else { /* if (y > 1) */
/*
* A-1 = y-1 (inexactly).
*/
*rx = log1p((y - 1) + sqrt((y - 1) * (y + 1)));
}
} else {
*rx = log(A + sqrt(A * A - 1));
}
*new_y = y;
if (y < FOUR_SQRT_MIN) {
/*
* Avoid a possible underflow caused by y/A. For casinh this
* would be legitimate, but will be picked up by invoking atan2
* later on. For cacos this would not be legitimate.
*/
*B_is_usable = 0;
*sqrt_A2my2 = A * (2 / DBL_EPSILON);
*new_y = y * (2 / DBL_EPSILON);
return;
}
/* B = (|z+I| - |z-I|) / 2 = y/A */
*B = y / A;
*B_is_usable = 1;
if (*B > B_crossover) {
*B_is_usable = 0;
/*
* Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1).
* sqrt_A2my2 = sqrt(Amy*(A+y))
*/
if (y == 1 && x < DBL_EPSILON / 128) {
/*
* fp is of order x^2, and fm = x/2.
* A = 1 (inexactly).
*/
*sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2);
} else if (x >= DBL_EPSILON * fabs(y - 1)) {
/*
* Underflow will not occur because
* x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN
* and
* x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN
*/
Amy = f(x, y + 1, R) + f(x, y - 1, S);
*sqrt_A2my2 = sqrt(Amy * (A + y));
} else if (y > 1) {
/*
* fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and
* A = y (inexactly).
*
* y < RECIP_EPSILON. So the following
* scaling should avoid any underflow problems.
*/
*sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y /
sqrt((y + 1) * (y - 1));
*new_y = y * (4 / DBL_EPSILON / DBL_EPSILON);
} else { /* if (y < 1) */
/*
* fm = 1-y >= DBL_EPSILON, fp is of order x^2, and
* A = 1 (inexactly).
*/
*sqrt_A2my2 = sqrt((1 - y) * (1 + y));
}
}
}
/*
* casinh(z) = z + O(z^3) as z -> 0
*
* casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity
* The above formula works for the imaginary part as well, because
* Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3)
* as z -> infinity, uniformly in y
*/
double complex
casinh(double complex z)
{
double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
int B_is_usable;
double complex w;
x = creal(z);
y = cimag(z);
ax = fabs(x);
ay = fabs(y);
if (isnan(x) || isnan(y)) {
/* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
if (isinf(x))
return (CMPLX(x, y + y));
/* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
if (isinf(y))
return (CMPLX(y, x + x));
/* casinh(NaN + I*0) = NaN + I*0 */
if (y == 0)
return (CMPLX(x + x, y));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
/* clog...() will raise inexact unless x or y is infinite. */
if (signbit(x) == 0)
w = clog_for_large_values(z) + m_ln2;
else
w = clog_for_large_values(-z) + m_ln2;
return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y)));
}
/* Avoid spuriously raising inexact for z = 0. */
if (x == 0 && y == 0)
return (z);
/* All remaining cases are inexact. */
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (z);
do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
if (B_is_usable)
ry = asin(B);
else
ry = atan2(new_y, sqrt_A2my2);
return (CMPLX(copysign(rx, x), copysign(ry, y)));
}
/*
* casin(z) = reverse(casinh(reverse(z)))
* where reverse(x + I*y) = y + I*x = I*conj(z).
*/
double complex
casin(double complex z)
{
double complex w = casinh(CMPLX(cimag(z), creal(z)));
return (CMPLX(cimag(w), creal(w)));
}
/*
* cacos(z) = PI/2 - casin(z)
* but do the computation carefully so cacos(z) is accurate when z is
* close to 1.
*
* cacos(z) = PI/2 - z + O(z^3) as z -> 0
*
* cacos(z) = -sign(y)*I*clog(z) + O(1/z^2) as z -> infinity
* The above formula works for the real part as well, because
* Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3)
* as z -> infinity, uniformly in y
*/
double complex
cacos(double complex z)
{
double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
int sx, sy;
int B_is_usable;
double complex w;
x = creal(z);
y = cimag(z);
sx = signbit(x);
sy = signbit(y);
ax = fabs(x);
ay = fabs(y);
if (isnan(x) || isnan(y)) {
/* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
if (isinf(x))
return (CMPLX(y + y, -INFINITY));
/* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
if (isinf(y))
return (CMPLX(x + x, -y));
/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
if (x == 0)
return (CMPLX(pio2_hi + pio2_lo, y + y));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
/* clog...() will raise inexact unless x or y is infinite. */
w = clog_for_large_values(z);
rx = fabs(cimag(w));
ry = creal(w) + m_ln2;
if (sy == 0)
ry = -ry;
return (CMPLX(rx, ry));
}
/* Avoid spuriously raising inexact for z = 1. */
if (x == 1 && y == 0)
return (CMPLX(0, -y));
/* All remaining cases are inexact. */
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (CMPLX(pio2_hi - (x - pio2_lo), -y));
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
if (B_is_usable) {
if (sx == 0)
rx = acos(B);
else
rx = acos(-B);
} else {
if (sx == 0)
rx = atan2(sqrt_A2mx2, new_x);
else
rx = atan2(sqrt_A2mx2, -new_x);
}
if (sy == 0)
ry = -ry;
return (CMPLX(rx, ry));
}
/*
* cacosh(z) = I*cacos(z) or -I*cacos(z)
* where the sign is chosen so Re(cacosh(z)) >= 0.
*/
double complex
cacosh(double complex z)
{
double complex w;
double rx, ry;
w = cacos(z);
rx = creal(w);
ry = cimag(w);
/* cacosh(NaN + I*NaN) = NaN + I*NaN */
if (isnan(rx) && isnan(ry))
return (CMPLX(ry, rx));
/* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
/* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
if (isnan(rx))
return (CMPLX(fabs(ry), rx));
/* cacosh(0 + I*NaN) = NaN + I*NaN */
if (isnan(ry))
return (CMPLX(ry, ry));
return (CMPLX(fabs(ry), copysign(rx, cimag(z))));
}
/*
* Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON.
*/
static double complex
clog_for_large_values(double complex z)
{
double x, y;
double ax, ay, t;
x = creal(z);
y = cimag(z);
ax = fabs(x);
ay = fabs(y);
if (ax < ay) {
t = ax;
ax = ay;
ay = t;
}
/*
* Avoid overflow in hypot() when x and y are both very large.
* Divide x and y by E, and then add 1 to the logarithm. This
* depends on E being larger than sqrt(2), since the return value of
* hypot cannot overflow if neither argument is greater in magnitude
* than 1/sqrt(2) of the maximum value of the return type. Likewise
* this determines the necessary threshold for using this method
* (however, actually use 1/2 instead as it is simpler).
*
* Dividing by E causes an insignificant loss of accuracy; however
* this method is still poor since it is uneccessarily slow.
*/
if (ax > DBL_MAX / 2)
return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
/*
* Avoid overflow when x or y is large. Avoid underflow when x or
* y is small.
*/
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
return (CMPLX(log(hypot(x, y)), atan2(y, x)));
return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x)));
}
/*
* =================
* | catanh, catan |
* =================
*/
/*
* sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
* Assumes x*x and y*y will not overflow.
* Assumes x and y are finite.
* Assumes y is non-negative.
* Assumes fabs(x) >= DBL_EPSILON.
*/
static inline double
sum_squares(double x, double y)
{
/* Avoid underflow when y is small. */
if (y < SQRT_MIN)
return (x * x);
return (x * x + y * y);
}
/*
* real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
* Assumes x and y are not NaN, and one of x and y is larger than
* RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use
* the code creal(1/z), because the imaginary part may produce an unwanted
* underflow.
* This is only called in a context where inexact is always raised before
* the call, so no effort is made to avoid or force inexact.
*/
static inline double
real_part_reciprocal(double x, double y)
{
double scale;
uint32_t hx, hy;
int32_t ix, iy;
/*
* This code is inspired by the C99 document n1124.pdf, Section G.5.1,
* example 2.
*/
GET_HIGH_WORD(hx, x);
ix = hx & 0x7ff00000;
GET_HIGH_WORD(hy, y);
iy = hy & 0x7ff00000;
#undef BIAS
#define BIAS (DBL_MAX_EXP - 1)
/* XXX more guard digits are useful iff there is extra precision. */
#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */
if (ix - iy >= CUTOFF << 20 || isinf(x))
return (1 / x); /* +-Inf -> +-0 is special */
if (iy - ix >= CUTOFF << 20)
return (x / y / y); /* should avoid double div, but hard */
if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
return (x / (x * x + y * y));
scale = 1;
SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */
x *= scale;
y *= scale;
return (x / (x * x + y * y) * scale);
}
/*
* catanh(z) = log((1+z)/(1-z)) / 2
* = log1p(4*x / |z-1|^2) / 4
* + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2
*
* catanh(z) = z + O(z^3) as z -> 0
*
* catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3) as z -> infinity
* The above formula works for the real part as well, because
* Re(catanh(z)) = x/|z|^2 + O(x/z^4)
* as z -> infinity, uniformly in x
*/
double complex
catanh(double complex z)
{
double x, y, ax, ay, rx, ry;
x = creal(z);
y = cimag(z);
ax = fabs(x);
ay = fabs(y);
/* This helps handle many cases. */
if (y == 0 && ax <= 1)
return (CMPLX(atanh(x), y));
/* To ensure the same accuracy as atan(), and to filter out z = 0. */
if (x == 0)
return (CMPLX(x, atan(y)));
if (isnan(x) || isnan(y)) {
/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
if (isinf(x))
return (CMPLX(copysign(0, x), y + y));
/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
if (isinf(y))
return (CMPLX(copysign(0, x),
copysign(pio2_hi + pio2_lo, y)));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
return (CMPLX(real_part_reciprocal(x, y),
copysign(pio2_hi + pio2_lo, y)));
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
/*
* z = 0 was filtered out above. All other cases must raise
* inexact, but this is the only case that needs to do it
* explicitly.
*/
raise_inexact();
return (z);
}
if (ax == 1 && ay < DBL_EPSILON)
rx = (m_ln2 - log(ay)) / 2;
else
rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4;
if (ax == 1)
ry = atan2(2, -ay) / 2;
else if (ay < DBL_EPSILON)
ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2;
else
ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
return (CMPLX(copysign(rx, x), copysign(ry, y)));
}
/*
* catan(z) = reverse(catanh(reverse(z)))
* where reverse(x + I*y) = y + I*x = I*conj(z).
*/
double complex
catan(double complex z)
{
double complex w = catanh(CMPLX(cimag(z), creal(z)));
return (CMPLX(cimag(w), creal(w)));
}
#if LDBL_MANT_DIG == 53
__weak_reference(cacosh, cacoshl);
__weak_reference(cacos, cacosl);
__weak_reference(casinh, casinhl);
__weak_reference(casin, casinl);
__weak_reference(catanh, catanhl);
__weak_reference(catan, catanl);
#endif

377
libc/tinymath/catrigf.c Normal file
View file

@ -0,0 +1,377 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
FreeBSD lib/msun/src/catrigf.c
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_complex_notice");
#undef isinf
#define isinf(x) (fabsf(x) == INFINITY)
#undef isnan
#define isnan(x) ((x) != (x))
#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0)
#undef signbit
#define signbit(x) (__builtin_signbitf(x))
static const float
A_crossover = 10,
B_crossover = 0.6417,
FOUR_SQRT_MIN = 0x1p-61,
QUARTER_SQRT_MAX = 0x1p61,
m_e = 2.7182818285e0, /* 0xadf854.0p-22 */
m_ln2 = 6.9314718056e-1, /* 0xb17218.0p-24 */
pio2_hi = 1.5707962513e0, /* 0xc90fda.0p-23 */
RECIP_EPSILON = 1 / FLT_EPSILON,
SQRT_3_EPSILON = 5.9801995673e-4, /* 0x9cc471.0p-34 */
SQRT_6_EPSILON = 8.4572793338e-4, /* 0xddb3d7.0p-34 */
SQRT_MIN = 0x1p-63;
static const volatile float
pio2_lo = 7.5497899549e-8, /* 0xa22169.0p-47 */
tiny = 0x1p-100;
static float complex clog_for_large_values(float complex z);
static inline float
f(float a, float b, float hypot_a_b)
{
if (b < 0)
return ((hypot_a_b - b) / 2);
if (b == 0)
return (a / 2);
return (a * a / (hypot_a_b + b) / 2);
}
static inline void
do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B,
float *sqrt_A2my2, float *new_y)
{
float R, S, A;
float Am1, Amy;
R = hypotf(x, y + 1);
S = hypotf(x, y - 1);
A = (R + S) / 2;
if (A < 1)
A = 1;
if (A < A_crossover) {
if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) {
*rx = sqrtf(x);
} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
*rx = log1pf(Am1 + sqrtf(Am1 * (A + 1)));
} else if (y < 1) {
*rx = x / sqrtf((1 - y) * (1 + y));
} else {
*rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1)));
}
} else {
*rx = logf(A + sqrtf(A * A - 1));
}
*new_y = y;
if (y < FOUR_SQRT_MIN) {
*B_is_usable = 0;
*sqrt_A2my2 = A * (2 / FLT_EPSILON);
*new_y = y * (2 / FLT_EPSILON);
return;
}
*B = y / A;
*B_is_usable = 1;
if (*B > B_crossover) {
*B_is_usable = 0;
if (y == 1 && x < FLT_EPSILON / 128) {
*sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2);
} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
Amy = f(x, y + 1, R) + f(x, y - 1, S);
*sqrt_A2my2 = sqrtf(Amy * (A + y));
} else if (y > 1) {
*sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y /
sqrtf((y + 1) * (y - 1));
*new_y = y * (4 / FLT_EPSILON / FLT_EPSILON);
} else {
*sqrt_A2my2 = sqrtf((1 - y) * (1 + y));
}
}
}
float complex
casinhf(float complex z)
{
float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
int B_is_usable;
float complex w;
x = crealf(z);
y = cimagf(z);
ax = fabsf(x);
ay = fabsf(y);
if (isnan(x) || isnan(y)) {
if (isinf(x))
return (CMPLXF(x, y + y));
if (isinf(y))
return (CMPLXF(y, x + x));
if (y == 0)
return (CMPLXF(x + x, y));
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
if (signbit(x) == 0)
w = clog_for_large_values(z) + m_ln2;
else
w = clog_for_large_values(-z) + m_ln2;
return (CMPLXF(copysignf(crealf(w), x),
copysignf(cimagf(w), y)));
}
if (x == 0 && y == 0)
return (z);
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (z);
do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
if (B_is_usable)
ry = asinf(B);
else
ry = atan2f(new_y, sqrt_A2my2);
return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
}
float complex
casinf(float complex z)
{
float complex w = casinhf(CMPLXF(cimagf(z), crealf(z)));
return (CMPLXF(cimagf(w), crealf(w)));
}
float complex
cacosf(float complex z)
{
float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
int sx, sy;
int B_is_usable;
float complex w;
x = crealf(z);
y = cimagf(z);
sx = signbit(x);
sy = signbit(y);
ax = fabsf(x);
ay = fabsf(y);
if (isnan(x) || isnan(y)) {
if (isinf(x))
return (CMPLXF(y + y, -INFINITY));
if (isinf(y))
return (CMPLXF(x + x, -y));
if (x == 0)
return (CMPLXF(pio2_hi + pio2_lo, y + y));
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
w = clog_for_large_values(z);
rx = fabsf(cimagf(w));
ry = crealf(w) + m_ln2;
if (sy == 0)
ry = -ry;
return (CMPLXF(rx, ry));
}
if (x == 1 && y == 0)
return (CMPLXF(0, -y));
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (CMPLXF(pio2_hi - (x - pio2_lo), -y));
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
if (B_is_usable) {
if (sx == 0)
rx = acosf(B);
else
rx = acosf(-B);
} else {
if (sx == 0)
rx = atan2f(sqrt_A2mx2, new_x);
else
rx = atan2f(sqrt_A2mx2, -new_x);
}
if (sy == 0)
ry = -ry;
return (CMPLXF(rx, ry));
}
float complex
cacoshf(float complex z)
{
float complex w;
float rx, ry;
w = cacosf(z);
rx = crealf(w);
ry = cimagf(w);
if (isnan(rx) && isnan(ry))
return (CMPLXF(ry, rx));
if (isnan(rx))
return (CMPLXF(fabsf(ry), rx));
if (isnan(ry))
return (CMPLXF(ry, ry));
return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z))));
}
static float complex
clog_for_large_values(float complex z)
{
float x, y;
float ax, ay, t;
x = crealf(z);
y = cimagf(z);
ax = fabsf(x);
ay = fabsf(y);
if (ax < ay) {
t = ax;
ax = ay;
ay = t;
}
if (ax > FLT_MAX / 2)
return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1,
atan2f(y, x)));
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x)));
return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x)));
}
static inline float
sum_squares(float x, float y)
{
if (y < SQRT_MIN)
return (x * x);
return (x * x + y * y);
}
static inline float
real_part_reciprocal(float x, float y)
{
float scale;
uint32_t hx, hy;
int32_t ix, iy;
GET_FLOAT_WORD(hx, x);
ix = hx & 0x7f800000;
GET_FLOAT_WORD(hy, y);
iy = hy & 0x7f800000;
#undef BIAS
#define BIAS (FLT_MAX_EXP - 1)
#define CUTOFF (FLT_MANT_DIG / 2 + 1)
if (ix - iy >= CUTOFF << 23 || isinf(x))
return (1 / x);
if (iy - ix >= CUTOFF << 23)
return (x / y / y);
if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
return (x / (x * x + y * y));
SET_FLOAT_WORD(scale, 0x7f800000 - ix);
x *= scale;
y *= scale;
return (x / (x * x + y * y) * scale);
}
float complex
catanhf(float complex z)
{
float x, y, ax, ay, rx, ry;
x = crealf(z);
y = cimagf(z);
ax = fabsf(x);
ay = fabsf(y);
if (y == 0 && ax <= 1)
return (CMPLXF(atanhf(x), y));
if (x == 0)
return (CMPLXF(x, atanf(y)));
if (isnan(x) || isnan(y)) {
if (isinf(x))
return (CMPLXF(copysignf(0, x), y + y));
if (isinf(y))
return (CMPLXF(copysignf(0, x),
copysignf(pio2_hi + pio2_lo, y)));
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
return (CMPLXF(real_part_reciprocal(x, y),
copysignf(pio2_hi + pio2_lo, y)));
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
raise_inexact();
return (z);
}
if (ax == 1 && ay < FLT_EPSILON)
rx = (m_ln2 - logf(ay)) / 2;
else
rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4;
if (ax == 1)
ry = atan2f(2, -ay) / 2;
else if (ay < FLT_EPSILON)
ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2;
else
ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
}
float complex
catanf(float complex z)
{
float complex w = catanhf(CMPLXF(cimagf(z), crealf(z)));
return (CMPLXF(cimagf(w), crealf(w)));
}

View file

@ -78,7 +78,6 @@ __static_yoink("fdlibm_notice");
/**
* Returns cosine of 𝑥.
* @note should take ~5ns
*/
double cos(double x)
{

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,92 +25,63 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/kernel.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Small multiples of pi/2 rounded to double precision. */
static const double
c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
#include "libc/tinymath/sincosf.internal.h"
__static_yoink("arm_optimized_routines_notice");
/**
* Returns cosine of 𝑥.
* @note should take about 5ns
* Returns cosine of y.
*
* This is a fast cosf implementation. The worst-case ULP is 0.5607, and
* the maximum relative error is 0.5303 * 2^-23. A single-step range
* reduction is used for small values. Large inputs have their range
* reduced using fast integer arithmetic.
*
* @raise EDOM and FE_INVALID if y is an infinity
*/
float cosf(float x)
float
cosf (float y)
{
double y;
uint32_t ix;
unsigned n, sign;
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];
GET_FLOAT_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (abstop12 (y) < abstop12 (pio4f))
{
double x2 = x * x;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
/* raise inexact if x != 0 */
FORCE_EVAL(x + 0x1p120f);
return 1.0f;
}
return __cosdf(x);
}
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
else {
if (sign)
return __sindf(x + c1pio2);
else
return __sindf(c1pio2 - x);
}
}
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
return __cosdf(sign ? x+c4pio2 : x-c4pio2);
else {
if (sign)
return __sindf(-x - c3pio2);
else
return __sindf(x - c3pio2);
}
}
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
return 1.0f;
/* cos(Inf or NaN) is NaN */
if (ix >= 0x7f800000)
return x-x;
return sinf_poly (x, x2, p, 1);
}
else if (likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);
/* general argument reduction needed */
n = __rem_pio2f(x,&y);
switch (n&3) {
case 0: return __cosdf(y);
case 1: return __sindf(-y);
case 2: return -__cosdf(y);
default:
return __sindf(y);
}
/* Setup the signs for sin and cos. */
s = p->sign[n & 3];
if (n & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else
return __math_invalidf (y);
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,310 +25,247 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* double erf(double x)
* double erfc(double x)
* x
* 2 |\
* erf(x) = --------- | exp(-t*t)dt
* sqrt(pi) \|
* 0
*
* erfc(x) = 1-erf(x)
* Note that
* erf(-x) = -erf(x)
* erfc(-x) = 2 - erfc(x)
*
* Method:
* 1. For |x| in [0, 0.84375]
* erf(x) = x + x*R(x^2)
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
* where R = P/Q where P is an odd poly of degree 8 and
* Q is an odd poly of degree 10.
* -57.90
* | R - (erf(x)-x)/x | <= 2
*
*
* Remark. The formula is derived by noting
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
* and that
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
* is close to one. The interval is chosen because the fix
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
* near 0.6174), and by some experiment, 0.84375 is chosen to
* guarantee the error is less than one ulp for erf.
*
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
* c = 0.84506291151 rounded to single (24 bits)
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
* 1+(c+P1(s)/Q1(s)) if x < 0
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
* Remark: here we use the taylor series expansion at x=1.
* erf(1+s) = erf(1) + s*Poly(s)
* = 0.845.. + P1(s)/Q1(s)
* That is, we use rational approximation to approximate
* erf(1+s) - (c = (single)0.84506291151)
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
* where
* P1(s) = degree 6 poly in s
* Q1(s) = degree 6 poly in s
*
* 3. For x in [1.25,1/0.35(~2.857143)],
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
* erf(x) = 1 - erfc(x)
* where
* R1(z) = degree 7 poly in z, (z=1/x^2)
* S1(z) = degree 8 poly in z
*
* 4. For x in [1/0.35,28]
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
* = 2.0 - tiny (if x <= -6)
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
* erf(x) = sign(x)*(1.0 - tiny)
* where
* R2(z) = degree 6 poly in z, (z=1/x^2)
* S2(z) = degree 7 poly in z
*
* Note1:
* To compute exp(-x*x-0.5625+R/S), let s be a single
* precision number and s := x; then
* -x*x = -s*s + (s-x)*(s+x)
* exp(-x*x-0.5626+R/S) =
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
* Note2:
* Here 4 and 5 make use of the asymptotic series
* exp(-x*x)
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
* x*sqrt(pi)
* We use rational approximation to approximate
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
* Here is the error bound for R1/S1 and R2/S2
* |R1/S1 - f(x)| < 2**(-62.57)
* |R2/S2 - f(x)| < 2**(-61.52)
*
* 5. For inf > x >= 28
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
* erfc(x) = tiny*tiny (raise underflow) if x > 0
* = 2 - tiny if x<0
*
* 7. Special case:
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
* erfc/erf(NaN) is NaN
*/
#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
#define C 0x1.b0ac16p-1
#define PA __erf_data.erf_poly_A
#define NA __erf_data.erf_ratio_N_A
#define DA __erf_data.erf_ratio_D_A
#define NB __erf_data.erf_ratio_N_B
#define DB __erf_data.erf_ratio_D_B
#define PC __erf_data.erfc_poly_C
#define PD __erf_data.erfc_poly_D
#define PE __erf_data.erfc_poly_E
#define PF __erf_data.erfc_poly_F
static const double
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
#define INSERT_WORDS(d,hi,lo) \
do { \
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
} while (0)
#define GET_HIGH_WORD(hi,d) \
do { \
(hi) = asuint64(d) >> 32; \
} while (0)
#define GET_LOW_WORD(lo,d) \
do { \
(lo) = (uint32_t)asuint64(d); \
} while (0)
#define SET_HIGH_WORD(d,hi) \
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
#define SET_LOW_WORD(d,lo) \
INSERT_WORDS(d, asuint64(d)>>32, lo)
static double erfc1(double x)
/* Top 32 bits of a double. */
static inline uint32_t
top32 (double x)
{
double_t s,P,Q;
s = fabs(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
}
static double erfc2(uint32_t ix, double x)
{
double_t s,R,S;
double z;
if (ix < 0x3ff40000) /* |x| < 1.25 */
return erfc1(x);
x = fabs(x);
s = 1/(x*x);
if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
} else { /* |x| > 1/.35 */
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
}
z = x;
SET_LOW_WORD(z,0);
return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x;
return asuint64 (x) >> 32;
}
/**
* Returns error function of 𝑥.
* Returns error function of x.
*
* Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.
*
* @raise ERANGE on underflow
*/
double erf(double x)
double
erf (double x)
{
double r,s,z,y;
uint32_t ix;
int sign;
/* Get top word and sign. */
uint32_t ix = top32 (x);
uint32_t ia = ix & 0x7fffffff;
uint32_t sign = ix >> 31;
GET_HIGH_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */
return 1-2*sign + 1/x;
/* Normalized and subnormal cases */
if (ia < 0x3feb0000)
{ /* a = |x| < 0.84375. */
if (ia < 0x3e300000)
{ /* a < 2^(-28). */
if (ia < 0x00800000)
{ /* a < 2^(-1015). */
double y = fma (TwoOverSqrtPiMinusOne, x, x);
return check_uflow (y);
}
return x + TwoOverSqrtPiMinusOne * x;
}
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
if (ix < 0x3e300000) { /* |x| < 2**-28 */
/* avoid underflow */
return 0.125*(8*x + efx8*x);
}
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
return x + x*y;
double x2 = x * x;
if (ia < 0x3fe00000)
{ /* a < 0.5 - Use polynomial approximation. */
double r1 = fma (x2, PA[1], PA[0]);
double r2 = fma (x2, PA[3], PA[2]);
double r3 = fma (x2, PA[5], PA[4]);
double r4 = fma (x2, PA[7], PA[6]);
double r5 = fma (x2, PA[9], PA[8]);
double x4 = x2 * x2;
double r = r5;
r = fma (x4, r, r4);
r = fma (x4, r, r3);
r = fma (x4, r, r2);
r = fma (x4, r, r1);
return fma (r, x, x); /* This fma is crucial for accuracy. */
}
if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
y = 1 - erfc2(ix,x);
else
y = 1 - 0x1p-1022;
return sign ? -y : y;
else
{ /* 0.5 <= a < 0.84375 - Use rational approximation. */
double x4, x8, r1n, r2n, r1d, r2d, r3d;
r1n = fma (x2, NA[1], NA[0]);
x4 = x2 * x2;
r2n = fma (x2, NA[3], NA[2]);
x8 = x4 * x4;
r1d = fma (x2, DA[0], 1.0);
r2d = fma (x2, DA[2], DA[1]);
r3d = fma (x2, DA[4], DA[3]);
double P = r1n + x4 * r2n + x8 * NA[4];
double Q = r1d + x4 * r2d + x8 * r3d;
return fma (P / Q, x, x);
}
}
else if (ia < 0x3ff40000)
{ /* 0.84375 <= |x| < 1.25. */
double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
double a = fabs (x) - 1.0;
r1n = fma (a, NB[1], NB[0]);
a2 = a * a;
r1d = fma (a, DB[0], 1.0);
a4 = a2 * a2;
r2n = fma (a, NB[3], NB[2]);
a6 = a4 * a2;
r2d = fma (a, DB[2], DB[1]);
r3n = fma (a, NB[5], NB[4]);
r3d = fma (a, DB[4], DB[3]);
r4n = NB[6];
r4d = DB[5];
double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
if (sign)
return -C - P / Q;
else
return C + P / Q;
}
else if (ia < 0x40000000)
{ /* 1.25 <= |x| < 2.0. */
double a = fabs (x);
a = a - 1.25;
double r1 = fma (a, PC[1], PC[0]);
double r2 = fma (a, PC[3], PC[2]);
double r3 = fma (a, PC[5], PC[4]);
double r4 = fma (a, PC[7], PC[6]);
double r5 = fma (a, PC[9], PC[8]);
double r6 = fma (a, PC[11], PC[10]);
double r7 = fma (a, PC[13], PC[12]);
double r8 = fma (a, PC[15], PC[14]);
double a2 = a * a;
double r = r8;
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x400a0000)
{ /* 2 <= |x| < 3.25. */
double a = fabs (x);
a = fma (0.5, a, -1.0);
double r1 = fma (a, PD[1], PD[0]);
double r2 = fma (a, PD[3], PD[2]);
double r3 = fma (a, PD[5], PD[4]);
double r4 = fma (a, PD[7], PD[6]);
double r5 = fma (a, PD[9], PD[8]);
double r6 = fma (a, PD[11], PD[10]);
double r7 = fma (a, PD[13], PD[12]);
double r8 = fma (a, PD[15], PD[14]);
double r9 = fma (a, PD[17], PD[16]);
double a2 = a * a;
double r = r9;
r = fma (a2, r, r8);
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x40100000)
{ /* 3.25 <= |x| < 4.0. */
double a = fabs (x);
a = a - 3.25;
double r1 = fma (a, PE[1], PE[0]);
double r2 = fma (a, PE[3], PE[2]);
double r3 = fma (a, PE[5], PE[4]);
double r4 = fma (a, PE[7], PE[6]);
double r5 = fma (a, PE[9], PE[8]);
double r6 = fma (a, PE[11], PE[10]);
double r7 = fma (a, PE[13], PE[12]);
double a2 = a * a;
double r = r7;
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x4017a000)
{ /* 4 <= |x| < 5.90625. */
double a = fabs (x);
a = fma (0.5, a, -2.0);
double r1 = fma (a, PF[1], PF[0]);
double r2 = fma (a, PF[3], PF[2]);
double r3 = fma (a, PF[5], PF[4]);
double r4 = fma (a, PF[7], PF[6]);
double r5 = fma (a, PF[9], PF[8]);
double r6 = fma (a, PF[11], PF[10]);
double r7 = fma (a, PF[13], PF[12]);
double r8 = fma (a, PF[15], PF[14]);
double r9 = PF[16];
double a2 = a * a;
double r = r9;
r = fma (a2, r, r8);
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else
{
/* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
if (unlikely (ia >= 0x7ff00000))
return (double) (1.0 - (sign << 1)) + 1.0 / x;
if (sign)
return -1.0;
else
return 1.0;
}
}
/**
* Returns complementary error function of 𝑥.
*/
double erfc(double x)
{
double r,s,z,y;
uint32_t ix;
int sign;
GET_HIGH_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000) {
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
return 2*sign + 1/x;
}
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
if (ix < 0x3c700000) /* |x| < 2**-56 */
return 1.0 - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3fd00000) { /* x < 1/4 */
return 1.0 - (x+x*y);
}
return 0.5 - (x - 0.5 + x*y);
}
if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022;
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#if LDBL_MANT_DIG == 53
__weak_reference(erf, erfl);
__weak_reference(erfc, erfcl);
#endif

105
libc/tinymath/erf_data.c Normal file
View file

@ -0,0 +1,105 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
Minimax approximation of erf
*/
const struct erf_data __erf_data = {
.erf_poly_A = {
#if ERF_POLY_A_NCOEFFS == 10
0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4,
-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11,
0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20,
-0x1.18c47fd143c5ep-23
#endif
},
/* Rational approximation on [0x1p-28, 0.84375] */
.erf_ratio_N_A = {
0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,
-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16
},
.erf_ratio_D_A = {
0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,
0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18
},
/* Rational approximation on [0.84375, 1.25] */
.erf_ratio_N_B = {
-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2,
0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5,
-0x1.1bf380a96073fp-9
},
.erf_ratio_D_B = {
0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4,
0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7
},
.erfc_poly_C = {
#if ERFC_POLY_C_NCOEFFS == 16
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */
0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2,
-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5,
-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8,
-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12,
0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16,
-0x1.578c9e375d37p-19
#endif
},
.erfc_poly_D = {
#if ERFC_POLY_D_NCOEFFS == 18
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */
0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3,
-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2,
-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2,
0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3,
-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3,
0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10
#endif
},
.erfc_poly_E = {
#if ERFC_POLY_E_NCOEFFS == 14
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */
0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14,
-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12,
0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14,
-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20,
-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23
#endif
},
.erfc_poly_F = {
#if ERFC_POLY_F_NCOEFFS == 17
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */
0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19,
-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14,
0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11,
-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11,
0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14,
-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19
#endif
}
};

279
libc/tinymath/erfc.c Normal file
View file

@ -0,0 +1,279 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Copyright (c) 1992-2024 The FreeBSD Project
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
/* double erf(double x)
* double erfc(double x)
* x
* 2 |\
* erf(x) = --------- | exp(-t*t)dt
* sqrt(pi) \|
* 0
*
* erfc(x) = 1-erf(x)
* Note that
* erf(-x) = -erf(x)
* erfc(-x) = 2 - erfc(x)
*
* Method:
* 1. For |x| in [0, 0.84375]
* erf(x) = x + x*R(x^2)
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
* where R = P/Q where P is an odd poly of degree 8 and
* Q is an odd poly of degree 10.
* -57.90
* | R - (erf(x)-x)/x | <= 2
*
*
* Remark. The formula is derived by noting
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
* and that
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
* is close to one. The interval is chosen because the fix
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
* near 0.6174), and by some experiment, 0.84375 is chosen to
* guarantee the error is less than one ulp for erf.
*
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
* c = 0.84506291151 rounded to single (24 bits)
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
* 1+(c+P1(s)/Q1(s)) if x < 0
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
* Remark: here we use the taylor series expansion at x=1.
* erf(1+s) = erf(1) + s*Poly(s)
* = 0.845.. + P1(s)/Q1(s)
* That is, we use rational approximation to approximate
* erf(1+s) - (c = (single)0.84506291151)
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
* where
* P1(s) = degree 6 poly in s
* Q1(s) = degree 6 poly in s
*
* 3. For x in [1.25,1/0.35(~2.857143)],
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
* erf(x) = 1 - erfc(x)
* where
* R1(z) = degree 7 poly in z, (z=1/x^2)
* S1(z) = degree 8 poly in z
*
* 4. For x in [1/0.35,28]
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
* = 2.0 - tiny (if x <= -6)
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
* erf(x) = sign(x)*(1.0 - tiny)
* where
* R2(z) = degree 6 poly in z, (z=1/x^2)
* S2(z) = degree 7 poly in z
*
* Note1:
* To compute exp(-x*x-0.5625+R/S), let s be a single
* precision number and s := x; then
* -x*x = -s*s + (s-x)*(s+x)
* exp(-x*x-0.5626+R/S) =
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
* Note2:
* Here 4 and 5 make use of the asymptotic series
* exp(-x*x)
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
* x*sqrt(pi)
* We use rational approximation to approximate
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
* Here is the error bound for R1/S1 and R2/S2
* |R1/S1 - f(x)| < 2**(-62.57)
* |R2/S2 - f(x)| < 2**(-61.52)
*
* 5. For inf > x >= 28
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
* erfc(x) = tiny*tiny (raise underflow) if x > 0
* = 2 - tiny if x<0
*
* 7. Special case:
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
* erfc/erf(NaN) is NaN
*/
/* XXX Prevent compilers from erroneously constant folding: */
static const volatile double tiny= 1e-300;
static const double
half= 0.5,
one = 1,
two = 2,
/* c = (float)0.84506291151 */
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
* In the domain [0, 2**-28], only the first term in the power series
* expansion of erf(x) is used. The magnitude of the first neglected
* terms is less than 2**-84.
*/
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
/**
* Returns complementary error function of x, i.e. 1.0 - erf(x).
*/
double
erfc(double x)
{
int32_t hx,ix;
double R,S,P,Q,s,y,z,r;
GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff;
if(ix>=0x7ff00000) { /* erfc(nan)=nan */
/* erfc(+-inf)=0,2 */
return (double)(((uint32_t)hx>>31)<<1)+one/x;
}
if(ix < 0x3feb0000) { /* |x|<0.84375 */
if(ix < 0x3c700000) /* |x|<2**-56 */
return one-x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if(hx < 0x3fd00000) { /* x<1/4 */
return one-(x+x*y);
} else {
r = x*y;
r += (x-half);
return half - r ;
}
}
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
s = fabs(x)-one;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
if(hx>=0) {
z = one-erx; return z - P/Q;
} else {
z = erx+P/Q; return one+z;
}
}
if (ix < 0x403c0000) { /* |x|<28 */
x = fabs(x);
s = one/(x*x);
if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))));
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
s*sa8)))))));
} else { /* |x| >= 1/.35 ~ 2.857143 */
if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))));
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
}
z = x;
SET_LOW_WORD(z,0);
r = exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
if(hx>0) return r/x; else return two-r/x;
} else {
if(hx>0) return tiny*tiny; else return two-tiny;
}
}
#if LDBL_MANT_DIG == 53
__weak_reference(erfc, erfcl);
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2020 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,189 +25,99 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
#define A __erff_data.erff_poly_A
#define B __erff_data.erff_poly_B
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
static const float
erx = 8.4506291151e-01, /* 0x3f58560b */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.0270333290e+00, /* 0x3f8375d4 */
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
pp1 = -3.2504209876e-01, /* 0xbea66beb */
pp2 = -2.8481749818e-02, /* 0xbce9528f */
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
qq4 = 1.3249473704e-04, /* 0x390aee49 */
qq5 = -3.9602282413e-06, /* 0xb684e21a */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
qa4 = 1.2617121637e-01, /* 0x3e013307 */
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
ra2 = -1.0558626175e+01, /* 0xc128f022 */
ra3 = -6.2375331879e+01, /* 0xc2798057 */
ra4 = -1.6239666748e+02, /* 0xc322658c */
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
ra7 = -9.8143291473e+00, /* 0xc11d077e */
sa1 = 1.9651271820e+01, /* 0x419d35ce */
sa2 = 1.3765776062e+02, /* 0x4309a863 */
sa3 = 4.3456588745e+02, /* 0x43d9486f */
sa4 = 6.4538726807e+02, /* 0x442158c9 */
sa5 = 4.2900814819e+02, /* 0x43d6810b */
sa6 = 1.0863500214e+02, /* 0x42d9451f */
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
rb2 = -1.7757955551e+01, /* 0xc18e104b */
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
rb5 = -1.0250950928e+03, /* 0xc480230b */
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
sb3 = 1.5367296143e+03, /* 0x44c01759 */
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
sb5 = 2.5530502930e+03, /* 0x451f90ce */
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
static float erfc1(float x)
/* Top 12 bits of a float. */
static inline uint32_t
top12 (float x)
{
float_t s,P,Q;
s = fabsf(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
return asuint (x) >> 20;
}
static float erfc2(uint32_t ix, float x)
/* Efficient implementation of erff
using either a pure polynomial approximation or
the exponential of a polynomial.
Worst-case error is 1.09ulps at 0x1.c111acp-1. */
float
erff (float x)
{
float_t s,R,S;
float z;
float r, x2, u;
if (ix < 0x3fa00000) /* |x| < 1.25 */
return erfc1(x);
/* Get top word. */
uint32_t ix = asuint (x);
uint32_t sign = ix >> 31;
uint32_t ia12 = top12 (x) & 0x7ff;
x = fabsf(x);
s = 1/(x*x);
if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
} else { /* |x| >= 1/0.35 */
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
/* Limit of both intervals is 0.875 for performance reasons but coefficients
computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
from 0.94 to 1.1ulps. */
if (ia12 < 0x3f6)
{ /* a = |x| < 0.875. */
/* Tiny and subnormal cases. */
if (unlikely (ia12 < 0x318))
{ /* |x| < 2^(-28). */
if (unlikely (ia12 < 0x040))
{ /* |x| < 2^(-119). */
float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
return check_uflowf (y);
}
return x + TwoOverSqrtPiMinusOne * x;
}
ix = asuint(x);
z = asfloat(ix&0xffffe000);
return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x;
}
float erff(float x)
{
float r,s,z,y;
uint32_t ix;
int sign;
ix = asuint(x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */
return 1-2*sign + 1/x;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */
if (ix < 0x31800000) { /* |x| < 2**-28 */
/*avoid underflow */
return 0.125f*(8*x + efx8*x);
}
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
return x + x*y;
}
if (ix < 0x40c00000) /* |x| < 6 */
y = 1 - erfc2(ix,x);
else
y = 1 - 0x1p-120f;
return sign ? -y : y;
}
float erfcf(float x)
{
float r,s,z,y;
uint32_t ix;
int sign;
ix = asuint(x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) {
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
return 2*sign + 1/x;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */
if (ix < 0x23800000) /* |x| < 2**-56 */
return 1.0f - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3e800000) /* x < 1/4 */
return 1.0f - (x+x*y);
return 0.5f - (x - 0.5f + x*y);
}
if (ix < 0x41e00000) { /* |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f;
x2 = x * x;
/* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2). */
r = A[5];
r = fmaf (r, x2, A[4]);
r = fmaf (r, x2, A[3]);
r = fmaf (r, x2, A[2]);
r = fmaf (r, x2, A[1]);
r = fmaf (r, x2, A[0]);
r = fmaf (r, x, x);
}
else if (ia12 < 0x408)
{ /* |x| < 4.0 - Use a custom Estrin scheme. */
float a = fabsf (x);
/* Start with Estrin scheme on high order (small magnitude) coefficients. */
r = fmaf (B[6], a, B[5]);
u = fmaf (B[4], a, B[3]);
x2 = x * x;
r = fmaf (r, x2, u);
/* Then switch to pure Horner scheme. */
r = fmaf (r, a, B[2]);
r = fmaf (r, a, B[1]);
r = fmaf (r, a, B[0]);
r = fmaf (r, a, a);
/* Single precision exponential with ~0.5ulps,
ensures erff has max. rel. error
< 1ulp on [0.921875, 4.0],
< 1.1ulps on [0.875, 4.0]. */
r = expf (-r);
/* Explicit copysign (calling copysignf increases latency). */
if (sign)
r = -1.0f + r;
else
r = 1.0f - r;
}
else
{ /* |x| >= 4.0. */
/* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
if (unlikely (ia12 >= 0x7f8))
return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
/* Explicit copysign (calling copysignf increases latency). */
if (sign)
r = -1.0f;
else
r = 1.0f;
}
return r;
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,12 +25,19 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/log1pf_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/* Polynomial coefficients generated using floating-point minimax
algorithm, see tools/log1pf.sollya for details. */
const struct log1pf_data __log1pf_data
= {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-0x1.6f0d5ep-5f}};
/* Minimax approximation of erff. */
const struct erff_data __erff_data = {
.erff_poly_A = {
0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
-0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
},
.erff_poly_B = {
0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
0x1.222900p-16f
}
};

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Double-precision e^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
@ -48,6 +38,7 @@ __static_yoink("arm_optimized_routines_notice");
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
@ -56,114 +47,154 @@ __static_yoink("arm_optimized_routines_notice");
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
static inline uint32_t
top12 (double x)
{
return asuint64(x) >> 52;
return asuint64 (x) >> 52;
}
/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
If hastail is 0 then xtail is assumed to be 0 too. */
static inline double
exp_inline (double x, double xtail, int hastail)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (asuint64 (x) >> 63)
return __math_uflow (0);
else
return __math_oflow (0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
if (hastail)
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
/**
* Returns 𝑒^x.
*
* @raise ERANGE on overflow or underflow
*/
double exp(double x)
double
exp (double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (asuint64(x) >> 63)
return __math_uflow(0);
else
return __math_oflow(0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (UNLIKELY(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
return exp_inline (x, 0, 0);
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(exp, expl);
/* May be useful for implementing pow where more than double
precision input is needed. */
double
__exp_dd (double x, double xtail)
{
return exp_inline (x, xtail, 1);
}
#if USE_GLIBC_ABI
strong_alias (exp, __exp_finite)
hidden_alias (exp, __ieee754_exp)
hidden_alias (__exp_dd, __exp1)
# if LDBL_MANT_DIG == 53
long double expl (long double x) { return exp (x); }
# endif
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,33 +25,135 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
__static_yoink("musl_libc_notice");
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
#define N (1 << EXP_TABLE_BITS)
#define IndexMask (N - 1)
#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX). */
#define UFlowBound -0x1.5ep+8 /* -350. */
#define SmallTop 0x3c6 /* top12(0x1p-57). */
#define BigTop 0x407 /* top12(0x1p8). */
#define Thresh 0x41 /* BigTop - SmallTop. */
#define Shift __exp_data.shift
#define C(i) __exp_data.exp10_poly[i]
static double
special_case (uint64_t sbits, double_t tmp, uint64_t ki)
{
double_t scale, y;
if (ki - (1ull << 16) < 0x80000000)
{
/* The exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble (sbits);
y = 2 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* n < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t lo = scale - y + scale * tmp;
double_t hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (y);
}
/**
* Returns 10ˣ.
*
* The largest observed error is ~0.513 ULP.
*/
double exp10(double x)
double
exp10 (double x)
{
static const double p10[] = {
1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15
};
double n, y = modf(x, &n);
union {double f; uint64_t i;} u = {n};
/* fabs(n) < 16 without raising invalid on nan */
if ((u.i>>52 & 0x7ff) < 0x3ff+4) {
if (!y) return p10[(int)n+15];
y = exp2(3.32192809488736234787031942948939 * y);
return y * p10[(int)n+15];
}
return pow(10.0, x);
uint64_t ix = asuint64 (x);
uint32_t abstop = (ix >> 52) & 0x7ff;
if (unlikely (abstop - SmallTop >= Thresh))
{
if (abstop - SmallTop >= 0x80000000)
/* Avoid spurious underflow for tiny x.
Note: 0 is common input. */
return x + 1;
if (abstop == 0x7ff)
return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
if (x >= OFlowBound)
return __math_oflow (0);
if (x < UFlowBound)
return __math_uflow (0);
/* Large x is special-cased below. */
abstop = 0;
}
/* Reduce x: z = x * N / log10(2), k = round(z). */
double_t z = __exp_data.invlog10_2N * x;
double_t kd;
int64_t ki;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#else
kd = eval_as_double (z + Shift);
kd -= Shift;
ki = kd;
#endif
/* r = x - k * log10(2), r in [-0.5, 0.5]. */
double_t r = x;
r = __exp_data.neglog10_2hiN * kd + r;
r = __exp_data.neglog10_2loN * kd + r;
/* exp10(x) = 2^(k/N) * 2^(r/N).
Approximate the two components separately. */
/* s = 2^(k/N), using lookup table. */
uint64_t e = ki << (52 - EXP_TABLE_BITS);
uint64_t i = (ki & IndexMask) * 2;
uint64_t u = __exp_data.tab[i + 1];
uint64_t sbits = u + e;
double_t tail = asdouble (__exp_data.tab[i]);
/* 2^(r/N) ~= 1 + r * Poly(r). */
double_t r2 = r * r;
double_t p = C (0) + r * C (1);
double_t y = C (2) + r * C (3);
y = y + r2 * C (4);
y = p + r2 * y;
y = tail + y * r;
if (unlikely (abstop == 0))
return special_case (sbits, y, ki);
/* Assemble components:
y = 2^(r/N) * 2^(k/N)
~= (y + 1) * s. */
double_t s = asdouble (sbits);
return eval_as_double (s * y + s);
}
__strong_reference(exp10, pow10);
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(exp10, exp10l);
__weak_reference(exp10, pow10l);
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Double-precision 2^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
@ -46,6 +36,7 @@ __static_yoink("arm_optimized_routines_notice");
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]
#define C6 __exp_data.exp2_poly[5]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
@ -54,103 +45,121 @@ __static_yoink("arm_optimized_routines_notice");
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble(sbits);
y = 2 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble (sbits);
y = 2 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
static inline uint32_t
top12 (double x)
{
return asuint64(x) >> 52;
return asuint64 (x) >> 52;
}
/**
* Returns 2^𝑥.
*/
double exp2(double x)
double
exp2 (double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, r, r2, scale, tail, tmp;
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (!(asuint64(x) >> 63))
return __math_oflow(0);
else if (asuint64(x) >= asuint64(-1075.0))
return __math_uflow(0);
}
if (2 * asuint64(x) > 2 * asuint64(928.0))
/* Large x is special cased below. */
abstop = 0;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (!(asuint64 (x) >> 63))
return __math_oflow (0);
else if (asuint64 (x) >= asuint64 (-1075.0))
return __math_uflow (0);
}
if (2 * asuint64 (x) > 2 * asuint64 (928.0))
/* Large x is special cased below. */
abstop = 0;
}
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double(x + Shift);
ki = asuint64(kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (UNLIKELY(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double (x + Shift);
ki = asuint64 (kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
#if EXP2_POLY_ORDER == 4
tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP2_POLY_ORDER == 5
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP2_POLY_ORDER == 6
tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(exp2, exp2l);
#if USE_GLIBC_ABI
strong_alias (exp2, __exp2_finite)
hidden_alias (exp2, __ieee754_exp2)
# if LDBL_MANT_DIG == 53
long double exp2l (long double x) { return exp2 (x); }
# endif
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
@ -53,48 +43,66 @@ Non-nearest ULP error: 1 (rounded ULP error)
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
static inline uint32_t top12(float x)
static inline uint32_t
top12 (float x)
{
return asuint(x) >> 20;
return asuint (x) >> 20;
}
/**
* Returns 2^𝑥.
*
* - ULP error: 0.502 (nearest rounding.)
* - Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
* - Wrong count: 168353 (all nearest rounding wrong results with fma.)
* - Non-nearest ULP error: 1 (rounded ULP error)
*/
float exp2f(float x)
float
exp2f (float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
uint32_t abstop;
uint64_t ki, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (UNLIKELY(abstop >= top12(128.0f))) {
/* |x| >= 128 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf(0);
if (x <= -150.0f)
return __math_uflowf(0);
}
xd = (double_t) x;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop >= top12 (128.0f)))
{
/* |x| >= 128 or x is nan. */
if (asuint (x) == asuint (-INFINITY))
return 0.0f;
if (abstop >= top12 (INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf (0);
if (x <= -150.0f)
return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -149.0f)
return __math_may_uflowf (0);
#endif
}
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double (xd + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (exp2f, __exp2f_finite)
hidden_alias (exp2f, __ieee754_exp2f)
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = {
@ -42,6 +35,15 @@ const struct exp2f_data __exp2f_data = {
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab = {
#if N == 8
0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
#elif N == 16
0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
#elif N == 32
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
@ -50,14 +52,48 @@ const struct exp2f_data __exp2f_data = {
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
#elif N == 64
0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
#endif
},
.shift_scaled = 0x1.8p+52 / N,
.poly = {
#if N == 8
0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
#elif N == 16
0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
#elif N == 32
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
#elif N == 64
0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
#endif
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = {
#if N == 8
0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
#elif N == 16
0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
#elif N == 32
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
#elif N == 64
0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
#endif
},
};

View file

@ -1,19 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
COSMOPOLITAN_C_START_
extern const struct exp2f_data {
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ */

File diff suppressed because it is too large Load diff

View file

@ -1,23 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
COSMOPOLITAN_C_START_
extern const struct exp_data {
double invln2N;
double shift;
double negln2hiN;
double negln2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
} __exp_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Single-precision e^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
@ -53,59 +43,79 @@ Non-nearest ULP error: 1 (rounded ULP error)
#define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled
static inline uint32_t top12(float x)
static inline uint32_t
top12 (float x)
{
return asuint(x) >> 20;
return asuint (x) >> 20;
}
/**
* Returns 𝑒^x.
*
* - ULP error: 0.502 (nearest rounding.)
* - Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
* - Wrong count: 170635 (all nearest rounding wrong results with fma.)
* - Non-nearest ULP error: 1 (rounded ULP error)
*
* @raise ERANGE on overflow or underflow
*/
float expf(float x)
float
expf (float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
uint32_t abstop;
uint64_t ki, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (UNLIKELY(abstop >= top12(88.0f))) {
/* |x| >= 88 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf(0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf(0);
}
xd = (double_t) x;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop >= top12 (88.0f)))
{
/* |x| >= 88 or x is nan. */
if (asuint (x) == asuint (-INFINITY))
return 0.0f;
if (abstop >= top12 (INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf (0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
return __math_may_uflowf (0);
#endif
}
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;
/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally ties-to-even rule is used, otherwise the magnitude of r
can be bigger which gives larger approximation error. */
/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally nearest int is used, otherwise the magnitude of r can be
bigger which gives larger approximation error. */
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
kd = roundtoint (z);
ki = converttoint (z);
#else
# define SHIFT __exp2f_data.shift
kd = eval_as_double(z + SHIFT);
ki = asuint64(kd);
kd -= SHIFT;
kd = eval_as_double (z + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT;
#endif
r = z - kd;
r = z - kd;
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (expf, __expf_finite)
hidden_alias (expf, __ieee754_expf)
#endif

View file

@ -29,7 +29,6 @@
*/
#include "libc/math.h"
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

View file

@ -28,7 +28,6 @@
#include "libc/math.h"
__static_yoink("musl_libc_notice");
#if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
#define SPLIT (0x1p32 + 1)
#else

View file

@ -28,7 +28,11 @@
#include "libc/math.h"
__static_yoink("musl_libc_notice");
/**
* Returns euclidean distance.
*
* Max observed error is 1 ulp.
*/
float hypotf(float x, float y)
{
union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;

175
libc/tinymath/hypotf2.c Normal file
View file

@ -0,0 +1,175 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
FreeBSD lib/msun/src/e_hypotf.c
Copyright (c) 1992-2023 The FreeBSD Project.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
static const float one = 1.0, tiny=1.0e-30;
float
sqrtf2(float x)
{
float z;
int32_t sign = (int)0x80000000;
int32_t ix,s,q,m,t,i;
uint32_t r;
GET_FLOAT_WORD(ix,x);
/* take care of Inf and NaN */
if((ix&0x7f800000)==0x7f800000) {
return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
sqrt(-inf)=sNaN */
}
/* take care of zero */
if(ix<=0) {
if((ix&(~sign))==0) return x;/* sqrt(+-0) = +-0 */
else if(ix<0)
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
}
/* normalize x */
m = (ix>>23);
if(m==0) { /* subnormal x */
for(i=0;(ix&0x00800000)==0;i++) ix<<=1;
m -= i-1;
}
m -= 127; /* unbias exponent */
ix = (ix&0x007fffff)|0x00800000;
if(m&1) /* odd m, double x to make it even */
ix += ix;
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix += ix;
q = s = 0; /* q = sqrt(x) */
r = 0x01000000; /* r = moving bit from right to left */
while(r!=0) {
t = s+r;
if(t<=ix) {
s = t+r;
ix -= t;
q += r;
}
ix += ix;
r>>=1;
}
/* use floating add to find out rounding direction */
if(ix!=0) {
z = one-tiny; /* trigger inexact flag */
if (z>=one) {
z = one+tiny;
if (z>one)
q += 2;
else
q += (q&1);
}
}
ix = (q>>1)+0x3f000000;
ix += ((uint32_t)m <<23);
SET_FLOAT_WORD(z,ix);
return z;
}
/**
* Returns euclidean distance.
*
* Error is less than 1 ULP.
*/
float
hypotf2(float x, float y)
{
float a,b,t1,t2,y1,y2,w;
int32_t j,k,ha,hb;
GET_FLOAT_WORD(ha,x);
ha &= 0x7fffffff;
GET_FLOAT_WORD(hb,y);
hb &= 0x7fffffff;
if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
a = fabsf(a);
b = fabsf(b);
if((ha-hb)>0xf000000) {return a+b;} /* x/y > 2**30 */
k=0;
if(ha > 0x58800000) { /* a>2**50 */
if(ha >= 0x7f800000) { /* Inf or NaN */
/* Use original arg order iff result is NaN; quieten sNaNs. */
w = fabsl(x+0.0L)-fabsf(y+0);
if(ha == 0x7f800000) w = a;
if(hb == 0x7f800000) w = b;
return w;
}
/* scale a and b by 2**-68 */
ha -= 0x22000000; hb -= 0x22000000; k += 68;
SET_FLOAT_WORD(a,ha);
SET_FLOAT_WORD(b,hb);
}
if(hb < 0x26800000) { /* b < 2**-50 */
if(hb <= 0x007fffff) { /* subnormal b or 0 */
if(hb==0) return a;
SET_FLOAT_WORD(t1,0x7e800000); /* t1=2^126 */
b *= t1;
a *= t1;
k -= 126;
} else { /* scale a and b by 2^68 */
ha += 0x22000000; /* a *= 2^68 */
hb += 0x22000000; /* b *= 2^68 */
k -= 68;
SET_FLOAT_WORD(a,ha);
SET_FLOAT_WORD(b,hb);
}
}
/* medium size a and b */
w = a-b;
if (w>b) {
SET_FLOAT_WORD(t1,ha&0xfffff000);
t2 = a-t1;
w = sqrtf2(t1*t1-(b*(-b)-t2*(a+t1)));
} else {
a = a+a;
SET_FLOAT_WORD(y1,hb&0xfffff000);
y2 = b - y1;
SET_FLOAT_WORD(t1,(ha+0x00800000)&0xfffff000);
t2 = a - t1;
w = sqrtf2(t1*y1-(w*(-w)-(t1*y2+t2*b)));
}
if(k!=0) {
SET_FLOAT_WORD(t1,(127+k)<<23);
return t1*w;
} else return w;
}

View file

@ -30,7 +30,6 @@
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
__static_yoink("musl_libc_notice");
#if LDBL_MANT_DIG == 64
#define SPLIT (0x1p32L+1)
#elif LDBL_MANT_DIG == 113

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Double-precision log(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define T __log_data.tab
#define T2 __log_data.tab2
#define B __log_data.poly1
@ -47,95 +37,151 @@ __static_yoink("arm_optimized_routines_notice");
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000
/**
* Returns natural logarithm of 𝑥.
*/
double log(double x)
/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = ix >> 48;
#define LO asuint64(1.0 - 0x1p-4)
#define HI asuint64(1.0 + 0x1.09p-4)
if (UNLIKELY(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
y = r3 *
(B[1] + r * B[2] + r2 * B[3] +
r3 * (B[4] + r * B[5] + r2 * B[6] +
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
return eval_as_double(y);
}
if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t)k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
y = lo + r2 * A[0] +
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
return eval_as_double(y);
return asuint64 (x) >> 48;
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(log, logl);
/**
* Returns natural logarithm of 𝑥.
*
* @raise EDOM and FE_INVALID if x is negative
* @raise ERANGE and FE_DIVBYZERO if x is zero
*/
double
log (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64 (x);
top = top16 (x);
#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1p-5)
# define HI asuint64 (1.0 + 0x1.1p-5)
#elif LOG_POLY1_ORDER == 12
# define LO asuint64 (1.0 - 0x1p-4)
# define HI asuint64 (1.0 + 0x1.09p-4)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
#if LOG_POLY1_ORDER == 10
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 11
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2]
+ r2 * (B[3] + r * B[4] + r2 * B[5]
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 12
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6]
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
# if N <= 64
/* Worst-case error is around 0.532 ULP. */
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
# else
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
# endif
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t) k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
#if LOG_POLY_ORDER == 6
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
#elif LOG_POLY_ORDER == 7
y = lo
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ r2 * r2 * (A[4] + r * A[5]))
+ hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log, __log_finite)
hidden_alias (log, __ieee754_log)
# if LDBL_MANT_DIG == 53
long double logl (long double x) { return log (x); }
# endif
#endif

View file

@ -29,11 +29,9 @@
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log2_data.internal.h"
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
/*
* ====================================================

View file

@ -2,74 +2,84 @@
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Copyright (c) 1992-2024 The FreeBSD Project
Copyright (c) 1993 Sun Microsystems, Inc.
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log_data.internal.h"
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* double log1p(double x)
* Return the natural logarithm of 1+x.
*
* Method :
* 1. Argument Reduction: find k and f such that
* 1+x = 2^k * (1+f),
* where sqrt(2)/2 < 1+f < sqrt(2) .
* 1+x = 2^k * (1+f),
* where sqrt(2)/2 < 1+f < sqrt(2) .
*
* Note. If k=0, then f=x is exact. However, if k!=0, then f
* may not be representable exactly. In that case, a correction
* term is need. Let u=1+x rounded. Let c = (1+x)-u, then
* log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
* and add back the correction term c/u.
* (Note: when x > 2**53, one can simply return log(x))
* may not be representable exactly. In that case, a correction
* term is need. Let u=1+x rounded. Let c = (1+x)-u, then
* log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
* and add back the correction term c/u.
* (Note: when x > 2**53, one can simply return log(x))
*
* 2. Approximation of log(1+f): See log.c
* 2. Approximation of log1p(f).
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
* = 2s + s*R
* We use a special Reme algorithm on [0,0.1716] to generate
* a polynomial of degree 14 to approximate R The maximum error
* of this polynomial approximation is bounded by 2**-58.45. In
* other words,
* 2 4 6 8 10 12 14
* R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
* (the values of Lp1 to Lp7 are listed in the program)
* and
* | 2 14 | -58.45
* | Lp1*s +...+Lp7*s - R(z) | <= 2
* | |
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
* In order to guarantee error in log below 1ulp, we compute log
* by
* log1p(f) = f - (hfsq - s*(hfsq+R)).
*
* 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c
* 3. Finally, log1p(x) = k*ln2 + log1p(f).
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
* Here ln2 is split into two floating point number:
* ln2_hi + ln2_lo,
* where n*ln2_hi is always exact for |n| < 2000.
*
* Special cases:
* log1p(x) is NaN with signal if x < -1 (including -INF) ;
* log1p(+INF) is +INF; log1p(-1) is -INF with signal;
* log1p(NaN) is that NaN with no signal.
* log1p(x) is NaN with signal if x < -1 (including -INF) ;
* log1p(+INF) is +INF; log1p(-1) is -INF with signal;
* log1p(NaN) is that NaN with no signal.
*
* Accuracy:
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
*
* Constants:
* The hexadecimal values are the intended ones for the following
@ -78,84 +88,110 @@ __static_yoink("fdlibm_notice");
* to produce the hexadecimal values shown.
*
* Note: Assuming log() return accurate answer, the following
* algorithm can be used to compute log1p(x) to within a few ULP:
* algorithm can be used to compute log1p(x) to within a few ULP:
*
* u = 1+x;
* if(u==1.0) return x ; else
* return log(u)*(x/(u-1.0));
* u = 1+x;
* if(u==1.0) return x ; else
* return log(u)*(x/(u-1.0));
*
* See HP-15C Advanced Functions Handbook, p.193.
* See HP-15C Advanced Functions Handbook, p.193.
*/
static const double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
static const double zero = 0.0;
static volatile double vzero = 0.0;
/**
* Returns log(𝟷+𝑥).
* Returns log(1 + x).
*/
double log1p(double x)
double
log1p(double x)
{
union {double f; uint64_t i;} u = {x};
double_t hfsq,f,c,s,z,R,w,t1,t2,dk;
uint32_t hx,hu;
int k;
double hfsq,f,c,s,z,R,u;
int32_t k,hx,hu,ax;
GET_HIGH_WORD(hx,x);
ax = hx&0x7fffffff;
hx = u.i>>32;
k = 1;
if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */
if (hx >= 0xbff00000) { /* x <= -1.0 */
if (x == -1)
return x/0.0; /* log1p(-1) = -inf */
return (x-x)/0.0; /* log1p(x<-1) = NaN */
}
if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */
/* underflow if subnormal */
if ((hx&0x7ff00000) == 0)
FORCE_EVAL((float)x);
return x;
}
if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
k = 0;
c = 0;
f = x;
}
} else if (hx >= 0x7ff00000)
return x;
if (k) {
u.f = 1 + x;
hu = u.i>>32;
hu += 0x3ff00000 - 0x3fe6a09e;
k = (int)(hu>>20) - 0x3ff;
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
if (k < 54) {
c = k >= 2 ? 1-(u.f-x) : x-(u.f-1);
c /= u.f;
} else
c = 0;
/* reduce u into [sqrt(2)/2, sqrt(2)] */
hu = (hu&0x000fffff) + 0x3fe6a09e;
u.i = (uint64_t)hu<<32 | (u.i&0xffffffff);
f = u.f - 1;
if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */
if(ax>=0x3ff00000) { /* x <= -1.0 */
if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */
else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
}
if(ax<0x3e200000) { /* |x| < 2**-29 */
if(two54+x>zero /* raise inexact */
&&ax<0x3c900000) /* |x| < 2**-54 */
return x;
else
return x - x*x*0.5;
}
if(hx>0||hx<=((int32_t)0xbfd2bec4)) {
k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
}
hfsq = 0.5*f*f;
s = f/(2.0+f);
if (hx >= 0x7ff00000) return x+x;
if(k!=0) {
if(hx<0x43400000) {
STRICT_ASSIGN(double,u,1.0+x);
GET_HIGH_WORD(hu,u);
k = (hu>>20)-1023;
c = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
c /= u;
} else {
u = x;
GET_HIGH_WORD(hu,u);
k = (hu>>20)-1023;
c = 0;
}
hu &= 0x000fffff;
/*
* The approximation to sqrt(2) used in thresholds is not
* critical. However, the ones used above must give less
* strict bounds than the one here so that the k==0 case is
* never reached from here, since here we have committed to
* using the correction term but don't use it if k==0.
*/
if(hu<0x6a09e) { /* u ~< sqrt(2) */
SET_HIGH_WORD(u,hu|0x3ff00000); /* normalize u */
} else {
k += 1;
SET_HIGH_WORD(u,hu|0x3fe00000); /* normalize u/2 */
hu = (0x00100000-hu)>>2;
}
f = u-1.0;
}
hfsq=0.5*f*f;
if(hu==0) { /* |f| < 2**-20 */
if(f==zero) {
if(k==0) {
return zero;
} else {
c += k*ln2_lo;
return k*ln2_hi+c;
}
}
R = hfsq*(1.0-0.66666666666666666*f);
if(k==0) return f-R; else
return k*ln2_hi-((R-(k*ln2_lo+c))-f);
}
s = f/(2.0+f);
z = s*s;
w = z*z;
t1 = w*(Lg2+w*(Lg4+w*Lg6));
t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
R = t2 + t1;
dk = k;
return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;
R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
if(k==0) return f-(hfsq-s*(hfsq+R)); else
return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#if (LDBL_MANT_DIG == 53)
__weak_reference(log1p, log1pl);
#endif

View file

@ -1,175 +1,133 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 1992-2024 The FreeBSD Project
Copyright (c) 1993 Sun Microsystems, Inc.
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log1pf_data.internal.h"
__static_yoink("arm_optimized_routines_notice");
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
#define Ln2 (0x1.62e43p-1f)
#define SignMask (0x80000000)
/* s_log1pf.c -- float version of s_log1p.c.
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/* Biased exponent of the largest float m for which m^8 underflows. */
#define M8UFLOW_BOUND_BEXP 112
/* Biased exponent of the largest float for which we just return x. */
#define TINY_BOUND_BEXP 103
static const float
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
two25 = 3.355443200e+07, /* 0x4c000000 */
Lp1 = 6.6666668653e-01, /* 3F2AAAAB */
Lp2 = 4.0000000596e-01, /* 3ECCCCCD */
Lp3 = 2.8571429849e-01, /* 3E924925 */
Lp4 = 2.2222198546e-01, /* 3E638E29 */
Lp5 = 1.8183572590e-01, /* 3E3A3325 */
Lp6 = 1.5313838422e-01, /* 3E1CD04F */
Lp7 = 1.4798198640e-01; /* 3E178897 */
#define C(i) __log1pf_data.coeffs[i]
static const float zero = 0.0;
static volatile float vzero = 0.0;
static inline float
eval_poly (float m, uint32_t e)
{
#ifdef LOG1PF_2U5
/* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
slightly modified Estrin scheme (no x^0 term, and x term is just x). */
float p_12 = fmaf (m, C (1), C (0));
float p_34 = fmaf (m, C (3), C (2));
float p_56 = fmaf (m, C (5), C (4));
float p_78 = fmaf (m, C (7), C (6));
float m2 = m * m;
float p_02 = fmaf (m2, p_12, m);
float p_36 = fmaf (m2, p_56, p_34);
float p_79 = fmaf (m2, C (8), p_78);
float m4 = m2 * m2;
float p_06 = fmaf (m4, p_36, p_02);
if (UNLIKELY (e < M8UFLOW_BOUND_BEXP))
return p_06;
float m8 = m4 * m4;
return fmaf (m8, p_79, p_06);
#elif defined(LOG1PF_1U3)
/* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
scheme. Our polynomial approximation for log1p has the form
x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
Hence approximation has the form m + m^2 * P(m)
where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
return fmaf (m, m * HORNER_8 (m, C), m);
#else
#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
#endif
}
static inline uint32_t
biased_exponent (uint32_t ix)
{
return (ix & 0x7f800000) >> 23;
}
/* log1pf approximation using polynomial on reduced interval. Worst-case error
when using Estrin is roughly 2.02 ULP:
log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
/**
* Returns log(1 + x).
*/
float
log1pf (float x)
log1pf(float x)
{
uint32_t ix = asuint (x);
uint32_t ia = ix & ~SignMask;
uint32_t ia12 = ia >> 20;
uint32_t e = biased_exponent (ix);
float hfsq,f,c,s,z,R,u;
int32_t k,hx,hu,ax;
/* Handle special cases first. */
if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
|| e <= TINY_BOUND_BEXP))
{
if (ix == 0xff800000)
{
/* x == -Inf => log1pf(x) = NaN. */
return NAN;
GET_FLOAT_WORD(hx,x);
ax = hx&0x7fffffff;
k = 1;
if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */
if(ax>=0x3f800000) { /* x <= -1.0 */
if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */
else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
}
if(ax<0x38000000) { /* |x| < 2**-15 */
if(two25+x>zero /* raise inexact */
&&ax<0x33800000) /* |x| < 2**-24 */
return x;
else
return x - x*x*(float)0.5;
}
if(hx>0||hx<=((int32_t)0xbe95f619)) {
k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
}
if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
{
/* |x| < TinyBound => log1p(x) = x.
x == Inf => log1pf(x) = Inf. */
return x;
if (hx >= 0x7f800000) return x+x;
if(k!=0) {
if(hx<0x5a000000) {
STRICT_ASSIGN(float,u,(float)1.0+x);
GET_FLOAT_WORD(hu,u);
k = (hu>>23)-127;
/* correction term */
c = (k>0)? (float)1.0-(u-x):x-(u-(float)1.0);
c /= u;
} else {
u = x;
GET_FLOAT_WORD(hu,u);
k = (hu>>23)-127;
c = 0;
}
hu &= 0x007fffff;
/*
* The approximation to sqrt(2) used in thresholds is not
* critical. However, the ones used above must give less
* strict bounds than the one here so that the k==0 case is
* never reached from here, since here we have committed to
* using the correction term but don't use it if k==0.
*/
if(hu<0x3504f4) { /* u < sqrt(2) */
SET_FLOAT_WORD(u,hu|0x3f800000);/* normalize u */
} else {
k += 1;
SET_FLOAT_WORD(u,hu|0x3f000000); /* normalize u/2 */
hu = (0x00800000-hu)>>2;
}
f = u-(float)1.0;
}
if (ix == 0xbf800000)
{
/* x == -1.0 => log1pf(x) = -Inf. */
return __math_divzerof (-1);
hfsq=(float)0.5*f*f;
if(hu==0) { /* |f| < 2**-20 */
if(f==zero) {
if(k==0) {
return zero;
} else {
c += k*ln2_lo;
return k*ln2_hi+c;
}
}
R = hfsq*((float)1.0-(float)0.66666666666666666*f);
if(k==0) return f-R; else
return k*ln2_hi-((R-(k*ln2_lo+c))-f);
}
if (ia12 >= 0x7f8)
{
/* x == +/-NaN => log1pf(x) = NaN. */
return __math_invalidf (asfloat (ia));
}
/* x < -1.0 => log1pf(x) = NaN. */
return __math_invalidf (x);
}
/* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
is in [-0.25, 0.5]):
log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
We approximate log1p(m) with a polynomial, then scale by
k*log(2). Instead of doing this directly, we use an intermediate
scale factor s = 4*k*log(2) to ensure the scale is representable
as a normalised fp32 number. */
if (ix <= 0x3f000000 || ia <= 0x3e800000)
{
/* If x is in [-0.25, 0.5] then we can shortcut all the logic
below, as k = 0 and m = x. All we need is to return the
polynomial. */
return eval_poly (x, e);
}
float m = x + 1.0f;
/* k is used scale the input. 0x3f400000 is chosen as we are trying to
reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
let k = sign * 2^p where sign = -1 if x < 0
1 otherwise
and p is a negative integer whose magnitude increases with the
magnitude of x. */
int k = (asuint (m) - 0x3f400000) & 0xff800000;
/* By using integer arithmetic, we obtain the necessary scaling by
subtracting the unbiased exponent of k from the exponent of x. */
float m_scale = asfloat (asuint (x) - k);
/* Scale up to ensure that the scale factor is representable as normalised
fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
float s = asfloat (asuint (4.0f) - k);
m_scale = m_scale + fmaf (0.25f, s, -1.0f);
float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
/* The scale factor to be applied back at the end - by multiplying float(k)
by 2^-23 we get the unbiased exponent of k. */
float scale_back = (float) k * 0x1.0p-23f;
/* Apply the scaling back. */
return fmaf (scale_back, Ln2, p);
s = f/((float)2.0+f);
z = s*s;
R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
if(k==0) return f-(hfsq-s*(hfsq+R)); else
return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
COSMOPOLITAN_C_START_
#define LOG1PF_2U5
#define V_LOG1PF_2U5
#define LOG1PF_NCOEFFS 9
extern const struct log1pf_data {
float coeffs[LOG1PF_NCOEFFS]; //
} __log1pf_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,20 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log2_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Double-precision log2(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
@ -49,110 +38,126 @@ __static_yoink("arm_optimized_routines_notice");
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t top16(double x)
static inline uint32_t
top16 (double x)
{
return asuint64(x) >> 48;
return asuint64 (x) >> 48;
}
/**
* Calculates log𝑥.
* Returns base 2 logarithm of x.
*/
double log2(double x)
double
log2 (double x)
{
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = top16(x);
#define LO asuint64(1.0 - 0x1.5b51p-5)
#define HI asuint64(1.0 + 0x1.6ab2p-5)
if (UNLIKELY(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
#if __FP_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
ix = asuint64 (x);
top = top16 (x);
#if LOG2_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1.5b51p-5)
# define HI asuint64 (1.0 + 0x1.6ab2p-5)
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
return eval_as_double(y);
}
if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
kd = (double_t)k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
#if HAVE_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
double_t rhi, rlo;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
#if LOG2_POLY1_ORDER == 11
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
kd = (double_t) k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif
/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;
/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;
/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
return eval_as_double(y);
/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
#if LOG2_POLY_ORDER == 7
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
#endif
return eval_as_double (y);
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(log2, log2l);
#if USE_GLIBC_ABI
strong_alias (log2, __log2_finite)
hidden_alias (log2, __ieee754_log2)
# if LDBL_MANT_DIG == 53
long double log2l (long double x) { return log2 (x); }
# endif
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,14 +25,8 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/log2_data.internal.h"
/*
* Data for log2.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
#define N (1 << LOG2_TABLE_BITS)
@ -41,6 +35,7 @@ const struct log2_data __log2_data = {
.invln2hi = 0x1.7154765200000p+0,
.invln2lo = 0x1.705fc2eefa200p-33,
.poly1 = {
#if LOG2_POLY1_ORDER == 11
// relative error: 0x1.2fad8188p-63
// in -0x1.5b51p-5 0x1.6ab2p-5
-0x1.71547652b82fep-1,
@ -53,8 +48,10 @@ const struct log2_data __log2_data = {
0x1.484d154f01b4ap-3,
-0x1.289e4a72c383cp-3,
0x1.0b32f285aee66p-3,
#endif
},
.poly = {
#if N == 64 && LOG2_POLY_ORDER == 7
// relative error: 0x1.a72c2bf8p-58
// abs error: 0x1.67a552c8p-66
// in -0x1.f45p-8 0x1.f45p-8
@ -64,6 +61,7 @@ const struct log2_data __log2_data = {
0x1.2776c50034c48p-2,
-0x1.ec7b328ea92bcp-3,
0x1.a6225e117f92ep-3,
#endif
},
/* Algorithm:
@ -92,6 +90,7 @@ single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
#if N == 64
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
@ -156,9 +155,11 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
#endif
},
#if !__FP_FAST_FMA
#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
@ -223,6 +224,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
# endif
},
#endif
#endif /* !HAVE_FAST_FMA */
};

View file

@ -1,26 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
COSMOPOLITAN_C_START_
extern const struct log2_data {
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG2_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,20 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log2f_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Single-precision log2 function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/*
LOG2F_TABLE_BITS = 4
LOG2F_POLY_ORDER = 4
@ -53,52 +42,65 @@ Relative error: 1.9 * 2^-26 (before rounding.)
#define OFF 0x3f330000
/**
* Calculates log𝑥.
* Returns base-2 logarithm of x.
*
* - ULP error: 0.752 (nearest rounding.)
* - Relative error: 1.9 * 2^-26 (before rounding.)
*/
float log2f(float x)
float
log2f (float x)
{
double_t z, r, r2, p, y, y0, invc, logc;
uint32_t ix, iz, top, tmp;
int k, i;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, p, y, y0, invc, logc;
uint32_t ix, iz, top, tmp;
int k, i;
ix = asuint(x);
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000))
return 0;
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof(1);
if (ix == 0x7f800000) /* log2(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
ix = asuint (x);
#if WANT_ROUNDING
/* Fix sign of zero with downward rounding when x==1. */
if (unlikely (ix == 0x3f800000))
return 0;
#endif
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
{
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof (1);
if (ix == 0x7f800000) /* log2(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf (x);
/* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t)tmp >> 23; /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t) tmp >> 23; /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t)k;
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t) k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
p = A[3] * r + y0;
y = y * r2 + p;
return eval_as_float(y);
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
p = A[3] * r + y0;
y = y * r2 + p;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (log2f, __log2f_finite)
hidden_alias (log2f, __ieee754_log2f)
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/log2f_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data definition for log2f.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
const struct log2f_data __log2f_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },

View file

@ -1,17 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
COSMOPOLITAN_C_START_
extern const struct log2f_data {
struct {
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,22 +25,41 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/log_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << LOG_TABLE_BITS)
const struct log_data __log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly1 = {
#if LOG_POLY1_ORDER == 10
// relative error: 0x1.32eccc6p-62
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.55555555554e5p-2,
-0x1.0000000000af2p-2,
0x1.9999999bbe436p-3,
-0x1.55555537f9cdep-3,
0x1.24922fc8127cfp-3,
-0x1.0000b7d6bb612p-3,
0x1.c806ee1ddbcafp-4,
-0x1.972335a9c2d6ep-4,
#elif LOG_POLY1_ORDER == 11
// relative error: 0x1.52c8b708p-68
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.5555555555555p-2,
-0x1.ffffffffffea9p-3,
0x1.999999999c4d4p-3,
-0x1.55555557f5541p-3,
0x1.249248fbe33e4p-3,
-0x1.ffffc9a3c825bp-4,
0x1.c71e1f204435dp-4,
-0x1.9a7f26377d06ep-4,
0x1.71c30cf8f7364p-4,
#elif LOG_POLY1_ORDER == 12
// relative error: 0x1.c04d76cp-63
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
-0x1p-1,
@ -54,8 +73,20 @@ const struct log_data __log_data = {
-0x1.999eb43b068ffp-4,
0x1.78182f7afd085p-4,
-0x1.5521375d145cdp-4,
#endif
},
.poly = {
#if N == 64 && LOG_POLY_ORDER == 7
// relative error: 0x1.906eb8ap-58
// abs error: 0x1.d2cad5a8p-67
// in -0x1.fp-8 0x1.fp-8
-0x1.0000000000027p-1,
0x1.555555555556ap-2,
-0x1.fffffff0440bap-3,
0x1.99999991906c3p-3,
-0x1.555c8d7e8201ep-3,
0x1.24978c59151fap-3,
#elif N == 128 && LOG_POLY_ORDER == 6
// relative error: 0x1.926199e8p-56
// abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9
@ -64,6 +95,17 @@ const struct log_data __log_data = {
-0x1.fffffffeb459p-3,
0x1.999b324f10111p-3,
-0x1.55575e506c89fp-3,
#elif N == 128 && LOG_POLY_ORDER == 7
// relative error: 0x1.649fc4bp-64
// abs error: 0x1.c3b5769p-74
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.5555555555556p-2,
-0x1.fffffffea1a8p-3,
0x1.99999998e9139p-3,
-0x1.555776801b968p-3,
0x1.2493c29331a5cp-3,
#endif
},
/* Algorithm:
@ -92,6 +134,72 @@ a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
#if N == 64
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
#elif N == 128
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
@ -220,9 +328,76 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
#endif
},
#if !__FP_FAST_FMA
#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
# elif N == 128
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
@ -351,6 +526,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
},
#endif
},
#endif /* !HAVE_FAST_FMA */
};

View file

@ -1,26 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
COSMOPOLITAN_C_START_
extern const struct log_data {
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/logf_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Single-precision log function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/*
LOGF_TABLE_BITS = 4
LOGF_POLY_ORDER = 4
@ -53,50 +43,63 @@ Relative error: 1.957 * 2^-26 (before rounding.)
#define OFF 0x3f330000
/**
* Returns natural logarithm of 𝑥.
* Returns natural logarithm of x.
*
* - ULP error: 0.818 (nearest rounding.)
* - Relative error: 1.957 * 2^-26 (before rounding.)
*/
float logf(float x)
float
logf (float x)
{
double_t z, r, r2, y, y0, invc, logc;
uint32_t ix, iz, tmp;
int k, i;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, y, y0, invc, logc;
uint32_t ix, iz, tmp;
int k, i;
ix = asuint(x);
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000))
return 0;
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof(1);
if (ix == 0x7f800000) /* log(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
ix = asuint (x);
#if WANT_ROUNDING
/* Fix sign of zero with downward rounding when x==1. */
if (unlikely (ix == 0x3f800000))
return 0;
#endif
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
{
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof (1);
if (ix == 0x7f800000) /* log(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf (x);
/* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t)tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0xff800000);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t) tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0xff800000);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1;
y0 = logc + (double_t)k * Ln2;
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1;
y0 = logc + (double_t) k * Ln2;
/* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
y = y * r2 + (y0 + r);
return eval_as_float(y);
/* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
y = y * r2 + (y0 + r);
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (logf, __logf_finite)
hidden_alias (logf, __ieee754_logf)
#endif

View file

@ -3,7 +3,7 @@
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/logf_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data definition for logf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
const struct logf_data __logf_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },

View file

@ -1,18 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
COSMOPOLITAN_C_START_
extern const struct logf_data {
struct {
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,16 +25,76 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/atan_data.internal.h"
#include "libc/errno.h"
#include "libc/tinymath/arm.internal.h"
const struct atan_poly_data __atan_poly_data = {
.poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
[2**-1022, 1.0]. See atan.sollya for details of how these were
generated. */
-0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
#if WANT_ERRNO
/* dontinline reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
dontinline static double
with_errno (double y, int e)
{
errno = e;
return y;
}
#else
#define with_errno(x, e) (x)
#endif
/* dontinline reduces code size. */
dontinline static double
xflow (uint32_t sign, double y)
{
y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
return with_errno (y, ERANGE);
}
double
__math_uflow (uint32_t sign)
{
return xflow (sign, 0x1p-767);
}
#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
double
__math_may_uflow (uint32_t sign)
{
return xflow (sign, 0x1.8p-538);
}
#endif
double
__math_oflow (uint32_t sign)
{
return xflow (sign, 0x1p769);
}
double
__math_divzero (uint32_t sign)
{
double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
return with_errno (y, ERANGE);
}
dontinstrument double
__math_invalid (double x)
{
double y = (x - x) / (x - x);
return isnan (x) ? y : with_errno (y, EDOM);
}
/* Check result and set errno if necessary. */
dontinstrument double
__math_check_uflow (double y)
{
return y == 0.0 ? with_errno (y, ERANGE) : y;
}
dontinstrument double
__math_check_oflow (double y)
{
return isinf (y) ? with_errno (y, ERANGE) : y;
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,27 +25,76 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
#include "libc/errno.h"
#include "libc/tinymath/arm.internal.h"
// FIXME
/* asin(z) = -i log(i z + sqrt(1 - z*z)) */
double complex casin(double complex z)
#if WANT_ERRNO
/* dontinline reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
dontinline static float
with_errnof (float y, int e)
{
double complex w;
double x, y;
errno = e;
return y;
}
#else
#define with_errnof(x, e) (x)
#endif
x = creal(z);
y = cimag(z);
w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y);
double complex r = clog(CMPLX(-y, x) + csqrt(w));
return CMPLX(cimag(r), -creal(r));
/* dontinline reduces code size. */
dontinline static float
xflowf (uint32_t sign, float y)
{
y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
return with_errnof (y, ERANGE);
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(casin, casinl);
float
__math_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1p-95f);
}
#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
float
__math_may_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1.4p-75f);
}
#endif
float
__math_oflowf (uint32_t sign)
{
return xflowf (sign, 0x1p97f);
}
float
__math_divzerof (uint32_t sign)
{
float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
return with_errnof (y, ERANGE);
}
dontinstrument float
__math_invalidf (float x)
{
float y = (x - x) / (x - x);
return isnan (x) ? y : with_errnof (y, EDOM);
}
/* Check result and set errno if necessary. */
dontinstrument float
__math_check_uflowf (float y)
{
return y == 0.0f ? with_errnof (y, ERANGE) : y;
}
dontinstrument float
__math_check_oflowf (float y)
{
return isinf (y) ? with_errnof (y, ERANGE) : y;
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,11 +25,25 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/atanf_data.internal.h"
#include "libc/errno.h"
#include "libc/tinymath/arm.internal.h"
/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
*/
const struct atanf_poly_data __atanf_poly_data = {
.poly = {/* See atanf.sollya for details of how these were generated. */
-0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
#if WANT_ERRNO
/* dontinline reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
dontinline static long double
with_errnol (long double y, int e)
{
errno = e;
return y;
}
#else
#define with_errnol(x, e) (x)
#endif
dontinstrument long double
__math_invalidl (long double x)
{
long double y = (x - x) / (x - x);
return isnan (x) ? y : with_errnol (y, EDOM);
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,20 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/pow_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Double-precision x^y function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/*
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
@ -53,79 +42,83 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
#define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
static inline uint32_t
top12 (double x)
{
return asuint64(x) >> 52;
return asuint64 (x) >> 52;
}
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t log_inline(uint64_t ix, double_t *tail)
static inline double_t
log_inline (uint64_t ix, double_t *tail)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp;
int k, i;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
z = asdouble(iz);
kd = (double_t)k;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
z = asdouble (iz);
kd = (double_t) k;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc;
logc = T[i].logc;
logctail = T[i].logctail;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc;
logc = T[i].logc;
logctail = T[i].logctail;
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
#if __FP_FAST_FMA
r = __builtin_fma(z, invc, -1.0);
#if HAVE_FAST_FMA
r = fma (z, invc, -1.0);
#else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc;
r = rhi + rlo;
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc;
r = rhi + rlo;
#endif
/* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc;
t2 = t1 + r;
lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r;
/* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc;
t2 = t1 + r;
lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r;
/* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
#if __FP_FAST_FMA
hi = t2 + ar2;
lo3 = __builtin_fma(ar, r, -ar2);
lo4 = t2 - hi + ar2;
/* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
#if HAVE_FAST_FMA
hi = t2 + ar2;
lo3 = fma (ar, r, -ar2);
lo4 = t2 - hi + ar2;
#else
double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi;
hi = t2 + arhi2;
lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2;
double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi;
hi = t2 + arhi2;
lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2;
#endif
/* p = log1p(r) - r - A[0]*r*r. */
p = (ar3 * (A[1] + r * A[2] +
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
*tail = hi - y + lo;
return y;
/* p = log1p(r) - r - A[0]*r*r. */
#if POW_LOG_POLY_ORDER == 8
p = (ar3
* (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
#endif
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
*tail = hi - y + lo;
return y;
}
#undef N
@ -149,232 +142,268 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
forceinline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
/* Note: sbits is signed scale. */
scale = asdouble(sbits);
y = scale + scale * tmp;
if (fabs(y) < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0;
if (y < 0.0)
one = -1.0;
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
y = eval_as_double(hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble(sbits & 0x8000000000000000);
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
/* Note: sbits is signed scale. */
scale = asdouble (sbits);
y = scale + scale * tmp;
if (fabs (y) < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0;
if (y < 0.0)
one = -1.0;
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
y = eval_as_double (hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble (sbits & 0x8000000000000000);
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
forceinline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
static inline double
exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (UNLIKELY(abstop - top12(0x1p-54) >=
top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000) {
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one;
}
if (abstop >= top12(1024.0)) {
/* Note: inf and nan are already handled. */
if (asuint64(x) >> 63)
return __math_uflow(sign_bias);
else
return __math_oflow(sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
{
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one;
}
if (abstop >= top12 (1024.0))
{
/* Note: inf and nan are already handled. */
if (asuint64 (x) >> 63)
return __math_uflow (sign_bias);
else
return __math_oflow (sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (UNLIKELY(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint64_t iy)
static inline int
checkint (uint64_t iy)
{
int e = iy >> 52 & 0x7ff;
if (e < 0x3ff)
return 0;
if (e > 0x3ff + 52)
return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0;
if (iy & (1ULL << (0x3ff + 52 - e)))
return 1;
return 2;
int e = iy >> 52 & 0x7ff;
if (e < 0x3ff)
return 0;
if (e > 0x3ff + 52)
return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0;
if (iy & (1ULL << (0x3ff + 52 - e)))
return 1;
return 2;
}
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int zeroinfnan(uint64_t i)
static inline int
zeroinfnan (uint64_t i)
{
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
}
/**
* Returns 𝑥^𝑦.
* @note should take ~18ns
*
* - Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
* - relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
* - ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
*
* @raise ERANGE on overflow or underflow
* @raise EDOM if x is negative and y is a finite non-integer
*/
double pow(double x, double y)
double
pow (double x, double y)
{
uint32_t sign_bias = 0;
uint64_t ix, iy;
uint32_t topx, topy;
uint32_t sign_bias = 0;
uint64_t ix, iy;
uint32_t topx, topy;
ix = asuint64(x);
iy = asuint64(y);
topx = top12(x);
topy = top12(y);
if (UNLIKELY(topx - 0x001 >= 0x7ff - 0x001 ||
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
/* Special cases: (x < 0x1p-126 or inf or nan) or
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (UNLIKELY(zeroinfnan(iy))) {
if (2 * iy == 0)
return issignaling_inline(x) ? x + y : 1.0;
if (ix == asuint64(1.0))
return issignaling_inline(y) ? x + y : 1.0;
if (2 * ix > 2 * asuint64(INFINITY) ||
2 * iy > 2 * asuint64(INFINITY))
return x + y;
if (2 * ix == 2 * asuint64(1.0))
return 1.0;
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (UNLIKELY(zeroinfnan(ix))) {
double_t x2 = x * x;
if (ix >> 63 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? fp_barrier(1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalid(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64(1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be) {
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64(1.0) ? 1.0 + y :
1.0 - y;
else
return 1.0;
}
return (ix > asuint64(1.0)) == (topy < 0x800) ?
__math_oflow(0) :
__math_uflow(0);
}
if (topx == 0) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint64(x * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
ix = asuint64 (x);
iy = asuint64 (y);
topx = top12 (x);
topy = top12 (y);
if (unlikely (topx - 0x001 >= 0x7ff - 0x001
|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
{
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
/* Special cases: (x < 0x1p-126 or inf or nan) or
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (unlikely (zeroinfnan (iy)))
{
if (2 * iy == 0)
return issignaling_inline (x) ? x + y : 1.0;
if (ix == asuint64 (1.0))
return issignaling_inline (y) ? x + y : 1.0;
if (2 * ix > 2 * asuint64 (INFINITY)
|| 2 * iy > 2 * asuint64 (INFINITY))
return x + y;
if (2 * ix == 2 * asuint64 (1.0))
return 1.0;
if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (unlikely (zeroinfnan (ix)))
{
double_t x2 = x * x;
if (ix >> 63 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
return __math_divzero (sign_bias);
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalid (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
{
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64 (1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be)
{
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
else
return 1.0;
}
return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
: __math_uflow (0);
}
if (topx == 0)
{
/* Normalize subnormal x so exponent becomes negative. */
/* Without the barrier some versions of clang evalutate the mul
unconditionally causing spurious overflow exceptions. */
ix = asuint64 (opt_barrier_double (x) * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
}
double_t lo;
double_t hi = log_inline(ix, &lo);
double_t ehi, elo;
#if __FP_FAST_FMA
ehi = y * hi;
elo = y * lo + __builtin_fma(y, hi, -ehi);
double_t lo;
double_t hi = log_inline (ix, &lo);
double_t ehi, elo;
#if HAVE_FAST_FMA
ehi = y * hi;
elo = y * lo + fma (y, hi, -ehi);
#else
double_t yhi = asdouble(iy & -1ULL << 27);
double_t ylo = y - yhi;
double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
double_t llo = hi - lhi + lo;
ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
double_t yhi = asdouble (iy & -1ULL << 27);
double_t ylo = y - yhi;
double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
double_t llo = hi - lhi + lo;
ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif
return exp_inline(ehi, elo, sign_bias);
return exp_inline (ehi, elo, sign_bias);
}
__weak_reference(pow, __pow_finite);
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(pow, powl);
#if USE_GLIBC_ABI
strong_alias (pow, __pow_finite)
hidden_alias (pow, __ieee754_pow)
# if LDBL_MANT_DIG == 53
long double powl (long double x, long double y) { return pow (x, y); }
# endif
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,22 +25,16 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/pow_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data for the log part of pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << POW_LOG_TABLE_BITS)
const struct pow_log_data __pow_log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly = {
#if N == 128 && POW_LOG_POLY_ORDER == 8
// relative error: 0x1.11922ap-70
// in -0x1.6bp-8 0x1.6bp-8
// Coefficients are scaled to match the scaling during evaluation.
@ -51,6 +45,7 @@ const struct pow_log_data __pow_log_data = {
-0x1.555555529a47ap-3 * 4,
0x1.2495b9b4845e9p-3 * -8,
-0x1.0002b8b263fc3p-3 * -8,
#endif
},
/* Algorithm:
@ -75,6 +70,7 @@ the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
error and the interval for z is selected such that near x == 1, where log(x)
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
.tab = {
#if N == 128
#define A(a, b, c) {a, 0, b, c},
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
@ -204,5 +200,6 @@ A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
#endif
},
};

View file

@ -1,20 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
COSMOPOLITAN_C_START_
extern const struct pow_log_data {
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {
double invc, pad, logc, logctail;
} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/powf_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/*
POWF_LOG2_POLY_ORDER = 5
EXP2F_TABLE_BITS = 5
@ -55,37 +45,39 @@ relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
/* Subnormal input is normalized so ix has negative biased exponent.
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
static inline double_t log2_inline(uint32_t ix)
static inline double_t
log2_inline (uint32_t ix)
{
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
uint32_t iz, top, tmp;
int k, i;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
uint32_t iz, top, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t)k;
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t) k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[0] * r + A[1];
p = A[2] * r + A[3];
r4 = r2 * r2;
q = A[4] * r + y0;
q = p * r2 + q;
y = y * r4 + q;
return y;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[0] * r + A[1];
p = A[2] * r + A[3];
r4 = r2 * r2;
q = A[4] * r + y0;
q = p * r2 + q;
y = y * r4 + q;
return y;
}
#undef N
@ -97,124 +89,164 @@ static inline double_t log2_inline(uint32_t ix)
/* The output of log2 and thus the input of exp2 is either scaled by N
(in case of fast toint intrinsics) or not. The unscaled xd must be
in [-1021,1023], sign_bias sets the sign of the result. */
static inline float exp2_inline(double_t xd, uint32_t sign_bias)
static inline float
exp2_inline (double_t xd, uint32_t sign_bias)
{
uint64_t ki, ski, t;
double_t kd, z, r, r2, y, s;
uint64_t ki, ski, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, y, s;
#if TOINT_INTRINSICS
#define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint(xd); /* k */
ki = converttoint(xd);
# define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint (xd); /* k */
ki = converttoint (xd);
#else
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N */
# define C __exp2f_data.poly
# define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double (xd + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT; /* k/N */
#endif
r = xd - kd;
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint32_t iy)
static inline int
checkint (uint32_t iy)
{
int e = iy >> 23 & 0xff;
if (e < 0x7f)
return 0;
if (e > 0x7f + 23)
return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0;
if (iy & (1 << (0x7f + 23 - e)))
return 1;
return 2;
int e = iy >> 23 & 0xff;
if (e < 0x7f)
return 0;
if (e > 0x7f + 23)
return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0;
if (iy & (1 << (0x7f + 23 - e)))
return 1;
return 2;
}
static inline int zeroinfnan(uint32_t ix)
static inline int
zeroinfnan (uint32_t ix)
{
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
}
/**
* Returns 𝑥^𝑦.
* @note should take ~16ns
*
* - ULP error: 0.82 (~ 0.5 + relerr*2^24)
* - relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
* - relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
* - relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
*
* @raise ERANGE on overflow or underflow
* @raise EDOM if x is negative and y is a finite non-integer
*/
float powf(float x, float y)
float
powf (float x, float y)
{
uint32_t sign_bias = 0;
uint32_t ix, iy;
uint32_t sign_bias = 0;
uint32_t ix, iy;
ix = asuint(x);
iy = asuint(y);
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
zeroinfnan(iy))) {
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (UNLIKELY(zeroinfnan(iy))) {
if (2 * iy == 0)
return issignalingf_inline(x) ? x + y : 1.0f;
if (ix == 0x3f800000)
return issignalingf_inline(y) ? x + y : 1.0f;
if (2 * ix > 2u * 0x7f800000 ||
2 * iy > 2u * 0x7f800000)
return x + y;
if (2 * ix == 2 * 0x3f800000)
return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (UNLIKELY(zeroinfnan(ix))) {
float_t x2 = x * x;
if (ix & 0x80000000 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
}
/* x and y are non-zero finite. */
if (ix & 0x80000000) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalidf(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint(x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
ix = asuint (x);
iy = asuint (y);
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
{
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (unlikely (zeroinfnan (iy)))
{
if (2 * iy == 0)
return issignalingf_inline (x) ? x + y : 1.0f;
if (ix == 0x3f800000)
return issignalingf_inline (y) ? x + y : 1.0f;
if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
return x + y;
if (2 * ix == 2 * 0x3f800000)
return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
double_t logx = log2_inline(ix);
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
if (UNLIKELY((asuint64(ylogx) >> 47 & 0xffff) >=
asuint64(126.0 * POWF_SCALE) >> 47)) {
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
return __math_oflowf(sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf(sign_bias);
if (unlikely (zeroinfnan (ix)))
{
float_t x2 = x * x;
if (ix & 0x80000000 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
#if WANT_ERRNO
if (2 * ix == 0 && iy & 0x80000000)
return __math_divzerof (sign_bias);
#endif
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
}
return exp2_inline(ylogx, sign_bias);
/* x and y are non-zero finite. */
if (ix & 0x80000000)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalidf (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000)
{
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint (x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
}
double_t logx = log2_inline (ix);
double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */
if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
>= asuint64 (126.0 * POWF_SCALE) >> 47))
{
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
/* |x^y| > 0x1.ffffffp127. */
return __math_oflowf (sign_bias);
if (WANT_ROUNDING && WANT_ERRNO
&& ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
/* |x^y| > 0x1.fffffep127, check if we round away from 0. */
if ((!sign_bias
&& eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
|| (sign_bias
&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
!= -1.0f))
return __math_oflowf (sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf (sign_bias);
#if WANT_ERRNO_UFLOW
if (ylogx < -149.0 * POWF_SCALE)
return __math_may_uflowf (sign_bias);
#endif
}
return exp2_inline (ylogx, sign_bias);
}
__weak_reference(powf, __powf_finite);
#if USE_GLIBC_ABI
strong_alias (powf, __powf_finite)
hidden_alias (powf, __ieee754_powf)
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/powf_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data definition for powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
const struct powf_log2_data __powf_log2_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },

View file

@ -1,23 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
#define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
COSMOPOLITAN_C_START_
extern const struct powf_log2_data {
struct {
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ */

View file

@ -1,120 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c
/usr/src/lib/libm/src/ld128/e_powl.c
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
#include "libc/tinymath/freebsd.internal.h"
#ifdef __x86_64__
/**
* Returns 𝑥^𝑦.
* @note should take ~56ns
*/
long double powl(long double x, long double y) {
long double t, u;
if (!isunordered(x, y)) {
if (!isinf(y)) {
if (!isinf(x)) {
if (x) {
if (y) {
if (x < 0 && y != truncl(y)) {
#ifndef __NO_MATH_ERRNO__
errno = EDOM;
#endif
return NAN;
}
asm("fyl2x" : "=t"(u) : "0"(fabsl(x)), "u"(y) : "st(1)");
asm("fprem" : "=t"(t) : "0"(u), "u"(1.L));
asm("f2xm1" : "=t"(t) : "0"(t));
asm("fscale" : "=t"(t) : "0"(t + 1), "u"(u));
if (signbit(x)) {
if (y != truncl(y)) return -NAN;
if ((int64_t)y & 1) t = -t;
}
return t;
} else {
return 1;
}
} else if (y > 0) {
if (signbit(x) && y == truncl(y) && ((int64_t)y & 1)) {
return -0.;
} else {
return 0;
}
} else if (!y) {
return 1;
} else {
#ifndef __NO_MATH_ERRNO__
errno = ERANGE;
#endif
if (y == truncl(y) && ((int64_t)y & 1)) {
return copysignl(INFINITY, x);
} else {
return INFINITY;
}
}
} else if (signbit(x)) {
if (!y) return 1;
x = y < 0 ? 0 : INFINITY;
if (y == truncl(y) && ((int64_t)y & 1)) x = -x;
return x;
} else if (y < 0) {
return 0;
} else if (y > 0) {
return INFINITY;
} else {
return 1;
}
} else {
x = fabsl(x);
if (x < 1) return signbit(y) ? INFINITY : 0;
if (x > 1) return signbit(y) ? 0 : INFINITY;
return 1;
}
} else if (!y || x == 1) {
return 1;
} else {
return NAN;
}
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
__static_yoink("musl_libc_notice");
__static_yoink("openbsd_libm_notice");
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* powl.c
*
* Power function, long double precision
@ -606,35 +528,9 @@ static long double powil(long double x, int nn)
return y;
}
__weak_reference(powl, __powl_finite);
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
#include "libc/tinymath/freebsd.internal.h"
/*-
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* powl(x,y) return x**y
*
@ -1045,8 +941,6 @@ powl(long double x, long double y)
return s * z;
}
#endif /* __x86_64__ */
__weak_reference(powl, __powl_finite);
#endif /* long double is long */
#endif /* __x86_64__ */

View file

@ -30,7 +30,6 @@
#include "libc/tinymath/internal.h"
__static_yoink("musl_libc_notice");
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,15 +25,19 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/sincosf.internal.h"
__static_yoink("arm_optimized_routines_notice");
/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic. */
/**
* Returns sine and cosine of y.
*
* This is a fast sincosf implementation. Worst-case ULP is 0.5607,
* maximum relative error is 0.5303 * 2^-23. A single-step range
* reduction is used for small values. Large inputs have their range
* reduced using fast integer arithmetic.
*
* @raise EDOM if y is an infinity
*/
void
sincosf (float y, float *sinp, float *cosp)
{
@ -46,11 +50,11 @@ sincosf (float y, float *sinp, float *cosp)
{
double x2 = x * x;
if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-12f)))
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-126f)))
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
FORCE_EVAL (x2);
force_eval_float (x2);
*sinp = y;
*cosp = 1.0f;
return;
@ -70,7 +74,7 @@ sincosf (float y, float *sinp, float *cosp)
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
}
else if (LIKELY (abstop12 (y) < abstop12 (INFINITY)))
else if (likely (abstop12 (y) < abstop12 (INFINITY)))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;

Some files were not shown because too many files have changed in this diff Show more