From 40b7da8422562ec169a6c59250c251cab5482180 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 22 Mar 2024 19:26:56 -0700 Subject: [PATCH] Speed up fmaf() on x86 --- libc/tinymath/fmaf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libc/tinymath/fmaf.c b/libc/tinymath/fmaf.c index 544301ca5..e48801748 100644 --- a/libc/tinymath/fmaf.c +++ b/libc/tinymath/fmaf.c @@ -100,6 +100,16 @@ float fmaf(float x, float y, float z) #else +#ifdef __x86_64__ + if (X86_HAVE(FMA)) { + asm("vfmadd132ss\t%1,%2,%0" : "+x"(x) : "x"(y), "x"(z)); + return x; + } else if (X86_HAVE(FMA4)) { + asm("vfmaddss\t%3,%2,%1,%0" : "=x"(x) : "x"(x), "x"(y), "x"(z)); + return x; + } +#endif + /* A double has more than twice as much precision than a float, so direct double-precision arithmetic suffices, except where double rounding occurs. */