Floating point parsing support for scanf family (#924)

This commit is contained in:
Matheus Moreira 2023-11-18 07:25:36 -03:00 committed by GitHub
parent 8caf1b48a9
commit 3ac473df3b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 331 additions and 4 deletions

31
examples/parsefloat.c Normal file
View file

@ -0,0 +1,31 @@
#include <stdio.h>
#define PARSE_AND_PRINT(type, scan_fmt, print_fmt, str) \
do { \
type val; int ret; \
ret = sscanf(str, scan_fmt, &val); \
printf("\"%s\" => " print_fmt " = %d\n", str, val, ret); \
} while (0)
int main()
{
PARSE_AND_PRINT(float, "%f", "%f", "0.3715");
PARSE_AND_PRINT(float, "%f", "%f", ".3715");
PARSE_AND_PRINT(float, "%f", "%f", "3715");
PARSE_AND_PRINT(float, "%f", "%f", "111.11");
PARSE_AND_PRINT(float, "%f", "%f", "-2.22");
PARSE_AND_PRINT(float, "%f", "%f", "Nan");
PARSE_AND_PRINT(float, "%f", "%f", "nAn(2)");
PARSE_AND_PRINT(float, "%f", "%f", "-NAN(_asdfZXCV1234_)");
PARSE_AND_PRINT(float, "%f", "%f", "-nan");
PARSE_AND_PRINT(float, "%f", "%f", "+nan");
PARSE_AND_PRINT(float, "%f", "%f", "inF");
PARSE_AND_PRINT(float, "%f", "%f", "iNfINiTy");
PARSE_AND_PRINT(float, "%f", "%f", "+inf");
PARSE_AND_PRINT(float, "%f", "%f", "-inf");
PARSE_AND_PRINT(float, "%f", "%f", "0X1.BC70A3D70A3D7P+6");
PARSE_AND_PRINT(float, "%f", "%f", "1.18973e+4932zzz");
PARSE_AND_PRINT(float, "%f", "%.10f", " -0.0000000123junk");
PARSE_AND_PRINT(float, "%f", "%f", "junk");
return 0;
}

View file

@ -27,6 +27,7 @@
#include "libc/str/tpdecodecb.internal.h"
#include "libc/str/utf16.h"
#include "libc/sysv/errfuns.h"
#include "third_party/gdtoa/gdtoa.h"
#define READ \
({ \
@ -35,6 +36,21 @@
c; \
})
#define FP_BUFFER_GROW 48
#define BUFFER \
({ \
int c = READ; \
if (fpbufcur >= fpbufsize - 1) { \
fpbufsize = fpbufsize + FP_BUFFER_GROW; \
fpbuf = realloc(fpbuf, fpbufsize); \
} \
if (c != -1) { \
fpbuf[fpbufcur++] = c; \
fpbuf[fpbufcur] = '\0'; \
} \
c; \
})
/**
* String / file / stream decoder.
*
@ -61,6 +77,9 @@ int __vcscanf(int callback(void *), //
struct FreeMe *next;
void *ptr;
} *freeme = NULL;
unsigned char *fpbuf = NULL;
size_t fpbufsize;
size_t fpbufcur;
const unsigned char *p = (const unsigned char *)fmt;
int *n_ptr;
int items = 0;
@ -85,8 +104,9 @@ int __vcscanf(int callback(void *), //
break;
case '%': {
uint128_t number;
void *buf;
unsigned char *buf;
size_t bufsize;
double fp;
unsigned width = 0;
unsigned char bits = 32;
unsigned char charbytes = sizeof(char);
@ -209,6 +229,27 @@ int __vcscanf(int callback(void *), //
base = 10;
}
goto DecodeNumber;
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G': // floating point number
if (!(charbytes == sizeof(char) || charbytes == sizeof(wchar_t))) {
items = -1;
goto Done;
}
while (isspace(c)) {
c = READ;
}
fpbufsize = FP_BUFFER_GROW;
fpbuf = malloc(fpbufsize);
fpbufcur = 0;
fpbuf[fpbufcur++] = c;
fpbuf[fpbufcur] = '\0';
goto ConsumeFloatingPointNumber;
default:
items = einval();
goto Done;
@ -294,6 +335,154 @@ int __vcscanf(int callback(void *), //
goto Done;
}
continue;
ConsumeFloatingPointNumber:
if (c == '+' || c == '-') {
c = BUFFER;
}
bool hexadecimal = false;
if (c == '0') {
c = BUFFER;
if (c == 'x' || c == 'X') {
c = BUFFER;
hexadecimal = true;
goto BufferFloatingPointNumber;
} else if (c == -1) {
goto GotFloatingPointNumber;
} else {
goto BufferFloatingPointNumber;
}
} else if (c == 'n' || c == 'N') {
c = BUFFER;
if (c == 'a' || c == 'A') {
c = BUFFER;
if (c == 'n' || c == 'N') {
c = BUFFER;
if (c == '(') {
c = BUFFER;
do {
bool isdigit = c >= '0' && c <= '9';
bool isletter = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
if (!(c == '_' || isdigit || isletter)) {
goto Done;
}
} while ((c = BUFFER) != -1 && c != ')');
if (c == ')') {
c = BUFFER;
}
goto GotFloatingPointNumber;
} else {
goto GotFloatingPointNumber;
}
} else {
goto Done;
}
} else {
goto Done;
}
} else if (c == 'i' || c == 'I') {
c = BUFFER;
if (c == 'n' || c == 'N') {
c = BUFFER;
if (c == 'f' || c == 'F') {
c = BUFFER;
if (c == 'i' || c == 'I') {
c = BUFFER;
if (c == 'n' || c == 'N') {
c = BUFFER;
if (c == 'i' || c == 'I') {
c = BUFFER;
if (c == 't' || c == 'T') {
c = BUFFER;
if (c == 'y' || c == 'Y') {
c = BUFFER;
} else {
goto Done;
}
} else {
goto Done;
}
} else {
goto Done;
}
} else {
goto Done;
}
} else {
if (c != -1 && unget) {
unget(c, arg);
}
goto GotFloatingPointNumber;
}
} else {
goto Done;
}
} else {
goto Done;
}
}
BufferFloatingPointNumber:
enum { INTEGER, FRACTIONAL, SIGN, EXPONENT } state = INTEGER;
do {
bool isdecdigit = c >= '0' && c <= '9';
bool ishexdigit = (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
bool ispoint = c == '.' || c == ',';
bool isdecexp = c == 'e' || c == 'E';
bool ishexp = c == 'p' || c == 'P';
bool issign = c == '+' || c == '-';
switch (state) {
case INTEGER:
case FRACTIONAL:
if (isdecdigit || (hexadecimal && ishexdigit)) {
goto Continue;
} else if (state == INTEGER && ispoint) {
state = FRACTIONAL;
goto Continue;
} else if (isdecexp || (hexadecimal && ishexp)) {
state = SIGN;
goto Continue;
} else {
goto Break;
}
case SIGN:
if (issign) {
state = EXPONENT;
goto Continue;
}
state = EXPONENT;
// fallthrough
case EXPONENT:
if (isdecdigit) {
goto Continue;
} else {
goto Break;
}
default:
goto Break;
}
Continue:
continue;
Break:
if (c != -1 && unget) {
unget(c, arg);
}
break;
} while ((c = BUFFER) != -1);
GotFloatingPointNumber:
fp = strtod((char *)fpbuf, NULL);
if (!discard) {
++items;
void *out = va_arg(va, void *);
if (charbytes == sizeof(char)) {
*(float *)out = (float)fp;
} else {
*(double *)out = (double)fp;
}
}
free(fpbuf);
fpbuf = NULL;
fpbufcur = fpbufsize = 0;
continue;
ReportConsumed:
n_ptr = va_arg(va, int *);
*n_ptr = consumed - 1; // minus lookahead
@ -322,7 +511,7 @@ int __vcscanf(int callback(void *), //
}
if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) {
if (charbytes == 1) {
((unsigned char *)buf)[j++] = (unsigned char)c;
buf[j++] = (unsigned char)c;
c = READ;
} else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) !=
-1) {
@ -344,7 +533,7 @@ int __vcscanf(int callback(void *), //
goto Done;
} else if (!rawmode && j < bufsize) {
if (charbytes == sizeof(char)) {
((unsigned char *)buf)[j] = '\0';
buf[j] = '\0';
} else if (charbytes == sizeof(char16_t)) {
((char16_t *)buf)[j] = u'\0';
} else if (charbytes == sizeof(wchar_t)) {
@ -356,8 +545,9 @@ int __vcscanf(int callback(void *), //
}
++items;
if (ismalloc) {
*va_arg(va, char **) = buf;
*va_arg(va, char **) = (void *) buf;
}
buf = NULL;
} else {
do {
if (isspace(c)) break;
@ -378,5 +568,6 @@ Done:
if (items == -1) free(entry->ptr);
free(entry);
}
if (fpbuf) free(fpbuf);
return items;
}

View file

@ -20,6 +20,7 @@
#include "libc/intrin/bits.h"
#include "libc/inttypes.h"
#include "libc/limits.h"
#include "libc/math.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/internal.h"
@ -327,6 +328,110 @@ TEST(sscanf, flexdecimal_hex) {
EXPECT_EQ(666, y);
}
TEST(sscanf, floating_point_simple) {
float x = 666.666f, y = x, z = y;
EXPECT_EQ(3, sscanf("0.3715 .3715 3715", "%f %f %f", &x, &y, &z));
EXPECT_EQ(0.3715f, x);
EXPECT_EQ(0.3715f, y);
EXPECT_EQ(3715.0f, z);
}
TEST(sscanf, floating_point_simple_double_precision) {
double x = 666.666, y = x, z = y;
EXPECT_EQ(3, sscanf("0.3715 .3715 3715", "%lf %lf %lf", &x, &y, &z));
EXPECT_EQ(0.3715, x);
EXPECT_EQ(0.3715, y);
EXPECT_EQ(3715.0, z);
}
TEST(sscanf, floating_point_nan) {
float a = 666.666f, b = a, c = b, d = c, e = d, f = e;
EXPECT_EQ(4, sscanf("nan -NAN nAn NaN", "%f %f %f %f", &a, &b, &c, &d));
EXPECT_EQ(2, sscanf("nan(2) -NaN(_ABCDzxcv1234_)", "%f %f", &e, &f));
EXPECT_TRUE(isnan(a));
EXPECT_TRUE(isnan(b));
EXPECT_TRUE(isnan(c));
EXPECT_TRUE(isnan(d));
EXPECT_TRUE(isnan(e));
EXPECT_TRUE(isnan(f));
}
TEST(sscanf, floating_point_nan_double_precision) {
double a = 666.666, b = a, c = b, d = c, e = d, f = e;
EXPECT_EQ(4, sscanf("nan -NAN nAn NaN", "%lf %lf %lf %lf", &a, &b, &c, &d));
EXPECT_EQ(2, sscanf("nan(2) -NAN(_ABCDzxcv1234_)", "%lf %lf", &e, &f));
EXPECT_TRUE(isnan(a));
EXPECT_TRUE(isnan(b));
EXPECT_TRUE(isnan(c));
EXPECT_TRUE(isnan(d));
EXPECT_TRUE(isnan(e));
EXPECT_TRUE(isnan(f));
}
TEST(sscanf, floating_point_infinity) {
float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f;
EXPECT_EQ(4, sscanf("inf +INF -iNf InF", "%f %f %f %f", &a, &b, &c, &d));
EXPECT_EQ(3, sscanf("+infinity -INFINITY iNfInItY", "%f %f %f", &e, &f, &g));
EXPECT_TRUE(isinf(a));
EXPECT_TRUE(isinf(b));
EXPECT_TRUE(isinf(c));
EXPECT_TRUE(isinf(d));
EXPECT_TRUE(isinf(e));
EXPECT_TRUE(isinf(f));
EXPECT_TRUE(isinf(g));
}
TEST(sscanf, floating_point_infinity_double_precision) {
double a = 666.666, b = a, c = b, d = c, e = d, f = e, g = f;
EXPECT_EQ(4, sscanf("inf +INF -iNf InF", "%lf %lf %lf %lf", &a, &b, &c, &d));
EXPECT_EQ(3, sscanf("+infinity -INFINITY iNfInItY", "%lf %lf %lf", &e, &f, &g));
EXPECT_TRUE(isinf(a));
EXPECT_TRUE(isinf(b));
EXPECT_TRUE(isinf(c));
EXPECT_TRUE(isinf(d));
EXPECT_TRUE(isinf(e));
EXPECT_TRUE(isinf(f));
EXPECT_TRUE(isinf(g));
}
TEST(sscanf, floating_point_documentation_examples) {
float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h, j = i;
EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b));
EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%f %f %f", &c, &d, &e));
EXPECT_EQ(5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk", "%f %f %f %f %f", &f, &g, &h, &i, &j));
EXPECT_EQ(111.11f, a);
EXPECT_EQ(-2.22f, b);
EXPECT_TRUE(isnan(c));
EXPECT_TRUE(isnan(d));
EXPECT_TRUE(isinf(e));
EXPECT_EQ(0X1.BC70A3D70A3D7P+6f, f);
EXPECT_TRUE(isinf(g));
EXPECT_EQ(-0.0000000123f, h);
EXPECT_EQ(.0f, i);
EXPECT_EQ(.0f, j);
}
TEST(sscanf, floating_point_documentation_examples_double_precision) {
double a = 666.666, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h, j = i;
EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b));
EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%lf %lf %lf", &c, &d, &e));
EXPECT_EQ(5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk", "%lf %lf %lf %lf %lf", &f, &g, &h, &i, &j));
EXPECT_EQ(111.11, a);
EXPECT_EQ(-2.22, b);
EXPECT_TRUE(isnan(c));
EXPECT_TRUE(isnan(d));
EXPECT_TRUE(isinf(e));
EXPECT_EQ(0X1.BC70A3D70A3D7P+6, f);
EXPECT_TRUE(isinf(g));
EXPECT_EQ(-0.0000000123, h);
EXPECT_EQ(.0, i);
EXPECT_EQ(.0, j);
}
TEST(sscanf, luplus) {
long x = 666;
EXPECT_EQ(1, sscanf("+123", "%lu", &x));