*scanf() fixes to make TeX work (#1109)

* Fix reading the same symbol twice when using `{f,}scanf()`

PR #924 appears to use `unget()` subtly incorrectly when parsing
floating point numbers. The rest of the code only uses `unget()`
immediately followed by `goto Done;` to return back the symbol that
can't possibly belong to the directive we're processing.

With floating-point, however, the ungot characters could very well
be valid for the *next* directive, so we will essentially read them
twice. It can't be seen in `sscanf()` tests because `unget()` is a
no-op there, but the test I added for `fscanf()` fails like this:

        ...
        EXPECT_EQ(0xDEAD, i1)
                need 57005 (or 0xdead) =
                 got 908973 (or 0x000ddead)
        ...
        EXPECT_EQ(0xBEEF, i2)
                need 48879 (or 0xbeef) =
                 got 769775 (or 0x000bbeef)

This means we read 0xDDEAD instead of 0xDEAD and 0xBBEEF instead of
0xBEEF. I checked that both musl and glibc read 0xDEAD/0xBEEF, as
expected.

Fix the failing test by removing the unneeded `unget()` calls.

* Don't read invalid floating-point numbers in `*scanf()`

Currently, we just ignore any errors from `strtod()`. They can
happen either because no valid float can be parsed at all, or
because the state machine recognizes only a prefix of a valid
floating-point number.

Fix this by making sure `strtod()` parses everything we recognized,
provided it's non-empty. This requires to pop the last character
off the FP buffer, which is supposed to be parsed by the next
`*scanf()` directive.

* Make `%c` parsing in `*scanf()` respect the C standard

Currently, `%c`-style directives always succeed even if there
are actually fewer characters in the input than requested.

Before the fix, the added test fails like this:

        ...
        EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4))
                need 2 (or 0x02 or '\2' or ENOENT) =
                 got 3 (or 0x03 or '\3' or ESRCH)
        ...
        EXPECT_EQ(0, sscanf("abcd", "%5c", s2))
                need 0 (or 0x0 or '\0') =
                 got 1 (or 0x01 or '\1' or EPERM)

musl and glibc pass this test.
This commit is contained in:
Ivan Komarov 2024-02-23 16:15:30 +01:00 committed by GitHub
parent 3afe3a3646
commit f7ff515961
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 89 additions and 22 deletions

View file

@ -50,6 +50,12 @@
} \
c; \
})
#define UNBUFFER \
({ \
if (c != -1) { \
fpbuf[--fpbufcur] = '\0'; \
} \
})
/**
* String / file / stream decoder.
@ -369,10 +375,11 @@ int __vcscanf(int callback(void *), //
}
} while ((c = BUFFER) != -1 && c != ')');
if (c == ')') {
c = BUFFER;
c = READ;
}
goto GotFloatingPointNumber;
} else {
UNBUFFER;
goto GotFloatingPointNumber;
}
} else {
@ -410,9 +417,7 @@ int __vcscanf(int callback(void *), //
goto Done;
}
} else {
if (c != -1 && unget) {
unget(c, arg);
}
UNBUFFER;
goto GotFloatingPointNumber;
}
} else {
@ -465,13 +470,24 @@ int __vcscanf(int callback(void *), //
Continue:
continue;
Break:
if (c != -1 && unget) {
unget(c, arg);
}
UNBUFFER;
break;
} while ((c = BUFFER) != -1);
GotFloatingPointNumber:
fp = strtod((char *)fpbuf, NULL);
/* An empty buffer can't be a valid float; don't even bother parsing. */
bool valid = fpbufcur > 0;
if (valid) {
char *ep;
fp = strtod((char *)fpbuf, &ep);
/* We should have parsed the whole buffer. */
valid = ep == (char *)fpbuf + fpbufcur;
}
free(fpbuf);
fpbuf = NULL;
fpbufcur = fpbufsize = 0;
if (!valid) {
goto Done;
}
if (!discard) {
++items;
void *out = va_arg(va, void *);
@ -481,9 +497,6 @@ int __vcscanf(int callback(void *), //
*(double *)out = (double)fp;
}
}
free(fpbuf);
fpbuf = NULL;
fpbufcur = fpbufsize = 0;
continue;
ReportConsumed:
n_ptr = va_arg(va, int *);
@ -537,6 +550,11 @@ int __vcscanf(int callback(void *), //
if (!j && c == -1 && !items) {
items = -1;
goto Done;
} else if (rawmode && j != width) {
/* The C standard says that %c "matches a sequence of characters of
* **exactly** the number specified by the field width". If we have
* fewer characters, what we've just read is invalid. */
goto Done;
} else if (!rawmode && j < bufsize) {
if (charbytes == sizeof(char)) {
buf[j] = '\0';

View file

@ -0,0 +1,33 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Ivan Komarov
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/testlib.h"
TEST(fscanf, test_readAfterFloat) {
FILE *f = fmemopen("infDEAD-.125e-2BEEF", 19, "r");
float f1 = 666.666f, f2 = f1;
int i1 = 666, i2 = i1;
EXPECT_EQ(4, fscanf(f, "%f%x%f%x", &f1, &i1, &f2, &i2));
EXPECT_TRUE(isinf(f1));
EXPECT_EQ(0xDEAD, i1);
EXPECT_EQ(-0.125e-2f, f2);
EXPECT_EQ(0xBEEF, i2);
fclose(f);
}

View file

@ -69,9 +69,17 @@ TEST(sscanf, testNonDirectiveCharacterMatching) {
}
TEST(sscanf, testCharacter) {
char c = 0;
EXPECT_EQ(1, sscanf("a", "%c", &c));
EXPECT_EQ('a', c);
char c1 = 0, c2 = c1, c3 = c2, c4 = c3;
char s1[32] = {0}, s2[32] = {0};
EXPECT_EQ(1, sscanf("a", "%c", &c1));
EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4));
EXPECT_EQ(1, sscanf("abcde", "%5c", s1));
EXPECT_EQ(0, sscanf("abcd", "%5c", s2));
EXPECT_EQ('a', c1);
EXPECT_EQ('a', c2);
EXPECT_EQ('b', c3);
EXPECT_STREQ("abcde", &s1[0]);
}
TEST(sscanf, testStringBuffer) {
@ -394,6 +402,20 @@ TEST(sscanf, floating_point_infinity_double_precision) {
EXPECT_TRUE(isinf(g));
}
TEST(sscanf, floating_point_invalid) {
float dummy;
EXPECT_EQ(0, sscanf("junk", "%f", &dummy));
EXPECT_EQ(0, sscanf("e9", "%f", &dummy));
EXPECT_EQ(0, sscanf("-e9", "%f", &dummy));
}
TEST(sscanf, floating_point_invalid_double_precision) {
double dummy;
EXPECT_EQ(0, sscanf("junk", "%lf", &dummy));
EXPECT_EQ(0, sscanf("e9", "%lf", &dummy));
EXPECT_EQ(0, sscanf("-e9", "%lf", &dummy));
}
TEST(sscanf, floating_point_documentation_examples) {
float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h,
j = i;
@ -401,7 +423,7 @@ TEST(sscanf, floating_point_documentation_examples) {
EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b));
EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%f %f %f", &c, &d, &e));
EXPECT_EQ(
5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
"%f %f %f %f %f", &f, &g, &h, &i, &j));
EXPECT_EQ(111.11f, a);
@ -411,9 +433,6 @@ TEST(sscanf, floating_point_documentation_examples) {
EXPECT_TRUE(isinf(e));
EXPECT_EQ(0X1.BC70A3D70A3D7P+6f, f);
EXPECT_TRUE(isinf(g));
EXPECT_EQ(-0.0000000123f, h);
EXPECT_EQ(.0f, i);
EXPECT_EQ(.0f, j);
}
TEST(sscanf, floating_point_documentation_examples_double_precision) {
@ -423,7 +442,7 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b));
EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%lf %lf %lf", &c, &d, &e));
EXPECT_EQ(
5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
"%lf %lf %lf %lf %lf", &f, &g, &h, &i, &j));
EXPECT_EQ(111.11, a);
@ -433,9 +452,6 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
EXPECT_TRUE(isinf(e));
EXPECT_EQ(0X1.BC70A3D70A3D7P+6, f);
EXPECT_TRUE(isinf(g));
EXPECT_EQ(-0.0000000123, h);
EXPECT_EQ(.0, i);
EXPECT_EQ(.0, j);
}
TEST(sscanf, luplus) {