Make stderr go faster

This change makes _IONBF (unbuffered) stdio handles go 20x faster for
certain kinds of formatting directives by being smarter about buffers
This commit is contained in:
Justine Tunney 2023-08-11 11:56:35 -07:00
parent 2cbd09b4d4
commit 8fc778162e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
6 changed files with 70 additions and 38 deletions

View file

@ -17,7 +17,6 @@
MAKEFLAGS += --no-builtin-rules
MAKE_ZIPCOPY = $(COMPILE) -AZIPCOPY -wT$@ $(ZIPCOPY) $< $@
MAKE_PECHECK = $(COMPILE) -APECHECK -wT$@ $(PECHECK) $@
ifneq ($(ARCH), aarch64)
MAKE_OBJCOPY = $(COMPILE) -AOBJCOPY -T$@ $(OBJCOPY) -S -O binary $< $@ && $(MAKE_ZIPCOPY)
else

View file

@ -2,23 +2,23 @@
#define COSMOPOLITAN_LIBC_FMT_DIVMOD10_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
forceinline uint64_t DivMod10(uint64_t x, unsigned *r) {
forceinline uint64_t __divmod10(uint64_t __x, unsigned *__r) {
#if defined(__STRICT_ANSI__) || !defined(__GNUC__) || \
(defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__))
*r = x % 10;
return x / 10;
*__r = __x % 10;
return __x / 10;
#else
uint128_t dw;
unsigned long long hi, rm;
dw = x;
dw *= 0xcccccccccccccccdull;
hi = dw >> 64;
hi >>= 3;
rm = hi;
rm += rm << 2;
rm += rm;
*r = x - rm;
return hi;
uint128_t __dw;
unsigned long long __hi, __rm;
__dw = __x;
__dw *= 0xcccccccccccccccdull;
__hi = __dw >> 64;
__hi >>= 3;
__rm = __hi;
__rm += __rm << 2;
__rm += __rm;
*__r = __x - __rm;
return __hi;
#endif
}

View file

@ -432,7 +432,7 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt,
FormatUnsigned:
if (x && hash) sign = hash;
for (i = j = 0;;) {
x = DivMod10(x, &rem);
x = __divmod10(x, &rem);
z[i++ & 127] = '0' + rem;
if (pdot ? i >= prec : !x) break;
if (quot && ++j == 3) {

View file

@ -244,15 +244,15 @@ static int __fmt_ntoa2(int out(const char *, void *, size_t), void *arg,
unsigned len, count, digit;
char buf[BUFFER_SIZE];
len = 0;
/* we check for log2base != 3 because otherwise we'll print nothing for a
* value of 0 with precision 0 when # mandates that one be printed */
// we check for log2base!=3, since otherwise we'll print nothing for
// a value of 0 with precision 0 when # mandates that one be printed
if (!value && log2base != 3) flags &= ~FLAGS_HASH;
if (value || !(flags & FLAGS_PRECISION)) {
count = 0;
do {
if (!log2base) {
if (value <= UINT64_MAX) {
value = DivMod10(value, &digit);
value = __divmod10(value, &digit);
} else {
value = __udivmodti4(value, 10, &remainder);
digit = remainder;

View file

@ -18,28 +18,35 @@
*/
#include "libc/calls/calls.h"
#include "libc/fmt/fmt.h"
#include "libc/limits.h"
#include "libc/stdckdint.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/errfuns.h"
struct buf {
int n;
char p[512];
};
struct state {
FILE *f;
int n;
struct buf b;
};
static int vfprintfputchar(const char *s, struct state *t, size_t n) {
static int __vfprintf_flbuf(const char *s, struct state *t, size_t n) {
int rc;
if (n) {
if (n == 1 && *s != '\n' && t->f->beg < t->f->size &&
t->f->bufmode != _IONBF) {
if (n == 1 && *s != '\n' && t->f->beg < t->f->size) {
t->f->buf[t->f->beg++] = *s;
t->n += n;
rc = 0;
} else if (!fwrite_unlocked(s, 1, n, t->f)) {
rc = -1;
} else {
t->n += n;
} else if (fwrite_unlocked(s, 1, n, t->f)) {
rc = 0;
} else {
rc = -1;
}
if (ckd_add(&t->n, t->n, n)) {
rc = eoverflow();
}
} else {
rc = 0;
@ -47,15 +54,49 @@ static int vfprintfputchar(const char *s, struct state *t, size_t n) {
return rc;
}
static int __vfprintf_nbuf(const char *s, struct state *t, size_t n) {
size_t i;
for (i = 0; i < n; ++i) {
t->b.p[t->b.n++] = s[i];
if (t->b.n == sizeof(t->b.p)) {
if (!fwrite_unlocked(s, 1, t->b.n, t->f)) {
return -1;
}
t->b.n = 0;
}
if (ckd_add(&t->n, t->n, 1)) {
return eoverflow();
}
}
return 0;
}
/**
* Formats and writes text to stream.
* @see printf() for further documentation
*/
int vfprintf_unlocked(FILE *f, const char *fmt, va_list va) {
int rc;
struct state st[1] = {{f, 0}};
if ((rc = __fmt(vfprintfputchar, st, fmt, va)) != -1) {
rc = st->n;
struct state st;
int (*out)(const char *, struct state *, size_t);
if (f->bufmode != _IONBF) {
out = __vfprintf_flbuf;
} else {
out = __vfprintf_nbuf;
}
st.f = f;
st.n = 0;
st.b.n = 0;
if ((rc = __fmt(out, &st, fmt, va)) != -1) {
if (!st.b.n) {
rc = st.n;
} else if (fwrite_unlocked(st.b.p, 1, st.b.n, st.f)) {
if (ckd_add(&rc, st.n, st.b.n)) {
rc = eoverflow();
}
} else {
rc = -1;
}
}
return rc;
}

View file

@ -1,9 +1,6 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
# qemu-user execve() is broken so we need to build/bootstrap/ commands
ifeq ($(ARCH), x86_64)
PKGS += TOOL_HELLO
TOOL_HELLO_FILES := $(wildcard tool/hello/*)
@ -81,8 +78,3 @@ o/$(MODE)/tool/hello/hello-pe.com: \
@$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -o $@ $<
$(TOOL_HELLO_OBJS): tool/hello/hello.mk
.PHONY: o/$(MODE)/tool/hello
o/$(MODE)/tool/hello: $(TOOL_HELLO_BINS)
endif