diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 3a7cc4ece..4aad63c52 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -48,7 +48,7 @@ "notpossible=", "thatispacked=", "dontthrow=", - "nocallback=", + "dontcallback=", "relegated=", "hidden=", "textstartup=", diff --git a/Makefile b/Makefile index 46e0b69d9..0f6122670 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,6 @@ XARGS ?= xargs -P4 -rs8000 DOT ?= dot CLANG = clang TMPDIR = o/tmp - AR = build/bootstrap/ar.com CP = build/bootstrap/cp.com RM = build/bootstrap/rm.com -f @@ -134,10 +133,10 @@ endif ifneq ($(findstring aarch64,$(MODE)),) ARCH = aarch64 -HOSTS ?= pi studio freebsdarm +HOSTS ?= pi pi5 studio freebsdarm else ARCH = x86_64 -HOSTS ?= freebsd rhel7 xnu win10 openbsd netbsd +HOSTS ?= freebsd rhel7 xnu win10 openbsd netbsd meatball nightmare endif ZIPOBJ_FLAGS += -a$(ARCH) @@ -209,7 +208,7 @@ endif libc/stdbool.h \ libc/disclaimer.inc \ rwc:/dev/shm \ - rx:cosmocc \ + rx:.cosmocc \ rx:build/bootstrap \ r:build/portcosmo.h \ /proc/stat \ @@ -481,6 +480,7 @@ COSMOPOLITAN_OBJECTS = \ LIBC_STR \ LIBC_SYSV \ LIBC_INTRIN \ + LIBC_NT_BCRYPTPRIMITIVES \ LIBC_NT_KERNEL32 \ LIBC_NEXGEN32E diff --git a/ape/ape-m1.c b/ape/ape-m1.c index 82c98feaa..1afd1edb0 100644 --- a/ape/ape-m1.c +++ b/ape/ape-m1.c @@ -39,7 +39,7 @@ /* maximum path size that cosmo can take */ #define PATHSIZE (PATH_MAX < 1024 ? PATH_MAX : 1024) #define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24) -#define SYSLIB_VERSION 8 +#define SYSLIB_VERSION 9 /* sync with libc/runtime/syslib.internal.h */ struct Syslib { int magic; @@ -96,11 +96,16 @@ struct Syslib { long (*sem_trywait)(int *); long (*getrlimit)(int, struct rlimit *); long (*setrlimit)(int, const struct rlimit *); - // v6 (2023-11-03) + /* v6 (2023-11-03) */ void *(*dlopen)(const char *, int); void *(*dlsym)(void *, const char *); int (*dlclose)(void *); char *(*dlerror)(void); + /* MANDATORY (cosmo runtime won't load if version < 8) + --------------------------------------------------- + OPTIONAL (cosmo lib should check __syslib->version) */ + /* v9 (2024-01-31) */ + int (*pthread_cpu_number_np)(size_t *); }; #define ELFCLASS32 1 @@ -660,9 +665,9 @@ __attribute__((__noreturn__)) static void Spawn(const char *exe, int fd, size = (p[i].p_vaddr & (pagesz - 1)) + p[i].p_filesz; if (prot1 & PROT_EXEC) { #ifdef SIP_DISABLED - // if sip is disabled then we can load the executable segments - // off the binary into memory without needing to copy anything - // which provides considerably better performance for building + /* if sip is disabled then we can load the executable segments + off the binary into memory without needing to copy anything + which provides considerably better performance for building */ rc = sys_mmap(addr, size, prot1, flags, fd, p[i].p_offset & -pagesz); if (rc < 0) { if (rc == -EPERM) { @@ -674,12 +679,12 @@ __attribute__((__noreturn__)) static void Spawn(const char *exe, int fd, } } #else - // the issue is that if sip is enabled then, attempting to map - // it with exec permission will cause xnu to phone home a hash - // of the entire file to apple intelligence as a one time cost - // which is literally minutes for executables holding big data - // since there's no public apple api for detecting sip we read - // as the default strategy which is slow but it works for both + /* the issue is that if sip is enabled then, attempting to map + it with exec permission will cause xnu to phone home a hash + of the entire file to apple intelligence as a one time cost + which is literally minutes for executables holding big data + since there's no public apple api for detecting sip we read + as the default strategy which is slow but it works for both */ rc = sys_mmap(addr, size, (prot1 = PROT_READ | PROT_WRITE), MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if (rc < 0) Pexit(exe, rc, "prog mmap anon"); @@ -812,12 +817,10 @@ static const char *TryElf(struct ApeLoader *M, union ElfEhdrBuf *ebuf, } } - /* - * merge adjacent loads that are contiguous with equal protection, - * which prevents our program header overlap check from needlessly - * failing later on; it also shaves away a microsecond of latency, - * since every program header requires invoking at least 1 syscall - */ + /* merge adjacent loads that are contiguous with equal protection, + which prevents our program header overlap check from needlessly + failing later on; it also shaves away a microsecond of latency, + since every program header requires invoking at least 1 syscall */ for (i = 0; i + 1 < e->e_phnum;) { if (p[i].p_type == PT_LOAD && p[i + 1].p_type == PT_LOAD && ((p[i].p_flags & (PF_R | PF_W | PF_X)) == @@ -944,6 +947,7 @@ int main(int argc, char **argv, char **envp) { M->lib.dlsym = dlsym; M->lib.dlclose = dlclose; M->lib.dlerror = dlerror; + M->lib.pthread_cpu_number_np = pthread_cpu_number_np; /* getenv("_") is close enough to at_execfn */ execfn = 0; diff --git a/ape/ape.lds b/ape/ape.lds index 288d798d4..1a39e2dc1 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -282,12 +282,9 @@ SECTIONS { KEEP(*(SORT_BY_NAME(.init.*))) KEEP(*(.init)) KEEP(*(.initepilogue)) - KEEP(*(.pltprologue)) *(.plt) - KEEP(*(.pltepilogue)) - KEEP(*(.pltgotprologue)) *(.plt.got) - KEEP(*(.pltgotepilogue)) + *(.iplt) *(.text.startup .text.startup.*) *(.text.exit .text.exit.*) *(.text.unlikely .text.*_unlikely .text.unlikely.*) @@ -323,7 +320,7 @@ SECTIONS { /*BEGIN: Read Only Data */ - .rodata . : { + .rodata ALIGN(CONSTANT(COMMONPAGESIZE)) : { KEEP(*(.rodata.pytab.0)); KEEP(*(.rodata.pytab.1)); KEEP(*(.rodata.pytab.2)); @@ -398,13 +395,12 @@ SECTIONS { KEEP(*(SORT_BY_NAME(.sort.data.*))) . += . > 0 ? CODE_GRANULE : 0; - KEEP(*(.gotprologue)) + . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0); + __got_start = .; *(.got) - KEEP(*(.gotepilogue)) + __got_end = .; - KEEP(*(.gotpltprologue)) *(.got.plt) - KEEP(*(.gotpltepilogue)) . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0); __init_array_start = .; @@ -430,6 +426,8 @@ SECTIONS { KEEP(*(SORT_BY_NAME(.piro.data.sort.*))) KEEP(*(.piro.pad.data)) KEEP(*(.dataepilogue)) + *(.igot.plt) + . = ALIGN(. != 0 ? CONSTANT(COMMONPAGESIZE) : 0); /*END: NT FORK COPYING */ _edata = .; @@ -528,6 +526,7 @@ SECTIONS { #endif *(__patchable_function_entries) *(__mcount_loc) + *(.rela.dyn) *(.discard) *(.yoink) } diff --git a/ape/loader.c b/ape/loader.c index 87822315e..752d5ef6e 100644 --- a/ape/loader.c +++ b/ape/loader.c @@ -166,13 +166,6 @@ (unsigned long)(255 & (S)[1]) << 010 | \ (unsigned long)(255 & (S)[0]) << 000) -#define DEBUG(VAR) \ - { \ - char ibuf[19] = {0}; \ - Utox(ibuf, VAR); \ - Print(os, 2, ibuf, " " #VAR, "\n", 0l); \ - } - struct ElfEhdr { unsigned char e_ident[16]; unsigned short e_type; @@ -340,23 +333,6 @@ static char *GetEnv(char **p, const char *s) { return 0; } -static char *Utox(char p[19], unsigned long x) { - int i; - if (x) { - *p++ = '0'; - *p++ = 'x'; - i = (__builtin_clzl(x) ^ (sizeof(long) * 8 - 1)) + 1; - i = (i + 3) & -4; - do { - *p++ = "0123456789abcdef"[(x >> (i -= 4)) & 15]; - } while (i); - } else { - *p++ = '0'; - } - *p = 0; - return p; -} - static char *Utoa(char p[20], unsigned long x) { char t; unsigned long i, a, b; @@ -534,6 +510,53 @@ static long Print(int os, int fd, const char *s, ...) { return Write(fd, b, n, os); } +static long Printf(int os, int fd, const char *fmt, ...) { + int i; + char c; + int k = 0; + unsigned u; + char b[512]; + const char *s; + unsigned long d; + __builtin_va_list va; + __builtin_va_start(va, fmt); + for (;;) { + switch ((c = *fmt++)) { + case '\0': + __builtin_va_end(va); + return Write(fd, b, k, os); + case '%': + switch ((c = *fmt++)) { + case 's': + for (s = __builtin_va_arg(va, const char *); s && *s; ++s) { + if (k < 512) b[k++] = *s; + } + break; + case 'd': + d = __builtin_va_arg(va, unsigned long); + for (i = 16; i--;) { + u = (d >> (i * 4)) & 15; + if (u < 10) { + c = '0' + u; + } else { + u -= 10; + c = 'a' + u; + } + if (k < 512) b[k++] = c; + } + break; + default: + if (k < 512) b[k++] = c; + break; + } + break; + default: + if (k < 512) b[k++] = c; + break; + } + } +} + static void Perror(int os, const char *thing, long rc, const char *reason) { char ibuf[21]; ibuf[0] = 0; @@ -901,7 +924,7 @@ EXTERN_C __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, long *auxv, *ap, *endp, *sp2; char *p, *pe, *exe, *prog, **argv, **envp; - (void)Utox; + (void)Printf; /* detect freebsd */ if (SupportsXnu() && dl == XNU) { diff --git a/ape/sections.internal.h b/ape/sections.internal.h index 119c4990e..e99503b7e 100644 --- a/ape/sections.internal.h +++ b/ape/sections.internal.h @@ -18,10 +18,12 @@ extern unsigned char _tbss_end[] __attribute__((__weak__)); extern unsigned char _tls_align[] __attribute__((__weak__)); extern unsigned char __test_start[] __attribute__((__weak__)); extern unsigned char __ro[] __attribute__((__weak__)); -extern uint8_t __data_start[] __attribute__((__weak__)); -extern uint8_t __data_end[] __attribute__((__weak__)); -extern uint8_t __bss_start[] __attribute__((__weak__)); -extern uint8_t __bss_end[] __attribute__((__weak__)); +extern unsigned char __data_start[] __attribute__((__weak__)); +extern unsigned char __data_end[] __attribute__((__weak__)); +extern unsigned char __bss_start[] __attribute__((__weak__)); +extern unsigned char __bss_end[] __attribute__((__weak__)); +extern unsigned long __got_start[] __attribute__((__weak__)); +extern unsigned long __got_end[] __attribute__((__weak__)); extern unsigned char ape_phdrs[] __attribute__((__weak__)); COSMOPOLITAN_C_END_ diff --git a/build/bootstrap/ape.aarch64 b/build/bootstrap/ape.aarch64 index c95c86c7f..65fe2cc16 100755 Binary files a/build/bootstrap/ape.aarch64 and b/build/bootstrap/ape.aarch64 differ diff --git a/build/bootstrap/compile.com b/build/bootstrap/compile.com index 392c72017..9a213f0f0 100755 Binary files a/build/bootstrap/compile.com and b/build/bootstrap/compile.com differ diff --git a/build/bootstrap/fixupobj.com b/build/bootstrap/fixupobj.com index efc2ff872..fefcebd01 100755 Binary files a/build/bootstrap/fixupobj.com and b/build/bootstrap/fixupobj.com differ diff --git a/build/bootstrap/gcc-only-flags.txt b/build/bootstrap/gcc-only-flags.txt new file mode 100644 index 000000000..77b816a6b --- /dev/null +++ b/build/bootstrap/gcc-only-flags.txt @@ -0,0 +1,58 @@ +--nocompress-debug-sections +--noexecstack +-Wa,--nocompress-debug-sections +-Wa,--noexecstack +-Wa,-msse2avx +-Werror=maybe-uninitialized +-Wno-literal-suffix +-Wno-unused-but-set-variable +-Wunsafe-loop-optimizations +-fbranch-target-load-optimize +-fcx-limited-range +-fdelete-dead-exceptions +-femit-struct-debug-baseonly +-ffp-int-builtin-inexact +-finline-functions-called-once +-fipa-pta +-fivopts +-flimit-function-alignment +-fmerge-constants +-fmodulo-sched +-fmodulo-sched-allow-regmoves +-fno-align-jumps +-fno-align-labels +-fno-align-loops +-fno-code-hoisting +-fno-cx-limited-range +-fno-fp-int-builtin-inexact +-fno-gnu-unique +-fno-inline-functions-called-once +-fno-instrument-functions +-fno-schedule-insns2 +-fno-whole-program +-fopt-info-vec +-fopt-info-vec-missed +-freg-struct-return +-freschedule-modulo-scheduled-loops +-frounding-math +-fsched2-use-superblocks +-fschedule-insns +-fschedule-insns2 +-fshrink-wrap +-fshrink-wrap-separate +-fsignaling-nans +-fstack-clash-protection +-ftracer +-ftrapv +-ftree-loop-im +-ftree-loop-vectorize +-funsafe-loop-optimizations +-fversion-loops-for-strides +-fwhole-program +-gdescribe-dies +-gstabs +-mcall-ms2sysv-xlogues +-mdispatch-scheduler +-mfpmath=sse+387 +-mmitigate-rop +-mno-fentry diff --git a/build/bootstrap/package.com b/build/bootstrap/package.com index 015964e47..1c7db4131 100755 Binary files a/build/bootstrap/package.com and b/build/bootstrap/package.com differ diff --git a/build/definitions.mk b/build/definitions.mk index 8a023d2d9..6950d5667 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -54,7 +54,7 @@ # ifeq ($(LANDLOCKMAKE_VERSION),) -TMPSAFE = $(join $(TMPDIR),$(subst /,_,$@)).tmp +TMPSAFE = $(join $(TMPDIR)/,$(subst /,_,$@)).tmp else TMPSAFE = $(TMPDIR)/ endif @@ -93,9 +93,9 @@ DEFAULT_CCFLAGS += \ -frecord-gcc-switches DEFAULT_COPTS ?= \ - -fno-math-errno \ -fno-ident \ -fno-common \ + -fno-math-errno \ -fno-gnu-unique \ -fstrict-aliasing \ -fstrict-overflow \ @@ -138,6 +138,7 @@ MATHEMATICAL = \ DEFAULT_CPPFLAGS += \ -D_COSMO_SOURCE \ -DMODE='"$(MODE)"' \ + -Wno-unknown-pragmas \ -nostdinc \ -iquote. \ -isystem libc/isystem diff --git a/build/htags b/build/htags index 698a4de2e..95f264228 100755 --- a/build/htags +++ b/build/htags @@ -55,7 +55,7 @@ set -- --regex-c='/_Atomic(\([^)]*\))/\1/b' "$@" set -- --regex-c='/^\(\(hidden\|extern\|const\) \)*[_[:alpha:]][_[:alnum:]]*[ *][ *]*\([_[:alpha:]][_[:alnum:]]*[ *][ *]*\)*\([_[:alpha:]][_$[:alnum:]]*\)/\4/b' "$@" # ctags doesn't understand function prototypes, e.g. -# bool isheap(void *p) dontthrow nocallback; +# bool isheap(void *p) dontthrow dontcallback; set -- --regex-c='/^[_[:alpha:]][_[:alnum:]]*[ *][ *]*\([_[:alpha:]][_[:alnum:]]*[ *][ *]*\)*\([_[:alpha:]][_$[:alnum:]]*\)(.*/\2/b' "$@" # ctags doesn't understand function pointers, e.g. diff --git a/build/online.mk b/build/online.mk index 0d874e5e6..da1775bbe 100644 --- a/build/online.mk +++ b/build/online.mk @@ -23,7 +23,8 @@ # - tool/build/runitd.c .PRECIOUS: o/$(MODE)/%.com.ok -o/$(MODE)/%.com.ok: private .PLEDGE = stdio rpath wpath cpath proc fattr inet +o/$(MODE)/%.com.ok: private .PLEDGE = stdio rpath wpath cpath proc fattr inet dns +o/$(MODE)/%.com.ok: private .UNVEIL += r:/etc/resolv.conf o/$(MODE)/%.com.ok: \ o/$(MODE)/tool/build/runit.com \ o/$(MODE)/tool/build/runitd.com \ diff --git a/build/rules.mk b/build/rules.mk index 728df92fd..e8ca02541 100644 --- a/build/rules.mk +++ b/build/rules.mk @@ -39,6 +39,7 @@ o/$(MODE)/%.h: %.c o/$(MODE)/%.o: %.cc @$(COMPILE) -AOBJECTIFY.cxx $(OBJECTIFY.cxx) $(OUTPUT_OPTION) $< + @$(COMPILE) -AFIXUPOBJ -wT$@ $(FIXUPOBJ) $@ o/$(MODE)/%.o: %.cpp @$(COMPILE) -AOBJECTIFY.cxx $(OBJECTIFY.cxx) $(OUTPUT_OPTION) $< diff --git a/examples/BUILD.mk b/examples/BUILD.mk index e69b297b0..5b8a75156 100644 --- a/examples/BUILD.mk +++ b/examples/BUILD.mk @@ -85,6 +85,7 @@ EXAMPLES_DIRECTDEPS = \ THIRD_PARTY_MUSL \ THIRD_PARTY_NSYNC \ THIRD_PARTY_NSYNC_MEM \ + THIRD_PARTY_OPENMP \ THIRD_PARTY_SED \ THIRD_PARTY_STB \ THIRD_PARTY_TR \ @@ -153,6 +154,7 @@ o/$(MODE)/examples/picol.com.dbg: \ @$(APELINK) o/$(MODE)/usr/share/dict/words.zip.o: private ZIPOBJ_FLAGS += -C2 +o/$(MODE)/examples/wut.o: private COPTS += -fopenmp $(EXAMPLES_OBJS): examples/BUILD.mk diff --git a/examples/system.c b/examples/system.c index aecfcbfe5..070665191 100644 --- a/examples/system.c +++ b/examples/system.c @@ -7,6 +7,7 @@ │ • http://creativecommons.org/publicdomain/zero/1.0/ │ ╚─────────────────────────────────────────────────────────────────*/ #endif +#include "libc/calls/calls.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" diff --git a/examples/unbourne.c b/examples/unbourne.c index 6ad057a43..536af38b5 100644 --- a/examples/unbourne.c +++ b/examples/unbourne.c @@ -2569,8 +2569,7 @@ static int shlex() { case 'y': case 'z': p = buf; - while (buf++, is_in_name(*buf)) - ; + while (buf++, is_in_name(*buf)); yylval.name = stalloc(buf - p + 1); *(char *)mempcpy(yylval.name, p, buf - p) = 0; value = ARITH_VAR; @@ -2994,7 +2993,7 @@ static const char *updatepwd(const char *dir) { lim = (char *)stackblock() + 1; if (*dir != '/') { if (new[-1] != '/') USTPUTC('/', new); - if (new > lim &&*lim == '/') lim++; + if (new > lim && *lim == '/') lim++; } else { USTPUTC('/', new); cdcomppath++; @@ -6565,6 +6564,10 @@ struct job *makejob(union node *node, int nprocs) { return jp; } +#if defined(__GNUC__) && __GNUC__ >= 12 +#pragma GCC diagnostic ignored "-Wuse-after-free" +#endif + static struct job *growjobtab(void) { unsigned len; long offset; @@ -7446,8 +7449,7 @@ static int ulimitcmd(int argc, char **argv) { what = optc; } } - for (l = limits; l->option != what; l++) - ; + for (l = limits; l->option != what; l++); set = *argptr ? 1 : 0; if (set) { char *p = *argptr; @@ -7660,8 +7662,7 @@ static void setparam(char **argv) { char **newparam; char **ap; int nparam; - for (nparam = 0; argv[nparam]; nparam++) - ; + for (nparam = 0; argv[nparam]; nparam++); ap = newparam = ckmalloc((nparam + 1) * sizeof *ap); while (*argv) { *ap++ = savestr(*argv++); @@ -7701,8 +7702,7 @@ static int shiftcmd(int argc, char **argv) { if (shellparam.malloc) ckfree(*ap1); } ap2 = shellparam.p; - while ((*ap2++ = *ap1++) != NULL) - ; + while ((*ap2++ = *ap1++) != NULL); shellparam.optind = 1; shellparam.optoff = -1; INTON; @@ -8308,8 +8308,7 @@ static void parsefname(void) { if (heredoclist == NULL) heredoclist = here; else { - for (p = heredoclist; p->next; p = p->next) - ; + for (p = heredoclist; p->next; p = p->next); p->next = here; } } else if (n->type == NTOFD || n->type == NFROMFD) { @@ -8432,8 +8431,7 @@ static int xxreadtoken(void) { case '\t': continue; case '#': - while ((c = pgetc()) != '\n' && c != PEOF) - ; + while ((c = pgetc()) != '\n' && c != PEOF); pungetc(); continue; case '\n': @@ -8553,7 +8551,7 @@ static int readtoken1(int firstc, char const *syntax, char *eofmark, quotef = 0; bqlist = NULL; STARTSTACKSTR(out); -loop : { /* for each line, until end of word */ +loop: { /* for each line, until end of word */ CHECKEND(); /* set c to PEOF if at end of here document */ for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ @@ -8701,7 +8699,7 @@ endword: * is called, c is set to the first character of the next input line. If * we are at the end of the here document, this routine sets the c to PEOF. */ -checkend : { +checkend: { if (realeofmark(eofmark)) { int markloc; char *p; @@ -8742,7 +8740,7 @@ checkend : { * specifying the fd to be redirected. The variable "c" contains the * first character of the redirection operator. */ -parseredir : { +parseredir: { char fd = *out; union node *np; np = (union node *)stalloc(sizeof(struct nfile)); @@ -8798,7 +8796,7 @@ parseredir : { * Parse a substitution. At this point, we have read the dollar sign * and nothing else. */ -parsesub : { +parsesub: { int subtype; int typeloc; char *p; @@ -8910,7 +8908,7 @@ parsesub : { * list of commands (passed by reference), and savelen is the number of * characters on the top of the stack which must be preserved. */ -parsebackq : { +parsebackq: { struct nodelist **nlpp; union node *n; char *str; @@ -9002,7 +9000,7 @@ parsebackq : { /* * Parse an arithmetic expansion (indicate start of one and set state) */ -parsearith : { +parsearith: { synstack_push(&synstack, synstack->prev ?: alloca(sizeof(*synstack)), ARISYNTAX); synstack->dblquote = 1; diff --git a/libc/calls/BUILD.mk b/libc/calls/BUILD.mk index bbf134fc4..7f1611f9a 100644 --- a/libc/calls/BUILD.mk +++ b/libc/calls/BUILD.mk @@ -41,6 +41,7 @@ LIBC_CALLS_A_DIRECTDEPS = \ LIBC_INTRIN \ LIBC_NEXGEN32E \ LIBC_NT_ADVAPI32 \ + LIBC_NT_BCRYPTPRIMITIVES \ LIBC_NT_IPHLPAPI \ LIBC_NT_KERNEL32 \ LIBC_NT_NTDLL \ @@ -132,7 +133,8 @@ endif o/$(MODE)/libc/calls/pledge-linux.o: private \ CFLAGS += \ -Os \ - -fPIC + -fPIC \ + -ffreestanding # these assembly files are safe to build on aarch64 o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S diff --git a/libc/calls/calls.h b/libc/calls/calls.h index 44811fe1d..b9aaa5e3f 100644 --- a/libc/calls/calls.h +++ b/libc/calls/calls.h @@ -247,6 +247,8 @@ ssize_t tinyprint(int, const char *, ...) libcesque nullterminated(); void shm_path_np(const char *, char[hasatleast 78]) libcesque; #endif /* _COSMO_SOURCE */ +int system(const char *) libcesque; + int __wifstopped(int) libcesque pureconst; int __wifcontinued(int) libcesque pureconst; int __wifsignaled(int) libcesque pureconst; diff --git a/libc/calls/clock_gettime-nt.c b/libc/calls/clock_gettime-nt.c index 363da29cc..8ebef10b6 100644 --- a/libc/calls/clock_gettime-nt.c +++ b/libc/calls/clock_gettime-nt.c @@ -21,12 +21,17 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/fmt/wintime.internal.h" +#include "libc/nt/accounting.h" +#include "libc/nt/runtime.h" #include "libc/nt/synchronization.h" +#include "libc/nt/thread.h" -#define _CLOCK_REALTIME 0 -#define _CLOCK_MONOTONIC 1 -#define _CLOCK_REALTIME_COARSE 2 -#define _CLOCK_BOOTTIME 3 +#define _CLOCK_REALTIME 0 +#define _CLOCK_MONOTONIC 1 +#define _CLOCK_REALTIME_COARSE 2 +#define _CLOCK_BOOTTIME 3 +#define _CLOCK_PROCESS_CPUTIME_ID 4 +#define _CLOCK_THREAD_CPUTIME_ID 5 static struct { uint64_t base; @@ -35,7 +40,7 @@ static struct { textwindows int sys_clock_gettime_nt(int clock, struct timespec *ts) { uint64_t t; - struct NtFileTime ft; + struct NtFileTime ft, ftExit, ftUser, ftKernel, ftCreation; switch (clock) { case _CLOCK_REALTIME: if (ts) { @@ -61,6 +66,22 @@ textwindows int sys_clock_gettime_nt(int clock, struct timespec *ts) { *ts = timespec_frommillis(GetTickCount64()); } return 0; + case _CLOCK_PROCESS_CPUTIME_ID: + if (ts) { + GetProcessTimes(GetCurrentProcess(), &ftCreation, &ftExit, &ftKernel, + &ftUser); + *ts = WindowsDurationToTimeSpec(ReadFileTime(ftUser) + + ReadFileTime(ftKernel)); + } + return 0; + case _CLOCK_THREAD_CPUTIME_ID: + if (ts) { + GetThreadTimes(GetCurrentThread(), &ftCreation, &ftExit, &ftKernel, + &ftUser); + *ts = WindowsDurationToTimeSpec(ReadFileTime(ftUser) + + ReadFileTime(ftKernel)); + } + return 0; default: return -EINVAL; } diff --git a/libc/calls/clock_gettime.c b/libc/calls/clock_gettime.c index f57b716f3..c5f72fdd9 100644 --- a/libc/calls/clock_gettime.c +++ b/libc/calls/clock_gettime.c @@ -61,24 +61,13 @@ static int __clock_gettime_init(int clockid, struct timespec *ts) { /** * Returns nanosecond time. * - * @param clock can be one of: - * - `CLOCK_REALTIME`: universally supported - * - `CLOCK_REALTIME_FAST`: ditto but faster on freebsd - * - `CLOCK_REALTIME_PRECISE`: ditto but better on freebsd - * - `CLOCK_REALTIME_COARSE`: : like `CLOCK_REALTIME_FAST` w/ Linux 2.6.32+ - * - `CLOCK_MONOTONIC`: universally supported (except on XNU/NT w/o INVTSC) - * - `CLOCK_MONOTONIC_FAST`: ditto but faster on freebsd - * - `CLOCK_MONOTONIC_PRECISE`: ditto but better on freebsd - * - `CLOCK_MONOTONIC_COARSE`: : like `CLOCK_MONOTONIC_FAST` w/ Linux 2.6.32+ - * - `CLOCK_MONOTONIC_RAW`: is actually monotonic but needs Linux 2.6.28+ - * - `CLOCK_PROCESS_CPUTIME_ID`: linux and bsd (NetBSD permits OR'd PID) - * - `CLOCK_THREAD_CPUTIME_ID`: linux and bsd (NetBSD permits OR'd TID) - * - `CLOCK_MONOTONIC_COARSE`: linux, freebsd - * - `CLOCK_PROF`: linux and netbsd - * - `CLOCK_BOOTTIME`: linux and openbsd - * - `CLOCK_REALTIME_ALARM`: linux-only - * - `CLOCK_BOOTTIME_ALARM`: linux-only - * - `CLOCK_TAI`: linux-only + * @param clock supports the following values across OSes: + * - `CLOCK_REALTIME` + * - `CLOCK_MONOTONIC` + * - `CLOCK_REALTIME_COARSE` + * - `CLOCK_MONOTONIC_COARSE` + * - `CLOCK_THREAD_CPUTIME_ID` + * - `CLOCK_PROCESS_CPUTIME_ID` * @param ts is where the result is stored (or null to do clock check) * @return 0 on success, or -1 w/ errno * @raise EFAULT if `ts` points to invalid memory diff --git a/libc/calls/close.c b/libc/calls/close.c index 95068563e..7c8a35f50 100644 --- a/libc/calls/close.c +++ b/libc/calls/close.c @@ -93,7 +93,7 @@ static int close_impl(int fd) { */ int close(int fd) { int rc; - if (__isfdkind(fd, kFdZip)) { // XXX IsWindows()? + if (__isfdkind(fd, kFdZip)) { // XXX IsWindows()? BLOCK_SIGNALS; __fds_lock(); rc = close_impl(fd); diff --git a/libc/calls/getrandom.c b/libc/calls/getrandom.c index 65171f28c..7c9a7e2f2 100644 --- a/libc/calls/getrandom.c +++ b/libc/calls/getrandom.c @@ -103,7 +103,7 @@ static ssize_t GetDevUrandom(char *p, size_t n) { ssize_t __getrandom(void *p, size_t n, unsigned f) { ssize_t rc; if (IsWindows()) { - rc = RtlGenRandom(p, n) ? n : __winerr(); + rc = ProcessPrng(p, n) ? n : __winerr(); } else if (have_getrandom) { if (IsXnu() || IsOpenbsd()) { rc = GetRandomBsd(p, n, GetRandomEntropy); @@ -131,7 +131,7 @@ ssize_t __getrandom(void *p, size_t n, unsigned f) { * * This random number seed generator obtains information from: * - * - RtlGenRandom() on Windows + * - ProcessPrng() on Windows * - getentropy() on XNU and OpenBSD * - getrandom() on Linux, FreeBSD, and NetBSD * - sysctl(KERN_ARND) on older versions of FreeBSD and NetBSD diff --git a/libc/calls/metalfile.c b/libc/calls/metalfile.c index 20d16e56f..0832d52f5 100644 --- a/libc/calls/metalfile.c +++ b/libc/calls/metalfile.c @@ -75,7 +75,9 @@ textstartup void InitializeMetalFile(void) { memcpy(copied_base, (void *)(BANE + IMAGE_BASE_PHYSICAL), size); __ape_com_base = copied_base; __ape_com_size = size; - KINFOF("%s @ %p,+%#zx", APE_COM_NAME, copied_base, size); + // TODO(tkchia): LIBC_CALLS doesn't depend on LIBC_VGA so references + // to its functions need to be weak + // KINFOF("%s @ %p,+%#zx", APE_COM_NAME, copied_base, size); } } diff --git a/libc/calls/rdrand.c b/libc/calls/rdrand.c index 7e5ebfa0c..c499c3752 100644 --- a/libc/calls/rdrand.c +++ b/libc/calls/rdrand.c @@ -52,7 +52,7 @@ static dontinline uint64_t rdrand_failover(void) { * * If RDRAND isn't available (we check CPUID and we also disable it * automatically for microarchitectures where it's slow or buggy) then - * we try getrandom(), RtlGenRandom(), or sysctl(KERN_ARND). If those + * we try getrandom(), ProcessPrng(), or sysctl(KERN_ARND). If those * aren't available then we try /dev/urandom and if that fails, we try * getauxval(AT_RANDOM), and if not we finally use RDTSC and getpid(). * diff --git a/libc/calls/read-nt.c b/libc/calls/read-nt.c index c48b9778c..540855355 100644 --- a/libc/calls/read-nt.c +++ b/libc/calls/read-nt.c @@ -157,6 +157,8 @@ static textwindows struct Keystroke *NewKeystroke(void) { struct Keystroke *k = KEYSTROKE_CONTAINER(e); dll_remove(&__keystroke.free, &k->elem); --__keystroke.freekeys; + // TODO(jart): What's wrong with GCC 12.3? + asm("" : "+r"(k)); k->buflen = 0; return k; } diff --git a/libc/calls/sched_getcpu.c b/libc/calls/sched_getcpu.c index 761bba995..ac80be47a 100644 --- a/libc/calls/sched_getcpu.c +++ b/libc/calls/sched_getcpu.c @@ -19,15 +19,19 @@ #include "libc/calls/calls.h" #include "libc/calls/struct/cpuset.h" #include "libc/dce.h" +#include "libc/errno.h" #include "libc/nexgen32e/rdtscp.h" #include "libc/nexgen32e/x86feature.h" #include "libc/nt/struct/processornumber.h" #include "libc/nt/synchronization.h" +#include "libc/runtime/syslib.internal.h" +#include "libc/sysv/errfuns.h" int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache); /** * Returns ID of CPU on which thread is currently scheduled. + * @return cpu number on success, or -1 w/ errno */ int sched_getcpu(void) { if (X86_HAVE(RDTSCP)) { @@ -38,6 +42,19 @@ int sched_getcpu(void) { struct NtProcessorNumber pn; GetCurrentProcessorNumberEx(&pn); return 64 * pn.Group + pn.Number; + } else if (IsXnuSilicon()) { + if (__syslib->__version >= 9) { + size_t cpu; + errno_t err = __syslib->__pthread_cpu_number_np(&cpu); + if (!err) { + return cpu; + } else { + errno = err; + return -1; + } + } else { + return enosys(); + } } else { unsigned cpu = 0; int rc = sys_getcpu(&cpu, 0, 0); diff --git a/libc/calls/sig.c b/libc/calls/sig.c index 586e74f8a..936ed99d4 100644 --- a/libc/calls/sig.c +++ b/libc/calls/sig.c @@ -441,8 +441,7 @@ textwindows void __sig_generate(int sig, int sic) { // to unblock our sig once the wait operation is completed; when // that's the case we can cancel the thread's i/o to deliver sig if (atomic_load_explicit(&pt->pt_blocker, memory_order_acquire) && - !(atomic_load_explicit(&pt->pt_blkmask, memory_order_relaxed) & - (1ull << (sig - 1)))) { + !(pt->pt_blkmask & (1ull << (sig - 1)))) { _pthread_ref(pt); mark = pt; break; diff --git a/libc/calls/syscall_support-nt.internal.h b/libc/calls/syscall_support-nt.internal.h index 7d7bab43c..a002ef9e3 100644 --- a/libc/calls/syscall_support-nt.internal.h +++ b/libc/calls/syscall_support-nt.internal.h @@ -18,7 +18,7 @@ int sys_fcntl_nt_setfl(int, unsigned); int sys_pause_nt(void); int64_t __fix_enotdir(int64_t, char16_t *); int64_t __fix_enotdir3(int64_t, char16_t *, char16_t *); -int64_t __winerr(void) nocallback privileged; +int64_t __winerr(void) dontcallback privileged; int64_t ntreturn(uint32_t); void *GetProcAddressModule(const char *, const char *); void WinMainForked(void); diff --git a/libc/calls/ucontext.h b/libc/calls/ucontext.h index 4c755f3b0..a869ab0ad 100644 --- a/libc/calls/ucontext.h +++ b/libc/calls/ucontext.h @@ -130,7 +130,7 @@ typedef struct ucontext ucontext_t; int getcontext(ucontext_t *) dontthrow; int setcontext(const ucontext_t *) dontthrow; int swapcontext(ucontext_t *, const ucontext_t *) dontthrow returnstwice; -void makecontext(ucontext_t *, void (*)(), int, ...) dontthrow nocallback; +void makecontext(ucontext_t *, void *, int, ...) dontthrow dontcallback; void __sig_restore(const ucontext_t *) wontreturn; COSMOPOLITAN_C_END_ diff --git a/libc/calls/uname.c b/libc/calls/uname.c index e7d5905bb..2f03c15b4 100644 --- a/libc/calls/uname.c +++ b/libc/calls/uname.c @@ -82,15 +82,27 @@ static textwindows void GetNtName(char *name, int kind) { } static inline textwindows int GetNtMajorVersion(void) { +#ifdef __x86_64__ return NtGetPeb()->OSMajorVersion; +#else + return 0; +#endif } static inline textwindows int GetNtMinorVersion(void) { +#ifdef __x86_64__ return NtGetPeb()->OSMinorVersion; +#else + return 0; +#endif } static inline textwindows int GetNtBuildNumber(void) { +#ifdef __x86_64__ return NtGetPeb()->OSBuildNumber; +#else + return 0; +#endif } static textwindows void GetNtVersion(char *p) { diff --git a/libc/cosmo.h b/libc/cosmo.h index 35c1a47eb..c84b731eb 100644 --- a/libc/cosmo.h +++ b/libc/cosmo.h @@ -4,6 +4,7 @@ COSMOPOLITAN_C_START_ errno_t cosmo_once(_Atomic(uint32_t) *, void (*)(void)); int systemvpe(const char *, char *const[], char *const[]) libcesque; +char *GetProgramExecutableName(void); COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_LIBC_COSMO_H_ */ diff --git a/libc/integral/c.inc b/libc/integral/c.inc index 11f6a781d..b5870b7fa 100644 --- a/libc/integral/c.inc +++ b/libc/integral/c.inc @@ -159,7 +159,7 @@ typedef struct { #include "libc/integral/lp64arg.inc" #endif -#define libcesque dontthrow nocallback +#define libcesque dontthrow dontcallback #define memcpyesque libcesque #define strlenesque libcesque nosideeffect paramsnonnull() #define vallocesque \ @@ -364,14 +364,14 @@ typedef struct { #endif #endif -#ifndef nocallback +#ifndef dontcallback #if !defined(__STRICT_ANSI__) && \ (__has_attribute(__leaf__) || \ (!defined(__llvm__) && \ (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 406)) -#define nocallback __attribute__((__leaf__)) +#define dontcallback __attribute__((__leaf__)) #else -#define nocallback +#define dontcallback #endif #endif @@ -645,11 +645,23 @@ void abort(void) wontreturn; #define initarray _Section(".init_array") #endif -#pragma GCC diagnostic ignored "-Wformat" /* todo: patch gcc */ +#ifndef __llvm__ +#pragma GCC diagnostic ignored "-Wformat=0" /* todo: patch gcc */ +#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" +#pragma GCC diagnostic warning "-Wunknown-pragmas" +#else +#pragma GCC diagnostic ignored "-Wformat" +#pragma GCC diagnostic ignored "-Wconstant-logical-operand" /* what */ +#pragma GCC diagnostic ignored "-Wunknown-warning-option" +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +#pragma GCC diagnostic ignored "-Wstring-plus-int" /* features 4 losers */ +#pragma GCC diagnostic ignored "-Wkeyword-compat" /* c++ upgrade */ +#pragma GCC diagnostic ignored "-Wuser-defined-literals" /* reserved for me */ +#endif + #pragma GCC diagnostic ignored "-Wformat-extra-args" /* todo: patch gcc */ #pragma GCC diagnostic ignored "-Wunused-function" /* contradicts dce */ #pragma GCC diagnostic ignored "-Wunused-const-variable" /* sooo ridiculous */ -#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" #ifndef __cplusplus #pragma GCC diagnostic ignored "-Wold-style-definition" /* orwellian bullsh */ #endif @@ -745,5 +757,18 @@ void abort(void) wontreturn; #define __funline static inline #endif +#if defined(__x86_64__) && (defined(__GNUC__) || defined(__llvm__)) && \ + !defined(__chibicc__) && 0 /* TODO: enable with toolchain upgrade */ +#define __target_clones(x) __attribute__((__target_clones__(x ",default"))) +#else +#define __target_clones(x) +#endif + +#if !defined(TINY) && !defined(__AVX__) && 0 +#define __vex __target_clones("avx") +#else +#define __vex +#endif + #define MACHINE_CODE_ANALYSIS_BEGIN_ #define MACHINE_CODE_ANALYSIS_END_ diff --git a/libc/intrin/BUILD.mk b/libc/intrin/BUILD.mk index e4ce22629..609e05018 100644 --- a/libc/intrin/BUILD.mk +++ b/libc/intrin/BUILD.mk @@ -86,6 +86,11 @@ o/$(MODE)/libc/intrin/memmove.o: private \ CFLAGS += \ -fpie +o/$(MODE)/libc/intrin/x86.o: private \ + CFLAGS += \ + -ffreestanding \ + -fno-jump-tables + # these assembly files are safe to build on aarch64 o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< diff --git a/libc/runtime/fesetround.c b/libc/intrin/fesetround.c similarity index 100% rename from libc/runtime/fesetround.c rename to libc/intrin/fesetround.c diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index d281a82c2..cb4c87659 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -24,7 +24,6 @@ #include "libc/fmt/magnumstrs.internal.h" #include "libc/intrin/asmflag.h" #include "libc/intrin/atomic.h" -#include "libc/serialize.h" #include "libc/intrin/getenv.internal.h" #include "libc/intrin/likely.h" #include "libc/intrin/nomultics.internal.h" @@ -46,6 +45,7 @@ #include "libc/runtime/memtrack.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" +#include "libc/serialize.h" #include "libc/stdckdint.h" #include "libc/str/str.h" #include "libc/str/tab.internal.h" diff --git a/libc/intrin/memchr.c b/libc/intrin/memchr.c index aa4adbb0a..b29c6e67a 100644 --- a/libc/intrin/memchr.c +++ b/libc/intrin/memchr.c @@ -67,10 +67,9 @@ static inline const unsigned char *memchr_sse(const unsigned char *s, * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -void *memchr(const void *s, int c, size_t n) { +__vex void *memchr(const void *s, int c, size_t n) { #if defined(__x86_64__) && !defined(__chibicc__) const void *r; - if (IsAsan()) __asan_verify(s, n); r = memchr_sse(s, c, n); return (void *)r; #else diff --git a/libc/intrin/memmove.c b/libc/intrin/memmove.c index ff2b4ef62..f6687c4b5 100644 --- a/libc/intrin/memmove.c +++ b/libc/intrin/memmove.c @@ -86,7 +86,7 @@ typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16))); * @return dst * @asyncsignalsafe */ -void *memmove(void *dst, const void *src, size_t n) { +__vex void *memmove(void *dst, const void *src, size_t n) { char *d; size_t i; const char *s; diff --git a/libc/intrin/memrchr.c b/libc/intrin/memrchr.c index 3a8efe2c0..59f0d6686 100644 --- a/libc/intrin/memrchr.c +++ b/libc/intrin/memrchr.c @@ -67,7 +67,7 @@ static inline const unsigned char *memrchr_sse(const unsigned char *s, * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -void *memrchr(const void *s, int c, size_t n) { +__vex void *memrchr(const void *s, int c, size_t n) { #if defined(__x86_64__) && !defined(__chibicc__) const void *r; r = memrchr_sse(s, c, n); diff --git a/libc/intrin/mman.greg.c b/libc/intrin/mman.greg.c index 7641a4969..6c886df53 100644 --- a/libc/intrin/mman.greg.c +++ b/libc/intrin/mman.greg.c @@ -44,14 +44,16 @@ #include "libc/runtime/runtime.h" #ifdef __x86_64__ -#define INVERT(x) (BANE + PHYSICAL(x)) -#define NOPAGE ((uint64_t)-1) +#define INVERT(x) (BANE + PHYSICAL((uintptr_t)(x))) +#define NOPAGE ((uint64_t) - 1) -#define ABS64(x) \ - ({ \ - int64_t vAddr; \ - __asm__("movabs\t%1,%0" : "=r"(vAddr) : "i"(x)); \ - vAddr; \ +#define APE_STACK_VADDR \ + ({ \ + int64_t vAddr; \ + __asm__(".weak\tape_stack_vaddr\n\t" \ + "movabs\t$ape_stack_vaddr,%0" \ + : "=r"(vAddr)); \ + vAddr; \ }) struct ReclaimedPage { @@ -305,7 +307,6 @@ textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b, extern char ape_phdrs_end[] __attribute__((__weak__)); extern char ape_stack_pf[] __attribute__((__weak__)); extern char ape_stack_offset[] __attribute__((__weak__)); - extern char ape_stack_vaddr[] __attribute__((__weak__)); extern char ape_stack_filesz[] __attribute__((__weak__)); extern char ape_stack_memsz[] __attribute__((__weak__)); __setup_mman(mm, pml4t, top); @@ -318,7 +319,7 @@ textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b, .p_type = PT_LOAD, .p_flags = (uintptr_t)ape_stack_pf, .p_offset = (uintptr_t)ape_stack_offset, - .p_vaddr = ABS64(ape_stack_vaddr), + .p_vaddr = APE_STACK_VADDR, .p_filesz = (uintptr_t)ape_stack_filesz, .p_memsz = (uintptr_t)ape_stack_memsz, }); diff --git a/libc/intrin/reservefd.c b/libc/intrin/reservefd.c index ae96d63d6..447f7ff2e 100644 --- a/libc/intrin/reservefd.c +++ b/libc/intrin/reservefd.c @@ -27,8 +27,6 @@ #include "libc/str/str.h" #include "libc/sysv/consts/map.h" -static volatile size_t mapsize; - /** * Grows file descriptor array memory if needed. * diff --git a/libc/intrin/segmentation.h b/libc/intrin/segmentation.h deleted file mode 100644 index 845b9a41a..000000000 --- a/libc/intrin/segmentation.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_ -#define COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_ -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) -#ifdef _COSMO_SOURCE - -/** - * Reads scalar from memory, offset by segment. - * - * @return *(MEM) relative to segment - * @see pushpop() - */ -#define fs(MEM) __peek("fs", MEM) -#define gs(MEM) __peek("gs", MEM) - -#define __peek(SEGMENT, ADDRESS) \ - ({ \ - typeof(*(ADDRESS)) Pk; \ - asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \ - Pk; \ - }) - -#endif /* _COSMO_SOURCE */ -#endif /* __GNUC__ && !__STRICT_ANSI__ */ -#endif /* COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_ */ diff --git a/libc/intrin/stpcpy.c b/libc/intrin/stpcpy.c index b3b7dfed2..c7c15e8e2 100644 --- a/libc/intrin/stpcpy.c +++ b/libc/intrin/stpcpy.c @@ -33,7 +33,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @return pointer to nul byte * @asyncsignalsafe */ -char *stpcpy(char *d, const char *s) { +__vex char *stpcpy(char *d, const char *s) { size_t i = 0; #if defined(__x86_64__) && !defined(__chibicc__) for (; (uintptr_t)(s + i) & 15; ++i) { diff --git a/libc/intrin/strchr.c b/libc/intrin/strchr.c index 7eb19a7b1..ab7439239 100644 --- a/libc/intrin/strchr.c +++ b/libc/intrin/strchr.c @@ -94,7 +94,7 @@ static inline const char *strchr_x64(const char *p, uint64_t c) { * @asyncsignalsafe * @vforksafe */ -char *strchr(const char *s, int c) { +__vex char *strchr(const char *s, int c) { #if defined(__x86_64__) && !defined(__chibicc__) const char *r; if (X86_HAVE(SSE)) { diff --git a/libc/intrin/strchrnul.c b/libc/intrin/strchrnul.c index 5c10b129c..b61cebde7 100644 --- a/libc/intrin/strchrnul.c +++ b/libc/intrin/strchrnul.c @@ -92,7 +92,7 @@ static const char *strchrnul_x64(const char *p, uint64_t c) { * @return pointer to first instance of c, or pointer to * NUL terminator if c is not found */ -char *strchrnul(const char *s, int c) { +__vex char *strchrnul(const char *s, int c) { #if defined(__x86_64__) && !defined(__chibicc__) const char *r; if (X86_HAVE(SSE)) { diff --git a/libc/intrin/strcpy.c b/libc/intrin/strcpy.c index fda91111a..b08c271e1 100644 --- a/libc/intrin/strcpy.c +++ b/libc/intrin/strcpy.c @@ -32,7 +32,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @return original dest * @asyncsignalsafe */ -char *strcpy(char *d, const char *s) { +__vex char *strcpy(char *d, const char *s) { size_t i = 0; #if defined(__x86_64__) && !defined(__chibicc__) for (; (uintptr_t)(s + i) & 15; ++i) { diff --git a/libc/intrin/strlen.c b/libc/intrin/strlen.c index bc2a95056..89d85e9d2 100644 --- a/libc/intrin/strlen.c +++ b/libc/intrin/strlen.c @@ -37,7 +37,7 @@ size_t strlen(const char *s) { while (!m) m = __builtin_ia32_pmovmskb128(*++p == z); return (const char *)p + __builtin_ctzl(m) - s; #else -#define ONES ((word)-1 / 255) +#define ONES ((word) - 1 / 255) #define BANE (ONES * (255 / 2 + 1)) typedef unsigned long mayalias word; word w; @@ -56,5 +56,4 @@ size_t strlen(const char *s) { return (const char *)p + (__builtin_ctzl(w) >> 3) - s; #endif } - #endif /* __aarch64__ */ diff --git a/libc/intrin/x86.c b/libc/intrin/x86.c new file mode 100644 index 000000000..3a2d21c77 --- /dev/null +++ b/libc/intrin/x86.c @@ -0,0 +1,808 @@ +//===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is based on LLVM's lib/Support/Host.cpp. +// It implements the operating system Host concept and builtin +// __cpu_model for the compiler_rt library for x86. +// +//===----------------------------------------------------------------------===// +#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) +#include "libc/intrin/strace.internal.h" +#include "third_party/compiler_rt/cpu_model.h" + +enum VendorSignatures { + SIG_INTEL = 0x756e6547, // Genu + SIG_AMD = 0x68747541, // Auth +}; + +enum ProcessorVendors { + VENDOR_INTEL = 1, + VENDOR_AMD, + VENDOR_OTHER, + VENDOR_MAX +}; + +enum ProcessorTypes { + INTEL_BONNELL = 1, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + INTEL_KNM, + INTEL_GOLDMONT, + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + AMDFAM19H, + ZHAOXIN_FAM7H, + INTEL_SIERRAFOREST, + INTEL_GRANDRIDGE, + INTEL_CLEARWATERFOREST, + CPU_TYPE_MAX +}; + +enum ProcessorSubtypes { + INTEL_COREI7_NEHALEM = 1, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + INTEL_COREI7_CANNONLAKE, + INTEL_COREI7_ICELAKE_CLIENT, + INTEL_COREI7_ICELAKE_SERVER, + AMDFAM17H_ZNVER2, + INTEL_COREI7_CASCADELAKE, + INTEL_COREI7_TIGERLAKE, + INTEL_COREI7_COOPERLAKE, + INTEL_COREI7_SAPPHIRERAPIDS, + INTEL_COREI7_ALDERLAKE, + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, + ZHAOXIN_FAM7H_LUJIAZUI, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, + INTEL_COREI7_GRANITERAPIDS_D, + INTEL_COREI7_ARROWLAKE, + INTEL_COREI7_ARROWLAKE_S, + INTEL_COREI7_PANTHERLAKE, + CPU_SUBTYPE_MAX +}; + +enum ProcessorFeatures { + FEATURE_CMOV = 0, + FEATURE_MMX, + FEATURE_POPCNT, + FEATURE_SSE, + FEATURE_SSE2, + FEATURE_SSE3, + FEATURE_SSSE3, + FEATURE_SSE4_1, + FEATURE_SSE4_2, + FEATURE_AVX, + FEATURE_AVX2, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + FEATURE_AVX512VBMI2, + FEATURE_GFNI, + FEATURE_VPCLMULQDQ, + FEATURE_AVX512VNNI, + FEATURE_AVX512BITALG, + FEATURE_AVX512BF16, + FEATURE_AVX512VP2INTERSECT, + + FEATURE_CMPXCHG16B = 46, + FEATURE_F16C = 49, + FEATURE_LAHF_LM = 54, + FEATURE_LM, + FEATURE_WP, + FEATURE_LZCNT, + FEATURE_MOVBE, + + FEATURE_AVX512FP16 = 94, + FEATURE_X86_64_BASELINE, + FEATURE_X86_64_V2, + FEATURE_X86_64_V3, + FEATURE_X86_64_V4, + CPU_FEATURE_MAX +}; + +// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). +// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID +// support. Consequently, for i386, the presence of CPUID is checked first +// via the corresponding eflags bit. +static bool isCpuIdSupported(void) { + return true; +} + +// This code is copied from lib/Support/Host.cpp. +// Changes to either file should be mirrored in the other. + +/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in +/// the specified arguments. If we can't run cpuid on the host, return true. +static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +} + +/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return +/// the 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, + unsigned *rEAX, unsigned *rEBX, unsigned *rECX, + unsigned *rEDX) { + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +} + +// Read control register 0 (XCR0). Used to detect features such as AVX. +static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); + return false; +} + +static void detectX86FamilyModel(unsigned EAX, unsigned *Family, + unsigned *Model) { + *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (*Family == 6 || *Family == 0xf) { + if (*Family == 0xf) + // Examine extended family ID if family ID is F. + *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +static const char *getIntelProcessorTypeAndSubtype(unsigned Family, + unsigned Model, + const unsigned *Features, + unsigned *Type, + unsigned *Subtype) { +#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 + + // We select CPU strings to match the code in Host.cpp, but we don't use them + // in compiler-rt. + const char *CPU = 0; + + switch (Family) { + case 6: + switch (Model) { + case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile + // processor, Intel Core 2 Quad processor, Intel Core 2 Quad + // mobile processor, Intel Core 2 Extreme processor, Intel + // Pentium Dual-Core processor, Intel Xeon processor, model + // 0Fh. All processors are manufactured using the 65 nm + // process. + case 0x16: // Intel Celeron processor model 16h. All processors are + // manufactured using the 65 nm process + CPU = "core2"; + *Type = INTEL_CORE2; + break; + case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, + // model 17h. All processors are manufactured using the 45 + // nm process. + // + // 45nm: Penryn , Wolfdale, Yorkfield (XE) + case 0x1d: // Intel Xeon processor MP. All processors are manufactured + // using the 45 nm process. + CPU = "penryn"; + *Type = INTEL_CORE2; + break; + case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 45 nm process. + case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. + // As found in a Summer 2010 model iMac. + case 0x1f: + case 0x2e: // Nehalem EX + CPU = "nehalem"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_NEHALEM; + break; + case 0x25: // Intel Core i7, laptop version. + case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. + case 0x2f: // Westmere EX + CPU = "westmere"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_WESTMERE; + break; + case 0x2a: // Intel Core i7 processor. All processors are manufactured + // using the 32 nm process. + case 0x2d: + CPU = "sandybridge"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SANDYBRIDGE; + break; + case 0x3a: + case 0x3e: // Ivy Bridge EP + CPU = "ivybridge"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_IVYBRIDGE; + break; + + // Haswell: + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + CPU = "haswell"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_HASWELL; + break; + + // Broadwell: + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: + CPU = "broadwell"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_BROADWELL; + break; + + // Skylake: + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop + case 0xa5: // Comet Lake-H/S + case 0xa6: // Comet Lake-U + CPU = "skylake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SKYLAKE; + break; + + // Rocketlake: + case 0xa7: + CPU = "rocketlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ROCKETLAKE; + break; + + // Skylake Xeon: + case 0x55: + *Type = INTEL_COREI7; + if (testFeature(FEATURE_AVX512BF16)) { + CPU = "cooperlake"; + *Subtype = INTEL_COREI7_COOPERLAKE; + } else if (testFeature(FEATURE_AVX512VNNI)) { + CPU = "cascadelake"; + *Subtype = INTEL_COREI7_CASCADELAKE; + } else { + CPU = "skylake-avx512"; + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; + } + break; + + // Cannonlake: + case 0x66: + CPU = "cannonlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_CANNONLAKE; + break; + + // Icelake: + case 0x7d: + case 0x7e: + CPU = "icelake-client"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ICELAKE_CLIENT; + break; + + // Tigerlake: + case 0x8c: + case 0x8d: + CPU = "tigerlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_TIGERLAKE; + break; + + // Alderlake: + case 0x97: + case 0x9a: + // Raptorlake: + case 0xb7: + case 0xba: + case 0xbf: + // Meteorlake: + case 0xaa: + case 0xac: + // Gracemont: + case 0xbe: + CPU = "alderlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ALDERLAKE; + break; + + // Arrowlake: + case 0xc5: + CPU = "arrowlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ARROWLAKE; + break; + + // Arrowlake S: + case 0xc6: + // Lunarlake: + case 0xbd: + CPU = "arrowlake-s"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ARROWLAKE_S; + break; + + // Pantherlake: + case 0xcc: + CPU = "pantherlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_PANTHERLAKE; + break; + + // Icelake Xeon: + case 0x6a: + case 0x6c: + CPU = "icelake-server"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ICELAKE_SERVER; + break; + + // Emerald Rapids: + case 0xcf: + // Sapphire Rapids: + case 0x8f: + CPU = "sapphirerapids"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; + + // Granite Rapids: + case 0xad: + CPU = "graniterapids"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_GRANITERAPIDS; + break; + + // Granite Rapids D: + case 0xae: + CPU = "graniterapids-d"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_GRANITERAPIDS_D; + break; + + case 0x1c: // Most 45 nm Intel Atom processors + case 0x26: // 45 nm Atom Lincroft + case 0x27: // 32 nm Atom Medfield + case 0x35: // 32 nm Atom Midview + case 0x36: // 32 nm Atom Midview + CPU = "bonnell"; + *Type = INTEL_BONNELL; + break; + + // Atom Silvermont codes from the Intel software optimization guide. + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + case 0x4c: // really airmont + CPU = "silvermont"; + *Type = INTEL_SILVERMONT; + break; + // Goldmont: + case 0x5c: // Apollo Lake + case 0x5f: // Denverton + CPU = "goldmont"; + *Type = INTEL_GOLDMONT; + break; // "goldmont" + case 0x7a: + CPU = "goldmont-plus"; + *Type = INTEL_GOLDMONT_PLUS; + break; + case 0x86: + case 0x8a: // Lakefield + case 0x96: // Elkhart Lake + case 0x9c: // Jasper Lake + CPU = "tremont"; + *Type = INTEL_TREMONT; + break; + + // Sierraforest: + case 0xaf: + CPU = "sierraforest"; + *Type = INTEL_SIERRAFOREST; + break; + + // Grandridge: + case 0xb6: + CPU = "grandridge"; + *Type = INTEL_GRANDRIDGE; + break; + + // Clearwaterforest: + case 0xdd: + CPU = "clearwaterforest"; + *Type = INTEL_COREI7; + *Subtype = INTEL_CLEARWATERFOREST; + break; + + case 0x57: + CPU = "knl"; + *Type = INTEL_KNL; + break; + + case 0x85: + CPU = "knm"; + *Type = INTEL_KNM; + break; + + default: // Unknown family 6 CPU. + break; + } + break; + default: + break; // Unknown. + } + + return CPU; +} + +static const char *getAMDProcessorTypeAndSubtype(unsigned Family, + unsigned Model, + const unsigned *Features, + unsigned *Type, + unsigned *Subtype) { + // We select CPU strings to match the code in Host.cpp, but we don't use them + // in compiler-rt. + const char *CPU = 0; + + switch (Family) { + case 16: + CPU = "amdfam10"; + *Type = AMDFAM10H; + switch (Model) { + case 2: + *Subtype = AMDFAM10H_BARCELONA; + break; + case 4: + *Subtype = AMDFAM10H_SHANGHAI; + break; + case 8: + *Subtype = AMDFAM10H_ISTANBUL; + break; + } + break; + case 20: + CPU = "btver1"; + *Type = AMD_BTVER1; + break; + case 21: + CPU = "bdver1"; + *Type = AMDFAM15H; + if (Model >= 0x60 && Model <= 0x7f) { + CPU = "bdver4"; + *Subtype = AMDFAM15H_BDVER4; + break; // 60h-7Fh: Excavator + } + if (Model >= 0x30 && Model <= 0x3f) { + CPU = "bdver3"; + *Subtype = AMDFAM15H_BDVER3; + break; // 30h-3Fh: Steamroller + } + if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + CPU = "bdver2"; + *Subtype = AMDFAM15H_BDVER2; + break; // 02h, 10h-1Fh: Piledriver + } + if (Model <= 0x0f) { + *Subtype = AMDFAM15H_BDVER1; + break; // 00h-0Fh: Bulldozer + } + break; + case 22: + CPU = "btver2"; + *Type = AMD_BTVER2; + break; + case 23: + CPU = "znver1"; + *Type = AMDFAM17H; + if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || + (Model >= 0x60 && Model <= 0x67) || + (Model >= 0x68 && Model <= 0x6f) || + (Model >= 0x70 && Model <= 0x7f) || + (Model >= 0x84 && Model <= 0x87) || + (Model >= 0x90 && Model <= 0x97) || + (Model >= 0x98 && Model <= 0x9f) || + (Model >= 0xa0 && Model <= 0xaf)) { + // Family 17h Models 30h-3Fh (Starship) Zen 2 + // Family 17h Models 47h (Cardinal) Zen 2 + // Family 17h Models 60h-67h (Renoir) Zen 2 + // Family 17h Models 68h-6Fh (Lucienne) Zen 2 + // Family 17h Models 70h-7Fh (Matisse) Zen 2 + // Family 17h Models 84h-87h (ProjectX) Zen 2 + // Family 17h Models 90h-97h (VanGogh) Zen 2 + // Family 17h Models 98h-9Fh (Mero) Zen 2 + // Family 17h Models A0h-AFh (Mendocino) Zen 2 + CPU = "znver2"; + *Subtype = AMDFAM17H_ZNVER2; + break; + } + if ((Model >= 0x10 && Model <= 0x1f) || + (Model >= 0x20 && Model <= 0x2f)) { + // Family 17h Models 10h-1Fh (Raven1) Zen + // Family 17h Models 10h-1Fh (Picasso) Zen+ + // Family 17h Models 20h-2Fh (Raven2 x86) Zen + *Subtype = AMDFAM17H_ZNVER1; + break; + } + break; + case 25: + CPU = "znver3"; + *Type = AMDFAM19H; + if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + (Model >= 0x30 && Model <= 0x3f) || + (Model >= 0x40 && Model <= 0x4f) || + (Model >= 0x50 && Model <= 0x5f)) { + // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 + // Family 19h Models 20h-2Fh (Vermeer) Zen 3 + // Family 19h Models 30h-3Fh (Badami) Zen 3 + // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ + // Family 19h Models 50h-5Fh (Cezanne) Zen 3 + *Subtype = AMDFAM19H_ZNVER3; + break; + } + if ((Model >= 0x10 && Model <= 0x1f) || + (Model >= 0x60 && Model <= 0x6f) || + (Model >= 0x70 && Model <= 0x77) || + (Model >= 0x78 && Model <= 0x7f) || + (Model >= 0xa0 && Model <= 0xaf)) { + // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 + // Family 19h Models 60h-6Fh (Raphael) Zen 4 + // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 + // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 + // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 + CPU = "znver4"; + *Subtype = AMDFAM19H_ZNVER4; + break; // "znver4" + } + break; // family 19h + default: + break; // Unknown AMD CPU. + } + + return CPU; +} + +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *Features) { + unsigned EAX = 0, EBX = 0; + +#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1) +#define setFeature(F) Features[F / 32] |= 1U << (F % 32) + + if ((EDX >> 15) & 1) setFeature(FEATURE_CMOV); + if ((EDX >> 23) & 1) setFeature(FEATURE_MMX); + if ((EDX >> 25) & 1) setFeature(FEATURE_SSE); + if ((EDX >> 26) & 1) setFeature(FEATURE_SSE2); + + if ((ECX >> 0) & 1) setFeature(FEATURE_SSE3); + if ((ECX >> 1) & 1) setFeature(FEATURE_PCLMUL); + if ((ECX >> 9) & 1) setFeature(FEATURE_SSSE3); + if ((ECX >> 12) & 1) setFeature(FEATURE_FMA); + if ((ECX >> 13) & 1) setFeature(FEATURE_CMPXCHG16B); + if ((ECX >> 19) & 1) setFeature(FEATURE_SSE4_1); + if ((ECX >> 20) & 1) setFeature(FEATURE_SSE4_2); + if ((ECX >> 22) & 1) setFeature(FEATURE_MOVBE); + if ((ECX >> 23) & 1) setFeature(FEATURE_POPCNT); + if ((ECX >> 25) & 1) setFeature(FEATURE_AES); + if ((ECX >> 29) & 1) setFeature(FEATURE_F16C); + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + const unsigned AVXBits = (1 << 27) | (1 << 28); + bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); +#if defined(__APPLE__) + // Darwin lazily saves the AVX512 context on first use: trust that the OS will + // save the AVX512 context if we use AVX512 instructions, even the bit is not + // set right now. + bool HasAVX512Save = true; +#else + // AVX512 requires additional context to be saved by the OS. + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); +#endif + + if (HasAVX) setFeature(FEATURE_AVX); + + bool HasLeaf7 = + MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + if (HasLeaf7) { + if ((EBX >> 3) & 1) setFeature(FEATURE_BMI); + if (((EBX >> 5) & 1) && HasAVX) setFeature(FEATURE_AVX2); + if ((EBX >> 8) & 1) setFeature(FEATURE_BMI2); + if (HasAVX512Save) { + if ((EBX >> 16) & 1) setFeature(FEATURE_AVX512F); + if ((EBX >> 17) & 1) setFeature(FEATURE_AVX512DQ); + if ((EBX >> 21) & 1) setFeature(FEATURE_AVX512IFMA); + if ((EBX >> 26) & 1) setFeature(FEATURE_AVX512PF); + if ((EBX >> 27) & 1) setFeature(FEATURE_AVX512ER); + if ((EBX >> 28) & 1) setFeature(FEATURE_AVX512CD); + if ((EBX >> 30) & 1) setFeature(FEATURE_AVX512BW); + if ((EBX >> 31) & 1) setFeature(FEATURE_AVX512VL); + if ((ECX >> 1) & 1) setFeature(FEATURE_AVX512VBMI); + if ((ECX >> 6) & 1) setFeature(FEATURE_AVX512VBMI2); + if ((ECX >> 11) & 1) setFeature(FEATURE_AVX512VNNI); + if ((ECX >> 12) & 1) setFeature(FEATURE_AVX512BITALG); + if ((ECX >> 14) & 1) setFeature(FEATURE_AVX512VPOPCNTDQ); + if ((EDX >> 2) & 1) setFeature(FEATURE_AVX5124VNNIW); + if ((EDX >> 3) & 1) setFeature(FEATURE_AVX5124FMAPS); + if ((EDX >> 8) & 1) setFeature(FEATURE_AVX512VP2INTERSECT); + if ((EDX >> 23) & 1) setFeature(FEATURE_AVX512FP16); + } + if ((ECX >> 8) & 1) setFeature(FEATURE_GFNI); + if (((ECX >> 10) & 1) && HasAVX) setFeature(FEATURE_VPCLMULQDQ); + } + + // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't + // return all 0s for invalid subleaves so check the limit. + bool HasLeaf7Subleaf1 = + HasLeaf7 && EAX >= 1 && + !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BF16); + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1) { + if (ECX & 1) setFeature(FEATURE_LAHF_LM); + if ((ECX >> 5) & 1) setFeature(FEATURE_LZCNT); + if (((ECX >> 6) & 1)) setFeature(FEATURE_SSE4_A); + if (((ECX >> 11) & 1)) setFeature(FEATURE_XOP); + if (((ECX >> 16) & 1)) setFeature(FEATURE_FMA4); + if (((EDX >> 29) & 1)) setFeature(FEATURE_LM); + } + + if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) { + setFeature(FEATURE_X86_64_BASELINE); + if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) && + hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) { + setFeature(FEATURE_X86_64_V2); + if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) && + hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) && + hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) && + hasFeature(FEATURE_MOVBE)) { + setFeature(FEATURE_X86_64_V3); + if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) && + hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL)) + setFeature(FEATURE_X86_64_V4); + } + } + } + +#undef hasFeature +#undef setFeature +} + +int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; + +struct __processor_model { + unsigned int __cpu_vendor; + unsigned int __cpu_type; + unsigned int __cpu_subtype; + unsigned int __cpu_features[1]; +} __cpu_model = {0, 0, 0, {0}}; + +unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32]; + +// A constructor function that is sets __cpu_model and __cpu_features2 with +// the right values. This needs to run only once. This constructor is +// given the highest priority and it should run before constructors without +// the priority set. However, it still runs after ifunc initializers and +// needs to be called explicitly there. + +int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { + unsigned EAX, EBX, ECX, EDX; + unsigned MaxLeaf = 5; + unsigned Vendor; + unsigned Model, Family; + unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; + _Static_assert(sizeof(Features) / sizeof(Features[0]) == 4, ""); + _Static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, ""); + + // This function needs to run just once. + if (__cpu_model.__cpu_vendor) return 0; + + if (!isCpuIdSupported() || + getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { + __cpu_model.__cpu_vendor = VENDOR_OTHER; + return -1; + } + + getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + detectX86FamilyModel(EAX, &Family, &Model); + + // Find available features. + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); + + __cpu_model.__cpu_features[0] = Features[0]; + __cpu_features2[0] = Features[1]; + __cpu_features2[1] = Features[2]; + __cpu_features2[2] = Features[3]; + + if (Vendor == SIG_INTEL) { + // Get CPU type. + getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_INTEL; + } else if (Vendor == SIG_AMD) { + // Get CPU type. + getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_AMD; + } else { + __cpu_model.__cpu_vendor = VENDOR_OTHER; + } + + return 0; +} + +#endif // __x86_64__ && (gnuc || clang) diff --git a/libc/log/addr2linepath.c b/libc/log/addr2linepath.c index 5493645c2..89c09d18b 100644 --- a/libc/log/addr2linepath.c +++ b/libc/log/addr2linepath.c @@ -36,7 +36,7 @@ static struct { char buf[PATH_MAX]; } g_addr2line; -const void GetAddr2linePathInit(void) { +void GetAddr2linePathInit(void) { int e = errno; const char *path; if (!(path = getenv("ADDR2LINE"))) { diff --git a/libc/log/cxaprintexits.c b/libc/log/cxaprintexits.c index 08e7f2d4a..a9b6354f5 100644 --- a/libc/log/cxaprintexits.c +++ b/libc/log/cxaprintexits.c @@ -48,10 +48,10 @@ void __cxa_printexits(FILE *f, void *pred) { if (symbol) { snprintf(name, sizeof(name), "%s", symbol); } else { - snprintf(name, sizeof(name), "0x%016lx", b->p[i].fp); + snprintf(name, sizeof(name), "0x%016lx", (unsigned long)b->p[i].fp); } - fprintf(f, "%-22s 0x%016lx 0x%016lx\n", name, b->p[i].arg, - b->p[i].pred); + fprintf(f, "%-22s 0x%016lx 0x%016lx\n", name, + (unsigned long)b->p[i].arg, (unsigned long)b->p[i].pred); } } } while ((b = b->next)); diff --git a/libc/log/die.c b/libc/log/die.c index d2fcd7948..30458102c 100644 --- a/libc/log/die.c +++ b/libc/log/die.c @@ -20,6 +20,7 @@ #include "libc/errno.h" #include "libc/intrin/describebacktrace.internal.h" #include "libc/intrin/kprintf.h" +#include "libc/log/backtrace.internal.h" #include "libc/log/internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" @@ -51,5 +52,6 @@ relegated wontreturn void __die(void) { __nocolor ? "" : "\e[1;31m", program_invocation_short_name, host, getpid(), gettid(), __nocolor ? "" : "\e[0m", FindDebugBinary(), DescribeBacktrace(__builtin_frame_address(0))); + ShowBacktrace(2, __builtin_frame_address(0)); _Exit(77); } diff --git a/libc/log/gdbexec.c b/libc/log/gdbexec.c index 3e0e035ba..f8d47f14e 100644 --- a/libc/log/gdbexec.c +++ b/libc/log/gdbexec.c @@ -41,7 +41,7 @@ relegated int(gdbexec)(const char *cmd) { elf = "-q"; } bp = __builtin_frame_address(0); - sprintf(breakcmd, "%s *%#p", "break", bp->addr); + sprintf(breakcmd, "%s *%#lx", "break", (unsigned long)bp->addr); if (!(pid = vfork())) { execv(gdb, (char *const[]){ "gdb", diff --git a/libc/log/oncrash_amd64.c b/libc/log/oncrash_amd64.c index fe0949c62..06e155a15 100644 --- a/libc/log/oncrash_amd64.c +++ b/libc/log/oncrash_amd64.c @@ -252,16 +252,9 @@ static relegated void ShowCrashReport(int err, int sig, struct siginfo *si, } relegated void __oncrash(int sig, struct siginfo *si, void *arg) { - ucontext_t *ctx = arg; - int gdbpid, err; - err = errno; - if ((gdbpid = IsDebuggerPresent(true))) { - DebugBreak(); - } - if (!(gdbpid > 0 && (sig == SIGTRAP || sig == SIGQUIT))) { - __restore_tty(); - ShowCrashReport(err, sig, si, ctx); - } + int err = errno; + __restore_tty(); + ShowCrashReport(err, sig, si, arg); } #endif /* __x86_64__ */ diff --git a/libc/macros.internal.h b/libc/macros.internal.h index 396f2b749..ae6313384 100644 --- a/libc/macros.internal.h +++ b/libc/macros.internal.h @@ -21,8 +21,8 @@ #define TRUE 1 #define FALSE 0 -#define IS2POW(X) (!((X) & ((X)-1))) -#define ROUNDUP(X, K) (((X) + (K)-1) & -(K)) +#define IS2POW(X) (!((X) & ((X) - 1))) +#define ROUNDUP(X, K) (((X) + (K) - 1) & -(K)) #define ROUNDDOWN(X, K) ((X) & -(K)) #ifndef __ASSEMBLER__ #define ABS(X) ((X) >= 0 ? (X) : -(X)) @@ -40,7 +40,7 @@ #define STRINGIFY(A) __STRINGIFY(A) #define EQUIVALENT(X, Y) (__builtin_constant_p((X) == (Y)) && ((X) == (Y))) #define TYPE_BIT(type) (sizeof(type) * CHAR_BIT) -#define TYPE_SIGNED(type) (((type)-1) < 0) +#define TYPE_SIGNED(type) (((type) - 1) < 0) #define TYPE_INTEGRAL(type) (((type)0.5) != 0.5) #define ARRAYLEN(A) \ diff --git a/libc/mem/mergesort.c b/libc/mem/mergesort.c index 205a20ef4..70344f552 100644 --- a/libc/mem/mergesort.c +++ b/libc/mem/mergesort.c @@ -54,8 +54,8 @@ asm(".include \"libc/disclaimer.inc\""); * (The default is pairwise merging.) */ -static void setup(uint8_t *, uint8_t *, size_t, size_t, int (*)(), void *); -static void insertionsort(uint8_t *, size_t, size_t, int (*)(), void *); +static void setup(uint8_t *, uint8_t *, size_t, size_t, int (*)(const void *, const void *, void *), void *); +static void insertionsort(uint8_t *, size_t, size_t, int (*)(const void *, const void *, void *), void *); #define ISIZE sizeof(int) #define PSIZE sizeof(uint8_t *) diff --git a/libc/nexgen32e/rdtscp.h b/libc/nexgen32e/rdtscp.h index 9c3502faf..5bebc8735 100644 --- a/libc/nexgen32e/rdtscp.h +++ b/libc/nexgen32e/rdtscp.h @@ -5,7 +5,7 @@ #include "libc/nexgen32e/x86feature.h" COSMOPOLITAN_C_START_ -#define TSC_AUX_CORE(MSR) ((MSR)&0xfff) +#define TSC_AUX_CORE(MSR) ((MSR) & 0xfff) #define TSC_AUX_NODE(MSR) (((MSR) >> 12) & 0xfff) /** @@ -41,9 +41,10 @@ COSMOPOLITAN_C_START_ asm volatile("rdpid\t%0" : "=r"(Msr) : /* no inputs */ : "memory"); \ Ok = true; \ } else if (IsLinux()) { \ + char *p = (char *)0x7b; \ asm volatile(ZFLAG_ASM("lsl\t%2,%1") \ : ZFLAG_CONSTRAINT(Ok), "=r"(Msr) \ - : "r"(0x7b) \ + : "r"(p) \ : "memory"); \ } \ if (!Ok && X86_HAVE(RDTSCP)) { \ diff --git a/libc/nt/advapi32/SystemFunction036.S b/libc/nt/BCryptPrimitives/ProcessPrng.S similarity index 57% rename from libc/nt/advapi32/SystemFunction036.S rename to libc/nt/BCryptPrimitives/ProcessPrng.S index 4380c47e3..d7eec4fbb 100644 --- a/libc/nt/advapi32/SystemFunction036.S +++ b/libc/nt/BCryptPrimitives/ProcessPrng.S @@ -1,18 +1,18 @@ #include "libc/nt/codegen.h" -.imp advapi32,__imp_SystemFunction036,SystemFunction036 +.imp BCryptPrimitives,__imp_ProcessPrng,ProcessPrng .text.windows .ftrace1 -RtlGenRandom: +ProcessPrng: .ftrace2 #ifdef __x86_64__ push %rbp mov %rsp,%rbp - mov __imp_SystemFunction036(%rip),%rax + mov __imp_ProcessPrng(%rip),%rax jmp __sysv2nt #elif defined(__aarch64__) mov x0,#0 ret #endif - .endfn RtlGenRandom,globl + .endfn ProcessPrng,globl .previous diff --git a/libc/nt/BUILD.mk b/libc/nt/BUILD.mk index b49f2c41a..feaa79e24 100644 --- a/libc/nt/BUILD.mk +++ b/libc/nt/BUILD.mk @@ -297,6 +297,24 @@ $(LIBC_NT_PSAPI_A).pkg: \ #─────────────────────────────────────────────────────────────────────────────── +LIBC_NT_ARTIFACTS += LIBC_NT_BCRYPTPRIMITIVES_A +LIBC_NT_BCRYPTPRIMITIVES = $(LIBC_NT_BCRYPTPRIMITIVES_A_DEPS) $(LIBC_NT_BCRYPTPRIMITIVES_A) +LIBC_NT_BCRYPTPRIMITIVES_A = o/$(MODE)/libc/nt/BCryptPrimitives.a +LIBC_NT_BCRYPTPRIMITIVES_A_SRCS := $(wildcard libc/nt/BCryptPrimitives/*.S) +LIBC_NT_BCRYPTPRIMITIVES_A_OBJS = $(LIBC_NT_BCRYPTPRIMITIVES_A_SRCS:%.S=o/$(MODE)/%.o) +LIBC_NT_BCRYPTPRIMITIVES_A_CHECKS = $(LIBC_NT_BCRYPTPRIMITIVES_A).pkg +LIBC_NT_BCRYPTPRIMITIVES_A_DIRECTDEPS = LIBC_NT_KERNEL32 +LIBC_NT_BCRYPTPRIMITIVES_A_DEPS := $(call uniq,$(foreach x,$(LIBC_NT_BCRYPTPRIMITIVES_A_DIRECTDEPS),$($(x)))) +$(LIBC_NT_BCRYPTPRIMITIVES_A): \ + libc/nt/BCryptPrimitives/ \ + $(LIBC_NT_BCRYPTPRIMITIVES_A).pkg \ + $(LIBC_NT_BCRYPTPRIMITIVES_A_OBJS) +$(LIBC_NT_BCRYPTPRIMITIVES_A).pkg: \ + $(LIBC_NT_BCRYPTPRIMITIVES_A_OBJS) \ + $(foreach x,$(LIBC_NT_BCRYPTPRIMITIVES_A_DIRECTDEPS),$($(x)_A).pkg) + +#─────────────────────────────────────────────────────────────────────────────── + # let aarch64 compile these o/$(MODE)/libc/nt/%.o: libc/nt/%.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) $< diff --git a/libc/nt/master.sh b/libc/nt/master.sh index 11d9e4528..5a76231ed 100755 --- a/libc/nt/master.sh +++ b/libc/nt/master.sh @@ -362,7 +362,6 @@ imp 'RegisterEventSource' RegisterEventSourceW advapi32 2 imp 'ReportEvent' ReportEventW advapi32 9 imp 'ReportEventA' ReportEventA advapi32 9 imp 'RevertToSelf' RevertToSelf advapi32 0 -imp 'RtlGenRandom' SystemFunction036 advapi32 2 imp 'TraceSetInformation' TraceSetInformation advapi32 # Windows 7+ # USER32.DLL @@ -611,6 +610,11 @@ imp 'GetModuleBaseName' GetModuleBaseNameW psapi 4 imp 'GetProcessImageFileName' GetProcessImageFileNameW psapi 3 imp 'GetProcessMemoryInfo' GetProcessMemoryInfo psapi 3 +# BCryptPrimitives.dll +# +# Name Actual DLL Arity +imp 'ProcessPrng' ProcessPrng BCryptPrimitives 2 + # API-MS-Win-Core-Synch-l1-2-0.dll (Windows 8+) # # Name Actual DLL Arity diff --git a/libc/nt/runtime.h b/libc/nt/runtime.h index 5248816a3..953e77692 100644 --- a/libc/nt/runtime.h +++ b/libc/nt/runtime.h @@ -36,11 +36,11 @@ bool32 TerminateProcess(int64_t hProcess, uint32_t uExitCode); void TerminateThisProcess(uint32_t dwWaitStatus) wontreturn; void ExitProcess(uint32_t uExitCode) wontreturn; uint32_t GetLastError(void) nosideeffect; -bool32 CloseHandle(int64_t hObject) dontthrow nocallback; +bool32 CloseHandle(int64_t hObject) dontthrow dontcallback; intptr_t GetStdHandle(uint32_t nStdHandle) nosideeffect; bool32 SetStdHandle(uint32_t nStdHandle, int64_t hHandle); bool32 SetDefaultDllDirectories(unsigned dirflags); -bool32 RtlGenRandom(void *RandomBuffer, uint32_t RandomBufferLength); +bool32 ProcessPrng(void *RandomBuffer, uint32_t RandomBufferLength); uint32_t GetModuleFileName(int64_t hModule, char16_t *lpFilename, uint32_t nSize); diff --git a/libc/nt/struct/teb.h b/libc/nt/struct/teb.h index a232b4f43..ec997a83e 100644 --- a/libc/nt/struct/teb.h +++ b/libc/nt/struct/teb.h @@ -1,6 +1,5 @@ #ifndef COSMOPOLITAN_LIBC_NT_TEB_H_ #define COSMOPOLITAN_LIBC_NT_TEB_H_ -#include "libc/intrin/segmentation.h" #include "libc/nt/struct/peb.h" #if defined(__GNUC__) && !defined(__STRICT_ANSI__) @@ -8,19 +7,19 @@ * These macros address directly into NT's TEB a.k.a. TIB * Any function that does this needs the `dontasan` keyword */ -#define NtGetPeb() gs((struct NtPeb **)(0x60ULL)) -#define NtGetTeb() gs((void **)(0x30)) /* %gs:0 linear address */ -#define NtGetPid() gs((uint32_t *)(0x40)) /* GetCurrentProcessId() */ -#define NtGetTid() gs((uint32_t *)(0x48)) /* GetCurrentThreadId() */ -#define NtGetErr() gs((int *)(0x68)) -#define _NtGetSeh() gs((void **)(0x00)) -#define _NtGetStackHigh() gs((void **)(0x08)) -#define _NtGetStackLow() gs((void **)(0x10)) -#define _NtGetSubsystemTib() gs((void **)(0x18)) -#define _NtGetFib() gs((void **)(0x20)) -#define _NtGetEnv() gs((char16_t **)(0x38)) -#define _NtGetRpc() gs((void **)(0x50)) -#define _NtGetTls() gs((void **)(0x58)) /* cf. gs((long *)0x1480 + i0..64) */ +#define NtGetPeb() ((__seg_gs struct NtPeb *)0x60) +#define NtGetTeb() ((void *)*(__seg_gs uintptr_t *)0x30) +#define NtGetPid() (*(__seg_gs uint32_t *)0x40) +#define NtGetTid() (*(__seg_gs uint32_t *)0x48) +#define NtGetErr() (*(__seg_gs int *)0x68) +#define _NtGetSeh() ((void *)*(__seg_gs uintptr_t *)0x00) +#define _NtGetStackHigh() ((void *)*(__seg_gs uintptr_t *)0x08) +#define _NtGetStackLow() ((void *)*(__seg_gs uintptr_t *)0x10) +#define _NtGetSubsystemTib() ((void *)*(__seg_gs uintptr_t *)0x18) +#define _NtGetFib() ((void *)*(__seg_gs uintptr_t *)0x20) +#define _NtGetEnv() ((char16_t *)*(__seg_gs intptr_t *)0x38) +#define _NtGetRpc() ((void *)*(__seg_gs uintptr_t *)0x50) +#define _NtGetTls() ((void *)*(__seg_gs uintptr_t *)0x58) #endif /* __GNUC__ && !__STRICT_ANSI__ */ #endif /* COSMOPOLITAN_LIBC_NT_TEB_H_ */ diff --git a/libc/proc/fork-nt.c b/libc/proc/fork-nt.c index a59fecaaa..dac6300cb 100644 --- a/libc/proc/fork-nt.c +++ b/libc/proc/fork-nt.c @@ -78,7 +78,9 @@ static textwindows char16_t *ParseInt(char16_t *p, int64_t *x) { } static inline textwindows ssize_t ForkIo(int64_t h, char *p, size_t n, - bool32 (*f)()) { + bool32 (*f)(int64_t, void *, uint32_t, + uint32_t *, + struct NtOverlapped *)) { size_t i; uint32_t x; for (i = 0; i < n; i += x) { @@ -90,8 +92,10 @@ static inline textwindows ssize_t ForkIo(int64_t h, char *p, size_t n, } static dontinline textwindows bool ForkIo2(int64_t h, void *buf, size_t n, - bool32 (*fn)(), const char *sf, - bool ischild) { + bool32 (*fn)(int64_t, void *, + uint32_t, uint32_t *, + struct NtOverlapped *), + const char *sf, bool ischild) { ssize_t rc = ForkIo(h, buf, n, fn); if (ischild) __tls_enabled_set(false); // prevent tls crash in kprintf NTTRACE("%s(%ld, %p, %'zu) → %'zd% m", sf, h, buf, n, rc); @@ -100,9 +104,9 @@ static dontinline textwindows bool ForkIo2(int64_t h, void *buf, size_t n, static dontinline textwindows bool WriteAll(int64_t h, void *buf, size_t n) { bool ok; - ok = ForkIo2(h, buf, n, WriteFile, "WriteFile", false); + ok = ForkIo2(h, buf, n, (void *)WriteFile, "WriteFile", false); #ifndef NDEBUG - if (ok) ok = ForkIo2(h, &n, sizeof(n), WriteFile, "WriteFile", false); + if (ok) ok = ForkIo2(h, &n, sizeof(n), (void *)WriteFile, "WriteFile", false); #endif #if SYSDEBUG if (!ok) { diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index 85d3db9df..cb1d0733f 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -94,7 +94,7 @@ struct CloneArgs { void *arg; }; -int sys_set_tls(); +int sys_set_tls(uintptr_t, void *); int __stack_call(void *, int, long, long, int (*)(void *, int), void *); static struct CloneArgs *AllocateCloneArgs(char *stk, size_t stksz) { diff --git a/libc/runtime/cosmo2.c b/libc/runtime/cosmo2.c index 841c37d23..f01c63720 100644 --- a/libc/runtime/cosmo2.c +++ b/libc/runtime/cosmo2.c @@ -149,7 +149,7 @@ wontreturn textstartup void cosmo(long *sp, struct Syslib *m1, char *exename, } // check system call abi compatibility - if (IsXnu() && __syslib->__version < SYSLIB_VERSION) { + if (IsXnu() && __syslib->__version < SYSLIB_VERSION_MANDATORY) { sys_write(2, "need newer ape loader\n", 22); _Exit(127); } diff --git a/libc/runtime/efimain.greg.c b/libc/runtime/efimain.greg.c index 50d5dee82..7861ef0bc 100644 --- a/libc/runtime/efimain.greg.c +++ b/libc/runtime/efimain.greg.c @@ -30,6 +30,9 @@ #include "libc/runtime/runtime.h" #include "libc/str/str.h" +#pragma GCC diagnostic ignored "-Warray-bounds" +#pragma GCC diagnostic ignored "-Wstringop-overflow" + #ifdef __x86_64__ /* TODO: Why can't we change CR3? Could it really need PML5T? */ @@ -162,7 +165,7 @@ static void EfiInitAcpi(struct mman *mm, EFI_SYSTEM_TABLE *SystemTable) { * @see libc/dce.h */ __msabi EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, - EFI_SYSTEM_TABLE *SystemTable) { + EFI_SYSTEM_TABLE *SystemTable) { struct mman *mm; uint32_t DescVersion; uintptr_t i, j, MapSize; @@ -215,9 +218,8 @@ __msabi EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, &kEfiLoadedImageProtocol, &ImgInfo); CmdLine = (const char16_t *)ImgInfo->LoadOptions; if (!CmdLine || !CmdLine[0]) CmdLine = u"BOOTX64.EFI"; - Args = GetDosArgv(CmdLine, ArgBlock->ArgBlock, - sizeof(ArgBlock->ArgBlock), ArgBlock->Args, - ARRAYLEN(ArgBlock->Args)); + Args = GetDosArgv(CmdLine, ArgBlock->ArgBlock, sizeof(ArgBlock->ArgBlock), + ArgBlock->Args, ARRAYLEN(ArgBlock->Args)); /* * Gets information about our current video mode. Clears the screen. diff --git a/libc/runtime/getargmax.c b/libc/runtime/getargmax.c index 3a02203d2..37ce64c83 100644 --- a/libc/runtime/getargmax.c +++ b/libc/runtime/getargmax.c @@ -19,6 +19,8 @@ #include "libc/dce.h" #include "libc/macros.internal.h" #include "libc/runtime/runtime.h" +#include "libc/stdio/sysparam.h" +#include "libc/sysv/consts/_posix.h" #include "libc/sysv/consts/limits.h" #include "libc/sysv/consts/rlimit.h" @@ -29,6 +31,7 @@ * Returns expensive but more correct version of `ARG_MAX`. */ int __get_arg_max(void) { + int res; if (IsLinux()) { // You might think that just returning a constant 128KiB (ARG_MAX) // would make sense, as this guy did: @@ -57,10 +60,11 @@ int __get_arg_max(void) { // does. Right now (2019, Linux 5.3) that amounts to: uint64_t stacksz; stacksz = __get_rlimit(RLIMIT_STACK); - return MAX(MIN(stacksz / 4, 3 * (8 * 1024 * 1024) / 4), _ARG_MAX); + res = MAX(MIN(stacksz / 4, 3 * (8 * 1024 * 1024) / 4), _ARG_MAX); } else if (IsBsd()) { - return __get_sysctl(CTL_KERN, KERN_ARGMAX); + res = __get_sysctl(CTL_KERN, KERN_ARGMAX); } else { - return _ARG_MAX; + res = _ARG_MAX; } + return MAX(res, _POSIX_ARG_MAX); } diff --git a/libc/runtime/memtrack.internal.h b/libc/runtime/memtrack.internal.h index 543a16c22..6607a036a 100644 --- a/libc/runtime/memtrack.internal.h +++ b/libc/runtime/memtrack.internal.h @@ -164,9 +164,9 @@ forceinline pureconst bool OverlapsImageSpace(const void *p, size_t n) { const unsigned char *BegA, *EndA, *BegB, *EndB; if (n) { BegA = p; - EndA = BegA + (n - 1); + EndA = BegA + n; BegB = __executable_start; - EndB = _end - 1; + EndB = _end; return MAX(BegA, BegB) < MIN(EndA, EndB); } else { return 0; @@ -177,9 +177,9 @@ forceinline pureconst bool OverlapsShadowSpace(const void *p, size_t n) { intptr_t BegA, EndA, BegB, EndB; if (n) { BegA = (intptr_t)p; - EndA = BegA + (n - 1); + EndA = BegA + n; BegB = 0x7fff0000; - EndB = 0x10007fffffff; + EndB = 0x100080000000; return MAX(BegA, BegB) < MIN(EndA, EndB); } else { return 0; diff --git a/libc/runtime/runtime.h b/libc/runtime/runtime.h index ff26dde66..db0dfb253 100644 --- a/libc/runtime/runtime.h +++ b/libc/runtime/runtime.h @@ -54,7 +54,7 @@ char *getlogin(void); int getlogin_r(char *, size_t); int login_tty(int); int getpagesize(void); -int syncfs(int); +int syncfs(int) dontthrow; int vhangup(void); int getdtablesize(void); int sethostname(const char *, size_t); diff --git a/libc/runtime/set_tls.c b/libc/runtime/set_tls.c index 0dd8049c5..29972f484 100644 --- a/libc/runtime/set_tls.c +++ b/libc/runtime/set_tls.c @@ -29,7 +29,7 @@ #define AMD64_SET_FSBASE 129 #define AMD64_SET_GSBASE 131 -int sys_set_tls(); +int sys_set_tls(uintptr_t, void *); // we can't allow --ftrace here because cosmo_dlopen() calls this // function to fix the tls register, and ftrace needs it unbroken @@ -47,12 +47,12 @@ dontinstrument textstartup void __set_tls(struct CosmoTib *tib) { // netbsd has sysarch(X86_SET_FSBASE) but we can't use that because // signal handlers will cause it to be reset due to not setting the // _mc_tlsbase field in struct mcontext_netbsd. - sys_set_tls(tib); + sys_set_tls((uintptr_t)tib, 0); } else if (IsOpenbsd()) { - sys_set_tls(tib); + sys_set_tls((uintptr_t)tib, 0); } else if (IsXnu()) { // thread_fast_set_cthread_self has a weird ABI - sys_set_tls((intptr_t)tib - 0x30); + sys_set_tls((intptr_t)tib - 0x30, 0); } else { uint64_t val = (uint64_t)tib; asm volatile("wrmsr" diff --git a/libc/runtime/syslib.internal.h b/libc/runtime/syslib.internal.h index 01e129464..ec6d87fe5 100644 --- a/libc/runtime/syslib.internal.h +++ b/libc/runtime/syslib.internal.h @@ -10,8 +10,14 @@ COSMOPOLITAN_C_START_ * `-errno` convention, and hence should be wrapped with `_sysret()`. */ -#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24) -#define SYSLIB_VERSION 8 +#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24) + +#define SYSLIB_VERSION 9 /* sync with ape/ape-m1.c */ + +/* if this number increases, then everyone on macos arm will need to + reinstall ape loader in order to run newer ape binaries so please + don't do this if it's sufficient to just check __syslib->version. */ +#define SYSLIB_VERSION_MANDATORY 8 typedef uint64_t dispatch_time_t; typedef uint64_t dispatch_semaphore_t; @@ -69,11 +75,13 @@ struct Syslib { long (*__sem_trywait)(int *); long (*__getrlimit)(int, void *); long (*__setrlimit)(int, const void *); - // v6 (2023-11-03) + /* v6 (2023-11-03) */ void *(*__dlopen)(const char *, int); void *(*__dlsym)(void *, const char *); int (*__dlclose)(void *); char *(*__dlerror)(void); + /* v9 (2024-01-31) */ + int (*__pthread_cpu_number_np)(size_t *); }; extern struct Syslib *__syslib; diff --git a/libc/runtime/winargs.internal.h b/libc/runtime/winargs.internal.h index c123cd613..ccf0fc74d 100644 --- a/libc/runtime/winargs.internal.h +++ b/libc/runtime/winargs.internal.h @@ -4,12 +4,17 @@ COSMOPOLITAN_C_START_ struct WinArgs { - char *argv[8192]; - char *envp[512]; - intptr_t auxv[2][2]; - char argv0buf[256]; - char argblock[32767]; - char envblock[32767]; + union { + struct { + char *argv[8192]; + char *envp[512]; + intptr_t auxv[2][2]; + char argv0buf[256]; + char argblock[32767]; + char envblock[32767]; + }; + char16_t tmp16[257]; + }; } forcealign(16); COSMOPOLITAN_C_END_ diff --git a/libc/runtime/winmain.greg.c b/libc/runtime/winmain.greg.c index 118f7fbdf..3c0ae49ed 100644 --- a/libc/runtime/winmain.greg.c +++ b/libc/runtime/winmain.greg.c @@ -24,6 +24,7 @@ #include "libc/limits.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/rdtsc.h" +#include "libc/nt/accounting.h" #include "libc/nt/console.h" #include "libc/nt/enum/consolemodeflags.h" #include "libc/nt/enum/filemapflags.h" @@ -59,6 +60,7 @@ __msabi extern typeof(GetEnvironmentStrings) *const __imp_GetEnvironmentStringsW __msabi extern typeof(GetEnvironmentVariable) *const __imp_GetEnvironmentVariableW; __msabi extern typeof(GetFileAttributes) *const __imp_GetFileAttributesW; __msabi extern typeof(GetStdHandle) *const __imp_GetStdHandle; +__msabi extern typeof(GetUserName) *const __imp_GetUserNameW; __msabi extern typeof(MapViewOfFileEx) *const __imp_MapViewOfFileEx; __msabi extern typeof(SetConsoleCP) *const __imp_SetConsoleCP; __msabi extern typeof(SetConsoleMode) *const __imp_SetConsoleMode; @@ -142,6 +144,11 @@ static abi void DeduplicateStdioHandles(void) { } } +static bool32 HasEnvironmentVariable(const char16_t *name) { + char16_t buf[4]; + return __imp_GetEnvironmentVariableW(name, buf, ARRAYLEN(buf)); +} + // main function of windows init process // i.e. first process spawned that isn't forked static abi wontreturn void WinInit(const char16_t *cmdline) { @@ -168,12 +175,6 @@ static abi wontreturn void WinInit(const char16_t *cmdline) { } } - // avoid programs like emacs nagging the user to define this - char16_t var[8]; - if (!__imp_GetEnvironmentVariableW(u"TERM", var, 8)) { - __imp_SetEnvironmentVariableW(u"TERM", u"xterm-256color"); - } - // allocate memory for stack and argument block _mmi.p = _mmi.s; _mmi.n = ARRAYLEN(_mmi.s); @@ -200,6 +201,34 @@ static abi wontreturn void WinInit(const char16_t *cmdline) { struct WinArgs *wa = (struct WinArgs *)(stackaddr + (stacksize - sizeof(struct WinArgs))); + // define $TERM if it's not already present + // programs like emacs will stop the world and nag if it's not set + if (!HasEnvironmentVariable(u"TERM")) { + __imp_SetEnvironmentVariableW(u"TERM", u"xterm-256color"); + } + + // define $USER as GetUserName() if not set + // Windows doesn't define this environment variable by default + uint32_t vsize = ARRAYLEN(wa->tmp16); + if (!HasEnvironmentVariable(u"USER") && + __imp_GetUserNameW(&wa->tmp16, &vsize)) { + __imp_SetEnvironmentVariableW(u"USER", wa->tmp16); + } + + // define $HOME as $HOMEDRIVE$HOMEPATH if not set + // Windows doesn't define this environment variable by default + uint32_t vlen; + if (!HasEnvironmentVariable(u"HOME") && + (vlen = __imp_GetEnvironmentVariableW(u"HOMEDRIVE", wa->tmp16, + ARRAYLEN(wa->tmp16))) < + ARRAYLEN(wa->tmp16) && + (vlen += __imp_GetEnvironmentVariableW(u"HOMEPATH", wa->tmp16 + vlen, + ARRAYLEN(wa->tmp16) - vlen)) < + ARRAYLEN(wa->tmp16) && + vlen) { + __imp_SetEnvironmentVariableW(u"HOME", wa->tmp16); + } + // parse utf-16 command into utf-8 argv array in argument block int count = GetDosArgv(cmdline, wa->argblock, ARRAYLEN(wa->argblock), wa->argv, ARRAYLEN(wa->argv)); diff --git a/libc/stdbool.h b/libc/stdbool.h index 37173c92c..6e5002ee1 100644 --- a/libc/stdbool.h +++ b/libc/stdbool.h @@ -2,15 +2,22 @@ #define COSMOPOLITAN_LIBC_STDBOOL_H_ #ifndef __cplusplus -#if __STDC_VERSION__ + 0 >= 201112 + #define bool _Bool +#if defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L +#define true ((_Bool) + 1u) +#define false ((_Bool) + 0u) #else -#define bool unsigned char -#endif -#define true 1 +#define true 1 #define false 0 +#endif + +#else /* __cplusplus */ + +#define _Bool bool + #endif /* __cplusplus */ -#define __bool_true_false_are_defined +#define __bool_true_false_are_defined 1 #endif /* COSMOPOLITAN_LIBC_STDBOOL_H_ */ diff --git a/libc/stdio/fmt.c b/libc/stdio/fmt.c index f35116953..12de10d52 100644 --- a/libc/stdio/fmt.c +++ b/libc/stdio/fmt.c @@ -43,7 +43,6 @@ #include "libc/fmt/conv.h" #include "libc/fmt/divmod10.internal.h" #include "libc/fmt/itoa.h" -#include "libc/serialize.h" #include "libc/intrin/bsr.h" #include "libc/intrin/nomultics.internal.h" #include "libc/intrin/safemacros.internal.h" @@ -53,6 +52,7 @@ #include "libc/mem/mem.h" #include "libc/mem/reverse.internal.h" #include "libc/runtime/internal.h" +#include "libc/serialize.h" #include "libc/str/str.h" #include "libc/str/strwidth.h" #include "libc/str/tab.internal.h" @@ -800,7 +800,7 @@ int __fmt(void *fn, void *arg, const char *format, va_list va) { x = 0; lasterr = errno; - out = fn ? fn : __fmt_noop; + out = fn ? fn : (void *)__fmt_noop; while (*format) { if (*format != '%') { diff --git a/libc/stdio/ftw.c b/libc/stdio/ftw.c index 9be3bb03e..9440b75f8 100644 --- a/libc/stdio/ftw.c +++ b/libc/stdio/ftw.c @@ -50,5 +50,5 @@ int ftw(const char *dirpath, /* The following cast assumes that calling a function with one * argument more than it needs behaves as expected. This is * actually undefined, but works on all real-world machines. */ - return nftw(dirpath, (int (*)())fn, fd_limit, FTW_PHYS); + return nftw(dirpath, (void *)fn, fd_limit, FTW_PHYS); } diff --git a/libc/stdio/printargs.c b/libc/stdio/printargs.c index fa38e6ebf..eb7dbac63 100644 --- a/libc/stdio/printargs.c +++ b/libc/stdio/printargs.c @@ -84,6 +84,7 @@ static const char *FindNameById(const struct IdName *names, unsigned long id) { } static void PrintDependencies(const char *prologue) { +#ifdef __x86_64__ struct NtLinkedList *head = &NtGetPeb()->Ldr->InLoadOrderModuleList; struct NtLinkedList *ldr = head->Next; do { @@ -92,6 +93,7 @@ static void PrintDependencies(const char *prologue) { PRINT(" ☼ %.*!hs (%'zukb @ %p)", dll->FullDllName.Length, dll->FullDllName.Data, dll->SizeOfImage / 1024, dll->DllBase); } while ((ldr = ldr->Next) && ldr != head); +#endif } static void Print(const char *prologue) { @@ -624,6 +626,7 @@ textstartup void __printargs(const char *prologue) { if (GetConsoleMode(GetStdHandle(kNtStdErrorHandle), &cm)) PRINT(" %s", DescribeNtConsoleOutFlags(cm)); +#ifdef __x86_64__ PRINT(""); PRINT("TEB"); PRINT(" ☼ gs:0x%02x %s = %p", 0x00, "NtGetSeh()", _NtGetSeh()); @@ -640,6 +643,7 @@ textstartup void __printargs(const char *prologue) { PRINT(" ☼ gs:0x%02x %s = %p", 0x58, "NtGetTls()", _NtGetTls()); PRINT(" ☼ gs:0x%02x %s = %p", 0x60, "NtGetPeb()", NtGetPeb()); PRINT(" ☼ gs:0x%02x %s = %p", 0x68, "NtGetErr()", NtGetErr()); +#endif PRINT(""); PRINT("DEPENDENCIES"); diff --git a/libc/stdio/rdseed.c b/libc/stdio/rdseed.c index 2b88bb22f..4cde60469 100644 --- a/libc/stdio/rdseed.c +++ b/libc/stdio/rdseed.c @@ -27,7 +27,7 @@ * * If RDSEED isn't available, we'll try RDRAND (which we automatically * disable for microarchitectures where it's known to be slow or buggy). - * If RDRAND isn't available then we try getrandom(), RtlGenRandom(), or + * If RDRAND isn't available then we try getrandom(), ProcessPrng(), or * sysctl(KERN_ARND). If those aren't available then we try /dev/urandom * and if that fails, we use RDTSC and getpid(). * diff --git a/libc/stdio/rngset.c b/libc/stdio/rngset.c index 0c0823172..881eb9fa1 100644 --- a/libc/stdio/rngset.c +++ b/libc/stdio/rngset.c @@ -45,9 +45,6 @@ dontasan void *rngset(void *b, size_t n, uint64_t seed(void), size_t reseed) { size_t m; uint64_t x, t = 0; unsigned char *p = b; - if (IsAsan()) { - __asan_verify(b, n); - } if (!seed) { t = reseed; reseed = -1; diff --git a/libc/stdio/stdio.h b/libc/stdio/stdio.h index e8ec9473e..021aa7c3a 100644 --- a/libc/stdio/stdio.h +++ b/libc/stdio/stdio.h @@ -90,7 +90,7 @@ int fsetpos(FILE *, const fpos_t *) libcesque paramsnonnull(); FILE *tmpfile(void) libcesque __wur; char *tmpnam(char *) libcesque __wur; char *tmpnam_r(char *) libcesque __wur; -int system(const char *) libcesque; + FILE *popen(const char *, const char *) libcesque; /*───────────────────────────────────────────────────────────────────────────│─╗ diff --git a/libc/stdio/tmpnam.c b/libc/stdio/tmpnam.c index b15d42895..b0ab75b92 100644 --- a/libc/stdio/tmpnam.c +++ b/libc/stdio/tmpnam.c @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/stdio/rand.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" @@ -35,7 +34,6 @@ static char g_tmpnam[L_tmpnam]; * is only mutated on success */ char *tmpnam(char *buf) { - if (IsAsan()) __asan_verify(buf, L_tmpnam); char path[] = P_tmpdir "/tmpnam_XXXXXX"; for (int t = 0; t < 100; ++t) { int w = _rand64(); diff --git a/libc/str/BUILD.mk b/libc/str/BUILD.mk index 8a86f287a..1600b2f3a 100644 --- a/libc/str/BUILD.mk +++ b/libc/str/BUILD.mk @@ -88,7 +88,7 @@ o/$(MODE)/libc/str/windowstimetotimespec.o: private \ -O2 $(LIBC_STR_A_OBJS): private \ - COPTS += \ + CFLAGS += \ -fno-sanitize=all \ -Wframe-larger-than=4096 \ -Walloca-larger-than=4096 diff --git a/libc/str/djbsort.c b/libc/str/djbsort.c index abfc8f3a9..18299e517 100644 --- a/libc/str/djbsort.c +++ b/libc/str/djbsort.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/runtime.h" #include "libc/stdckdint.h" @@ -28,11 +27,6 @@ void djbsort_avx2(int32_t *, long); * D.J. Bernstein's outrageously fast integer sorting algorithm. */ void djbsort(int32_t *a, size_t n) { - size_t m; - if (IsAsan()) { - if (ckd_mul(&m, n, 4)) m = -1; - __asan_verify(a, m); - } if (n > 1) { #if defined(__x86_64__) && !defined(__chibicc__) if (X86_HAVE(AVX2)) { diff --git a/libc/str/isutf8.c b/libc/str/isutf8.c index ed0fb918e..ddca5e330 100644 --- a/libc/str/isutf8.c +++ b/libc/str/isutf8.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/intrin/likely.h" #include "libc/str/str.h" @@ -53,7 +52,6 @@ bool32 isutf8(const void *data, size_t size) { long c; const char *p, *e; if (size == -1) size = data ? strlen(data) : 0; - if (IsAsan()) __asan_verify(data, size); p = data; e = p + size; while (p < e) { diff --git a/libc/str/memmem.c b/libc/str/memmem.c index c9e43c8f0..51975a3ef 100644 --- a/libc/str/memmem.c +++ b/libc/str/memmem.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/intrin/likely.h" #include "libc/str/str.h" @@ -32,16 +31,14 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @param needlelen is its character count * @return pointer to first result or NULL if not found */ -void *memmem(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen) { +__vex void *memmem(const void *haystack, size_t haystacklen, const void *needle, + size_t needlelen) { #if defined(__x86_64__) && !defined(__chibicc__) char c; xmm_t n; const xmm_t *v; unsigned i, k, m; const char *p, *q, *e; - if (IsAsan()) __asan_verify(needle, needlelen); - if (IsAsan()) __asan_verify(haystack, haystacklen); if (!needlelen) return (void *)haystack; if (UNLIKELY(needlelen > haystacklen)) return 0; q = needle; diff --git a/libc/str/memrchr16.c b/libc/str/memrchr16.c index 517f4a9a0..15c61ba46 100644 --- a/libc/str/memrchr16.c +++ b/libc/str/memrchr16.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/limits.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" @@ -36,8 +35,8 @@ static inline const char16_t *memrchr16_pure(const char16_t *s, char16_t c, } #if defined(__x86_64__) && !defined(__chibicc__) -static inline const char16_t *memrchr16_sse(const char16_t *s, - char16_t c, size_t n) { +static inline const char16_t *memrchr16_sse(const char16_t *s, char16_t c, + size_t n) { size_t i; unsigned m; xmm_t v, t = {c, c, c, c, c, c, c, c}; @@ -67,11 +66,10 @@ static inline const char16_t *memrchr16_sse(const char16_t *s, * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -void *memrchr16(const void *s, int c, size_t n) { +__vex void *memrchr16(const void *s, int c, size_t n) { #if defined(__x86_64__) && !defined(__chibicc__) const void *r; if (!IsTiny() && X86_HAVE(SSE)) { - if (IsAsan()) __asan_verify(s, n * 2); r = memrchr16_sse(s, c, n); } else { r = memrchr16_pure(s, c, n); diff --git a/libc/str/rawmemchr.c b/libc/str/rawmemchr.c index a6f089f6b..d3b4a5523 100644 --- a/libc/str/rawmemchr.c +++ b/libc/str/rawmemchr.c @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" @@ -33,8 +32,7 @@ static inline const unsigned char *rawmemchr_pure(const unsigned char *s, #if defined(__x86_64__) && !defined(__chibicc__) typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); -static inline const char *rawmemchr_sse(const char *s, - unsigned char c) { +static inline const char *rawmemchr_sse(const char *s, unsigned char c) { unsigned k; unsigned m; const xmm_t *p; @@ -67,11 +65,10 @@ static inline uint64_t UncheckedAlignedRead64(const unsigned char *p) { * @param c is search byte which is masked with 255 * @return is pointer to first instance of c */ -void *rawmemchr(const void *s, int c) { +__vex void *rawmemchr(const void *s, int c) { #if defined(__x86_64__) && !defined(__chibicc__) const void *r; if (X86_HAVE(SSE)) { - if (IsAsan()) __asan_verify(s, 1); r = rawmemchr_sse(s, c); } else { r = rawmemchr_pure(s, c); diff --git a/libc/str/str.h b/libc/str/str.h index 18a91bd20..fdc97b244 100644 --- a/libc/str/str.h +++ b/libc/str/str.h @@ -167,7 +167,7 @@ wint_t towctrans(wint_t, wctrans_t) libcesque; int getsubopt(char **, char *const *, char **) libcesque paramsnonnull(); char *strsignal(int) returnsnonnull libcesque; -char *strerror(int) returnsnonnull dontthrow nocallback; +char *strerror(int) returnsnonnull dontthrow dontcallback; errno_t strerror_r(int, char *, size_t) libcesque; char *__xpg_strerror_r(int, char *, size_t) libcesque; diff --git a/libc/str/strcasecmp.c b/libc/str/strcasecmp.c index acc9d5a7a..edeb633fc 100644 --- a/libc/str/strcasecmp.c +++ b/libc/str/strcasecmp.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" #include "libc/str/tab.internal.h" @@ -34,8 +33,6 @@ int strcasecmp(const char *a, const char *b) { size_t i = 0; uint64_t v, w; if (a == b) return 0; - if (IsAsan()) __asan_verify_str(a); - if (IsAsan()) __asan_verify_str(b); if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) { for (; (uintptr_t)(a + i) & 7; ++i) { CheckEm: diff --git a/libc/str/strcasestr.c b/libc/str/strcasestr.c index 51bdcf138..cf46cb3f1 100644 --- a/libc/str/strcasestr.c +++ b/libc/str/strcasestr.c @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/str/tab.internal.h" typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); @@ -35,15 +34,13 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @asyncsignalsafe * @see strstr() */ -char *strcasestr(const char *haystack, const char *needle) { +__vex char *strcasestr(const char *haystack, const char *needle) { #if defined(__x86_64__) && !defined(__chibicc__) char c; size_t i; unsigned k, m; const xmm_t *p; xmm_t v, n1, n2, z = {0}; - if (IsAsan()) __asan_verify(needle, 1); - if (IsAsan()) __asan_verify(haystack, 1); if (haystack == needle || !*needle) return (char *)haystack; c = *needle; n1 = (xmm_t){c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; diff --git a/libc/str/strlen16.c b/libc/str/strlen16.c index 823d91d4d..cb9fe11c8 100644 --- a/libc/str/strlen16.c +++ b/libc/str/strlen16.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(16))); @@ -29,17 +28,15 @@ typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @return number of shorts (excluding NUL) * @asyncsignalsafe */ -size_t strlen16(const char16_t *s) { +__vex size_t strlen16(const char16_t *s) { #if defined(__x86_64__) && !defined(__chibicc__) size_t n; xmm_t z = {0}; unsigned m, k = (uintptr_t)s & 15; const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16); - if (IsAsan()) __asan_verify(s, 2); m = __builtin_ia32_pmovmskb128(*p == z) >> k << k; while (!m) m = __builtin_ia32_pmovmskb128(*++p == z); n = (const char16_t *)p + (__builtin_ctzl(m) >> 1) - s; - if (IsAsan()) __asan_verify(s, n * 2); return n; #else size_t n = 0; diff --git a/libc/str/strnlen_s.c b/libc/str/strnlen_s.c index 26595cd4a..c95d88b41 100644 --- a/libc/str/strnlen_s.c +++ b/libc/str/strnlen_s.c @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" static size_t strnlen_s_x64(const char *s, size_t n, size_t i) { @@ -48,7 +47,6 @@ static size_t strnlen_s_x64(const char *s, size_t n, size_t i) { size_t strnlen_s(const char *s, size_t n) { size_t i; if (!s) return 0; - if (IsAsan()) __asan_verify(s, n); for (i = 0; (uintptr_t)(s + i) & 7; ++i) { if (i == n || !s[i]) return i; } diff --git a/libc/str/strstr.c b/libc/str/strstr.c index 8cc041d02..b428851b0 100644 --- a/libc/str/strstr.c +++ b/libc/str/strstr.c @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); @@ -35,14 +34,12 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @see strcasestr() * @see memmem() */ -char *strstr(const char *haystack, const char *needle) { +__vex char *strstr(const char *haystack, const char *needle) { #if defined(__x86_64__) && !defined(__chibicc__) size_t i; unsigned k, m; const xmm_t *p; xmm_t v, n, z = {0}; - if (IsAsan()) __asan_verify(needle, 1); - if (IsAsan()) __asan_verify(haystack, 1); if (haystack == needle || !*needle) return (char *)haystack; n = (xmm_t){*needle, *needle, *needle, *needle, *needle, *needle, *needle, *needle, *needle, *needle, *needle, *needle, diff --git a/libc/str/wcslen.c b/libc/str/wcslen.c index 8cbd6ba35..9dbbfcbee 100644 --- a/libc/str/wcslen.c +++ b/libc/str/wcslen.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(16))); @@ -29,17 +28,15 @@ typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(16))); * @return number of wide characters (excluding NUL) * @asyncsignalsafe */ -size_t wcslen(const wchar_t *s) { +__vex size_t wcslen(const wchar_t *s) { #if defined(__x86_64__) && !defined(__chibicc__) size_t n; xmm_t z = {0}; unsigned m, k = (uintptr_t)s & 15; const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16); - if (IsAsan()) __asan_verify(s, 4); m = __builtin_ia32_pmovmskb128(*p == z) >> k << k; while (!m) m = __builtin_ia32_pmovmskb128(*++p == z); n = (const wchar_t *)p + (__builtin_ctzl(m) >> 2) - s; - if (IsAsan()) __asan_verify(s, n); return n; #else size_t n = 0; diff --git a/libc/str/wmemrchr.c b/libc/str/wmemrchr.c index 848e75ee5..acd2413a6 100644 --- a/libc/str/wmemrchr.c +++ b/libc/str/wmemrchr.c @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" -#include "libc/intrin/asan.internal.h" #include "libc/limits.h" #include "libc/nexgen32e/x86feature.h" #include "libc/stdckdint.h" @@ -38,7 +37,7 @@ static inline const wchar_t *wmemrchr_pure(const wchar_t *s, wchar_t c, #if defined(__x86_64__) && !defined(__chibicc__) static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c, - size_t n) { + size_t n) { size_t i; unsigned m; xmm_t v, t = {c, c, c, c}; @@ -68,16 +67,9 @@ static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c, * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -void *wmemrchr(const wchar_t *s, wchar_t c, size_t n) { +__vex void *wmemrchr(const wchar_t *s, wchar_t c, size_t n) { #if defined(__x86_64__) && !defined(__chibicc__) - size_t bytes; - const void *r; - if (IsAsan()) { - if (ckd_mul(&bytes, n, sizeof(wchar_t))) bytes = -1; - __asan_verify(s, bytes); - } - r = wmemrchr_sse(s, c, n); - return (void *)r; + return (void *)wmemrchr_sse(s, c, n); #else return (void *)wmemrchr_pure(s, c, n); #endif diff --git a/libc/sysv/consts.sh b/libc/sysv/consts.sh index 2a7a5cf22..1242f1e4a 100755 --- a/libc/sysv/consts.sh +++ b/libc/sysv/consts.sh @@ -234,7 +234,9 @@ syscon mmap MAP_INHERIT -1 -1 -1 -1 -1 -1 0x00000080 -1 # make syscon mmap MAP_HASSEMAPHORE 0 0 0x00000200 0x00000200 0x00000200 0 0x00000200 0 # does it matter on x86? syscon mmap MAP_NOSYNC 0 0 0 0 0x00000800 0 0 0 # flush to physical media only when necessary rather than gratuitously; be sure to use write() rather than ftruncate() with this! syscon mmap MAP_CONCEAL 0 0 0 0 0x00020000 0x00008000 0x00008000 0 # omit from core dumps; MAP_NOCORE on FreeBSD -syscon mmap MAP_JIT 0 0 0 0x00000800 0 0 0 0 # omit from core dumps; MAP_NOCORE on FreeBSD +syscon mmap MAP_JIT 0 0 0 0x00000800 0 0 0 0 # allocate region used for just-in-time compilation +syscon mmap MAP_NOCACHE 0 0 0x00000400 0x00000400 0 0 0 0 # don't cache pages for this mapping +syscon mmap MAP_NOEXTEND 0 0 0x00000100 0x00000100 0 0 0 0 # for MAP_FILE, don't change file size syscon compat MAP_NOCORE 0 0 0 0 0x00020000 0x00008000 0x00008000 0 # use MAP_CONCEAL syscon compat MAP_ANON 0x00000020 0x00000020 0x00001000 0x00001000 0x00001000 0x00001000 0x00001000 0x00000020 # bsd consensus; faked nt syscon compat MAP_EXECUTABLE 0x00001000 0x00001000 0 0 0 0 0 0 # ignored @@ -269,17 +271,18 @@ syscon madv MADV_WIPEONFORK 18 18 127 127 127 127 127 127 # T syscon madv MADV_KEEPONFORK 19 19 127 127 127 127 127 127 # TODO: add support ? syscon madv MADV_COLD 20 20 127 127 127 127 127 127 # TODO: add support ? syscon madv MADV_PAGEOUT 21 21 127 127 127 127 127 127 # TODO: add support ? -syscon madv MADV_POPULATE_READ 22 22 127 127 127 127 127 127 # TODO: add support ? -syscon madv MADV_POPULATE_WRITE 23 23 127 127 127 127 127 127 # TODO: add support ? -syscon madv MADV_DONTNEED_LOCKED 24 24 127 127 127 127 127 127 # TODO: add support ? +syscon madv MADV_POPULATE_READ 22 22 127 127 127 127 127 127 # TODO: add support ? +syscon madv MADV_POPULATE_WRITE 23 23 127 127 127 127 127 127 # TODO: add support ? +syscon madv MADV_DONTNEED_LOCKED 24 24 127 127 127 127 127 127 # TODO: add support ? syscon madv MADV_COLLAPSE 25 25 127 127 127 127 127 127 # TODO: add support ? syscon madv MADV_DOFORK 11 11 127 127 127 127 127 127 # TODO(jart): what is it? syscon madv MADV_DONTDUMP 16 16 127 127 127 127 127 127 # see MAP_CONCEAL in OpenBSD; TODO(jart): what is it? syscon madv MADV_DONTFORK 10 10 127 127 127 127 127 127 # TODO(jart): what is it? syscon madv MADV_HWPOISON 100 100 127 127 127 127 127 127 # TODO(jart): what is it? -syscon madv MADV_SOFT_OFFLINE 101 101 127 127 127 127 127 127 # TODO: add support ? +syscon madv MADV_SOFT_OFFLINE 101 101 127 127 127 127 127 127 # TODO: add support ? syscon madv MADV_REMOVE 9 9 127 127 127 127 127 127 # TODO(jart): what is it? syscon fadv POSIX_FADV_NOREUSE 5 5 127 127 5 127 5 127 # wut +syscon madv MADV_REMOVE 9 9 127 127 127 127 127 127 # TODO(jart): what is it? # mmap(), mprotect(), etc. # digital restrictions management for the people @@ -579,19 +582,19 @@ syscon clock CLOCK_REALTIME 0 0 0 0 0 0 0 0 # consensus syscon clock CLOCK_REALTIME_PRECISE 0 0 0 0 9 0 0 0 # syscon clock CLOCK_REALTIME_FAST 0 0 0 0 10 0 0 0 # syscon clock CLOCK_REALTIME_COARSE 5 5 0 0 10 0 0 2 # Linux 2.6.32+; bsd consensus; not available on RHEL5 -syscon clock CLOCK_MONOTONIC 1 1 1 6 4 3 3 1 # XNU/NT faked; could move backwards if NTP introduces negative leap second -syscon clock CLOCK_MONOTONIC_PRECISE 1 1 1 6 11 3 3 1 # -syscon clock CLOCK_MONOTONIC_FAST 1 1 1 6 12 3 3 1 # -syscon clock CLOCK_MONOTONIC_COARSE 6 6 1 6 12 3 3 1 # Linux 2.6.32+; bsd consensus; not available on RHEL5 -syscon clock CLOCK_MONOTONIC_RAW 4 4 127 4 127 127 127 127 # actually monotonic; not subject to NTP adjustments; Linux 2.6.28+; XNU/NT/FreeBSD/OpenBSD faked; not available on RHEL5 -syscon clock CLOCK_PROCESS_CPUTIME_ID 2 2 127 12 15 2 0x40000000 127 # NetBSD lets you bitwise a PID into clockid_t -syscon clock CLOCK_THREAD_CPUTIME_ID 3 3 127 16 14 4 0x20000000 127 # +syscon clock CLOCK_MONOTONIC 1 1 6 6 4 3 3 1 # XNU/NT faked; could move backwards if NTP introduces negative leap second +syscon clock CLOCK_MONOTONIC_PRECISE 1 1 6 6 11 3 3 1 # +syscon clock CLOCK_MONOTONIC_FAST 1 1 6 6 12 3 3 1 # +syscon clock CLOCK_MONOTONIC_COARSE 6 6 5 5 12 3 3 1 # Linux 2.6.32+; bsd consensus; not available on RHEL5 +syscon clock CLOCK_MONOTONIC_RAW 4 4 4 4 127 127 127 127 # actually monotonic; not subject to NTP adjustments; Linux 2.6.28+; XNU/NT/FreeBSD/OpenBSD faked; not available on RHEL5 +syscon clock CLOCK_PROCESS_CPUTIME_ID 2 2 12 12 15 2 0x40000000 4 # NetBSD lets you bitwise a PID into clockid_t +syscon clock CLOCK_THREAD_CPUTIME_ID 3 3 16 16 14 4 0x20000000 5 # syscon clock CLOCK_PROF 127 127 127 127 2 127 2 127 # syscon clock CLOCK_BOOTTIME 7 7 7 127 127 6 127 3 # syscon clock CLOCK_REALTIME_ALARM 8 8 127 127 127 127 127 127 # syscon clock CLOCK_BOOTTIME_ALARM 9 9 127 127 127 127 127 127 # syscon clock CLOCK_TAI 11 11 127 127 127 127 127 127 # -syscon clock CLOCK_UPTIME 127 127 127 127 5 5 127 127 # +syscon clock CLOCK_UPTIME 127 127 8 8 5 5 127 127 # syscon clock CLOCK_UPTIME_PRECISE 127 127 127 127 7 127 127 127 # syscon clock CLOCK_UPTIME_FAST 127 127 127 127 8 127 127 127 # syscon clock CLOCK_SECOND 127 127 127 127 13 127 127 127 # diff --git a/libc/sysv/consts/CLOCK_MONOTONIC.S b/libc/sysv/consts/CLOCK_MONOTONIC.S index d4cf1ff24..2275c6cf1 100644 --- a/libc/sysv/consts/CLOCK_MONOTONIC.S +++ b/libc/sysv/consts/CLOCK_MONOTONIC.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_MONOTONIC,1,1,1,6,4,3,3,1 +.syscon clock,CLOCK_MONOTONIC,1,1,6,6,4,3,3,1 diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S b/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S index 27762d022..225972c1d 100644 --- a/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S +++ b/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_MONOTONIC_COARSE,6,6,1,6,12,3,3,1 +.syscon clock,CLOCK_MONOTONIC_COARSE,6,6,5,5,12,3,3,1 diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S b/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S index 80bb43b66..0069c82cf 100644 --- a/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S +++ b/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_MONOTONIC_FAST,1,1,1,6,12,3,3,1 +.syscon clock,CLOCK_MONOTONIC_FAST,1,1,6,6,12,3,3,1 diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S b/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S index fdea24d20..e9e77f345 100644 --- a/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S +++ b/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_MONOTONIC_PRECISE,1,1,1,6,11,3,3,1 +.syscon clock,CLOCK_MONOTONIC_PRECISE,1,1,6,6,11,3,3,1 diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S b/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S index 5704b2138..1c158565e 100644 --- a/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S +++ b/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_MONOTONIC_RAW,4,4,127,4,127,127,127,127 +.syscon clock,CLOCK_MONOTONIC_RAW,4,4,4,4,127,127,127,127 diff --git a/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S b/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S index 2b8c354db..b4b39f501 100644 --- a/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S +++ b/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_PROCESS_CPUTIME_ID,2,2,127,12,15,2,0x40000000,127 +.syscon clock,CLOCK_PROCESS_CPUTIME_ID,2,2,12,12,15,2,0x40000000,4 diff --git a/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S b/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S index 7d5893688..3f3529ab7 100644 --- a/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S +++ b/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_THREAD_CPUTIME_ID,3,3,127,16,14,4,0x20000000,127 +.syscon clock,CLOCK_THREAD_CPUTIME_ID,3,3,16,16,14,4,0x20000000,5 diff --git a/libc/sysv/consts/CLOCK_UPTIME.S b/libc/sysv/consts/CLOCK_UPTIME.S index 15a0e414a..281eaa508 100644 --- a/libc/sysv/consts/CLOCK_UPTIME.S +++ b/libc/sysv/consts/CLOCK_UPTIME.S @@ -1,2 +1,2 @@ #include "libc/sysv/consts/syscon.internal.h" -.syscon clock,CLOCK_UPTIME,127,127,127,127,5,5,127,127 +.syscon clock,CLOCK_UPTIME,127,127,8,8,5,5,127,127 diff --git a/libc/sysv/consts/MAP_NOCACHE.S b/libc/sysv/consts/MAP_NOCACHE.S new file mode 100644 index 000000000..23ee0d40c --- /dev/null +++ b/libc/sysv/consts/MAP_NOCACHE.S @@ -0,0 +1,2 @@ +#include "libc/sysv/consts/syscon.internal.h" +.syscon mmap,MAP_NOCACHE,0,0,0x00000400,0x00000400,0,0,0,0 diff --git a/libc/sysv/consts/MAP_NOEXTEND.S b/libc/sysv/consts/MAP_NOEXTEND.S new file mode 100644 index 000000000..1d79cf695 --- /dev/null +++ b/libc/sysv/consts/MAP_NOEXTEND.S @@ -0,0 +1,2 @@ +#include "libc/sysv/consts/syscon.internal.h" +.syscon mmap,MAP_NOEXTEND,0,0,0x00000100,0x00000100,0,0,0,0 diff --git a/libc/sysv/consts/map.h b/libc/sysv/consts/map.h index 50657c9a8..04008f4cd 100644 --- a/libc/sysv/consts/map.h +++ b/libc/sysv/consts/map.h @@ -16,6 +16,8 @@ extern const int MAP_HASSEMAPHORE; extern const int MAP_INHERIT; extern const int MAP_JIT; extern const int MAP_LOCKED; +extern const int MAP_NOCACHE; +extern const int MAP_NOEXTEND; extern const int MAP_NONBLOCK; extern const int MAP_NORESERVE; extern const int MAP_NOSYNC; @@ -40,6 +42,7 @@ COSMOPOLITAN_C_END_ #define MAP_FIXED_NOREPLACE MAP_FIXED_NOREPLACE #define MAP_HASSEMAPHORE MAP_HASSEMAPHORE #define MAP_POPULATE MAP_POPULATE +#define MAP_NORESERVE MAP_NORESERVE #define MAP_ANON MAP_ANONYMOUS #define MAP_NOCORE MAP_CONCEAL diff --git a/libc/testlib/ezbenchcontrol.c b/libc/testlib/ezbenchcontrol.c index 0763a228f..f64263fe8 100644 --- a/libc/testlib/ezbenchcontrol.c +++ b/libc/testlib/ezbenchcontrol.c @@ -42,7 +42,7 @@ double __testlib_ezbenchcontrol(void) { } while (++Tries < 10 && (__testlib_getcore() != Core && __testlib_getinterrupts() > Interrupts)); if (Tries == 10) { - tinyprint(2, "warning: failed to accurately benchmark control\n"); + tinyprint(2, "warning: failed to accurately benchmark control\n", NULL); } strcpy(host, "unknown"); gethostname(host, 64); diff --git a/libc/testlib/showerror.c b/libc/testlib/showerror.c index 56238317d..713afbb67 100644 --- a/libc/testlib/showerror.c +++ b/libc/testlib/showerror.c @@ -101,7 +101,7 @@ static void testlib_showerror_(int line, // _weaken(kvprintf)(fmt, va); tinyprint(2, "\n", NULL); } else { - tinyprint(2, "\t[missing kvprintf]\n"); + tinyprint(2, "\t[missing kvprintf]\n", NULL); } } tinyprint(2, "\t", SUBTLE, strerror(e), RESET, "\n\t", SUBTLE, diff --git a/libc/testlib/testlib.h b/libc/testlib/testlib.h index e2dedd68f..50a0df919 100644 --- a/libc/testlib/testlib.h +++ b/libc/testlib/testlib.h @@ -167,22 +167,22 @@ void TearDownOnce(void); #define ASSERT_IN(NEEDLE, GOT) \ assertContains(FILIFU sizeof(*(NEEDLE)), NEEDLE, GOT, #GOT, true) -#define ASSERT_BINEQ(WANT, GOT) \ - _Generic((WANT)[0], char \ - : assertBinaryEquals_hex, default \ - : assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, true) -#define ASSERT_BINNE(NOPE, GOT) \ - _Generic((NOPE)[0], char \ - : assertBinaryNotEquals_hex, default \ - : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true) -#define ASSERT_BINEQN(WANT, GOT, N) \ - _Generic((WANT)[0], char \ - : assertBinaryEquals_hex, default \ - : assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, true) -#define ASSERT_BINNEN(NOPE, GOT, N) \ - _Generic((NOPE)[0], char \ - : assertBinaryNotEquals_hex, default \ - : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true) +#define ASSERT_BINEQ(WANT, GOT) \ + _Generic((WANT)[0], \ + char: assertBinaryEquals_hex, \ + default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, true) +#define ASSERT_BINNE(NOPE, GOT) \ + _Generic((NOPE)[0], \ + char: assertBinaryNotEquals_hex, \ + default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true) +#define ASSERT_BINEQN(WANT, GOT, N) \ + _Generic((WANT)[0], \ + char: assertBinaryEquals_hex, \ + default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, true) +#define ASSERT_BINNEN(NOPE, GOT, N) \ + _Generic((NOPE)[0], \ + char: assertBinaryNotEquals_hex, \ + default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true) #define ASSERT_FLOAT_EQ(WANT, GOT) \ assertLongDoubleEquals(FILIFU WANT, GOT, #GOT, true) @@ -243,22 +243,22 @@ void TearDownOnce(void); #define EXPECT_IN(NEEDLE, GOT) \ assertContains(FILIFU sizeof(*(NEEDLE)), NEEDLE, GOT, #GOT, false) -#define EXPECT_BINEQ(WANT, GOT) \ - _Generic((WANT)[0], char \ - : assertBinaryEquals_hex, default \ - : assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, false) -#define EXPECT_BINNE(NOPE, GOT) \ - _Generic((NOPE)[0], char \ - : assertBinaryNotEquals_hex, default \ - : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false) -#define EXPECT_BINEQN(WANT, GOT, N) \ - _Generic((WANT)[0], char \ - : assertBinaryEquals_hex, default \ - : assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, false) -#define EXPECT_BINNEN(NOPE, GOT, N) \ - _Generic((NOPE)[0], char \ - : assertBinaryNotEquals_hex, default \ - : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false) +#define EXPECT_BINEQ(WANT, GOT) \ + _Generic((WANT)[0], \ + char: assertBinaryEquals_hex, \ + default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, false) +#define EXPECT_BINNE(NOPE, GOT) \ + _Generic((NOPE)[0], \ + char: assertBinaryNotEquals_hex, \ + default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false) +#define EXPECT_BINEQN(WANT, GOT, N) \ + _Generic((WANT)[0], \ + char: assertBinaryEquals_hex, \ + default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, false) +#define EXPECT_BINNEN(NOPE, GOT, N) \ + _Generic((NOPE)[0], \ + char: assertBinaryNotEquals_hex, \ + default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false) #define EXPECT_FLOAT_EQ(WANT, GOT) \ assertLongDoubleEquals(FILIFU WANT, GOT, #GOT, false) diff --git a/libc/thread/makecontext.c b/libc/thread/makecontext.c index d3e93a85c..0108979f7 100644 --- a/libc/thread/makecontext.c +++ b/libc/thread/makecontext.c @@ -30,7 +30,8 @@ typedef double vect __attribute__((__vector_size__(16), __aligned__(16))); struct Gadget { - void (*func)(); + void (*func)(long, long, long, long, long, long, // + vect, vect, vect, vect, vect, vect); long longs[6]; vect vects[6]; }; @@ -89,7 +90,7 @@ static void runcontext(struct Gadget *call, ucontext_t *link) { * @param argc is effectively ignored (see notes above) * @see setcontext(), getcontext(), swapcontext() */ -void makecontext(ucontext_t *uc, void func(), int argc, ...) { +void makecontext(ucontext_t *uc, void *func, int argc, ...) { va_list va; long sp, sb; struct Gadget *call; diff --git a/libc/thread/tls.h b/libc/thread/tls.h index a9b689a3f..b5ef8b793 100644 --- a/libc/thread/tls.h +++ b/libc/thread/tls.h @@ -39,7 +39,7 @@ struct CosmoTib { void **tib_keys; void *tib_nsync; void *tib_todo[7]; -}; +} __attribute__((__aligned__(64))); extern int __threaded; extern char __tls_morphed; diff --git a/libc/tinymath/fma.c b/libc/tinymath/fma.c index 2eaf00cb0..72a0ba9a0 100644 --- a/libc/tinymath/fma.c +++ b/libc/tinymath/fma.c @@ -26,6 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +#include "libc/nexgen32e/x86feature.h" asm(".ident\t\"\\n\\n\ Musl libc (MIT License)\\n\ @@ -92,41 +93,51 @@ static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y) */ double fma(double x, double y, double z) { -#if defined(__x86_64__) && defined(__FMA__) && defined(__FAST_MATH__) +#if defined(__x86_64__) && defined(__FMA__) // Intel Haswell+ (c. 2013) // AMD Piledriver+ (c. 2011) asm("vfmadd132sd\t%1,%2,%0" : "+x"(x) : "x"(y), "x"(z)); return x; -#elif defined(__x86_64__) && defined(__FMA4__) && defined(__FAST_MATH__) +#elif defined(__x86_64__) && defined(__FMA4__) // AMD Bulldozer+ (c. 2011) asm("vfmaddsd\t%3,%2,%1,%0" : "=x"(x) : "x"(x), "x"(y), "x"(z)); return x; -#elif defined(__aarch64__) && defined(__FAST_MATH__) +#elif defined(__aarch64__) asm("fmadd\t%d0,%d1,%d2,%d3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); return x; -#elif defined(__powerpc64__) && defined(__FAST_MATH__) +#elif defined(__powerpc64__) asm("fmadd\t%0,%1,%2,%3" : "=d"(x) : "d"(x), "d"(y), "d"(z)); return x; -#elif defined(__riscv) && __riscv_flen >= 64 && defined(__FAST_MATH__) +#elif defined(__riscv) && __riscv_flen >= 64 asm("fmadd.d\t%0,%1,%2,%3" : "=f"(x) : "f"(x), "f"(y), "f"(z)); return x; -#elif defined(__s390x__) && defined(__FAST_MATH__) +#elif defined(__s390x__) asm("madbr\t%0,\t%1,\t%2" : "+f"(z) : "f"(x), "f"(y)); return z; #else -// #pragma STDC FENV_ACCESS ON +/* #pragma STDC FENV_ACCESS ON */ + +#ifdef __x86_64__ + if (X86_HAVE(FMA)) { + asm("vfmadd132sd\t%1,%2,%0" : "+x"(x) : "x"(y), "x"(z)); + return x; + } else if (X86_HAVE(FMA4)) { + asm("vfmaddsd\t%3,%2,%1,%0" : "=x"(x) : "x"(x), "x"(y), "x"(z)); + return x; + } +#endif /* normalize so top 10bits and last bit are 0 */ struct num nx, ny, nz; @@ -268,3 +279,7 @@ double fma(double x, double y, double z) #endif /* __x86_64__ */ } + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +__weak_reference(fma, fmal); +#endif diff --git a/libc/tinymath/fmaf.c b/libc/tinymath/fmaf.c index 7d651b650..c65a3d481 100644 --- a/libc/tinymath/fmaf.c +++ b/libc/tinymath/fmaf.c @@ -26,6 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +#include "libc/nexgen32e/x86feature.h" #include "libc/runtime/fenv.h" asm(".ident\t\"\\n\\n\ @@ -110,7 +111,7 @@ float fmaf(float x, float y, float z) so direct double-precision arithmetic suffices, except where double rounding occurs. */ - /* #pragma STDC FENV_ACCESS ON */ +/* #pragma STDC FENV_ACCESS ON */ double xy, result; union {double f; uint64_t i;} u; int e; diff --git a/libc/tinymath/fmal.c b/libc/tinymath/fmal.c new file mode 100644 index 000000000..cefdca2d6 --- /dev/null +++ b/libc/tinymath/fmal.c @@ -0,0 +1,287 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Copyright (c) 2004-2005 David Schultz │ +│ All rights reserved. │ +│ │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ +│ │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/math.h" +#include "libc/runtime/fenv.h" +#include "libc/tinymath/freebsd.internal.h" +#include "libc/tinymath/ldshape.internal.h" +#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 + +asm(".ident\t\"\\n\\n\ +FreeBSD libm (BSD-2 License)\\n\ +Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); +asm(".include \"libc/disclaimer.inc\""); +// clang-format off + +#if LDBL_MANT_DIG == 64 +#define LASTBIT(u) (u.i.m & 1) +#define SPLIT (0x1p32L + 1) +#elif LDBL_MANT_DIG == 113 +#define LASTBIT(u) (u.i.lo & 1) +#define SPLIT (0x1p57L + 1) +#endif + +/* + * A struct dd represents a floating-point number with twice the precision + * of a long double. We maintain the invariant that "hi" stores the high-order + * bits of the result. + */ +struct dd { + long double hi; + long double lo; +}; + +/* + * Compute a+b exactly, returning the exact result in a struct dd. We assume + * that both a and b are finite, but make no assumptions about their relative + * magnitudes. + */ +static inline struct dd dd_add(long double a, long double b) { + struct dd ret; + long double s; + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + return (ret); +} + +/* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline long double add_adjusted(long double a, long double b) { + struct dd sum; + union ldshape u; + sum = dd_add(a, b); + if (sum.lo != 0) { + u.f = sum.hi; + if (!LASTBIT(u)) sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline long double add_and_denormalize(long double a, long double b, + int scale) { + struct dd sum; + int bits_lost; + union ldshape u; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + u.f = sum.hi; + bits_lost = -u.i.se - scale + 1; + if ((bits_lost != 1) ^ LASTBIT(u)) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return scalbnl(sum.hi, scale); +} + +/* + * Compute a*b exactly, returning the exact result in a struct dd. We assume + * that both a and b are normalized, so no underflow or overflow will occur. + * The current rounding mode must be round-to-nearest. + */ +static inline struct dd dd_mul(long double a, long double b) { + struct dd ret; + long double ha, hb, la, lb, p, q; + + p = a * SPLIT; + ha = a - p; + ha += p; + la = a - ha; + + p = b * SPLIT; + hb = b - p; + hb += p; + lb = b - hb; + + p = ha * hb; + q = ha * lb + la * hb; + + ret.hi = p + q; + ret.lo = p - ret.hi + q + la * lb; + return (ret); +} + +/* + * Fused multiply-add: Compute x * y + z with a single rounding error. + * + * We use scaling to avoid overflow/underflow, along with the + * canonical precision-doubling technique adapted from: + * + * Dekker, T. A Floating-Point Technique for Extending the + * Available Precision. Numer. Math. 18, 224-242 (1971). + */ +long double fmal(long double x, long double y, long double z) { +/* #pragma STDC FENV_ACCESS ON */ + long double xs, ys, zs, adj; + struct dd xy, r; + int oround; + int ex, ey, ez; + int spread; + + /* + * Handle special cases. The order of operations and the particular + * return values here are crucial in handling special cases involving + * infinities, NaNs, overflows, and signed zeroes correctly. + */ + if (!isfinite(x) || !isfinite(y)) return x * y + z; + if (!isfinite(z)) return z; + if (x == 0.0 || y == 0.0) return x * y + z; + if (z == 0.0) return x * y; + + xs = frexpl(x, &ex); + ys = frexpl(y, &ey); + zs = frexpl(z, &ez); + oround = fegetround(); + spread = ex + ey - ez; + + /* + * If x * y and z are many orders of magnitude apart, the scaling + * will overflow, so we handle these cases specially. Rounding + * modes other than FE_TONEAREST are painful. + */ + if (spread < -LDBL_MANT_DIG) { +#ifdef FE_INEXACT + feraiseexcept(FE_INEXACT); +#endif +#ifdef FE_UNDERFLOW + if (!isnormal(z)) feraiseexcept(FE_UNDERFLOW); +#endif + switch (oround) { + default: /* FE_TONEAREST */ + return z; +#ifdef FE_TOWARDZERO + case FE_TOWARDZERO: + if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) + return z; + else + return nextafterl(z, 0); +#endif +#ifdef FE_DOWNWARD + case FE_DOWNWARD: + if ((x > 0.0) ^ (y < 0.0)) + return (z); + else + return nextafterl(z, -INFINITY); +#endif +#ifdef FE_UPWARD + case FE_UPWARD: + if ((x > 0.0) ^ (y < 0.0)) + return nextafterl(z, INFINITY); + else + return (z); +#endif + } + } + if (spread <= LDBL_MANT_DIG * 2) + zs = scalbnl(zs, -spread); + else + zs = copysignl(LDBL_MIN, zs); + + fesetround(FE_TONEAREST); + + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ + xy = dd_mul(xs, ys); + r = dd_add(xy.hi, zs); + + spread = ex + ey; + + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ + fesetround(oround); + volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ + return xy.hi + vzs + scalbnl(xy.lo, spread); + } + + if (oround != FE_TONEAREST) { + /* + * There is no need to worry about double rounding in directed + * rounding modes. + * But underflow may not be raised correctly, example in downward rounding: + * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L) + */ + long double ret; +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + int e = fetestexcept(FE_INEXACT); + feclearexcept(FE_INEXACT); +#endif + fesetround(oround); + adj = r.lo + xy.lo; + ret = scalbnl(r.hi + adj, spread); +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT)) + feraiseexcept(FE_UNDERFLOW); + else if (e) + feraiseexcept(FE_INEXACT); +#endif + return ret; + } + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogbl(r.hi) > -16383) + return scalbnl(r.hi + adj, spread); + else + return add_and_denormalize(r.hi, adj, spread); +} + +#endif diff --git a/libc/tinymath/ilogb.c b/libc/tinymath/ilogb.c index 429d17a27..fb327610d 100644 --- a/libc/tinymath/ilogb.c +++ b/libc/tinymath/ilogb.c @@ -40,7 +40,7 @@ asm(".include \"libc/disclaimer.inc\""); */ int ilogb(double x) { - // #pragma STDC FENV_ACCESS ON +/* #pragma STDC FENV_ACCESS ON */ union {double f; uint64_t i;} u = {x}; uint64_t i = u.i; int e = i>>52 & 0x7ff; diff --git a/libc/tinymath/ilogbf.c b/libc/tinymath/ilogbf.c index 0d91f0cbc..6e3297604 100644 --- a/libc/tinymath/ilogbf.c +++ b/libc/tinymath/ilogbf.c @@ -40,7 +40,7 @@ asm(".include \"libc/disclaimer.inc\""); */ int ilogbf(float x) { - // #pragma STDC FENV_ACCESS ON +/* #pragma STDC FENV_ACCESS ON */ union {float f; uint32_t i;} u = {x}; uint32_t i = u.i; int e = i>>23 & 0xff; diff --git a/libc/tinymath/nearbyint.c b/libc/tinymath/nearbyint.c index ee8ae70b3..c8b853ce5 100644 --- a/libc/tinymath/nearbyint.c +++ b/libc/tinymath/nearbyint.c @@ -29,7 +29,7 @@ double nearbyint(double x) { asm("fidbra\t%0,0,%1,4" : "=f"(x) : "f"(x)); #else #ifdef FE_INEXACT - // #pragma STDC FENV_ACCESS ON + /* #pragma STDC FENV_ACCESS ON */ int e; e = fetestexcept(FE_INEXACT); #endif diff --git a/libc/tinymath/nearbyintf.c b/libc/tinymath/nearbyintf.c index e9b50cf29..332e15a65 100644 --- a/libc/tinymath/nearbyintf.c +++ b/libc/tinymath/nearbyintf.c @@ -24,7 +24,7 @@ */ float nearbyintf(float x) { #ifdef FE_INEXACT - // #pragma STDC FENV_ACCESS ON + /* #pragma STDC FENV_ACCESS ON */ int e; e = fetestexcept(FE_INEXACT); #endif diff --git a/libc/tinymath/nearbyintl.c b/libc/tinymath/nearbyintl.c index 353b3d905..e020916ae 100644 --- a/libc/tinymath/nearbyintl.c +++ b/libc/tinymath/nearbyintl.c @@ -25,7 +25,7 @@ */ long double nearbyintl(long double x) { #ifdef FE_INEXACT - // #pragma STDC FENV_ACCESS ON + /* #pragma STDC FENV_ACCESS ON */ int e; e = fetestexcept(FE_INEXACT); #endif diff --git a/libc/tinymath/pow.c b/libc/tinymath/pow.c index 94195c97f..4e223fb54 100644 --- a/libc/tinymath/pow.c +++ b/libc/tinymath/pow.c @@ -154,7 +154,7 @@ static inline double_t log_inline(uint64_t ix, double_t *tail) a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +forceinline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) { double_t scale, y; @@ -196,7 +196,7 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ -static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) +forceinline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) { uint32_t abstop; uint64_t ki, idx, top, sbits; diff --git a/libc/tinymath/rempio2large.c b/libc/tinymath/rempio2large.c index cc6e643f5..4f2b2ad22 100644 --- a/libc/tinymath/rempio2large.c +++ b/libc/tinymath/rempio2large.c @@ -28,6 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" + asm(".ident\t\"\\n\\n\ fdlibm (fdlibm license)\\n\ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); diff --git a/libc/x/x.h b/libc/x/x.h index 1be0a30bf..39335fc91 100644 --- a/libc/x/x.h +++ b/libc/x/x.h @@ -40,39 +40,41 @@ COSMOPOLITAN_C_START_ int xwrite(int, const void *, uint64_t); void xdie(void) wontreturn; char *xdtoa(double) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xdtoaf(float) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xdtoal(long double) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; void *xmalloc(size_t) attributeallocsize((1)) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; void *xrealloc(void *, size_t) - attributeallocsize((2)) dontthrow nocallback __wur; + attributeallocsize((2)) dontthrow dontcallback __wur; void *xcalloc(size_t, size_t) attributeallocsize((1, 2)) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; void *xvalloc(size_t) attributeallocsize((1)) returnsaligned((65536)) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; void *xmemalign(size_t, size_t) attributeallocalign((1)) attributeallocsize((2)) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; -void *xmemalignzero(size_t, size_t) attributeallocalign((1)) attributeallocsize( - (2)) returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; +void *xmemalignzero(size_t, size_t) attributeallocalign((1)) + attributeallocsize((2)) + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xstrdup(const char *) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xstrndup(const char *, size_t) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xstrcat(const char *, ...) paramsnonnull((1)) nullterminated() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; #define xstrcat(...) (xstrcat)(__VA_ARGS__, NULL) char *xstrmul(const char *, size_t) paramsnonnull((1)) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xinet_ntop(int, const void *) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; void *xunbinga(size_t, const char16_t *) - attributeallocalign((1)) returnspointerwithnoaliases dontthrow - nocallback __wur returnsnonnull dontthrow nocallback __wur returnsnonnull; -void *xunbing(const char16_t *) returnspointerwithnoaliases dontthrow - nocallback __wur returnsnonnull dontthrow nocallback __wur returnsnonnull; + attributeallocalign((1)) returnspointerwithnoaliases dontthrow dontcallback + __wur returnsnonnull dontthrow dontcallback __wur returnsnonnull; +void *xunbing(const char16_t *) + returnspointerwithnoaliases dontthrow dontcallback __wur + returnsnonnull dontthrow dontcallback __wur returnsnonnull; char16_t *utf8to16(const char *, size_t, size_t *) __wur; char *utf16to8(const char16_t *, size_t, size_t *) __wur; wchar_t *utf8to32(const char *, size_t, size_t *) __wur; @@ -84,11 +86,11 @@ char *xstripexts(const char *) __wur; void *xload(_Atomic(void *) *, const void *, size_t, size_t); int rmrf(const char *); char *xbasename(const char *) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xdirname(const char *) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xjoinpaths(const char *, const char *) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; void xfixpath(void); void *xslurp(const char *, size_t *) paramsnonnull((1)) returnspointerwithnoaliases returnsaligned((4096)) __wur; diff --git a/libc/x/xasprintf.h b/libc/x/xasprintf.h index 7e4049bae..4d92a3db3 100644 --- a/libc/x/xasprintf.h +++ b/libc/x/xasprintf.h @@ -3,9 +3,9 @@ COSMOPOLITAN_C_START_ char *xasprintf(const char *, ...) paramsnonnull((1)) - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; char *xvasprintf(const char *, va_list) paramsnonnull() - returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull; + returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull; COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_LIBC_X_XASPRINTF_H_ */ diff --git a/test/libc/calls/madvise_test.c b/test/libc/calls/madvise_test.c index a5de04344..d1eb03b5e 100644 --- a/test/libc/calls/madvise_test.c +++ b/test/libc/calls/madvise_test.c @@ -20,6 +20,7 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/runtime/runtime.h" +#include "libc/sysv/consts/auxv.h" #include "libc/sysv/consts/madv.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" @@ -63,7 +64,9 @@ TEST(madvise, subPages) { char *p; ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0))); - ASSERT_SYS(0, 0, madvise(p + 4096, FRAMESIZE - 4096, MADV_WILLNEED)); + ASSERT_SYS(0, 0, + madvise(p + getauxval(AT_PAGESZ), FRAMESIZE - getauxval(AT_PAGESZ), + MADV_WILLNEED)); ASSERT_SYS(0, 0, munmap(p, FRAMESIZE)); } diff --git a/test/libc/intrin/kprintf_test.c b/test/libc/intrin/kprintf_test.c index a096c888d..f091d48b0 100644 --- a/test/libc/intrin/kprintf_test.c +++ b/test/libc/intrin/kprintf_test.c @@ -20,13 +20,13 @@ #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/errno.h" -#include "libc/serialize.h" #include "libc/limits.h" #include "libc/log/libfatal.internal.h" #include "libc/macros.internal.h" #include "libc/runtime/memtrack.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" +#include "libc/serialize.h" #include "libc/stdio/rand.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" diff --git a/test/libc/nexgen32e/gclongjmp_test.c b/test/libc/nexgen32e/gclongjmp_test.c index a31f4a786..c567dda51 100644 --- a/test/libc/nexgen32e/gclongjmp_test.c +++ b/test/libc/nexgen32e/gclongjmp_test.c @@ -96,6 +96,10 @@ TEST(gc, torture) { for (i = 0; i < n; ++i) EXPECT_SYS(0, 0, pthread_join(t[i], 0)); } +#if defined(__GNUC__) && __GNUC__ >= 12 +#pragma GCC diagnostic ignored "-Winfinite-recursion" +#endif + void crawl2(jmp_buf jb, const char *path) { if (!strcmp(path, "/") || !strcmp(path, ".")) gclongjmp(jb, 1); crawl2(jb, gc(xdirname(path))); diff --git a/test/libc/runtime/tls_test.c b/test/libc/runtime/tls_test.c index 66fbf9679..ef8f086cc 100644 --- a/test/libc/runtime/tls_test.c +++ b/test/libc/runtime/tls_test.c @@ -31,6 +31,8 @@ _Thread_local long y[1] = {40}; _Alignas(A) _Thread_local long a; dontubsan void *Worker(void *arg) { + ASSERT_EQ(A, _Alignof(a)); + ASSERT_EQ(0, (uintptr_t)&a & (_Alignof(a) - 1)); ASSERT_EQ(42, x + y[0] + z); ASSERT_EQ(0, (intptr_t)&a & (A - 1)); if (IsAsan()) { @@ -41,6 +43,7 @@ dontubsan void *Worker(void *arg) { TEST(tls, test) { ASSERT_EQ(A, _Alignof(a)); + ASSERT_EQ(0, (uintptr_t)&a & (_Alignof(a) - 1)); ASSERT_EQ(0, sizeof(struct CosmoTib) % A); ASSERT_EQ(0, (intptr_t)__get_tls() & (A - 1)); EXPECT_EQ(2, z); diff --git a/test/libc/runtime/zipos_test.c b/test/libc/runtime/zipos_test.c index c172a737d..621c3bbea 100644 --- a/test/libc/runtime/zipos_test.c +++ b/test/libc/runtime/zipos_test.c @@ -21,7 +21,6 @@ #include "libc/errno.h" #include "libc/limits.h" #include "libc/mem/gc.h" -#include "libc/mem/gc.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/runtime/zipos.internal.h" diff --git a/test/libc/x/xstrcat_test.c b/test/libc/x/xstrcat_test.c index 3821a60d6..733736de4 100644 --- a/test/libc/x/xstrcat_test.c +++ b/test/libc/x/xstrcat_test.c @@ -35,6 +35,10 @@ TEST(xstrcat, pointerAbuse) { EXPECT_STREQ("hi there\n", gc(xstrcat("hi", ' ', "there", '\n'))); } +#if defined(__GNUC__) && __GNUC__ >= 12 +#pragma GCC diagnostic ignored "-Wuse-after-free" +#endif + int hard_static(void) { char *b, *p; p = b = malloc(16); diff --git a/test/libcxx/BUILD.mk b/test/libcxx/BUILD.mk index c646ddd56..836e6da8c 100644 --- a/test/libcxx/BUILD.mk +++ b/test/libcxx/BUILD.mk @@ -14,9 +14,13 @@ TEST_LIBCXX_TESTS = $(TEST_LIBCXX_COMS:%=%.ok) TEST_LIBCXX_DIRECTDEPS = \ LIBC_CALLS \ LIBC_INTRIN \ + LIBC_LOG \ LIBC_NEXGEN32E \ LIBC_RUNTIME \ LIBC_STDIO \ + LIBC_SYSV \ + LIBC_THREAD \ + LIBC_TINYMATH \ THIRD_PARTY_LIBCXX \ THIRD_PARTY_OPENMP @@ -37,7 +41,8 @@ o/$(MODE)/test/libcxx/%.com.dbg: \ $(TEST_LIBCXX_OBJS): private CCFLAGS += -fexceptions -frtti -o/$(MODE)/test/libcxx/openmp_test.o: private CXXFLAGS += -fopenmp -O3 +o/$(MODE)/test/libcxx/openmp_test.o: private CXXFLAGS += -fopenmp +o/$(MODE)/test/libcxx/openmp_test.com.runs: private QUOTA += -C100 .PHONY: o/$(MODE)/test/libcxx o/$(MODE)/test/libcxx: \ diff --git a/test/libcxx/openmp_test.cc b/test/libcxx/openmp_test.cc index ecf5ef3c2..7a8e7782c 100644 --- a/test/libcxx/openmp_test.cc +++ b/test/libcxx/openmp_test.cc @@ -16,143 +16,330 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" -#include "libc/calls/struct/timespec.h" -#include "libc/fmt/itoa.h" -#include "libc/inttypes.h" -#include "libc/runtime/runtime.h" -#include "libc/stdio/stdio.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/clock.h" -#include "third_party/double-conversion/double-to-string.h" -#include "third_party/double-conversion/utils.h" -#include "third_party/openmp/omp.h" +#include +#include +#include +#include +#include +#include -#ifndef __FAST_MATH__ -#define FLAWLESS 0 -#else -#define FLAWLESS 1e-05 -#endif +#define PRECISION 2e-6 +#define LV1DCACHE 49152 +#define THRESHOLD 3000000 #if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__) -#define ITERATIONS 10 +#define ITERATIONS 5 #else #define ITERATIONS 1 #endif -// m×n → (m×n)ᵀ -template -void transpose(long m, long n, const T *A, long sa, T *B, long sb) { -#pragma omp parallel for collapse(2) - for (long i = 0; i < m; ++i) { +#define OPTIMIZED __attribute__((__optimize__("-O3,-ffast-math"))) +#define PORTABLE \ + __target_clones("arch=znver4," \ + "arch=znver3," \ + "arch=sapphirerapids," \ + "arch=alderlake," \ + "arch=rocketlake," \ + "arch=cooperlake," \ + "arch=tigerlake," \ + "arch=cascadelake," \ + "arch=skylake-avx512," \ + "arch=skylake," \ + "arch=znver1," \ + "arch=tremont," \ + "fma," \ + "avx") + +static bool is_self_testing; + +// m×n → n×m +template +void transpose(long m, long n, const TA *A, long lda, TB *B, long ldb) { +#pragma omp parallel for collapse(2) if (m * n > THRESHOLD) + for (long i = 0; i < m; ++i) for (long j = 0; j < n; ++j) { - B[sb * j + i] = A[sa * i + j]; + B[ldb * j + i] = A[lda * i + j]; } - } } // m×k * k×n → m×n -template -void matmul(long m, long n, long k, const T *A, long sa, const T *B, long sb, - T *C, long sc) { -#pragma omp parallel for collapse(2) - for (long i = 0; i < m; ++i) { +// k×m * k×n → m×n if aT +// m×k * n×k → m×n if bT +// k×m * n×k → m×n if aT and bT +template +void dgemm(bool aT, bool bT, long m, long n, long k, float alpha, const TA *A, + long lda, const TB *B, long ldb, float beta, TC *C, long ldc) { +#pragma omp parallel for collapse(2) if (m * n * k > THRESHOLD) + for (long i = 0; i < m; ++i) for (long j = 0; j < n; ++j) { - T sum = 0; - for (long l = 0; l < k; ++l) { - sum += A[sa * i + l] * B[sb * l + j]; - } - C[sc * i + j] = sum; + double sum = 0; + for (long l = 0; l < k; ++l) + sum = std::fma((aT ? A[lda * l + i] : A[lda * i + l]) * alpha, + (bT ? B[ldb * j + l] : B[ldb * l + j]), sum); + C[ldc * i + j] = beta * C[ldc * i + j] + sum; } - } } -template -void gemmk(long k, const T *A, long sa, const T *B, long sb, T *C, long sc) { - T S[BM][BN] = {0}; - for (long l = 0; l < k; ++l) { - for (long i = 0; i < BM; ++i) { - for (long j = 0; j < BN; ++j) { - S[i][j] += A[sa * l + i] * B[sb * l + j]; +template +struct Gemmlin { + public: + Gemmlin(bool aT, bool bT, float alpha, const TA *A, long lda, const TB *B, + long ldb, float beta, TC *C, long ldc) + : aT(aT), + bT(bT), + alpha(alpha), + A(A), + lda(lda), + B(B), + ldb(ldb), + beta(beta), + C(C), + ldc(ldc) { + } + + void gemm(long m, long n, long k) { + if (!m || !n) return; + for (long i = 0; i < m; ++i) + for (long j = 0; j < n; ++j) { + C[ldc * i + j] *= beta; + } + if (!k) return; + cub = sqrt(LV1DCACHE) / sqrt(sizeof(T) * 3); + mnpack(0, m, 0, n, 0, k); + } + + private: + void mnpack(long m0, long m, // + long n0, long n, // + long k0, long k) { + long mc = rounddown(std::min(m - m0, cub), 4); + long mp = m0 + (m - m0) / mc * mc; + long nc = rounddown(std::min(n - n0, cub), 4); + long np = n0 + (n - n0) / nc * nc; + long kc = rounddown(std::min(k - k0, cub), 4); + long kp = k0 + (k - k0) / kc * kc; + kpack(m0, mc, mp, n0, nc, np, k0, kc, k, kp); + if (m - mp) mnpack(mp, m, n0, np, k0, k); + if (n - np) mnpack(m0, mp, np, n, k0, k); + if (m - mp && n - np) mnpack(mp, m, np, n, k0, k); + } + + void kpack(long m0, long mc, long m, // + long n0, long nc, long n, // + long k0, long kc, long k, // + long kp) { + rpack(m0, mc, m, n0, nc, n, k0, kc, kp); + if (k - kp) rpack(m0, mc, m, n0, nc, n, kp, k - kp, k); + } + + void rpack(long m0, long mc, long m, // + long n0, long nc, long n, // + long k0, long kc, long k) { + if (!(mc % 4) && !(nc % 4)) + bgemm<4, 4>(m0, mc, m, n0, nc, n, k0, kc, k); + else + bgemm<1, 1>(m0, mc, m, n0, nc, n, k0, kc, k); + } + + template + void bgemm(long m0, long mc, long m, // + long n0, long nc, long n, // + long k0, long kc, long k) { + ops = (m - m0) * (n - n0) * (k - k0); + ml = (m - m0) / mc; + nl = (n - n0) / nc; + locks = new lock[ml * nl]; + there_will_be_blocks(m0, mc, m, n0, nc, n, k0, kc, k); + delete[] locks; + } + + template + void there_will_be_blocks(long m0, volatile long mc, long m, long n0, long nc, + long n, long k0, long kc, long k) { +#pragma omp parallel for collapse(2) if (ops > THRESHOLD && mc * kc > 16) + for (long ic = m0; ic < m; ic += mc) + for (long pc = k0; pc < k; pc += kc) + gizmo(m0, mc, ic, n0, nc, k0, kc, pc, n); + } + + template + PORTABLE OPTIMIZED void gizmo(long m0, long mc, long ic, long n0, long nc, + long k0, long kc, long pc, long n) { + T Ac[mc / mr][kc][mr]; + for (long i = 0; i < mc; ++i) + for (long j = 0; j < kc; ++j) + Ac[i / mr][j][i % mr] = alpha * (aT ? A[lda * (pc + j) + (ic + i)] + : A[lda * (ic + i) + (pc + j)]); + for (long jc = n0; jc < n; jc += nc) { + T Bc[nc / nr][nr][kc]; + for (long j = 0; j < nc; ++j) + for (long i = 0; i < kc; ++i) + Bc[j / nr][j % nr][i] = + bT ? B[ldb * (jc + j) + (pc + i)] : B[ldb * (pc + i) + (jc + j)]; + T Cc[nc / nr][mc / mr][nr][mr]; + memset(Cc, 0, nc * mc * sizeof(float)); + for (long jr = 0; jr < nc / nr; ++jr) + for (long ir = 0; ir < mc / mr; ++ir) + for (long pr = 0; pr < kc; ++pr) + for (long j = 0; j < nr; ++j) + for (long i = 0; i < mr; ++i) + Cc[jr][ir][j][i] += Ac[ir][pr][i] * Bc[jr][j][pr]; + const long lk = nl * ((ic - m0) / mc) + ((jc - n0) / nc); + locks[lk].acquire(); + for (long ir = 0; ir < mc; ir += mr) + for (long jr = 0; jr < nc; jr += nr) + for (long i = 0; i < mr; ++i) + for (long j = 0; j < nr; ++j) + C[ldc * (ic + ir + i) + (jc + jr + j)] += + Cc[jr / nr][ir / mr][j][i]; + locks[lk].release(); + } + } + + inline long rounddown(long x, long r) { + if (x < r) + return x; + else + return x & -r; + } + + class lock { + public: + lock() = default; + void acquire() { + while (lock_.exchange(true, std::memory_order_acquire)) { } } - } - for (long i = 0; i < BM; ++i) { - for (long j = 0; j < BN; ++j) { - C[sc * i + j] = S[i][j]; + void release() { + lock_.store(false, std::memory_order_release); } - } + + private: + std::atomic_bool lock_ = false; + }; + + bool aT; + bool bT; + float alpha; + const TA *A; + long lda; + const TB *B; + long ldb; + float beta; + TC *C; + long ldc; + long ops; + long nl; + long ml; + lock *locks; + long cub; +}; + +template +void sgemm(bool aT, bool bT, long m, long n, long k, float alpha, const TA *A, + long lda, const TB *B, long ldb, float beta, TC *C, long ldc) { + Gemmlin g{aT, bT, alpha, A, lda, B, ldb, beta, C, ldc}; + g.gemm(m, n, k); } -// (m×k)ᵀ * k×n → m×n -template -void gemm(long m, long n, long k, const T *A, long sa, const T *B, long sb, - T *C, long sc) { -#pragma omp parallel for collapse(2) - for (long i = 0; i < m; i += BM) { - for (long j = 0; j < n; j += BN) { - gemmk(k, A + i, sa, B + j, sb, C + sc * i + j, sc); - } +template +void show(FILE *f, long max, long m, long n, const TA *A, long lda, const TB *B, + long ldb) { + flockfile(f); + fprintf(f, " "); + for (long j = 0; j < n; ++j) { + fprintf(f, "%13ld", j); } -} - -template -void show(long m, long n, const T *A, long sa) { - long max = 4; - printf("{"); + fprintf(f, "\n"); for (long i = 0; i < m; ++i) { - if (i) { - if (i == max) { - printf(", ..."); + if (i == max) { + fprintf(f, "...\n"); + break; + } + fprintf(f, "%5ld ", i); + for (long j = 0; j < n; ++j) { + if (j == max) { + fprintf(f, " ..."); break; - } else { - printf(", "); + } + char ba[16], bb[16]; + sprintf(ba, "%13.7f", static_cast(A[lda * i + j])); + sprintf(bb, "%13.7f", static_cast(B[ldb * i + j])); + for (long k = 0; ba[k] && bb[k]; ++k) { + if (ba[k] != bb[k]) fputs_unlocked("\33[31m", f); + fputc_unlocked(ba[k], f); + if (ba[k] != bb[k]) fputs_unlocked("\33[0m", f); } } - printf("{"); - for (long j = 0; j < n; ++j) { - if (j) { - if (j == max) { - printf(", ..."); - break; - } else { - printf(", "); - } - } - printf("%g", static_cast(A[j + i * sa])); - } - printf("}"); + fprintf(f, "\n"); } - printf("}"); + funlockfile(f); } -template -double diff(long m, long n, const T *A, long sa, const T *B, long sb) { +inline unsigned long GetDoubleBits(double f) { + union { + double f; + unsigned long i; + } u; + u.f = f; + return u.i; +} + +inline bool IsNan(double x) { + return (GetDoubleBits(x) & (-1ull >> 1)) > (0x7ffull << 52); +} + +template +double diff(long m, long n, const TA *Want, long lda, const TB *Got, long ldb) { double s = 0; - for (long i = 0; i < m; ++i) { - for (long j = 0; j < n; ++j) { - s += fabs(A[sa * i + j] - B[sb * i + j]); - } - } - return s / m / n; + int got_nans = 0; + int want_nans = 0; + for (long i = 0; i < m; ++i) + for (long j = 0; j < n; ++j) + if (IsNan(Want[ldb * i + j])) + ++want_nans; + else if (IsNan(Got[ldb * i + j])) + ++got_nans; + else + s += std::fabs(Want[lda * i + j] - Got[ldb * i + j]); + if (got_nans) printf("WARNING: got %d NaNs!\n", got_nans); + if (want_nans) printf("WARNING: want array has %d NaNs!\n", want_nans); + return s / (m * n); } -template -void check(double tol, long m, long n, const T *A, long sa, const T *B, long sb, - const char *file, long line) { - double sad = diff(m, n, A, sa, B, sb); - if (sad > tol) { - printf("%s:%d: sad %g exceeds %g\n\twant ", file, line, sad, tol); - show(m, n, A, sa); - printf("\n\t got "); - show(m, n, B, sb); - printf("\n"); +template +void show_error(FILE *f, long max, long m, long n, const TA *A, long lda, + const TB *B, long ldb, const char *file, int line, double sad, + double tol) { + fprintf(f, "%s:%d: sad %.17g exceeds %g\nwant\n", file, line, sad, tol); + show(f, max, m, n, A, lda, B, ldb); + fprintf(f, "got\n"); + show(f, max, m, n, B, ldb, A, lda); + fprintf(f, "\n"); +} + +template +void check(double tol, long m, long n, const TA *A, long lda, const TB *B, + long ldb, const char *file, int line) { + double sad = diff(m, n, A, lda, B, ldb); + if (sad <= tol) { + if (!is_self_testing) { + printf(" %g error\n", sad); + } + } else { + show_error(stderr, 16, m, n, A, lda, B, ldb, file, line, sad, tol); + const char *path = "/tmp/openmp_test.log"; + FILE *f = fopen(path, "w"); + if (f) { + show_error(f, 10000, m, n, A, lda, B, ldb, file, line, sad, tol); + printf("see also %s\n", path); + } exit(1); } } -#define check(tol, m, n, A, sa, B, sb) \ - check(tol, m, n, A, sa, B, sb, __FILE__, __LINE__) +#define check(tol, m, n, A, lda, B, ldb) \ + check(tol, m, n, A, lda, B, ldb, __FILE__, __LINE__) long micros(void) { struct timespec ts; @@ -196,41 +383,91 @@ void fill(T *A, long n) { } } -void check_reference_gemm_is_ok(void) { - constexpr long m = 2; - constexpr long n = 2; - constexpr long k = 2; - float A[m][k] = {{1, 2}, {3, 4}}; - float B[k][n] = {{5, 6}, {7, 8}}; - float C[m][n] = {{666, 666}, {666, 666}}; - float G[m][n] = {{19, 22}, {43, 50}}; - bench(matmul(m, n, k, (float *)A, k, (float *)B, n, (float *)C, n)); - check(FLAWLESS, m, n, (float *)G, n, (float *)C, n); -} - -void check_transposed_blocking_gemm_is_ok(void) { - long m = 1024; - long k = 512; - long n = 80; +void test_gemm(long m, long n, long k) { float *A = new float[m * k]; + float *At = new float[k * m]; float *B = new float[k * n]; + float *Bt = new float[n * k]; float *C = new float[m * n]; - float *D = new float[m * n]; + float *GOLD = new float[m * n]; + float alpha = 1; + float beta = 0; fill(A, m * k); fill(B, k * n); - bench(matmul(m, n, k, A, k, B, n, C, n)); - float *At = new float[k * m]; - bench(transpose(m, k, A, k, At, m)); - bench((gemm<8, 4>(m, n, k, At, m, B, n, D, n))); - check(FLAWLESS, m, n, C, n, D, n); - delete[] At; - delete[] D; + dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n); + transpose(m, k, A, k, At, m); + transpose(k, n, B, n, Bt, k); + sgemm(0, 0, m, n, k, alpha, A, k, B, n, beta, C, n); + check(PRECISION, m, n, GOLD, n, C, n); + sgemm(1, 0, m, n, k, alpha, At, m, B, n, beta, C, n); + check(PRECISION, m, n, GOLD, n, C, n); + sgemm(0, 1, m, n, k, alpha, A, k, Bt, k, beta, C, n); + check(PRECISION, m, n, GOLD, n, C, n); + sgemm(1, 1, m, n, k, alpha, At, m, Bt, k, beta, C, n); + check(PRECISION, m, n, GOLD, n, C, n); + delete[] GOLD; delete[] C; + delete[] Bt; delete[] B; + delete[] At; + delete[] A; +} + +void check_gemm_works(void) { + static long kSizes[] = {1, 2, 3, 4, 5, 6, 7, 17, 31, 33, 63, 128, 129}; + is_self_testing = true; + long c = 0; + long N = sizeof(kSizes) / sizeof(kSizes[0]); + for (long i = 0; i < N; ++i) { + long m = kSizes[i]; + for (long j = 0; j < N; ++j) { + long n = kSizes[N - 1 - i]; + for (long k = 0; k < N; ++k) { + long K = kSizes[i]; + if (c++ % 13 == 0) { + printf("testing %2ld %2ld %2ld\r", m, n, K); + } + test_gemm(m, n, K); + } + } + } + printf("\r"); + is_self_testing = false; +} + +long m = 2333 / 3; +long k = 577 / 3; +long n = 713 / 3; + +void check_sgemm(void) { + float *A = new float[m * k]; + float *At = new float[k * m]; + float *B = new float[k * n]; + float *Bt = new float[n * k]; + float *C = new float[m * n]; + double *GOLD = new double[m * n]; + fill(A, m * k); + fill(B, k * n); + transpose(m, k, A, k, At, m); + transpose(k, n, B, n, Bt, k); + bench(dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n)); + bench(sgemm(0, 0, m, n, k, 1, A, k, B, n, 0, C, n)); + check(PRECISION, m, n, GOLD, n, C, n); + bench(sgemm(1, 0, m, n, k, 1, At, m, B, n, 0, C, n)); + check(PRECISION, m, n, GOLD, n, C, n); + bench(sgemm(0, 1, m, n, k, 1, A, k, Bt, k, 0, C, n)); + check(PRECISION, m, n, GOLD, n, C, n); + bench(sgemm(1, 1, m, n, k, 1, At, m, Bt, k, 0, C, n)); + check(PRECISION, m, n, GOLD, n, C, n); + delete[] GOLD; + delete[] C; + delete[] Bt; + delete[] B; + delete[] At; delete[] A; } int main(int argc, char *argv[]) { - check_reference_gemm_is_ok(); - check_transposed_blocking_gemm_is_ok(); + check_gemm_works(); + check_sgemm(); } diff --git a/third_party/awk/run.c b/third_party/awk/run.c index f31259db2..1ee408e4d 100644 --- a/third_party/awk/run.c +++ b/third_party/awk/run.c @@ -103,7 +103,8 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); - DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); + // [jart] use after free error + // DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); if (tbuf == NULL) { if (whatrtn) FATAL("out of memory in %s", whatrtn); diff --git a/third_party/bash/BUILD.mk b/third_party/bash/BUILD.mk index df6f7e10a..790dc452d 100644 --- a/third_party/bash/BUILD.mk +++ b/third_party/bash/BUILD.mk @@ -83,6 +83,7 @@ $(THIRD_PARTY_BASH_OBJS): private \ -Wno-nonnull-compare \ -Wno-unused-variable \ -Wno-missing-braces \ + -Wno-use-after-free \ -Wno-unused-label \ -Wno-unused-value \ -Wno-return-type \ diff --git a/third_party/chibicc/preprocess.c b/third_party/chibicc/preprocess.c index dd9780597..ff2dee27e 100644 --- a/third_party/chibicc/preprocess.c +++ b/third_party/chibicc/preprocess.c @@ -789,11 +789,12 @@ static Token *preprocess2(Token *tok) { char *path = xasprintf("%s/%s", dirname(tmp), filename); free(tmp); bool exists = fileexists(path); - free(path); if (exists) { tok = include_file(tok, path, start->next->next); + free(path); continue; } + free(path); } char *path = search_include_paths(filename); tok = include_file(tok, path ? path : filename, start->next->next); diff --git a/third_party/compiler_rt/cpu_model.h b/third_party/compiler_rt/cpu_model.h new file mode 100644 index 000000000..f5d2ba440 --- /dev/null +++ b/third_party/compiler_rt/cpu_model.h @@ -0,0 +1,33 @@ +//===-- cpu_model_common.c - Utilities for cpu model detection ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements common utilities for runtime cpu model detection. +// +//===----------------------------------------------------------------------===// + +#ifndef COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H +#define COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H + +#if __has_attribute(constructor) +#if __GNUC__ >= 9 +// Ordinarily init priorities below 101 are disallowed as they are reserved for +// the implementation. However, we are the implementation, so silence the +// diagnostic, since it doesn't apply to us. +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif +// We're choosing init priority 90 to force our constructors to run before any +// constructors in the end user application (starting at priority 101). This +// value matches the libgcc choice for the same functions. +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__(90))) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + +#endif diff --git a/third_party/hiredis/sds.c b/third_party/hiredis/sds.c index 7f2f5692d..1e7b108e9 100644 --- a/third_party/hiredis/sds.c +++ b/third_party/hiredis/sds.c @@ -64,6 +64,8 @@ #include "third_party/hiredis/sds.h" #include "third_party/hiredis/sdsalloc.h" +#pragma GCC diagnostic ignored "-Wstringop-overflow" + static inline int sdsHdrSize(char type) { switch(type&SDS_TYPE_MASK) { case SDS_TYPE_5: diff --git a/third_party/libcxx/BUILD.mk b/third_party/libcxx/BUILD.mk index b2005f133..b3b29eeb7 100644 --- a/third_party/libcxx/BUILD.mk +++ b/third_party/libcxx/BUILD.mk @@ -223,6 +223,7 @@ $(THIRD_PARTY_LIBCXX_A_OBJS): private \ -fdata-sections \ -fexceptions \ -frtti \ + -Wno-alloc-size-larger-than \ -DLIBCXX_BUILDING_LIBCXXABI THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x))) diff --git a/third_party/libcxx/__config b/third_party/libcxx/__config index 0194906ef..d0d2c4b0e 100644 --- a/third_party/libcxx/__config +++ b/third_party/libcxx/__config @@ -78,7 +78,7 @@ #if defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2 // Change short string representation so that string data starts at offset 0, // improving its alignment in some cases. -# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT +// # define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT // Fix deque iterator type in order to support incomplete types. # define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE // Fix undefined behavior in how std::list stores its linked nodes. @@ -332,7 +332,7 @@ #if (defined(__APPLE__) && !defined(__i386__) && !defined(__x86_64__) && \ (!defined(__arm__) || __ARM_ARCH_7K__ >= 2)) || \ defined(_LIBCPP_ALTERNATE_STRING_LAYOUT) -#define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT +// #define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT #endif #if __has_feature(cxx_alignas) diff --git a/third_party/libcxx/cassert b/third_party/libcxx/cassert index a3daa06a9..582a75434 100644 --- a/third_party/libcxx/cassert +++ b/third_party/libcxx/cassert @@ -17,7 +17,7 @@ Macros: */ #include "third_party/libcxx/__config" -#include "libc/assert.h" +#include "libc/isystem/assert.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cfenv b/third_party/libcxx/cfenv index bba3dcd31..a66bf926a 100644 --- a/third_party/libcxx/cfenv +++ b/third_party/libcxx/cfenv @@ -53,7 +53,7 @@ int feupdateenv(const fenv_t* envp); */ #include "third_party/libcxx/__config" -#include "libc/runtime/fenv.h" +#include "libc/isystem/fenv.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cfloat b/third_party/libcxx/cfloat index 9b5e73c3a..1886a4f96 100644 --- a/third_party/libcxx/cfloat +++ b/third_party/libcxx/cfloat @@ -70,8 +70,7 @@ Macros: */ #include "third_party/libcxx/__config" -#include "libc/math.h" -#include "libc/runtime/fenv.h" +#include "libc/isystem/float.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cinttypes b/third_party/libcxx/cinttypes index 61d1334dd..a57977fae 100644 --- a/third_party/libcxx/cinttypes +++ b/third_party/libcxx/cinttypes @@ -236,8 +236,7 @@ uintmax_t wcstoumax(const wchar_t* restrict nptr, wchar_t** restrict endptr, int #include "third_party/libcxx/__config" #include "third_party/libcxx/cstdint" -#include "libc/inttypes.h" -#include "libc/fmt/conv.h" +#include "libc/isystem/inttypes.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/ciso646 b/third_party/libcxx/ciso646 index 6749b74c0..1b442a032 100644 --- a/third_party/libcxx/ciso646 +++ b/third_party/libcxx/ciso646 @@ -21,4 +21,6 @@ #pragma GCC system_header #endif +#include "libc/isystem/iso646.h" + #endif // _LIBCPP_CISO646 diff --git a/third_party/libcxx/clocale b/third_party/libcxx/clocale index bad5ab57f..126c3521a 100644 --- a/third_party/libcxx/clocale +++ b/third_party/libcxx/clocale @@ -35,8 +35,7 @@ lconv* localeconv(); */ #include "third_party/libcxx/__config" -#include "libc/str/unicode.h" -#include "libc/str/locale.h" +#include "libc/isystem/locale.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/csetjmp b/third_party/libcxx/csetjmp index 9dc5f49bf..d9bfb5305 100644 --- a/third_party/libcxx/csetjmp +++ b/third_party/libcxx/csetjmp @@ -31,7 +31,7 @@ void longjmp(jmp_buf env, int val); */ #include "third_party/libcxx/__config" -#include "libc/runtime/runtime.h" +#include "libc/isystem/setjmp.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/csignal b/third_party/libcxx/csignal index 8530f6248..d6acd9032 100644 --- a/third_party/libcxx/csignal +++ b/third_party/libcxx/csignal @@ -40,12 +40,7 @@ int raise(int sig); */ #include "third_party/libcxx/__config" -#include "libc/calls/calls.h" -#include "libc/calls/struct/sigaction.h" -#include "libc/calls/struct/siginfo.h" -#include "libc/sysv/consts/sa.h" -#include "libc/sysv/consts/sig.h" -#include "libc/sysv/consts/sicode.h" +#include "libc/isystem/signal.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cstdarg b/third_party/libcxx/cstdarg index d3c9a734a..9564fd3a8 100644 --- a/third_party/libcxx/cstdarg +++ b/third_party/libcxx/cstdarg @@ -11,6 +11,7 @@ #define _LIBCPP_CSTDARG #include "third_party/libcxx/__config" +#include "libc/isystem/stdarg.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cstdbool b/third_party/libcxx/cstdbool index a32e40195..0ea403fd7 100644 --- a/third_party/libcxx/cstdbool +++ b/third_party/libcxx/cstdbool @@ -20,6 +20,7 @@ Macros: */ #include "third_party/libcxx/__config" +#include "libc/isystem/stdbool.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cstddef b/third_party/libcxx/cstddef index a20079083..f3672dba6 100644 --- a/third_party/libcxx/cstddef +++ b/third_party/libcxx/cstddef @@ -35,6 +35,7 @@ Types: #include "third_party/libcxx/__config" #include "third_party/libcxx/version" +#include "libc/isystem/stddef.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/cstdint b/third_party/libcxx/cstdint index 8ebad1acb..f1dffe83f 100644 --- a/third_party/libcxx/cstdint +++ b/third_party/libcxx/cstdint @@ -10,11 +10,7 @@ #ifndef _LIBCPP_CSTDINT #define _LIBCPP_CSTDINT -#include "libc/inttypes.h" -#include "libc/fmt/conv.h" -#include "libc/limits.h" -#include "libc/literal.h" -#include "libc/calls/weirdtypes.h" +#include "libc/isystem/stdint.h" #include "third_party/libcxx/__config" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/third_party/libcxx/cstdlib b/third_party/libcxx/cstdlib index e2c88b522..818ba2ccf 100644 --- a/third_party/libcxx/cstdlib +++ b/third_party/libcxx/cstdlib @@ -13,6 +13,7 @@ #include "third_party/libcxx/__config" #include "libc/str/str.h" #include "third_party/libcxx/stdlib.h" +#include "libc/isystem/stdlib.h" /* cstdlib synopsis diff --git a/third_party/libcxx/ctime b/third_party/libcxx/ctime index 4879de5bb..37f121b7e 100644 --- a/third_party/libcxx/ctime +++ b/third_party/libcxx/ctime @@ -11,15 +11,7 @@ #define _LIBCPP_CTIME #include "third_party/libcxx/__config" -#include "libc/calls/struct/timespec.h" -#include "libc/calls/struct/timeval.h" -#include "libc/sysv/consts/clock.h" -#include "libc/sysv/consts/sched.h" -#include "libc/sysv/consts/timer.h" -#include "libc/calls/weirdtypes.h" -#include "libc/time/struct/tm.h" -#include "libc/calls/calls.h" -#include "libc/time/time.h" +#include "libc/isystem/time.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/ctype.h b/third_party/libcxx/ctype.h index e6a95b2ca..b89d403ec 100644 --- a/third_party/libcxx/ctype.h +++ b/third_party/libcxx/ctype.h @@ -35,7 +35,7 @@ int toupper(int c); #pragma GCC system_header #endif -#include "libc/str/str.h" +#include "libc/isystem/ctype.h" #ifdef __cplusplus diff --git a/third_party/libcxx/errno.h b/third_party/libcxx/errno.h index c1210f888..1dfe334b6 100644 --- a/third_party/libcxx/errno.h +++ b/third_party/libcxx/errno.h @@ -28,7 +28,7 @@ Macros: #pragma GCC system_header #endif -#include "libc/errno.h" +#include "libc/isystem/errno.h" #ifdef __cplusplus diff --git a/third_party/libcxx/limits.h b/third_party/libcxx/limits.h index b78191a71..379e34fcf 100644 --- a/third_party/libcxx/limits.h +++ b/third_party/libcxx/limits.h @@ -43,22 +43,6 @@ Macros: #pragma GCC system_header #endif -#ifndef __GNUC__ -#include "libc/limits.h" -#else -// GCC header limits.h recursively includes itself through another header called -// syslimits.h for some reason. This setup breaks down if we directly -// #include_next GCC's limits.h (reasons not entirely clear to me). Therefore, -// we manually re-create the necessary include sequence below: - -// Get the system limits.h defines (force recurse into the next level) -#define _GCC_LIMITS_H_ -#define _GCC_NEXT_LIMITS_H -#include "libc/limits.h" - -// Get the ISO C defines -#undef _GCC_LIMITS_H_ -#include "libc/limits.h" -#endif // __GNUC__ +#include "libc/isystem/limits.h" #endif // _LIBCPP_LIMITS_H diff --git a/third_party/libcxx/locale.h b/third_party/libcxx/locale.h index bcf7c1a3f..dd61d9df2 100644 --- a/third_party/libcxx/locale.h +++ b/third_party/libcxx/locale.h @@ -39,7 +39,6 @@ Functions: #pragma GCC system_header #endif -#include "libc/str/locale.h" -#include "libc/str/unicode.h" +#include "libc/isystem/locale.h" #endif // _LIBCPP_LOCALE_H diff --git a/third_party/libcxx/math.h b/third_party/libcxx/math.h index 6bb8d562c..9171afc7c 100644 --- a/third_party/libcxx/math.h +++ b/third_party/libcxx/math.h @@ -19,7 +19,7 @@ #define _LIBCPP_STDLIB_INCLUDE_NEXT #include "third_party/libcxx/stdlib.h" -#include "libc/math.h" +#include "libc/isystem/math.h" #ifdef __cplusplus diff --git a/third_party/libcxx/stdio.h b/third_party/libcxx/stdio.h index c16c2d66e..79c965488 100644 --- a/third_party/libcxx/stdio.h +++ b/third_party/libcxx/stdio.h @@ -102,9 +102,7 @@ void perror(const char* s); #pragma GCC system_header #endif -#include "libc/calls/calls.h" -#include "libc/temp.h" -#include "libc/stdio/stdio.h" +#include "libc/isystem/stdio.h" #ifdef __cplusplus diff --git a/third_party/libcxx/stdlib.h b/third_party/libcxx/stdlib.h index a561dc78f..aea595b06 100644 --- a/third_party/libcxx/stdlib.h +++ b/third_party/libcxx/stdlib.h @@ -9,13 +9,7 @@ #if defined(__need_malloc_and_calloc) || defined(_LIBCPP_STDLIB_INCLUDE_NEXT) -#include "libc/stdio/rand.h" -#include "libc/mem/mem.h" -#include "libc/runtime/runtime.h" -#include "libc/runtime/runtime.h" -#include "libc/mem/alg.h" -#include "libc/stdio/stdio.h" -#include "libc/fmt/conv.h" +#include "libc/isystem/stdlib.h" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header diff --git a/third_party/libcxx/string b/third_party/libcxx/string index ea0b695ef..c8c822545 100644 --- a/third_party/libcxx/string +++ b/third_party/libcxx/string @@ -702,34 +702,41 @@ private: #ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT - struct __long - { - pointer __data_; + struct __long { + pointer __data_; size_type __size_; - size_type __cap_; + size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1; + size_type __is_long_ : 1; }; -#ifdef _LIBCPP_BIG_ENDIAN - static const size_type __short_mask = 0x01; - static const size_type __long_mask = 0x1ul; -#else // _LIBCPP_BIG_ENDIAN - static const size_type __short_mask = 0x80; - static const size_type __long_mask = ~(size_type(~0) >> 1); -#endif // _LIBCPP_BIG_ENDIAN + enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 }; - enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ? - (sizeof(__long) - 1)/sizeof(value_type) : 2}; - - struct __short - { + struct __short { value_type __data_[__min_cap]; - struct - : __padding - { - unsigned char __size_; - }; + unsigned char __padding_[sizeof(value_type) - 1]; + unsigned char __size_ : 7; + unsigned char __is_long_ : 1; }; + // The __endian_factor is required because the field we use to store the size + // has one fewer bit than it would if it were not a bitfield. + // + // If the LSB is used to store the short-flag in the short string representation, + // we have to multiply the size by two when it is stored and divide it by two when + // it is loaded to make sure that we always store an even number. In the long string + // representation, we can ignore this because we can assume that we always allocate + // an even amount of value_types. + // + // If the MSB is used for the short-flag, the max_size() is numeric_limits::max() / 2. + // This does not impact the short string representation, since we never need the MSB + // for representing the size of a short string anyway. + +# ifdef _LIBCPP_BIG_ENDIAN + static const size_type __endian_factor = 2; +# else + static const size_type __endian_factor = 1; +# endif + #else struct __long diff --git a/third_party/libcxx/string.h b/third_party/libcxx/string.h index 5490cbc8d..abd6be931 100644 --- a/third_party/libcxx/string.h +++ b/third_party/libcxx/string.h @@ -16,7 +16,7 @@ #pragma GCC system_header #endif -#include "libc/str/str.h" +#include "libc/isystem/string.h" /* string.h synopsis diff --git a/third_party/libcxx/wchar.h b/third_party/libcxx/wchar.h index c5e80d9bd..17996b194 100644 --- a/third_party/libcxx/wchar.h +++ b/third_party/libcxx/wchar.h @@ -9,8 +9,7 @@ #ifndef _LIBCPP_WCHAR_H #define _LIBCPP_WCHAR_H -#include "libc/str/str.h" -#include "libc/time/time.h" +#include "libc/isystem/wchar.h" #include "third_party/libcxx/__config" #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/third_party/libcxx/wctype.h b/third_party/libcxx/wctype.h index 1ea8f5652..b256d58dd 100644 --- a/third_party/libcxx/wctype.h +++ b/third_party/libcxx/wctype.h @@ -50,8 +50,7 @@ wctrans_t wctrans(const char* property); #pragma GCC system_header #endif -#include "libc/str/str.h" -#include "libc/time/time.h" +#include "libc/isystem/wctype.h" #ifdef __cplusplus diff --git a/third_party/libcxxabi/test/BUILD.mk b/third_party/libcxxabi/test/BUILD.mk index 8bfd39d40..6bc0d5861 100644 --- a/third_party/libcxxabi/test/BUILD.mk +++ b/third_party/libcxxabi/test/BUILD.mk @@ -32,26 +32,27 @@ THIRD_PARTY_LIBCXXABI_TEST_SRCS = \ third_party/libcxxabi/test/cxa_thread_atexit_test.pass.cc \ third_party/libcxxabi/test/cxa_vec_new_overflow_PR41395.pass.cc \ third_party/libcxxabi/test/dynamic_cast.pass.cc \ + third_party/libcxxabi/test/dynamic_cast14.pass.cc \ third_party/libcxxabi/test/dynamic_cast3.pass.cc \ third_party/libcxxabi/test/dynamic_cast5.pass.cc \ - third_party/libcxxabi/test/dynamic_cast14.pass.cc \ third_party/libcxxabi/test/dynamic_cast_stress.pass.cc \ - third_party/libcxxabi/test/exception_object_alignment.pass.cc \ third_party/libcxxabi/test/exception_object_alignment.2.pass.cc \ + third_party/libcxxabi/test/exception_object_alignment.pass.cc \ third_party/libcxxabi/test/guard_test_basic.pass.cc \ - third_party/libcxxabi/test/incomplete_type.sh.cc \ + third_party/libcxxabi/test/incomplete_type_test.pass.cc \ + third_party/libcxxabi/test/incomplete_type_test.lib.cc \ third_party/libcxxabi/test/inherited_exception.pass.cc \ third_party/libcxxabi/test/test_aux_runtime.pass.cc \ third_party/libcxxabi/test/test_aux_runtime_op_array_new.pass.cc\ third_party/libcxxabi/test/test_demangle.pass.cc \ - third_party/libcxxabi/test/test_exception_address_alignment.pass.cc \ + third_party/libcxxabi/test/test_exception_address_alignment.pass.cc \ third_party/libcxxabi/test/test_exception_storage.pass.cc \ third_party/libcxxabi/test/test_fallback_malloc.pass.cc \ third_party/libcxxabi/test/test_guard.pass.cc \ third_party/libcxxabi/test/test_vector1.pass.cc \ third_party/libcxxabi/test/test_vector2.pass.cc \ - third_party/libcxxabi/test/thread_local_destruction_order.pass.cc \ third_party/libcxxabi/test/test_vector3.pass.cc \ + third_party/libcxxabi/test/thread_local_destruction_order.pass.cc \ third_party/libcxxabi/test/uncaught_exception.pass.cc \ third_party/libcxxabi/test/uncaught_exceptions.pass.cc \ third_party/libcxxabi/test/unittest_demangle.pass.cc \ @@ -69,33 +70,21 @@ THIRD_PARTY_LIBCXXABI_TEST_SRCS_TOOSLOW_COSMO = \ THIRD_PARTY_LIBCXXABI_TEST_SRCS_FAILING_GCC = \ third_party/libcxxabi/test/catch_array_01.pass.cc \ third_party/libcxxabi/test/catch_function_01.pass.cc \ - third_party/libcxxabi/test/catch_member_function_pointer_01.pass.cc \ + third_party/libcxxabi/test/catch_member_function_pointer_01.pass.cc \ third_party/libcxxabi/test/catch_member_function_pointer_02.pass.cc +THIRD_PARTY_LIBCXXABI_TEST_OBJS = \ + $(THIRD_PARTY_LIBCXXABI_TEST_SRCS:%.cc=o/$(MODE)/%.o) + +THIRD_PARTY_LIBCXXABI_TEST_COMS_SRCS = \ + $(filter-out %.pass.cc,$(THIRD_PARTY_LIBCXXABI_TEST_SRCS)) + THIRD_PARTY_LIBCXXABI_TEST_COMS = \ - $(THIRD_PARTY_LIBCXXABI_TEST_SRCS:%.cc=o/$(MODE)/%.com) + $(THIRD_PARTY_LIBCXXABI_TEST_COMS_SRCS:%.cc=o/$(MODE)/%.com) THIRD_PARTY_LIBCXXABI_TEST_TESTS = \ $(THIRD_PARTY_LIBCXXABI_TEST_COMS:%=%.ok) -THIRD_PARTY_LIBCXXABI_TEST_OBJS = \ - $(THIRD_PARTY_LIBCXXABI_TEST_SRCS:%.cc=o/$(MODE)/%.o) \ - o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.one.o \ - o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.two.o - -THIRD_PARTY_LIBCXXABI_TEST_OBJS_WNO_EXCEPTIONS = \ - o/$(MODE)/third_party/libcxxabi/test/catch_class_03.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/catch_class_04.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/catch_ptr.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/catch_ptr_02.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/inherited_exception.pass.o - -THIRD_PARTY_LIBCXXABI_TEST_OBJS_CPP14 = \ - o/$(MODE)/third_party/libcxxabi/test/unwind_02.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/unwind_03.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/unwind_04.pass.o \ - o/$(MODE)/third_party/libcxxabi/test/unwind_05.pass.o - THIRD_PARTY_LIBCXXABI_TEST_BINS = \ $(THIRD_PARTY_LIBCXXABI_TEST_COMS) \ $(THIRD_PARTY_LIBCXXABI_TEST_COMS:%=%.dbg) @@ -105,19 +94,29 @@ THIRD_PARTY_LIBCXXABI_TEST_CHECKS = \ $(THIRD_PARTY_LIBCXXABI_TEST_HDRS:%=o/$(MODE)/%.ok) THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS = \ - LIBC_NEXGEN32E \ + LIBC_CALLS \ + LIBC_INTRIN \ LIBC_LOG \ + LIBC_MEM \ + LIBC_NEXGEN32E \ + LIBC_PROC \ + LIBC_RUNTIME \ + LIBC_STDIO \ + LIBC_THREAD \ THIRD_PARTY_LIBCXX \ - THIRD_PARTY_LIBCXXABI + THIRD_PARTY_LIBCXXABI \ + THIRD_PARTY_LIBUNWIND THIRD_PARTY_LIBCXXABI_TEST_DEPS := \ $(call uniq,$(foreach x,$(THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS),$($(x)))) $(THIRD_PARTY_LIBCXXABI_TEST_A): \ - $(THIRD_PARTY_LIBCXXABI_TEST_A).pkg + $(THIRD_PARTY_LIBCXXABI_TEST_A).pkg \ + $(THIRD_PARTY_LIBCXXABI_TEST_OBJS) $(THIRD_PARTY_LIBCXXABI_TEST_A).pkg: \ - $(foreach x,$(THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS),$($(x)_A).pkg) + $(foreach x,$(THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS),$($(x)_A).pkg) \ + $(THIRD_PARTY_LIBCXXABI_TEST_OBJS) o/$(MODE)/third_party/libcxxabi/test/%.com.dbg: \ $(THIRD_PARTY_LIBCXXABI_TEST_DEPS) \ @@ -137,38 +136,27 @@ $(THIRD_PARTY_LIBCXXABI_TEST_OBJS): private \ -D_LIBCPP_BUILDING_LIBRARY \ -D_LIBCPP_CONSTINIT=__constinit -$(THIRD_PARTY_LIBCXXABI_TEST_OBJS_WNO_EXCEPTIONS): private \ - CXXFLAGS += \ - -Wno-exceptions - -$(THIRD_PARTY_LIBCXXABI_TEST_OBJS_CPP14): private \ - CXXFLAGS += \ - -std=gnu++14 - -o/$(MODE)/third_party/libcxxabi/test/guard_test_basic.pass.o: private \ - CXXFLAGS += \ - -Wno-invalid-memory-model - -o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.one.o: private \ - CXXFLAGS += \ - -Wno-unreachable-code -o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.two.o: private \ - CXXFLAGS += \ - -Wno-unreachable-code \ - -DTU_ONE -o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.com.dbg: \ - $(THIRD_PARTY_LIBCXXABI_TEST_DEPS) \ - $(THIRD_PARTY_LIBCXXABI_TEST_A) \ - o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.one.o \ - o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.two.o \ - $(THIRD_PARTY_LIBCXXABI_TEST_A).pkg \ - $(CRT) \ - $(APE_NO_MODIFY_SELF) - @$(APELINK) - $(THIRD_PARTY_LIBCXXABI_TEST_OBJS): private CONFIG_CPPFLAGS += -UNDEBUG o/$(MODE)/third_party/libcxxabi/test/catch_multi_level_pointer.pass.o: private COPTS += -O0 o/$(MODE)/third_party/libcxxabi/test/catch_multi_level_pointer.pass.o: private QUOTA += -C30 -M4000m +o/$(MODE)/third_party/libcxxabi/test/guard_test_basic.pass.o: private CXXFLAGS += -Wno-invalid-memory-model +o/$(MODE)/third_party/libcxxabi/test/incomplete_type_test.pass.o: private CXXFLAGS += -Wno-unreachable-code +o/$(MODE)/third_party/libcxxabi/test/incomplete_type_test.lib.o: private CXXFLAGS += -Wno-unreachable-code -DTU_ONE + +o/$(MODE)/third_party/libcxxabi/test/catch_class_03.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/catch_class_04.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/catch_ptr.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/catch_ptr_02.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/inherited_exception.pass.o: private \ + CXXFLAGS += \ + -Wno-exceptions + +o/$(MODE)/third_party/libcxxabi/test/unwind_02.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/unwind_03.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/unwind_04.pass.o \ +o/$(MODE)/third_party/libcxxabi/test/unwind_05.pass.o: private \ + CXXFLAGS += \ + -std=gnu++14 .PHONY: o/$(MODE)/third_party/libcxxabi/test o/$(MODE)/third_party/libcxxabi/test: \ diff --git a/third_party/libcxxabi/test/incomplete_type.sh.one.cc b/third_party/libcxxabi/test/incomplete_type.sh.one.cc deleted file mode 120000 index 100e3f363..000000000 --- a/third_party/libcxxabi/test/incomplete_type.sh.one.cc +++ /dev/null @@ -1 +0,0 @@ -incomplete_type.sh.cc \ No newline at end of file diff --git a/third_party/libcxxabi/test/incomplete_type.sh.two.cc b/third_party/libcxxabi/test/incomplete_type.sh.two.cc deleted file mode 120000 index 100e3f363..000000000 --- a/third_party/libcxxabi/test/incomplete_type.sh.two.cc +++ /dev/null @@ -1 +0,0 @@ -incomplete_type.sh.cc \ No newline at end of file diff --git a/third_party/libcxxabi/test/incomplete_type_test.lib.cc b/third_party/libcxxabi/test/incomplete_type_test.lib.cc new file mode 120000 index 000000000..a10bb894c --- /dev/null +++ b/third_party/libcxxabi/test/incomplete_type_test.lib.cc @@ -0,0 +1 @@ +incomplete_type_test.pass.cc \ No newline at end of file diff --git a/third_party/libcxxabi/test/incomplete_type.sh.cc b/third_party/libcxxabi/test/incomplete_type_test.pass.cc similarity index 100% rename from third_party/libcxxabi/test/incomplete_type.sh.cc rename to third_party/libcxxabi/test/incomplete_type_test.pass.cc diff --git a/third_party/musl/crypt_des.c b/third_party/musl/crypt_des.c index 87ae04bff..f301f8b22 100644 --- a/third_party/musl/crypt_des.c +++ b/third_party/musl/crypt_des.c @@ -32,6 +32,8 @@ #include "third_party/musl/crypt.internal.h" #include "third_party/musl/crypt_des.internal.h" +#pragma GCC diagnostic ignored "-Wstringop-overflow" + asm(".ident\t\"\\n\\n\ Musl libc (MIT License)\\n\ Copyright 2005-2014 Rich Felker, et. al.\""); diff --git a/third_party/musl/dn_comp.c b/third_party/musl/dn_comp.c index a6db07072..331d20f18 100644 --- a/third_party/musl/dn_comp.c +++ b/third_party/musl/dn_comp.c @@ -93,7 +93,7 @@ int dn_comp(const char *src, unsigned char **dnptrs, unsigned char **lastdnptr) { - int i, j, n, m=0, offset, bestlen=0, bestoff; + int i, j, n, m=0, offset, bestlen=0, bestoff=0; unsigned char lens[127]; unsigned char **p; const char *end; diff --git a/third_party/nsync/futex.c b/third_party/nsync/futex.c index de9a5889b..feb8c49ef 100644 --- a/third_party/nsync/futex.c +++ b/third_party/nsync/futex.c @@ -143,7 +143,7 @@ static int nsync_futex_polyfill_ (atomic_int *w, int expect, struct timespec *ab if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) { return -ETIMEDOUT; } - pthread_yield (); + pthread_yield_np (); } } @@ -373,7 +373,7 @@ int nsync_futex_wake_ (atomic_int *w, int count, char pshare) { } } else { Polyfill: - pthread_yield (); + pthread_yield_np (); rc = 0; } diff --git a/third_party/nsync/yield.c b/third_party/nsync/yield.c index 495391fdd..f15b29f6c 100644 --- a/third_party/nsync/yield.c +++ b/third_party/nsync/yield.c @@ -22,6 +22,6 @@ #include "third_party/nsync/common.internal.h" void nsync_yield_ (void) { - pthread_yield (); + pthread_yield_np (); STRACE ("nsync_yield_()"); } diff --git a/third_party/openmp/kmp_os.h b/third_party/openmp/kmp_os.h index 8ef3746ce..6ce5d511a 100644 --- a/third_party/openmp/kmp_os.h +++ b/third_party/openmp/kmp_os.h @@ -75,7 +75,7 @@ #error Unknown compiler #endif -#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_WASI +#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_WASI && !defined(__COSMOPOLITAN__) #define KMP_AFFINITY_SUPPORTED 1 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64 #define KMP_GROUP_AFFINITY 1 diff --git a/third_party/zstd/BUILD.mk b/third_party/zstd/BUILD.mk index 27edd513b..5aa69393c 100644 --- a/third_party/zstd/BUILD.mk +++ b/third_party/zstd/BUILD.mk @@ -147,6 +147,7 @@ $(THIRD_PARTY_ZSTD_A_CHECKS): private \ $(THIRD_PARTY_ZSTD_A_OBJS): private \ CFLAGS += \ -Wframe-larger-than=262144 \ + -Wno-array-bounds \ -Wno-comment $(THIRD_PARTY_ZSTD_A_OBJS): private \ diff --git a/tool/build/compile.c b/tool/build/compile.c index aee8f9e30..576177ef5 100644 --- a/tool/build/compile.c +++ b/tool/build/compile.c @@ -226,65 +226,6 @@ const char *const kSafeEnv[] = { "SYSTEMROOT", // needed by socket() }; -const char *const kGccOnlyFlags[] = { - "--nocompress-debug-sections", - "--noexecstack", - "-Wa,--nocompress-debug-sections", - "-Wa,--noexecstack", - "-Wa,-msse2avx", - "-Wno-unused-but-set-variable", - "-Wunsafe-loop-optimizations", - "-fbranch-target-load-optimize", - "-fcx-limited-range", - "-fdelete-dead-exceptions", - "-femit-struct-debug-baseonly", - "-ffp-int-builtin-inexact", - "-finline-functions-called-once", - "-fipa-pta", - "-fivopts", - "-flimit-function-alignment", - "-fmerge-constants", - "-fmodulo-sched", - "-fmodulo-sched-allow-regmoves", - "-fno-align-jumps", - "-fno-align-labels", - "-fno-align-loops", - "-fno-cx-limited-range", - "-fno-fp-int-builtin-inexact", - "-fno-gnu-unique", - "-fno-gnu-unique", - "-fno-inline-functions-called-once", - "-fno-instrument-functions", - "-fno-schedule-insns2", - "-fno-whole-program", - "-fopt-info-vec", - "-fopt-info-vec-missed", - "-freg-struct-return", - "-freschedule-modulo-scheduled-loops", - "-frounding-math", - "-fsched2-use-superblocks", - "-fschedule-insns", - "-fschedule-insns2", - "-fshrink-wrap", - "-fshrink-wrap-separate", - "-fsignaling-nans", - "-fstack-clash-protection", - "-ftracer", - "-ftrapv", - "-ftree-loop-im", - "-ftree-loop-vectorize", - "-funsafe-loop-optimizations", - "-fversion-loops-for-strides", - "-fwhole-program", - "-gdescribe-dies", - "-gstabs", - "-mcall-ms2sysv-xlogues", - "-mdispatch-scheduler", - "-mfpmath=sse+387", - "-mmitigate-rop", - "-mno-fentry", -}; - void OnAlrm(int sig) { ++gotalrm; } @@ -400,21 +341,38 @@ bool IsSafeEnv(const char *s) { return false; } -bool IsGccOnlyFlag(const char *s) { - int m, l, r, x; - l = 0; - r = ARRAYLEN(kGccOnlyFlags) - 1; - while (l <= r) { - m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) - x = strcmp(s, kGccOnlyFlags[m]); - if (x < 0) { - r = m - 1; - } else if (x > 0) { - l = m + 1; - } else { - return true; +char *Slurp(const char *path) { + int fd; + char *res = 0; + if ((fd = open(path, O_RDONLY)) != -1) { + ssize_t size; + if ((size = lseek(fd, 0, SEEK_END)) != -1) { + char *buf; + if ((buf = calloc(1, size + 1))) { + if (pread(fd, buf, size, 0) == size) { + res = buf; + } else { + free(buf); + } + } } + close(fd); } + return res; +} + +bool HasFlag(const char *flags, const char *s) { + char buf[256]; + size_t n = strlen(s); + if (!flags) return false; + if (n + 2 > sizeof(buf)) return false; + memcpy(buf, s, n); + buf[n] = '\n'; + buf[n + 1] = 0; + return !!strstr(flags, buf); +} + +bool IsGccOnlyFlag(const char *s) { if (s[0] == '-') { if (s[1] == 'f') { if (startswith(s, "-ffixed-")) return true; @@ -428,8 +386,25 @@ bool IsGccOnlyFlag(const char *s) { if (startswith(s, "-mstringop-strategy=")) return true; if (startswith(s, "-mpreferred-stack-boundary=")) return true; if (startswith(s, "-Wframe-larger-than=")) return true; + if (startswith(s, "-Walloca-larger-than=")) return true; } - return false; + static bool once; + static char *gcc_only_flags; + if (!once) { + gcc_only_flags = Slurp("build/bootstrap/gcc-only-flags.txt"); + once = true; + } + return HasFlag(gcc_only_flags, s); +} + +bool IsClangOnlyFlag(const char *s) { + static bool once; + static char *clang_only_flags; + if (!once) { + clang_only_flags = Slurp("build/bootstrap/clang-only-flags.txt"); + once = true; + } + return HasFlag(clang_only_flags, s); } bool FileExistsAndIsNewerThan(const char *filepath, const char *thanpath) { @@ -926,12 +901,12 @@ int main(int argc, char *argv[]) { } s = basename(strdup(cmd)); - if (strstr(s, "gcc") || strstr(s, "g++")) { - iscc = true; - isgcc = true; - } else if (strstr(s, "clang") || strstr(s, "clang++")) { + if (strstr(s, "clang") || strstr(s, "clang++")) { iscc = true; isclang = true; + } else if (strstr(s, "gcc") || strstr(s, "g++")) { + iscc = true; + isgcc = true; } else if (strstr(s, "ld.bfd")) { isbfd = true; } else if (strstr(s, "ar.com")) { @@ -990,6 +965,9 @@ int main(int argc, char *argv[]) { AddArg(argv[i]); continue; } + if (isgcc && IsClangOnlyFlag(argv[i])) { + continue; + } if (isclang && IsGccOnlyFlag(argv[i])) { continue; } @@ -1188,7 +1166,9 @@ int main(int argc, char *argv[]) { !strcmp(argv[i], "-O3"))) { /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97623 */ AddArg(argv[i]); - AddArg("-fno-code-hoisting"); + if (!isclang) { + AddArg("-fno-code-hoisting"); + } } else { AddArg(argv[i]); } diff --git a/libc/intrin/__clear_cache.c b/tool/build/findape.c similarity index 63% rename from libc/intrin/__clear_cache.c rename to tool/build/findape.c index 3d93adf45..88373b651 100644 --- a/libc/intrin/__clear_cache.c +++ b/tool/build/findape.c @@ -16,22 +16,49 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/runtime/runtime.h" +#include "libc/serialize.h" +#include "libc/stdio/ftw.h" +#include "libc/stdio/stdio.h" +#include "libc/sysv/consts/o.h" -void __clear_cache2(const void *base, const void *end) { -#ifdef __aarch64__ - int icache, dcache; - const char *p, *pe = end; - static unsigned int ctr_el0 = 0; - if (!ctr_el0) asm volatile("mrs\t%0,ctr_el0" : "=r"(ctr_el0)); - icache = 4 << (ctr_el0 & 15); - dcache = 4 << ((ctr_el0 >> 16) & 15); - for (p = (const char *)((uintptr_t)base & -dcache); p < pe; p += dcache) { - asm volatile("dc\tcvau,%0" : : "r"(p) : "memory"); +// finds ape executables +// usage: findelf PATH... + +static int OnFile(const char *fpath, const struct stat *st, int typeflag, + struct FTW *ftwbuf) { + if (typeflag == FTW_F) { + char buf[8] = {0}; + int fd = open(fpath, O_RDONLY); + if (fd != -1) { + pread(fd, buf, sizeof(buf), 0); + close(fd); + if (READ64LE(buf) == READ64LE("MZqFpD='") || + READ64LE(buf) == READ64LE("jartsr='") || + READ64LE(buf) == READ64LE("APEDBG='")) { + tinyprint(1, fpath, "\n", NULL); + } + } else { + perror(fpath); + } + } + return 0; +} + +static void HandleArg(const char *path) { + if (nftw(path, OnFile, 128, FTW_PHYS | FTW_DEPTH)) { + perror(path); + exit(1); + } +} + +int main(int argc, char *argv[]) { + if (argc <= 1) { + HandleArg("."); + } else { + for (int i = 1; i < argc; ++i) { + HandleArg(argv[i]); + } } - asm volatile("dsb\tish" ::: "memory"); - for (p = (const char *)((uintptr_t)base & -icache); p < pe; p += icache) { - asm volatile("ic\tivau,%0" : : "r"(p) : "memory"); - } - asm volatile("dsb\tish\nisb" ::: "memory"); -#endif } diff --git a/tool/build/findelf.c b/tool/build/findelf.c new file mode 100644 index 000000000..4b781eae4 --- /dev/null +++ b/tool/build/findelf.c @@ -0,0 +1,65 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/elf/def.h" +#include "libc/elf/struct/ehdr.h" +#include "libc/runtime/runtime.h" +#include "libc/serialize.h" +#include "libc/stdio/ftw.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/o.h" + +// finds elf executables +// usage: findelf PATH... + +static int OnFile(const char *fpath, const struct stat *st, int typeflag, + struct FTW *ftwbuf) { + if (typeflag == FTW_F && (st->st_mode & 0111)) { + Elf64_Ehdr ehdr = {0}; + int fd = open(fpath, O_RDONLY); + if (fd != -1) { + pread(fd, &ehdr, sizeof(ehdr), 0); + close(fd); + if (READ32LE(ehdr.e_ident) == READ32LE(ELFMAG) && ehdr.e_type != ET_REL) { + tinyprint(1, fpath, "\n", NULL); + } + } else { + perror(fpath); + } + } + return 0; +} + +static void HandleArg(const char *path) { + if (nftw(path, OnFile, 128, FTW_PHYS | FTW_DEPTH)) { + perror(path); + exit(1); + } +} + +int main(int argc, char *argv[]) { + if (argc <= 1) { + HandleArg("."); + } else { + for (int i = 1; i < argc; ++i) { + HandleArg(argv[i]); + } + } +} diff --git a/tool/build/findpe.c b/tool/build/findpe.c index 548ea288a..68e59dc1a 100644 --- a/tool/build/findpe.c +++ b/tool/build/findpe.c @@ -22,6 +22,9 @@ #include "libc/stdio/stdio.h" #include "libc/sysv/consts/o.h" +// finds portable executables (and actually portable executable) +// usage: findelf PATH... + static int OnFile(const char *fpath, const struct stat *st, int typeflag, struct FTW *ftwbuf) { if (typeflag == FTW_F) { diff --git a/tool/build/fixupobj.c b/tool/build/fixupobj.c index f57192c16..7cb391bce 100644 --- a/tool/build/fixupobj.c +++ b/tool/build/fixupobj.c @@ -29,13 +29,16 @@ #include "libc/errno.h" #include "libc/fmt/itoa.h" #include "libc/fmt/magnumstrs.internal.h" +#include "libc/intrin/kprintf.h" #include "libc/limits.h" #include "libc/log/log.h" #include "libc/macros.internal.h" #include "libc/mem/gc.h" #include "libc/runtime/runtime.h" #include "libc/serialize.h" +#include "libc/stdalign.internal.h" #include "libc/stdckdint.h" +#include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/msync.h" @@ -48,8 +51,10 @@ * @fileoverview GCC Codegen Fixer-Upper. */ -#define COSMO_TLS_REG 28 -#define MRS_TPIDR_EL0 0xd53bd040u +#define COSMO_TLS_REG 28 +#define MRS_TPIDR_EL0 0xd53bd040u +#define IFUNC_SECTION ".init.202.ifunc" + #define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST)) static int mode; @@ -365,6 +370,203 @@ static void RelinkZipFiles(void) { eocd = foot; } +// when __attribute__((__target_clones__(...))) is used, the compiler +// will generate multiple implementations of a function for different +// microarchitectures as well as a resolver function that tells which +// function is appropriate to call. however the compiler doesn't make +// code for the actual function. it also doesn't record where resolve +// functions are located in the binary so we've reverse eng'd it here +static void GenerateIfuncInit(void) { + char *name, *s; + long code_i = 0; + long relas_i = 0; + static char code[16384]; + static Elf64_Rela relas[1024]; + Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0); + if (!symtab_shdr) Die("symbol table section header not found"); + Elf64_Word symtab_shdr_index = + ((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize; + for (Elf64_Xword i = 0; i < symcount; ++i) { + if (syms[i].st_shndx == SHN_UNDEF) continue; + if (syms[i].st_shndx >= SHN_LORESERVE) continue; + if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC) continue; + if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name))) + Die("could not get symbol name of ifunc"); + static char resolver_name[65536]; + strlcpy(resolver_name, name, sizeof(resolver_name)); + if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >= + sizeof(resolver_name)) + Die("ifunc name too long"); + Elf64_Xword function_sym_index = i; + Elf64_Xword resolver_sym_index = -1; + for (Elf64_Xword i = 0; i < symcount; ++i) { + if (syms[i].st_shndx == SHN_UNDEF) continue; + if (syms[i].st_shndx >= SHN_LORESERVE) continue; + if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC) continue; + if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name))) continue; + if (strcmp(s, resolver_name)) continue; + resolver_sym_index = i; + break; + } + if (resolver_sym_index == -1) + // this can happen if a function with __target_clones() also has a + // __weak_reference() defined, in which case GCC shall only create + // one resolver function for the two of them so we can ignore this + // HOWEVER the GOT will still have an entry for each two functions + continue; + + // call the resolver (using cosmo's special .init abi) + static const char chunk1[] = { + 0x57, // push %rdi + 0x56, // push %rsi + 0xe8, 0x00, 0x00, 0x00, 0x00, // call f.resolver + }; + if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas)) + Die("too many ifuncs"); + memcpy(code + code_i, chunk1, sizeof(chunk1)); + relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32); + relas[relas_i].r_offset = code_i + 1 + 1 + 1; + relas[relas_i].r_addend = -4; + code_i += sizeof(chunk1); + relas_i += 1; + + // move the resolved function address into the GOT slot. it's very + // important that this happen, because the linker by default makes + // self-referencing PLT functions whose execution falls through oh + // no. we need to repeat this process for any aliases this defines + static const char chunk2[] = { + 0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00, // mov %rax,f@gotpcrel(%rip) + }; + for (Elf64_Xword i = 0; i < symcount; ++i) { + if (i == function_sym_index || + (ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC && + syms[i].st_shndx == syms[function_sym_index].st_shndx && + syms[i].st_value == syms[function_sym_index].st_value)) { + if (code_i + sizeof(chunk2) > sizeof(code) || + relas_i + 1 > ARRAYLEN(relas)) + Die("too many ifuncs"); + memcpy(code + code_i, chunk2, sizeof(chunk2)); + relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL); + relas[relas_i].r_offset = code_i + 3; + relas[relas_i].r_addend = -4; + code_i += sizeof(chunk2); + relas_i += 1; + } + } + + static const char chunk3[] = { + 0x5e, // pop %rsi + 0x5f, // pop %rdi + }; + if (code_i + sizeof(chunk3) > sizeof(code)) Die("too many ifuncs"); + memcpy(code + code_i, chunk3, sizeof(chunk3)); + code_i += sizeof(chunk3); + } + if (!code_i) return; + + // prepare to mutate elf + // remap file so it has more space + if (elf->e_shnum + 2 > 65535) Die("too many sections"); + size_t reserve_size = esize + 32 * 1024 * 1024; + if (ftruncate(fildes, reserve_size)) SysExit("ifunc ftruncate #1"); + elf = mmap((char *)elf, reserve_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fildes, 0); + if (elf == MAP_FAILED) SysExit("ifunc mmap"); + + // duplicate section name strings table to end of file + Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff + + elf->e_shstrndx * elf->e_shentsize); + memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset, + shdrstr_shdr->sh_size); + shdrstr_shdr->sh_offset = esize; + esize += shdrstr_shdr->sh_size; + + // append strings for the two sections we're creating + const char *code_section_name = IFUNC_SECTION; + Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size; + memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1); + shdrstr_shdr->sh_size += strlen(code_section_name) + 1; + esize += strlen(code_section_name) + 1; + const char *rela_section_name = ".rela" IFUNC_SECTION; + Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size; + memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1); + shdrstr_shdr->sh_size += strlen(rela_section_name) + 1; + esize += strlen(rela_section_name) + 1; + unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size); + ++esize; + + // duplicate section headers to end of file + esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr); + memcpy((char *)elf + esize, (char *)elf + elf->e_shoff, + elf->e_shnum * elf->e_shentsize); + elf->e_shoff = esize; + esize += elf->e_shnum * elf->e_shentsize; + unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize); + + // append code section header + Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize); + Elf64_Word code_shdr_index = elf->e_shnum++; + esize += elf->e_shentsize; + code_shdr->sh_name = code_section_name_offset; + code_shdr->sh_type = SHT_PROGBITS; + code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR; + code_shdr->sh_addr = 0; + code_shdr->sh_link = 0; + code_shdr->sh_info = 0; + code_shdr->sh_entsize = 1; + code_shdr->sh_addralign = 1; + code_shdr->sh_size = code_i; + + // append code's rela section header + Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize); + esize += elf->e_shentsize; + rela_shdr->sh_name = rela_section_name_offset; + rela_shdr->sh_type = SHT_RELA; + rela_shdr->sh_flags = SHF_INFO_LINK; + rela_shdr->sh_addr = 0; + rela_shdr->sh_info = code_shdr_index; + rela_shdr->sh_link = symtab_shdr_index; + rela_shdr->sh_entsize = sizeof(Elf64_Rela); + rela_shdr->sh_addralign = alignof(Elf64_Rela); + rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela); + elf->e_shnum++; + + // append relas + esize = (esize + 63) & -64; + rela_shdr->sh_offset = esize; + memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela)); + esize += relas_i * sizeof(Elf64_Rela); + unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size); + + // append code + esize = (esize + 63) & -64; + code_shdr->sh_offset = esize; + memcpy((char *)elf + esize, code, code_i); + esize += code_i; + unassert(esize == code_shdr->sh_offset + code_shdr->sh_size); + + if (ftruncate(fildes, esize)) SysExit("ifunc ftruncate #1"); +} + +// when __attribute__((__target_clones__(...))) is used, static binaries +// become poisoned with rela IFUNC relocations, which the linker refuses +// to remove. even if we objcopy the ape executable as binary the linker +// preserves its precious ifunc code and puts them before the executable +// header. the good news is that the linker actually does link correctly +// which means we can delete the broken rela sections in the elf binary. +static void PurgeIfuncSections(void) { + Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff); + for (Elf64_Word i = 0; i < elf->e_shnum; ++i) { + char *name; + if (shdrs[i].sh_type == SHT_RELA || + ((name = GetElfSectionName(elf, esize, shdrs + i)) && + !strcmp(name, ".init.202.ifunc"))) { + shdrs[i].sh_type = SHT_NULL; + shdrs[i].sh_flags &= ~SHF_ALLOC; + } + } +} + static void FixupObject(void) { if ((fildes = open(epath, mode)) == -1) { SysExit("open"); @@ -373,8 +575,8 @@ static void FixupObject(void) { SysExit("lseek"); } if (esize) { - if ((elf = mmap(0, esize, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0)) == - MAP_FAILED) { + if ((elf = mmap((void *)0x003210000000, esize, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fildes, 0)) == MAP_FAILED) { SysExit("mmap"); } if (!IsElf64Binary(elf, esize)) { @@ -393,6 +595,7 @@ static void FixupObject(void) { if (mode == O_RDWR) { if (elf->e_machine == EM_NEXGEN32E) { OptimizePatchableFunctionEntries(); + GenerateIfuncInit(); } else if (elf->e_machine == EM_AARCH64) { RewriteTlsCode(); if (elf->e_type != ET_REL) { @@ -400,6 +603,7 @@ static void FixupObject(void) { } } if (elf->e_type != ET_REL) { + PurgeIfuncSections(); RelinkZipFiles(); } if (msync(elf, esize, MS_ASYNC | MS_INVALIDATE)) { diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c index 91dd299de..3aa2a2b69 100644 --- a/tool/build/mkdeps.c +++ b/tool/build/mkdeps.c @@ -19,6 +19,7 @@ #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/errno.h" +#include "libc/fmt/itoa.h" #include "libc/fmt/libgen.h" #include "libc/fmt/magnumstrs.internal.h" #include "libc/intrin/kprintf.h" @@ -45,48 +46,48 @@ "copyright 2023 justine tunney\n" \ "https://github.com/jart/cosmopolitan\n" -#define MANUAL \ - " -r o// -o OUTPUT INPUT...\n" \ - "\n" \ - "DESCRIPTION\n" \ - "\n" \ - " Generates header file dependencies for your makefile\n" \ - "\n" \ - " This tool computes the transitive closure of included paths\n" \ - " for every source file in your repository. This program does\n" \ - " it orders of a magnitude faster than `gcc -M` on each file.\n" \ - "\n" \ - " Includes look like this:\n" \ - "\n" \ - " - #include \n" \ - " - #include \"samedir.h\"\n" \ - " - #include \"root/of/repository/foo.h\"\n" \ - " - .include \"asm/x86_64/foo.s\"\n" \ - "\n" \ - " Your generated make code looks like this:\n" \ - "\n" \ - " o//package/foo.o: \\\n" \ - " package/foo.c \\\n" \ - " package/foo.h \\\n" \ - " package/bar.h \\\n" \ - " libc/isystem/stdio.h\n" \ - " o//package/bar.o: \\\n" \ - " package/bar.c \\\n" \ - " package/bar.h\n" \ - "\n" \ - "FLAGS\n" \ - "\n" \ - " -h show usage\n" \ - " -o OUTPUT set output path\n" \ - " -g ROOT set generated path [default: o/]\n" \ - " -r ROOT set build output path, e.g. o/$(MODE)/\n" \ - " -S PATH isystem include path [default: libc/isystem/]\n" \ - " -s hermetically sealed mode [repeatable]\n" \ - "\n" \ - "ARGUMENTS\n" \ - "\n" \ - " OUTPUT shall be makefile code\n" \ - " INPUT should be source or @args.txt\n" \ +#define MANUAL \ + " -r o// -o OUTPUT INPUT...\n" \ + "\n" \ + "DESCRIPTION\n" \ + "\n" \ + " Generates header file dependencies for your makefile\n" \ + "\n" \ + " This tool computes the transitive closure of included paths\n" \ + " for every source file in your repository. This program does\n" \ + " it orders of a magnitude faster than `gcc -M` on each file.\n" \ + "\n" \ + " Includes look like this:\n" \ + "\n" \ + " - #include \n" \ + " - #include \"samedir.h\"\n" \ + " - #include \"root/of/repository/foo.h\"\n" \ + " - .include \"asm/x86_64/foo.s\"\n" \ + "\n" \ + " Your generated make code looks like this:\n" \ + "\n" \ + " o//package/foo.o: \\\n" \ + " package/foo.c \\\n" \ + " package/foo.h \\\n" \ + " package/bar.h \\\n" \ + " libc/isystem/stdio.h\n" \ + " o//package/bar.o: \\\n" \ + " package/bar.c \\\n" \ + " package/bar.h\n" \ + "\n" \ + "FLAGS\n" \ + "\n" \ + " -h show usage\n" \ + " -o OUTPUT set output path\n" \ + " -g ROOT set generated path [default: o/]\n" \ + " -r ROOT set build output path, e.g. o/$(MODE)/\n" \ + " -S PATH isystem include path [repeatable; default: libc/isystem/]\n" \ + " -s hermetically sealed mode [repeatable]\n" \ + "\n" \ + "ARGUMENTS\n" \ + "\n" \ + " OUTPUT shall be makefile code\n" \ + " INPUT should be source or @args.txt\n" \ "\n" #define Read32(s) (s[3] << 24 | s[2] << 16 | s[1] << 8 | s[0]) @@ -118,6 +119,11 @@ struct Edges { struct Edge *p; }; +struct Paths { + long n; + const char *p[64]; +}; + static const uint32_t kSourceExts[] = { EXT("s"), // assembly EXT("S"), // assembly with c preprocessor @@ -135,7 +141,7 @@ static const char *prog; static struct Edges edges; static struct Sauce *sauces; static struct Sources sources; -static const char *systempath; +static struct Paths systempaths; static const char *buildroot; static const char *genroot; static const char *outpath; @@ -329,7 +335,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize, // scan backwards for hash character for (;;) { if (q == map) { - return false; + return 0; } if (IsBlank(q[-1])) { --q; @@ -342,7 +348,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize, --q; break; } else { - return false; + return 0; } } @@ -359,7 +365,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize, if (q[-1] == '\n') { break; } else { - return false; + return 0; } } } @@ -378,7 +384,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize, ++q; break; } else { - return false; + return 0; } } @@ -395,7 +401,7 @@ static void LoadRelationships(int argc, char *argv[]) { struct GetArgs ga; int srcid, dependency; static char srcdirbuf[PATH_MAX]; - const char *p, *pe, *src, *path, *pathend, *srcdir; + const char *p, *pe, *src, *path, *pathend, *srcdir, *final; getargs_init(&ga, argv + optind); while ((src = getargs_next(&ga))) { CreateSourceId(src); @@ -433,51 +439,71 @@ static void LoadRelationships(int argc, char *argv[]) { DieSys(src); } for (p = map, pe = map + size; p < pe; ++p) { - char *bp; - char right; - char buf[PATH_MAX]; if (!(p = memmem(p, pe - p, "include ", 8))) break; if (!(path = FindIncludePath(map, size, p, is_assembly))) continue; - bp = buf; + // copy the specified include path + char right; if (path[-1] == '<') { - if (!systempath) continue; - bp = stpcpy(bp, systempath); + if (!systempaths.n) continue; right = '>'; } else { right = '"'; } - if ((pathend = memchr(path, right, pe - path))) { - const char *final; - char juf[PATH_MAX]; - if ((bp - buf) + (pathend - path) >= PATH_MAX) { - tinyprint(2, src, ": include path too long\n", NULL); - exit(1); - } - *(bp = mempcpy(bp, path, pathend - path)) = 0; - // let foo/bar.c say `#include "foo/hdr.h"` - dependency = GetSourceId((final = buf)); - // let foo/bar.c say `#include "hdr.h"` - if (dependency == -1 && right == '"' && !strchr(buf, '/')) { - if (!(final = __join_paths(juf, PATH_MAX, srcdir, buf))) { - DiePathTooLong(buf); + if (!(pathend = memchr(path, right, pe - path))) continue; + if (pathend - path >= PATH_MAX) { + tinyprint(2, src, ": uses really long include path\n", NULL); + exit(1); + } + char juf[PATH_MAX]; + char incpath[PATH_MAX]; + *(char *)mempcpy(incpath, path, pathend - path) = 0; + if (right == '>') { + // handle angle bracket includes + dependency = -1; + for (long i = 0; i < systempaths.n; ++i) { + if (!(final = + __join_paths(juf, PATH_MAX, systempaths.p[i], incpath))) { + DiePathTooLong(incpath); + } + if ((dependency = GetSourceId(final)) != -1) { + break; } - dependency = GetSourceId(final); } - if (dependency == -1) { - if (startswith(buf, genroot)) { - dependency = CreateSourceId(src); - } else if (!hermetic) { - continue; - } else if (hermetic == 1 && right == '>') { + if (dependency != -1) { + AppendEdge(&edges, dependency, srcid); + p = pathend + 1; + } else { + if (hermetic == 1) { // chances are the `#include ` is in some #ifdef // that'll never actually be executed; thus we ignore // since landlock make unveil() shall catch it anyway continue; + } + tinyprint(2, incpath, + ": system header not specified by the HDRS/SRCS/INCS " + "make variables defined by the hermetic mono repo\n", + NULL); + exit(1); + } + } else { + // handle double quote includes + // let foo/bar.c say `#include "foo/hdr.h"` + dependency = GetSourceId((final = incpath)); + // let foo/bar.c say `#include "hdr.h"` + if (dependency == -1 && !strchr(final, '/')) { + if (!(final = __join_paths(juf, PATH_MAX, srcdir, final))) { + DiePathTooLong(incpath); + } + dependency = GetSourceId(final); + } + if (dependency == -1) { + if (startswith(final, genroot)) { + dependency = CreateSourceId(src); } else { - tinyprint( - 2, final, - ": path not specified by HDRS/SRCS/INCS make variables\n", - NULL); + tinyprint(2, incpath, + ": path not specified by HDRS/SRCS/INCS make variables " + "(it was included by ", + src, ")\n", NULL); exit(1); } } @@ -498,6 +524,13 @@ static wontreturn void ShowUsage(int rc, int fd) { exit(rc); } +static void AddPath(struct Paths *paths, const char *path) { + if (paths->n == ARRAYLEN(paths->p)) { + Die("too many path arguments"); + } + paths->p[paths->n++] = path; +} + static void GetOpts(int argc, char *argv[]) { int opt; while ((opt = getopt(argc, argv, "hnsgS:o:r:")) != -1) { @@ -506,10 +539,7 @@ static void GetOpts(int argc, char *argv[]) { ++hermetic; break; case 'S': - if (systempath) { - Die("multiple system paths not supported yet"); - } - systempath = optarg; + AddPath(&systempaths, optarg); break; case 'o': if (outpath) { @@ -555,29 +585,33 @@ static void GetOpts(int argc, char *argv[]) { if (!startswith(buildroot, genroot)) { Die("build output path must start with generated output path"); } - if (!systempath && hermetic) { - systempath = "libc/isystem/"; + if (!systempaths.n && hermetic) { + AddPath(&systempaths, "third_party/libcxx/include/"); + AddPath(&systempaths, "libc/isystem/"); } - if (systempath && !hermetic) { + if (systempaths.n && !hermetic) { Die("system path can only be specified in hermetic mode"); } - if (systempath) { + long j = 0; + for (long i = 0; i < systempaths.n; ++i) { size_t n; struct stat st; - if (stat(systempath, &st)) { - DieSys(systempath); + const char *path = systempaths.p[i]; + if (!stat(path, &st)) { + systempaths.p[j++] = path; + if (!S_ISDIR(st.st_mode)) { + errno = ENOTDIR; + DieSys(path); + } } - if (!S_ISDIR(st.st_mode)) { - errno = ENOTDIR; - DieSys(systempath); + if ((n = strlen(path)) >= PATH_MAX) { + DiePathTooLong(path); } - if ((n = strlen(systempath)) >= PATH_MAX) { - DiePathTooLong(systempath); - } - if (!n || systempath[n - 1] != '/') { + if (!n || path[n - 1] != '/') { Die("system path must end with slash"); } } + systempaths.n = j; } static const char *StripExt(char pathbuf[hasatleast PATH_MAX], const char *s) { diff --git a/tool/build/objbincopy.c b/tool/build/objbincopy.c index a17b3fb63..ab3a906b5 100644 --- a/tool/build/objbincopy.c +++ b/tool/build/objbincopy.c @@ -201,7 +201,7 @@ static void ValidateMachoSection(const char *inpath, // Die(inpath, "don't bother with mach-o sections"); } namelen = strnlen(loadseg->name, sizeof(loadseg->name)); - if (!loadseg->name) { + if (!loadseg->name[0]) { Die(inpath, "mach-o load segment missing name"); } if (filesize || (loadseg->vaddr && loadseg->memsz)) { diff --git a/tool/build/package.c b/tool/build/package.c index df271c788..b52299eff 100644 --- a/tool/build/package.c +++ b/tool/build/package.c @@ -433,7 +433,8 @@ static void LoadSymbols(struct Package *pkg, uint32_t object) { symbol.type = ELF64_ST_TYPE(obj->syms[i].st_info); if (symbol.bind_ != STB_LOCAL && (symbol.type == STT_OBJECT || symbol.type == STT_FUNC || - symbol.type == STT_COMMON || symbol.type == STT_NOTYPE)) { + symbol.type == STT_COMMON || symbol.type == STT_NOTYPE || + symbol.type == STT_GNU_IFUNC)) { if (!(name = GetElfString(obj->elf, obj->size, obj->strs, obj->syms[i].st_name))) { Die("error", "elf overflow"); diff --git a/tool/build/runit.c b/tool/build/runit.c index 9dc1a897e..f94f735ea 100644 --- a/tool/build/runit.c +++ b/tool/build/runit.c @@ -151,17 +151,11 @@ void Connect(void) { struct timespec deadline; if ((rc = getaddrinfo(g_hostname, gc(xasprintf("%hu", g_runitdport)), &kResolvHints, &ai)) != 0) { - FATALF("%s:%hu: EAI_%s %m", g_hostname, g_runitdport, gai_strerror(rc)); + FATALF("%s:%hu: DNS lookup failed: %s", g_hostname, g_runitdport, + gai_strerror(rc)); __builtin_unreachable(); } ip4 = (const char *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr; - if (ispublicip(ai->ai_family, - &((struct sockaddr_in *)ai->ai_addr)->sin_addr)) { - FATALF("%s points to %hhu.%hhu.%hhu.%hhu" - " which isn't part of a local/private/testing subnet", - g_hostname, ip4[0], ip4[1], ip4[2], ip4[3]); - __builtin_unreachable(); - } DEBUGF("connecting to %d.%d.%d.%d port %d", ip4[0], ip4[1], ip4[2], ip4[3], ntohs(((struct sockaddr_in *)ai->ai_addr)->sin_port)); CHECK_NE(-1, diff --git a/tool/decode/elf.c b/tool/decode/elf.c index 314603fb4..2e59203d4 100644 --- a/tool/decode/elf.c +++ b/tool/decode/elf.c @@ -26,7 +26,6 @@ #include "libc/errno.h" #include "libc/fmt/conv.h" #include "libc/fmt/libgen.h" -#include "libc/serialize.h" #include "libc/intrin/safemacros.internal.h" #include "libc/log/check.h" #include "libc/log/log.h" @@ -34,6 +33,7 @@ #include "libc/mem/gc.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" +#include "libc/serialize.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/auxv.h" diff --git a/tool/emacs/c.lang b/tool/emacs/c.lang index a2805763a..99f7d7c08 100644 --- a/tool/emacs/c.lang +++ b/tool/emacs/c.lang @@ -125,7 +125,7 @@ Keywords={ "mayalias", "dontinstrument", "interruptfn", -"nocallback", +"dontcallback", "textstartup", "warnifused", "attributeallocsize", diff --git a/tool/emacs/cosmo-asm-mode.el b/tool/emacs/cosmo-asm-mode.el index 580fa0b01..70fd0edd2 100644 --- a/tool/emacs/cosmo-asm-mode.el +++ b/tool/emacs/cosmo-asm-mode.el @@ -67,7 +67,8 @@ "progbits" "nobits" "init_array" - "fini_array")]) + "fini_array" + "gnu_indirect_function")]) "\\>"]) "GNU Assembler section, relocation, macro param qualifiers.") diff --git a/tool/emacs/cosmo-c-keywords.el b/tool/emacs/cosmo-c-keywords.el index f3aa5b349..7fd7cdd30 100644 --- a/tool/emacs/cosmo-c-keywords.el +++ b/tool/emacs/cosmo-c-keywords.el @@ -91,7 +91,7 @@ "mayalias" "dontinstrument" "interruptfn" - "nocallback" + "dontcallback" "textstartup" "warnifused" "attributeallocsize" @@ -206,6 +206,9 @@ "__section__" "__sentinel__" "__simd__" + "__vex" + "__avx2" + "__target_clones" "__target_clones__" "__unused__" "__used__" diff --git a/tool/emacs/cosmo-platform-constants.el b/tool/emacs/cosmo-platform-constants.el index 6ba7d8014..65a84a434 100644 --- a/tool/emacs/cosmo-platform-constants.el +++ b/tool/emacs/cosmo-platform-constants.el @@ -71,7 +71,8 @@ "__SUPPORT_SNAN__" "__GCC_IEC_559_COMPLEX" "__NO_MATH_ERRNO__" - "__gnu__")) + "__gnu__" + "_OPENMP")) (cosmo '("__LINKER__")) diff --git a/tool/emacs/cosmo-stuff.el b/tool/emacs/cosmo-stuff.el index 22cd41d0a..c7ba003a3 100644 --- a/tool/emacs/cosmo-stuff.el +++ b/tool/emacs/cosmo-stuff.el @@ -215,7 +215,7 @@ (runs (format "o/$m/%s.com%s V=5 TESTARGS=-b" name runsuffix)) (buns (format "o/$m/test/%s_test.com%s V=5 TESTARGS=-b" name runsuffix))) (cond ((not (member ext '("c" "cc" "cpp" "s" "S" "rl" "f"))) - (format "m=%s; make -j12 MODE=$m o/$m/%s" + (format "m=%s; make -j32 MODE=$m o/$m/%s" mode (directory-file-name (or (file-name-directory @@ -226,7 +226,7 @@ (cosmo-join " && " `("m=%s; f=o/$m/%s.com" - ,(concat "make -j12 $f MODE=$m") + ,(concat "make -j32 $f MODE=$m") "scp $f $f.dbg win10:; ssh win10 ./%s.com")) mode name (file-name-nondirectory name))) ((eq kind 'run-xnu) @@ -234,19 +234,19 @@ (cosmo-join " && " `("m=%s; f=o/$m/%s.com" - ,(concat "make -j12 $f MODE=$m") + ,(concat "make -j32 $f MODE=$m") "scp $f $f.dbg xnu:" "ssh xnu ./%s.com")) mode name (file-name-nondirectory name))) ((and (equal suffix "") (cosmo-contains "_test." (buffer-file-name))) - (format "m=%s; make -j12 MODE=$m %s" + (format "m=%s; make -j32 MODE=$m %s" mode runs)) ((and (equal suffix "") (file-exists-p (format "%s" buddy))) (format (cosmo-join " && " - '("m=%s; n=%s; make -j12 o/$m/$n%s.o MODE=$m" + '("m=%s; n=%s; make -j32 o/$m/$n%s.o MODE=$m" ;; "bloat o/$m/%s.o | head" ;; "nm -C --size o/$m/%s.o | sort -r" "echo" @@ -258,11 +258,11 @@ (cosmo-join " && " `("m=%s; f=o/$m/%s.com" - ,(concat "make -j12 $f MODE=$m") + ,(concat "make -j32 $f MODE=$m") "build/run ./$f")) mode name)) ((eq kind 'test) - (format `"m=%s; f=o/$m/%s.com.ok && make -j12 $f MODE=$m" mode name)) + (format `"m=%s; f=o/$m/%s.com.ok && make -j32 $f MODE=$m" mode name)) ((and (file-regular-p this) (file-executable-p this)) (format "build/run ./%s" file)) @@ -271,7 +271,7 @@ (cosmo-join " && " `("m=%s; f=o/$m/%s%s.o" - ,(concat "make -j12 $f MODE=$m") + ,(concat "make -j32 $f MODE=$m") ;; "nm -C --size $f | sort -r" "echo" "size -A $f | grep '^[.T]' | grep -v 'debug\\|command.line\\|stack' | sort -rnk2" @@ -481,7 +481,7 @@ (error "don't know how to show assembly for non c/c++ source file")) (let* ((default-directory root) (compile-command - (format "make %s -j12 MODE=%s %s %s" + (format "make %s -j32 MODE=%s %s %s" (or extra-make-flags "") mode asm-gcc asm-clang))) (save-buffer) (set-visited-file-modtime (current-time)) @@ -641,11 +641,11 @@ (compile (format "sh -c %s" file))) ((eq major-mode 'lua-mode) (let* ((mode (cosmo--make-mode arg))) - (compile (format "make -j16 MODE=%s o/%s/tool/net/redbean.com && build/run o/%s/tool/net/redbean.com -i %s" mode mode mode file)))) + (compile (format "make -j32 MODE=%s o/%s/tool/net/redbean.com && build/run o/%s/tool/net/redbean.com -i %s" mode mode mode file)))) ((and (eq major-mode 'python-mode) (cosmo-startswith "third_party/python/Lib/test/" file)) (let ((mode (cosmo--make-mode arg))) - (compile (format "make -j12 MODE=%s PYHARNESSARGS=-vv PYTESTARGS=-v o/%s/%s.py.runs" + (compile (format "make -j32 MODE=%s PYHARNESSARGS=-vv PYTESTARGS=-v o/%s/%s.py.runs" mode mode (file-name-sans-extension file))))) ((eq major-mode 'python-mode) (compile (format "python.com %s" file))) @@ -692,8 +692,10 @@ (define-key lua-mode-map (kbd "C-c C-r") 'cosmo-run) (define-key python-mode-map (kbd "C-c C-r") 'cosmo-run) (define-key c-mode-map (kbd "C-c C-s") 'cosmo-run-test) + (define-key c++-mode-map (kbd "C-c C-s") 'cosmo-run-test) (define-key c-mode-map (kbd "C-c C-_") 'cosmo-run-win7) - (define-key c-mode-map (kbd "C-c C-_") 'cosmo-run-win10)) + (define-key c-mode-map (kbd "C-c C-_") 'cosmo-run-win10) + (define-key c++-mode-map (kbd "C-c C-_") 'cosmo-run-win10)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -939,7 +941,7 @@ "T[0-9][0-9]:[0-9][0-9]:[0-9][0-9]" ;; time "[+.][0-9][0-9][0-9][0-9][0-9][0-9]" ;; micros ":\\([^:]+\\)" ;; file - ":\\([0-9]+\\)")) ;; line + ":\\([0-9]+\\)")) ;; line 1 2)) (eval-after-load 'compile @@ -948,6 +950,20 @@ (cons 'cosmo cosmo-compilation-regexps)) (add-to-list 'compilation-error-regexp-alist 'cosmo))) +(defvar cosmo-gcc123-compilation-regexps + (list (cosmo-join + "" + '("inlined from '[^']*' at " + "\\([^:]+\\)" ;; file + ":\\([0-9]+\\)")) ;; line + 1 2)) + +(eval-after-load 'compile + '(progn + (add-to-list 'compilation-error-regexp-alist-alist + (cons 'cosmo cosmo-gcc123-compilation-regexps)) + (add-to-list 'compilation-error-regexp-alist 'cosmo))) + (provide 'cosmo-stuff) ;;; cosmo-stuff.el ends here diff --git a/tool/emacs/key.py b/tool/emacs/key.py index 6da461954..43a8b7c73 100644 --- a/tool/emacs/key.py +++ b/tool/emacs/key.py @@ -340,7 +340,7 @@ cosmo_kws = frozenset([ "mallocesque", "mayalias", "memcpyesque", - "nocallback", + "dontcallback", "nodebuginfo", "__wur", "dontinline", @@ -399,7 +399,7 @@ cosmo_kws = frozenset([ "mallocesque", "mayalias", "memcpyesque", - "nocallback", + "dontcallback", "nodebuginfo", "__wur", "dontinline", diff --git a/tool/viz/printpeb.c b/tool/viz/printpeb.c index 3d3ee1a61..7d9abbc43 100644 --- a/tool/viz/printpeb.c +++ b/tool/viz/printpeb.c @@ -188,7 +188,7 @@ dontasan void PrintTeb(void) { } void PrintPeb(void) { - struct NtPeb *peb = NtGetPeb(); + __seg_gs struct NtPeb *peb = NtGetPeb(); printf("\n\ ╔──────────────────────────────────────────────────────────────────────────────╗\n\ │ new technology § peb │\n\ @@ -327,8 +327,6 @@ void PrintPeb(void) { "pShimData", peb->pShimData); printf("0x%04x: %-40s = 0x%lx\n", offsetof(struct NtPeb, AppCompatInfo), "AppCompatInfo", peb->AppCompatInfo); - printf("0x%04x: %-40s = \"%s\"\n", offsetof(struct NtPeb, CSDVersion), - "CSDVersion", GetString(&peb->CSDVersion)); printf("0x%04x: %-40s = 0x%lx\n", offsetof(struct NtPeb, ActivationContextData), "ActivationContextData", peb->ActivationContextData);