From 69f4152f38c820e7fb5d6137616c98c642f5dd8f Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 18 Jul 2022 22:26:11 -0700 Subject: [PATCH] Always initialize thread local storage We had previously not enabled TLS in MODE=tiny in order to keep the smallest example programs (e.g. life.com) just 16kb in size. But it was error prone doing that, so now we just always enable it because this change uses hacks to ensure it won't increase life.com's size. This change also fixes a bug on NetBSD, where signal handlers would break thread local storage if SA_SIGINFO was being used. This looks like it might be a bug in NetBSD, but it's got a simple workaround. --- README.md | 2 +- examples/life.c | 4 +- libc/calls/netbsdtramp.S | 4 +- libc/calls/sigaction.c | 5 ++ libc/calls/strace.internal.h | 2 +- libc/calls/struct/ucontext-netbsd.internal.h | 1 - libc/intrin/gettid.greg.c | 4 +- libc/log/oncrash.c | 2 + libc/mem/unveil.c | 1 - libc/runtime/arch_prctl.c | 2 +- libc/runtime/clone.c | 23 ++----- libc/runtime/cosmo.S | 12 +--- libc/runtime/enable_tls.c | 68 +++++++++++++++++--- libc/runtime/ftracer.c | 1 - libc/runtime/internal.h | 2 +- libc/runtime/mapanon.c | 13 ++-- libc/runtime/morph.greg.c | 41 +++++++++++- libc/sysv/syscalls.sh | 2 +- libc/thread/ctor.S | 27 -------- libc/thread/self.c | 2 - libc/thread/sem.c | 2 - libc/thread/spawn.c | 2 - test/libc/mem/malloc_test.c | 6 ++ test/libc/runtime/arch_prctl_test.c | 2 + test/libc/runtime/munmap_test.c | 13 ++-- test/libc/stdio/fputc_test.c | 1 - test/libc/thread/pthread_key_create_test.c | 6 -- third_party/dlmalloc/README.cosmo | 6 +- third_party/dlmalloc/dlmalloc.c | 9 ++- third_party/dlmalloc/locks.inc | 5 +- third_party/dlmalloc/platform.inc | 10 +-- third_party/dlmalloc/vespene.c | 15 ++--- tool/plinko/plinko.c | 2 + 33 files changed, 174 insertions(+), 123 deletions(-) delete mode 100644 libc/thread/ctor.S diff --git a/README.md b/README.md index 15170db9c..dd9d19215 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ wget https://justine.lol/cosmopolitan/cosmopolitan.zip unzip cosmopolitan.zip printf 'main() { printf("hello world\\n"); }\n' >hello.c gcc -g -Os -static -nostdlib -nostdinc -fno-pie -no-pie -mno-red-zone \ - -fno-omit-frame-pointer -pg -mnop-mcount \ + -fno-omit-frame-pointer -pg -mnop-mcount -mno-tls-direct-seg-refs \ -o hello.com.dbg hello.c -fuse-ld=bfd -Wl,-T,ape.lds \ -include cosmopolitan.h crt.o ape-no-modify-self.o cosmopolitan.a objcopy -S -O binary hello.com.dbg hello.com diff --git a/examples/life.c b/examples/life.c index 5e8dfe9c4..b61489df7 100644 --- a/examples/life.c +++ b/examples/life.c @@ -8,4 +8,6 @@ ╚─────────────────────────────────────────────────────────────────*/ #endif -int main() { return 42; } +int main() { + return 42; +} diff --git a/libc/calls/netbsdtramp.S b/libc/calls/netbsdtramp.S index e0493b16e..3f4699933 100644 --- a/libc/calls/netbsdtramp.S +++ b/libc/calls/netbsdtramp.S @@ -21,9 +21,9 @@ __restore_rt_netbsd: mov %r15,%rdi - mov $308,%rax # setcontext + mov $308,%eax # setcontext syscall - mov $-1,%edi + or $-1,%edi mov $1,%rax # exit syscall .endfn __restore_rt_netbsd,globl,hidden diff --git a/libc/calls/sigaction.c b/libc/calls/sigaction.c index 16d285750..4f8afaabc 100644 --- a/libc/calls/sigaction.c +++ b/libc/calls/sigaction.c @@ -205,6 +205,11 @@ static int __sigaction(int sig, const struct sigaction *act, arg4 = (int64_t)(intptr_t)oldact; /* from go code */ arg5 = 0; } else if (IsNetbsd()) { + /* int __sigaction_sigtramp(int signum, + const struct sigaction *nsa, + struct sigaction *osa, + const void *tramp, + int vers); */ if (ap) { arg4 = (int64_t)(intptr_t)&__restore_rt_netbsd; arg5 = 2; /* netbsd/lib/libc/arch/x86_64/sys/__sigtramp2.S */ diff --git a/libc/calls/strace.internal.h b/libc/calls/strace.internal.h index 16c29f56d..ee247e005 100644 --- a/libc/calls/strace.internal.h +++ b/libc/calls/strace.internal.h @@ -7,7 +7,7 @@ #include "libc/calls/struct/stat.h" #include "libc/runtime/runtime.h" -#define _KERNTRACE 0 /* not configurable w/ flag yet */ +#define _KERNTRACE 1 /* not configurable w/ flag yet */ #define _POLLTRACE 0 /* not configurable w/ flag yet */ #define _DATATRACE 1 /* not configurable w/ flag yet */ #define _NTTRACE 0 /* not configurable w/ flag yet */ diff --git a/libc/calls/struct/ucontext-netbsd.internal.h b/libc/calls/struct/ucontext-netbsd.internal.h index a0c60e247..8d297c174 100644 --- a/libc/calls/struct/ucontext-netbsd.internal.h +++ b/libc/calls/struct/ucontext-netbsd.internal.h @@ -13,7 +13,6 @@ COSMOPOLITAN_C_START_ #define _UC_TLSBASE 0x00080000 #define _UC_SETSTACK 0x00010000 #define _UC_CLRSTACK 0x00020000 -#define _UC_CLRSTACK 0x00020000 union sigval_netbsd { int32_t sival_int; diff --git a/libc/intrin/gettid.greg.c b/libc/intrin/gettid.greg.c index a6d99227e..f3b08f6e9 100644 --- a/libc/intrin/gettid.greg.c +++ b/libc/intrin/gettid.greg.c @@ -54,7 +54,9 @@ int gettid(void) { int tid; if (__tls_enabled) { tid = *(int *)(__get_tls() + 0x38); - if (tid > 0) return tid; + if (tid > 0) { + return tid; + } } return sys_gettid(); } diff --git a/libc/log/oncrash.c b/libc/log/oncrash.c index df6827ae2..b4cdae1db 100644 --- a/libc/log/oncrash.c +++ b/libc/log/oncrash.c @@ -35,6 +35,7 @@ #include "libc/log/log.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/stackframe.h" +#include "libc/nexgen32e/threaded.h" #include "libc/runtime/internal.h" #include "libc/runtime/pc.internal.h" #include "libc/runtime/runtime.h" @@ -283,6 +284,7 @@ relegated void __oncrash(int sig, struct siginfo *si, ucontext_t *ctx) { int gdbpid, err; static int sync; static bool notpossible; + __tls_enabled = false; STRACE("__oncrash rip %x", ctx->uc_mcontext.rip); --__ftrace; --__strace; diff --git a/libc/mem/unveil.c b/libc/mem/unveil.c index a2ffbb61b..b830a11e6 100644 --- a/libc/mem/unveil.c +++ b/libc/mem/unveil.c @@ -194,7 +194,6 @@ static int sys_unveil_linux(const char *path, const char *permissions) { */ int unveil(const char *path, const char *permissions) { int rc; - __enable_tls(); if (IsLinux()) { rc = sys_unveil_linux(path, permissions); } else { diff --git a/libc/runtime/arch_prctl.c b/libc/runtime/arch_prctl.c index 2c958c5fb..9c6b2be84 100644 --- a/libc/runtime/arch_prctl.c +++ b/libc/runtime/arch_prctl.c @@ -199,7 +199,7 @@ int arch_prctl(int code, int64_t addr) { case METAL: return arch_prctl_msr(code, addr); case FREEBSD: - /* claims support but it appears not */ + // TODO(jart): this should use sysarch() return arch_prctl_freebsd(code, addr); case OPENBSD: return arch_prctl_openbsd(code, addr); diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index b8e23638e..1c9a5eec4 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -565,21 +565,9 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz, * either terminated or has finished using its stack memory * * - `CLONE_SETTLS` is needed if you intend to specify the `tls` - * argument, which provides a fast-path solution for changing the - * appropriate TLS segment register within the child thread. The - * child thread may then obtain a reference to the TIB address you - * supplied, by calling __get_tls(). Your C library holds certain - * expectations about the layout of your Thread Information Block - * (TIB), which are all documented by __initialize_tls(). That - * function can be used to initialize the first positive 64 bytes - * of your TLS allocation, which is the memory Cosmopolitan Libc - * wants for itself (and negative addresses are reserved by the - * GNU Linker). Using this flag will transition the C runtime to a - * `__tls_enabled` state automatically. If you use TLS for just - * one thread, then you must be specify TLS for ALL THREADS. It's - * a good idea to do that since TLS can offer considerable (i.e. - * multiple orders of a magnitude) performance improvement for - * TID-dependent C library services, e.g. recursive mutexes. + * argument, which after thread creation may be accessed using + * __get_tls(). Doing this means that `errno`, gettid(), etc. + * correctly work. Caveat emptor if you choose not to do this. * * @param arg is passed as an argument to `func` in the child thread * @param tls may be used to set the thread local storage segment; @@ -594,8 +582,9 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid, int rc; struct CloneArgs *wt; - if (flags & CLONE_SETTLS) __enable_tls(); - if (flags & CLONE_THREAD) __enable_threads(); + if (flags & CLONE_THREAD) { + __enable_threads(); + } if (!func) { rc = einval(); diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S index eba8547f3..954e2e263 100644 --- a/libc/runtime/cosmo.S +++ b/libc/runtime/cosmo.S @@ -76,22 +76,14 @@ cosmo: push %rbp ret .endfn cosmo,weak -#if !IsTiny() -// Enable TLS early if _Thread_local is used -// In MODE=tiny you may need to explicitly call __enable_tls() -// Otherwise this would bloat life.com from 16kb → 32kb D: +// Enables Thread Local Storage. .init.start 304,_init_tls - mov $_tls_content,%eax - test %eax,%eax - jz 1f push %rdi push %rsi call __enable_tls pop %rsi pop %rdi - jz 1f -1: .init.end 304,_init_tls -#endif + .init.end 304,_init_tls #if !IsTiny() // Creates deterministically addressed stack we can use diff --git a/libc/runtime/enable_tls.c b/libc/runtime/enable_tls.c index cfb74cf06..7bd2d3f24 100644 --- a/libc/runtime/enable_tls.c +++ b/libc/runtime/enable_tls.c @@ -16,12 +16,16 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" #include "libc/bits/bits.h" +#include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/calls/strace.internal.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" #include "libc/errno.h" +#include "libc/intrin/kprintf.h" +#include "libc/log/libfatal.internal.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/threaded.h" #include "libc/nt/thread.h" @@ -52,12 +56,35 @@ __msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc; extern unsigned char __tls_mov_nt_rax[]; extern unsigned char __tls_add_nt_rax[]; +_Alignas(long) static char __static_tls[5008]; /** * Enables thread local storage. + * + * This function is always called by the core runtime to guarantee TLS + * is always available to your program. You must build your code using + * -mno-tls-direct-seg-refs if you want to use _Thread_local. + * + * You can use __get_tls() to get the linear address of your tib. When + * accessing TLS via privileged code you must use __get_tls_privileged + * because we need code morphing to support The New Technology and XNU + * + * On XNU and The New Technology, this function imposes 1ms of latency + * during startup for larger binaries like Python. + * + * If you don't want TLS and you're sure you're not using it, then you + * can disable it as follows: + * + * int main() { + * __tls_enabled = false; + * // do stuff + * } + * + * This is useful if you want to wrestle back control of %fs using the + * arch_prctl() function. However, such programs might not be portable + * and your `errno` variable also won't be thread safe anymore. */ privileged void __enable_tls(void) { - if (__tls_enabled) return; STRACE("__enable_tls()"); // allocate tls memory for main process @@ -74,20 +101,42 @@ privileged void __enable_tls(void) { size_t siz; cthread_t tib; char *mem, *tls; - siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE); - mem = _mapanon(siz); + siz = ROUNDUP(_TLSZ + _TIBZ, alignof(__static_tls)); + if (siz <= sizeof(__static_tls)) { + // if tls requirement is small then use the static tls block + // which helps avoid a system call for appes with little tls + // this is crucial to keeping life.com 16 kilobytes in size! + _Static_assert(alignof(__static_tls) >= alignof(cthread_t)); + mem = __static_tls; + } else { + // if this binary needs a hefty tls block then we'll bank on + // malloc() being linked, which links _mapanon(). otherwise + // if you exceed this, you need to STATIC_YOINK("_mapanon"). + // please note that it's probably too early to call calloc() + assert(weaken(_mapanon)); + siz = ROUNDUP(siz, FRAMESIZE); + mem = weaken(_mapanon)(siz); + assert(mem); + } tib = (cthread_t)(mem + siz - _TIBZ); tls = mem + siz - _TIBZ - _TLSZ; tib->self = tib; tib->self2 = tib; tib->err = __errno; - tib->tid = sys_gettid(); - memmove(tls, _tdata_start, _TLDZ); + if (IsLinux()) { + // gnu/systemd guarantees pid==tid for the main thread so we can + // avoid issuing a superfluous system call at startup in program + tib->tid = __pid; + } else { + tib->tid = sys_gettid(); + } + __repmovsb(tls, _tdata_start, _TLDZ); // ask the operating system to change the x86 segment register int ax, dx; if (IsWindows()) { __tls_index = __imp_TlsAlloc(); + assert(0 <= __tls_index && __tls_index < 64); asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); } else if (IsFreebsd()) { asm volatile("syscall" @@ -95,9 +144,12 @@ privileged void __enable_tls(void) { : "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib) : "rcx", "r11", "memory", "cc"); } else if (IsNetbsd()) { + // netbsd has sysarch(X86_SET_FSBASE) but we can't use that because + // signal handlers will cause it to be reset due to net setting the + // _mc_tlsbase field in struct mcontext_netbsd. asm volatile("syscall" : "=a"(ax), "=d"(dx) - : "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib) + : "0"(__NR__lwp_setprivate), "D"(tib) : "rcx", "r11", "memory", "cc"); } else if (IsXnu()) { asm volatile("syscall" @@ -179,7 +231,7 @@ privileged void __enable_tls(void) { } // we're checking for the following expression: - // 0144 == p[0] && // fs + // 0144 == p[0] && // %fs // 0110 == p[1] && // rex.w (64-bit operand size) // (0213 == p[2] || // mov reg/mem → reg (word-sized) // 0003 == p[2]) && // add reg/mem → reg (word-sized) @@ -195,7 +247,7 @@ privileged void __enable_tls(void) { !p[8]) { // now change the code - p[0] = 0145; // this changes gs segment to fs segment + p[0] = 0145; // change %fs to %gs p[5] = (dis & 0x000000ff) >> 000; // displacement p[6] = (dis & 0x0000ff00) >> 010; // displacement p[7] = (dis & 0x00ff0000) >> 020; // displacement diff --git a/libc/runtime/ftracer.c b/libc/runtime/ftracer.c index 2e63d542d..951bc9a51 100644 --- a/libc/runtime/ftracer.c +++ b/libc/runtime/ftracer.c @@ -92,7 +92,6 @@ privileged void ftracer(void) { textstartup int ftrace_install(void) { if (GetSymbolTable()) { - __enable_tls(); g_stackdigs = LengthInt64Thousands(GetStackSize()); return __hook(ftrace_hook, GetSymbolTable()); } else { diff --git a/libc/runtime/internal.h b/libc/runtime/internal.h index c0e20fc5c..f61d4d8fb 100644 --- a/libc/runtime/internal.h +++ b/libc/runtime/internal.h @@ -27,7 +27,7 @@ extern unsigned char _tls_size[]; extern unsigned char _tls_content[]; void _init(void) hidden; -void __enable_tls(void) hidden; +void __enable_tls(void); void __enable_threads(void) hidden; void __restorewintty(void) hidden; void *__cxa_finalize(void *) hidden; diff --git a/libc/runtime/mapanon.c b/libc/runtime/mapanon.c index 1291a5ec8..12be768d2 100644 --- a/libc/runtime/mapanon.c +++ b/libc/runtime/mapanon.c @@ -55,15 +55,16 @@ * * That is performed automatically for unit test executables. * - * @return memory map address on success, or null w/ errrno + * @return memory map address on success, or null w/ errno */ void *_mapanon(size_t size) { - /* asan runtime depends on this function */ void *m; m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (m == MAP_FAILED && weaken(__oom_hook)) { - weaken(__oom_hook)(size); - return 0; + if (m != MAP_FAILED) { + return m; } - return m; + if (errno == ENOMEM && weaken(__oom_hook)) { + weaken(__oom_hook)(size); + } + return 0; } diff --git a/libc/runtime/morph.greg.c b/libc/runtime/morph.greg.c index bda8dc526..59ad7ff2a 100644 --- a/libc/runtime/morph.greg.c +++ b/libc/runtime/morph.greg.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #define ShouldUseMsabiAttribute() 1 +#include "libc/assert.h" #include "libc/bits/asmflag.h" #include "libc/calls/internal.h" #include "libc/calls/strace.internal.h" @@ -58,10 +59,28 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot, * @return 0 on success, or -1 w/ errno */ privileged void __morph_begin(void) { + int ax; + bool cf; + intptr_t dx; sigset_t ss = {{-1, -1}}; STRACE("__morph_begin()"); if (!IsWindows()) { - sys_sigprocmask(SIG_BLOCK, &ss, &oldss); + if (!IsOpenbsd()) { + asm volatile("mov\t$8,%%r10d\n\t" + "syscall" + : "=a"(ax), "=d"(dx) + : "0"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(&ss), + "1"(&oldss) + : "rcx", "r10", "r11", "memory", "cc"); + assert(!ax); + } else { + asm volatile(CFLAG_ASM("syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx) + : "1"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(-1u) + : "rcx", "r11", "memory"); + oldss.__bits[0] = ax & 0xffffffff; + assert(!cf); + } } __morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE, kNtPageWritecopy); @@ -71,10 +90,28 @@ privileged void __morph_begin(void) { * Begins code morphing execuatble. */ privileged void __morph_end(void) { + int ax; + long dx; + bool cf; __morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_EXEC, kNtPageExecuteRead); if (!IsWindows()) { - sys_sigprocmask(SIG_SETMASK, &oldss, 0); + if (!IsOpenbsd()) { + asm volatile("mov\t$8,%%r10d\n\t" + "syscall" + : "=a"(ax), "=d"(dx) + : "0"(__NR_sigprocmask), "D"(SIG_SETMASK), "S"(&oldss), + "1"(0) + : "rcx", "r10", "r11", "memory", "cc"); + assert(!ax); + } else { + asm volatile(CFLAG_ASM("syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx) + : "1"(__NR_sigprocmask), "D"(SIG_SETMASK), + "S"(oldss.__bits[0]) + : "rcx", "r11", "memory"); + assert(!cf); + } } STRACE("__morph_end()"); } diff --git a/libc/sysv/syscalls.sh b/libc/sysv/syscalls.sh index a5e12a0ff..5a65c2fea 100755 --- a/libc/sysv/syscalls.sh +++ b/libc/sysv/syscalls.sh @@ -48,7 +48,7 @@ scall __sys_mmap 0x0c50c51dd20c5009 globl hidden # netbsd+openbsd:pad scall sys_msync 0x115100041204101a globl hidden scall sys_mprotect 0x04a04a04a204a00a globl hidden scall __sys_munmap 0x049049049204900b globl hidden -scall sys_sigaction 0x15402e1a0202e00d globl hidden # rt_sigaction on Lunix; it's complicated on NetBSD +scall sys_sigaction 0x15402e1a0202e00d globl hidden # rt_sigaction on Lunix; __sigaction_sigtramp() on NetBSD scall __sys_sigprocmask 0x125030154214900e globl hidden # a.k.a. rt_sigprocmask, openbsd:byvalue, a.k.a. pthread_sigmask scall sys_ioctl 0x0360360362036010 globl hidden scall sys_pread 0x0ad0ad1db2099011 globl hidden # a.k.a. pread64; netbsd+openbsd:pad diff --git a/libc/thread/ctor.S b/libc/thread/ctor.S deleted file mode 100644 index 8139b3d87..000000000 --- a/libc/thread/ctor.S +++ /dev/null @@ -1,27 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - - .init.start 400,_main_thread_ctor - push %rdi - push %rsi - call __enable_tls - pop %rsi - pop %rdi - .init.end 400,_main_thread_ctor diff --git a/libc/thread/self.c b/libc/thread/self.c index a4af248b9..a790a1ad7 100644 --- a/libc/thread/self.c +++ b/libc/thread/self.c @@ -20,8 +20,6 @@ #include "libc/nexgen32e/threaded.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * Returns thread descriptor of the current thread. */ diff --git a/libc/thread/sem.c b/libc/thread/sem.c index 4187584ba..06b355984 100644 --- a/libc/thread/sem.c +++ b/libc/thread/sem.c @@ -20,8 +20,6 @@ #include "libc/calls/calls.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - #define CTHREAD_THREAD_VAL_BITS 32 static void Pause(int attempt) { diff --git a/libc/thread/spawn.c b/libc/thread/spawn.c index a144635a5..98788cc25 100644 --- a/libc/thread/spawn.c +++ b/libc/thread/spawn.c @@ -32,8 +32,6 @@ #include "libc/thread/spawn.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * @fileoverview Simple threading API * diff --git a/test/libc/mem/malloc_test.c b/test/libc/mem/malloc_test.c index 715617ae3..f336d1378 100644 --- a/test/libc/mem/malloc_test.c +++ b/test/libc/mem/malloc_test.c @@ -114,6 +114,12 @@ TEST(malloc, test) { for (i = 0; i < ARRAYLEN(fds); ++i) close(fds[i]); } +TEST(memalign, roundsUpAlignmentToTwoPower) { + char *volatile p = memalign(129, 1); + ASSERT_EQ(0, (intptr_t)p & 255); + free(p); +} + void *bulk[1024]; void BulkFreeBenchSetup(void) { diff --git a/test/libc/runtime/arch_prctl_test.c b/test/libc/runtime/arch_prctl_test.c index 01dc43760..14770aa47 100644 --- a/test/libc/runtime/arch_prctl_test.c +++ b/test/libc/runtime/arch_prctl_test.c @@ -19,9 +19,11 @@ #include "libc/bits/segmentation.h" #include "libc/calls/calls.h" #include "libc/dce.h" +#include "libc/nexgen32e/threaded.h" #include "libc/testlib/testlib.h" __attribute__((__constructor__)) static void init(void) { + __tls_enabled = false; pledge("stdio rpath", 0); errno = 0; } diff --git a/test/libc/runtime/munmap_test.c b/test/libc/runtime/munmap_test.c index a1309431b..01e541e2a 100644 --- a/test/libc/runtime/munmap_test.c +++ b/test/libc/runtime/munmap_test.c @@ -24,6 +24,7 @@ #include "libc/intrin/kprintf.h" #include "libc/runtime/memtrack.internal.h" #include "libc/runtime/runtime.h" +#include "libc/str/str.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" @@ -66,6 +67,12 @@ TEST(munmap, doesntExist_doesntCare) { } } +TEST(munmap, invalidParams) { + EXPECT_SYS(EINVAL, -1, munmap(0, 0)); + EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000000, 0)); + EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000001, FRAMESIZE)); +} + TEST(munmap, test) { char *p; ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, @@ -75,12 +82,6 @@ TEST(munmap, test) { EXPECT_FALSE(MemoryExists(p)); } -TEST(munmap, invalidParams) { - EXPECT_SYS(EINVAL, -1, munmap(0, 0)); - EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000000, 0)); - EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000001, FRAMESIZE)); -} - TEST(munmap, punchHoleInMemory) { char *p; ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE * 3, PROT_READ | PROT_WRITE, diff --git a/test/libc/stdio/fputc_test.c b/test/libc/stdio/fputc_test.c index 092e3139f..6062a362c 100644 --- a/test/libc/stdio/fputc_test.c +++ b/test/libc/stdio/fputc_test.c @@ -64,7 +64,6 @@ TEST(fgetc, testUnbuffered) { } BENCH(fputc, bench) { - __enable_tls(); __enable_threads(); FILE *f; ASSERT_NE(NULL, (f = fopen("/dev/null", "w"))); diff --git a/test/libc/thread/pthread_key_create_test.c b/test/libc/thread/pthread_key_create_test.c index 52ccbac30..15cdc4bd2 100644 --- a/test/libc/thread/pthread_key_create_test.c +++ b/test/libc/thread/pthread_key_create_test.c @@ -38,9 +38,3 @@ TEST(pthread_key_create, testRunsDtors_becauseNoLeakReport) { EXPECT_EQ(0, pthread_setspecific(key, x)); EXPECT_EQ(x, pthread_getspecific(key)); } - -__attribute__((__constructor__)) static void init(void) { - if (IsTiny()) { - __enable_tls(); - } -} diff --git a/third_party/dlmalloc/README.cosmo b/third_party/dlmalloc/README.cosmo index d364ed7b6..c3b83321b 100644 --- a/third_party/dlmalloc/README.cosmo +++ b/third_party/dlmalloc/README.cosmo @@ -9,5 +9,7 @@ LICENSE LOCAL CHANGES - - Introduce __oom_hook() - - Favor pause (rather than sched_yield) for spin locks + - Use faster two power roundup for memalign() + - Poison maps to integrate with Address Sanitizer + - Introduce __oom_hook() by using _mapanon() vs. mmap() + - Wrap locks with __threaded check to improve perf lots diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 827dd4867..c2d9cd467 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -5,7 +5,9 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/kprintf.h" +#include "libc/macros.internal.h" #include "libc/mem/mem.h" +#include "libc/nexgen32e/bsr.h" #include "libc/nexgen32e/rdtsc.h" #include "libc/rand/rand.h" #include "libc/runtime/runtime.h" @@ -917,11 +919,8 @@ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { void* mem = 0; if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ alignment = MIN_CHUNK_SIZE; - if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ - size_t a = MALLOC_ALIGNMENT << 1; - while (a < alignment) a <<= 1; - alignment = a; - } + /* alignment is 32+ bytes rounded up to nearest two power */ + alignment = 2ul << bsrl(MAX(MIN_CHUNK_SIZE, alignment) - 1); if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ MALLOC_FAILURE_ACTION; diff --git a/third_party/dlmalloc/locks.inc b/third_party/dlmalloc/locks.inc index 4c38271c0..0bf125f21 100644 --- a/third_party/dlmalloc/locks.inc +++ b/third_party/dlmalloc/locks.inc @@ -1,4 +1,5 @@ // clang-format off +#include "libc/calls/calls.h" #include "libc/nexgen32e/threaded.h" /* --------------------------- Lock preliminaries ------------------------ */ @@ -94,7 +95,7 @@ FORCEINLINE void x86_clear_lock(int* sl) { /* Plain spin locks use single word (embedded in malloc_states) */ static dontinline int spin_acquire_lock(int *sl) { while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { - __builtin_ia32_pause(); + sched_yield(); } return 0; } @@ -154,7 +155,7 @@ FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) { ++lk->c; return 0; } - __builtin_ia32_pause(); + sched_yield(); } } diff --git a/third_party/dlmalloc/platform.inc b/third_party/dlmalloc/platform.inc index 8b0cd9e71..d5c57893c 100644 --- a/third_party/dlmalloc/platform.inc +++ b/third_party/dlmalloc/platform.inc @@ -179,6 +179,8 @@ /* ======================================================================== To make a fully customizable malloc.h header file, cut everything +#include "libc/sysv/consts/map.h" +#include "libc/runtime/runtime.h" above this line, put into file malloc.h, edit to suit, and #include it on the next line, as well as in programs that use this malloc. ======================================================================== @@ -385,7 +387,7 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); /* MORECORE and MMAP must return MFAIL on failure */ -#define MFAIL ((void*)(MAX_SIZE_T)) +#define MFAIL NULL #define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ #if HAVE_MMAP @@ -398,7 +400,7 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); #endif /* MAP_ANON */ #ifdef MAP_ANONYMOUS #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#define MMAP_DEFAULT(s) _mapanon(s) #else /* MAP_ANONYMOUS */ /* Nearly all versions of mmap support MAP_ANONYMOUS, so the following @@ -408,8 +410,8 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ #define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) + mmap_no(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + mmap_no(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) #endif /* MAP_ANONYMOUS */ #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) diff --git a/third_party/dlmalloc/vespene.c b/third_party/dlmalloc/vespene.c index da84f24cc..c47850ad4 100644 --- a/third_party/dlmalloc/vespene.c +++ b/third_party/dlmalloc/vespene.c @@ -16,24 +16,21 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/weaken.h" -#include "libc/calls/calls.h" +#include "libc/dce.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/asancodes.h" -#include "libc/intrin/kprintf.h" #include "libc/runtime/runtime.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/prot.h" +#include "third_party/dlmalloc/vespene.internal.h" /** * Acquires more system memory for dlmalloc. + * @return memory map address on success, or null w/ errno */ void *dlmalloc_requires_more_vespene_gas(size_t size) { char *p; - if ((p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0)) != MAP_FAILED) { - if (weaken(__asan_poison)) { - weaken(__asan_poison)(p, size, kAsanHeapFree); + if ((p = _mapanon(size))) { + if (IsAsan()) { + __asan_poison(p, size, kAsanHeapFree); } } return p; diff --git a/tool/plinko/plinko.c b/tool/plinko/plinko.c index 90158369e..21823eeed 100644 --- a/tool/plinko/plinko.c +++ b/tool/plinko/plinko.c @@ -18,12 +18,14 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/kprintf.h" #include "libc/log/log.h" +#include "libc/nexgen32e/threaded.h" #include "libc/stdio/stdio.h" #include "tool/plinko/lib/plinko.h" STATIC_YOINK("__zipos_get"); int main(int argc, char *argv[]) { + __tls_enabled = false; Plinko(argc, argv); return 0; }