diff --git a/README.md b/README.md index 15170db9c..dd9d19215 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ wget https://justine.lol/cosmopolitan/cosmopolitan.zip unzip cosmopolitan.zip printf 'main() { printf("hello world\\n"); }\n' >hello.c gcc -g -Os -static -nostdlib -nostdinc -fno-pie -no-pie -mno-red-zone \ - -fno-omit-frame-pointer -pg -mnop-mcount \ + -fno-omit-frame-pointer -pg -mnop-mcount -mno-tls-direct-seg-refs \ -o hello.com.dbg hello.c -fuse-ld=bfd -Wl,-T,ape.lds \ -include cosmopolitan.h crt.o ape-no-modify-self.o cosmopolitan.a objcopy -S -O binary hello.com.dbg hello.com diff --git a/examples/life.c b/examples/life.c index 5e8dfe9c4..b61489df7 100644 --- a/examples/life.c +++ b/examples/life.c @@ -8,4 +8,6 @@ ╚─────────────────────────────────────────────────────────────────*/ #endif -int main() { return 42; } +int main() { + return 42; +} diff --git a/libc/calls/netbsdtramp.S b/libc/calls/netbsdtramp.S index e0493b16e..3f4699933 100644 --- a/libc/calls/netbsdtramp.S +++ b/libc/calls/netbsdtramp.S @@ -21,9 +21,9 @@ __restore_rt_netbsd: mov %r15,%rdi - mov $308,%rax # setcontext + mov $308,%eax # setcontext syscall - mov $-1,%edi + or $-1,%edi mov $1,%rax # exit syscall .endfn __restore_rt_netbsd,globl,hidden diff --git a/libc/calls/sigaction.c b/libc/calls/sigaction.c index 16d285750..4f8afaabc 100644 --- a/libc/calls/sigaction.c +++ b/libc/calls/sigaction.c @@ -205,6 +205,11 @@ static int __sigaction(int sig, const struct sigaction *act, arg4 = (int64_t)(intptr_t)oldact; /* from go code */ arg5 = 0; } else if (IsNetbsd()) { + /* int __sigaction_sigtramp(int signum, + const struct sigaction *nsa, + struct sigaction *osa, + const void *tramp, + int vers); */ if (ap) { arg4 = (int64_t)(intptr_t)&__restore_rt_netbsd; arg5 = 2; /* netbsd/lib/libc/arch/x86_64/sys/__sigtramp2.S */ diff --git a/libc/calls/strace.internal.h b/libc/calls/strace.internal.h index 16c29f56d..ee247e005 100644 --- a/libc/calls/strace.internal.h +++ b/libc/calls/strace.internal.h @@ -7,7 +7,7 @@ #include "libc/calls/struct/stat.h" #include "libc/runtime/runtime.h" -#define _KERNTRACE 0 /* not configurable w/ flag yet */ +#define _KERNTRACE 1 /* not configurable w/ flag yet */ #define _POLLTRACE 0 /* not configurable w/ flag yet */ #define _DATATRACE 1 /* not configurable w/ flag yet */ #define _NTTRACE 0 /* not configurable w/ flag yet */ diff --git a/libc/calls/struct/ucontext-netbsd.internal.h b/libc/calls/struct/ucontext-netbsd.internal.h index a0c60e247..8d297c174 100644 --- a/libc/calls/struct/ucontext-netbsd.internal.h +++ b/libc/calls/struct/ucontext-netbsd.internal.h @@ -13,7 +13,6 @@ COSMOPOLITAN_C_START_ #define _UC_TLSBASE 0x00080000 #define _UC_SETSTACK 0x00010000 #define _UC_CLRSTACK 0x00020000 -#define _UC_CLRSTACK 0x00020000 union sigval_netbsd { int32_t sival_int; diff --git a/libc/intrin/gettid.greg.c b/libc/intrin/gettid.greg.c index a6d99227e..f3b08f6e9 100644 --- a/libc/intrin/gettid.greg.c +++ b/libc/intrin/gettid.greg.c @@ -54,7 +54,9 @@ int gettid(void) { int tid; if (__tls_enabled) { tid = *(int *)(__get_tls() + 0x38); - if (tid > 0) return tid; + if (tid > 0) { + return tid; + } } return sys_gettid(); } diff --git a/libc/log/oncrash.c b/libc/log/oncrash.c index df6827ae2..b4cdae1db 100644 --- a/libc/log/oncrash.c +++ b/libc/log/oncrash.c @@ -35,6 +35,7 @@ #include "libc/log/log.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/stackframe.h" +#include "libc/nexgen32e/threaded.h" #include "libc/runtime/internal.h" #include "libc/runtime/pc.internal.h" #include "libc/runtime/runtime.h" @@ -283,6 +284,7 @@ relegated void __oncrash(int sig, struct siginfo *si, ucontext_t *ctx) { int gdbpid, err; static int sync; static bool notpossible; + __tls_enabled = false; STRACE("__oncrash rip %x", ctx->uc_mcontext.rip); --__ftrace; --__strace; diff --git a/libc/mem/unveil.c b/libc/mem/unveil.c index a2ffbb61b..b830a11e6 100644 --- a/libc/mem/unveil.c +++ b/libc/mem/unveil.c @@ -194,7 +194,6 @@ static int sys_unveil_linux(const char *path, const char *permissions) { */ int unveil(const char *path, const char *permissions) { int rc; - __enable_tls(); if (IsLinux()) { rc = sys_unveil_linux(path, permissions); } else { diff --git a/libc/runtime/arch_prctl.c b/libc/runtime/arch_prctl.c index 2c958c5fb..9c6b2be84 100644 --- a/libc/runtime/arch_prctl.c +++ b/libc/runtime/arch_prctl.c @@ -199,7 +199,7 @@ int arch_prctl(int code, int64_t addr) { case METAL: return arch_prctl_msr(code, addr); case FREEBSD: - /* claims support but it appears not */ + // TODO(jart): this should use sysarch() return arch_prctl_freebsd(code, addr); case OPENBSD: return arch_prctl_openbsd(code, addr); diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index b8e23638e..1c9a5eec4 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -565,21 +565,9 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz, * either terminated or has finished using its stack memory * * - `CLONE_SETTLS` is needed if you intend to specify the `tls` - * argument, which provides a fast-path solution for changing the - * appropriate TLS segment register within the child thread. The - * child thread may then obtain a reference to the TIB address you - * supplied, by calling __get_tls(). Your C library holds certain - * expectations about the layout of your Thread Information Block - * (TIB), which are all documented by __initialize_tls(). That - * function can be used to initialize the first positive 64 bytes - * of your TLS allocation, which is the memory Cosmopolitan Libc - * wants for itself (and negative addresses are reserved by the - * GNU Linker). Using this flag will transition the C runtime to a - * `__tls_enabled` state automatically. If you use TLS for just - * one thread, then you must be specify TLS for ALL THREADS. It's - * a good idea to do that since TLS can offer considerable (i.e. - * multiple orders of a magnitude) performance improvement for - * TID-dependent C library services, e.g. recursive mutexes. + * argument, which after thread creation may be accessed using + * __get_tls(). Doing this means that `errno`, gettid(), etc. + * correctly work. Caveat emptor if you choose not to do this. * * @param arg is passed as an argument to `func` in the child thread * @param tls may be used to set the thread local storage segment; @@ -594,8 +582,9 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid, int rc; struct CloneArgs *wt; - if (flags & CLONE_SETTLS) __enable_tls(); - if (flags & CLONE_THREAD) __enable_threads(); + if (flags & CLONE_THREAD) { + __enable_threads(); + } if (!func) { rc = einval(); diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S index eba8547f3..954e2e263 100644 --- a/libc/runtime/cosmo.S +++ b/libc/runtime/cosmo.S @@ -76,22 +76,14 @@ cosmo: push %rbp ret .endfn cosmo,weak -#if !IsTiny() -// Enable TLS early if _Thread_local is used -// In MODE=tiny you may need to explicitly call __enable_tls() -// Otherwise this would bloat life.com from 16kb → 32kb D: +// Enables Thread Local Storage. .init.start 304,_init_tls - mov $_tls_content,%eax - test %eax,%eax - jz 1f push %rdi push %rsi call __enable_tls pop %rsi pop %rdi - jz 1f -1: .init.end 304,_init_tls -#endif + .init.end 304,_init_tls #if !IsTiny() // Creates deterministically addressed stack we can use diff --git a/libc/runtime/enable_tls.c b/libc/runtime/enable_tls.c index cfb74cf06..7bd2d3f24 100644 --- a/libc/runtime/enable_tls.c +++ b/libc/runtime/enable_tls.c @@ -16,12 +16,16 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" #include "libc/bits/bits.h" +#include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/calls/strace.internal.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" #include "libc/errno.h" +#include "libc/intrin/kprintf.h" +#include "libc/log/libfatal.internal.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/threaded.h" #include "libc/nt/thread.h" @@ -52,12 +56,35 @@ __msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc; extern unsigned char __tls_mov_nt_rax[]; extern unsigned char __tls_add_nt_rax[]; +_Alignas(long) static char __static_tls[5008]; /** * Enables thread local storage. + * + * This function is always called by the core runtime to guarantee TLS + * is always available to your program. You must build your code using + * -mno-tls-direct-seg-refs if you want to use _Thread_local. + * + * You can use __get_tls() to get the linear address of your tib. When + * accessing TLS via privileged code you must use __get_tls_privileged + * because we need code morphing to support The New Technology and XNU + * + * On XNU and The New Technology, this function imposes 1ms of latency + * during startup for larger binaries like Python. + * + * If you don't want TLS and you're sure you're not using it, then you + * can disable it as follows: + * + * int main() { + * __tls_enabled = false; + * // do stuff + * } + * + * This is useful if you want to wrestle back control of %fs using the + * arch_prctl() function. However, such programs might not be portable + * and your `errno` variable also won't be thread safe anymore. */ privileged void __enable_tls(void) { - if (__tls_enabled) return; STRACE("__enable_tls()"); // allocate tls memory for main process @@ -74,20 +101,42 @@ privileged void __enable_tls(void) { size_t siz; cthread_t tib; char *mem, *tls; - siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE); - mem = _mapanon(siz); + siz = ROUNDUP(_TLSZ + _TIBZ, alignof(__static_tls)); + if (siz <= sizeof(__static_tls)) { + // if tls requirement is small then use the static tls block + // which helps avoid a system call for appes with little tls + // this is crucial to keeping life.com 16 kilobytes in size! + _Static_assert(alignof(__static_tls) >= alignof(cthread_t)); + mem = __static_tls; + } else { + // if this binary needs a hefty tls block then we'll bank on + // malloc() being linked, which links _mapanon(). otherwise + // if you exceed this, you need to STATIC_YOINK("_mapanon"). + // please note that it's probably too early to call calloc() + assert(weaken(_mapanon)); + siz = ROUNDUP(siz, FRAMESIZE); + mem = weaken(_mapanon)(siz); + assert(mem); + } tib = (cthread_t)(mem + siz - _TIBZ); tls = mem + siz - _TIBZ - _TLSZ; tib->self = tib; tib->self2 = tib; tib->err = __errno; - tib->tid = sys_gettid(); - memmove(tls, _tdata_start, _TLDZ); + if (IsLinux()) { + // gnu/systemd guarantees pid==tid for the main thread so we can + // avoid issuing a superfluous system call at startup in program + tib->tid = __pid; + } else { + tib->tid = sys_gettid(); + } + __repmovsb(tls, _tdata_start, _TLDZ); // ask the operating system to change the x86 segment register int ax, dx; if (IsWindows()) { __tls_index = __imp_TlsAlloc(); + assert(0 <= __tls_index && __tls_index < 64); asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); } else if (IsFreebsd()) { asm volatile("syscall" @@ -95,9 +144,12 @@ privileged void __enable_tls(void) { : "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib) : "rcx", "r11", "memory", "cc"); } else if (IsNetbsd()) { + // netbsd has sysarch(X86_SET_FSBASE) but we can't use that because + // signal handlers will cause it to be reset due to net setting the + // _mc_tlsbase field in struct mcontext_netbsd. asm volatile("syscall" : "=a"(ax), "=d"(dx) - : "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib) + : "0"(__NR__lwp_setprivate), "D"(tib) : "rcx", "r11", "memory", "cc"); } else if (IsXnu()) { asm volatile("syscall" @@ -179,7 +231,7 @@ privileged void __enable_tls(void) { } // we're checking for the following expression: - // 0144 == p[0] && // fs + // 0144 == p[0] && // %fs // 0110 == p[1] && // rex.w (64-bit operand size) // (0213 == p[2] || // mov reg/mem → reg (word-sized) // 0003 == p[2]) && // add reg/mem → reg (word-sized) @@ -195,7 +247,7 @@ privileged void __enable_tls(void) { !p[8]) { // now change the code - p[0] = 0145; // this changes gs segment to fs segment + p[0] = 0145; // change %fs to %gs p[5] = (dis & 0x000000ff) >> 000; // displacement p[6] = (dis & 0x0000ff00) >> 010; // displacement p[7] = (dis & 0x00ff0000) >> 020; // displacement diff --git a/libc/runtime/ftracer.c b/libc/runtime/ftracer.c index 2e63d542d..951bc9a51 100644 --- a/libc/runtime/ftracer.c +++ b/libc/runtime/ftracer.c @@ -92,7 +92,6 @@ privileged void ftracer(void) { textstartup int ftrace_install(void) { if (GetSymbolTable()) { - __enable_tls(); g_stackdigs = LengthInt64Thousands(GetStackSize()); return __hook(ftrace_hook, GetSymbolTable()); } else { diff --git a/libc/runtime/internal.h b/libc/runtime/internal.h index c0e20fc5c..f61d4d8fb 100644 --- a/libc/runtime/internal.h +++ b/libc/runtime/internal.h @@ -27,7 +27,7 @@ extern unsigned char _tls_size[]; extern unsigned char _tls_content[]; void _init(void) hidden; -void __enable_tls(void) hidden; +void __enable_tls(void); void __enable_threads(void) hidden; void __restorewintty(void) hidden; void *__cxa_finalize(void *) hidden; diff --git a/libc/runtime/mapanon.c b/libc/runtime/mapanon.c index 1291a5ec8..12be768d2 100644 --- a/libc/runtime/mapanon.c +++ b/libc/runtime/mapanon.c @@ -55,15 +55,16 @@ * * That is performed automatically for unit test executables. * - * @return memory map address on success, or null w/ errrno + * @return memory map address on success, or null w/ errno */ void *_mapanon(size_t size) { - /* asan runtime depends on this function */ void *m; m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (m == MAP_FAILED && weaken(__oom_hook)) { - weaken(__oom_hook)(size); - return 0; + if (m != MAP_FAILED) { + return m; } - return m; + if (errno == ENOMEM && weaken(__oom_hook)) { + weaken(__oom_hook)(size); + } + return 0; } diff --git a/libc/runtime/morph.greg.c b/libc/runtime/morph.greg.c index bda8dc526..59ad7ff2a 100644 --- a/libc/runtime/morph.greg.c +++ b/libc/runtime/morph.greg.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #define ShouldUseMsabiAttribute() 1 +#include "libc/assert.h" #include "libc/bits/asmflag.h" #include "libc/calls/internal.h" #include "libc/calls/strace.internal.h" @@ -58,10 +59,28 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot, * @return 0 on success, or -1 w/ errno */ privileged void __morph_begin(void) { + int ax; + bool cf; + intptr_t dx; sigset_t ss = {{-1, -1}}; STRACE("__morph_begin()"); if (!IsWindows()) { - sys_sigprocmask(SIG_BLOCK, &ss, &oldss); + if (!IsOpenbsd()) { + asm volatile("mov\t$8,%%r10d\n\t" + "syscall" + : "=a"(ax), "=d"(dx) + : "0"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(&ss), + "1"(&oldss) + : "rcx", "r10", "r11", "memory", "cc"); + assert(!ax); + } else { + asm volatile(CFLAG_ASM("syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx) + : "1"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(-1u) + : "rcx", "r11", "memory"); + oldss.__bits[0] = ax & 0xffffffff; + assert(!cf); + } } __morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE, kNtPageWritecopy); @@ -71,10 +90,28 @@ privileged void __morph_begin(void) { * Begins code morphing execuatble. */ privileged void __morph_end(void) { + int ax; + long dx; + bool cf; __morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_EXEC, kNtPageExecuteRead); if (!IsWindows()) { - sys_sigprocmask(SIG_SETMASK, &oldss, 0); + if (!IsOpenbsd()) { + asm volatile("mov\t$8,%%r10d\n\t" + "syscall" + : "=a"(ax), "=d"(dx) + : "0"(__NR_sigprocmask), "D"(SIG_SETMASK), "S"(&oldss), + "1"(0) + : "rcx", "r10", "r11", "memory", "cc"); + assert(!ax); + } else { + asm volatile(CFLAG_ASM("syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx) + : "1"(__NR_sigprocmask), "D"(SIG_SETMASK), + "S"(oldss.__bits[0]) + : "rcx", "r11", "memory"); + assert(!cf); + } } STRACE("__morph_end()"); } diff --git a/libc/sysv/syscalls.sh b/libc/sysv/syscalls.sh index a5e12a0ff..5a65c2fea 100755 --- a/libc/sysv/syscalls.sh +++ b/libc/sysv/syscalls.sh @@ -48,7 +48,7 @@ scall __sys_mmap 0x0c50c51dd20c5009 globl hidden # netbsd+openbsd:pad scall sys_msync 0x115100041204101a globl hidden scall sys_mprotect 0x04a04a04a204a00a globl hidden scall __sys_munmap 0x049049049204900b globl hidden -scall sys_sigaction 0x15402e1a0202e00d globl hidden # rt_sigaction on Lunix; it's complicated on NetBSD +scall sys_sigaction 0x15402e1a0202e00d globl hidden # rt_sigaction on Lunix; __sigaction_sigtramp() on NetBSD scall __sys_sigprocmask 0x125030154214900e globl hidden # a.k.a. rt_sigprocmask, openbsd:byvalue, a.k.a. pthread_sigmask scall sys_ioctl 0x0360360362036010 globl hidden scall sys_pread 0x0ad0ad1db2099011 globl hidden # a.k.a. pread64; netbsd+openbsd:pad diff --git a/libc/thread/ctor.S b/libc/thread/ctor.S deleted file mode 100644 index 8139b3d87..000000000 --- a/libc/thread/ctor.S +++ /dev/null @@ -1,27 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - - .init.start 400,_main_thread_ctor - push %rdi - push %rsi - call __enable_tls - pop %rsi - pop %rdi - .init.end 400,_main_thread_ctor diff --git a/libc/thread/self.c b/libc/thread/self.c index a4af248b9..a790a1ad7 100644 --- a/libc/thread/self.c +++ b/libc/thread/self.c @@ -20,8 +20,6 @@ #include "libc/nexgen32e/threaded.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * Returns thread descriptor of the current thread. */ diff --git a/libc/thread/sem.c b/libc/thread/sem.c index 4187584ba..06b355984 100644 --- a/libc/thread/sem.c +++ b/libc/thread/sem.c @@ -20,8 +20,6 @@ #include "libc/calls/calls.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - #define CTHREAD_THREAD_VAL_BITS 32 static void Pause(int attempt) { diff --git a/libc/thread/spawn.c b/libc/thread/spawn.c index a144635a5..98788cc25 100644 --- a/libc/thread/spawn.c +++ b/libc/thread/spawn.c @@ -32,8 +32,6 @@ #include "libc/thread/spawn.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * @fileoverview Simple threading API * diff --git a/test/libc/mem/malloc_test.c b/test/libc/mem/malloc_test.c index 715617ae3..f336d1378 100644 --- a/test/libc/mem/malloc_test.c +++ b/test/libc/mem/malloc_test.c @@ -114,6 +114,12 @@ TEST(malloc, test) { for (i = 0; i < ARRAYLEN(fds); ++i) close(fds[i]); } +TEST(memalign, roundsUpAlignmentToTwoPower) { + char *volatile p = memalign(129, 1); + ASSERT_EQ(0, (intptr_t)p & 255); + free(p); +} + void *bulk[1024]; void BulkFreeBenchSetup(void) { diff --git a/test/libc/runtime/arch_prctl_test.c b/test/libc/runtime/arch_prctl_test.c index 01dc43760..14770aa47 100644 --- a/test/libc/runtime/arch_prctl_test.c +++ b/test/libc/runtime/arch_prctl_test.c @@ -19,9 +19,11 @@ #include "libc/bits/segmentation.h" #include "libc/calls/calls.h" #include "libc/dce.h" +#include "libc/nexgen32e/threaded.h" #include "libc/testlib/testlib.h" __attribute__((__constructor__)) static void init(void) { + __tls_enabled = false; pledge("stdio rpath", 0); errno = 0; } diff --git a/test/libc/runtime/munmap_test.c b/test/libc/runtime/munmap_test.c index a1309431b..01e541e2a 100644 --- a/test/libc/runtime/munmap_test.c +++ b/test/libc/runtime/munmap_test.c @@ -24,6 +24,7 @@ #include "libc/intrin/kprintf.h" #include "libc/runtime/memtrack.internal.h" #include "libc/runtime/runtime.h" +#include "libc/str/str.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" @@ -66,6 +67,12 @@ TEST(munmap, doesntExist_doesntCare) { } } +TEST(munmap, invalidParams) { + EXPECT_SYS(EINVAL, -1, munmap(0, 0)); + EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000000, 0)); + EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000001, FRAMESIZE)); +} + TEST(munmap, test) { char *p; ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, @@ -75,12 +82,6 @@ TEST(munmap, test) { EXPECT_FALSE(MemoryExists(p)); } -TEST(munmap, invalidParams) { - EXPECT_SYS(EINVAL, -1, munmap(0, 0)); - EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000000, 0)); - EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000001, FRAMESIZE)); -} - TEST(munmap, punchHoleInMemory) { char *p; ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE * 3, PROT_READ | PROT_WRITE, diff --git a/test/libc/stdio/fputc_test.c b/test/libc/stdio/fputc_test.c index 092e3139f..6062a362c 100644 --- a/test/libc/stdio/fputc_test.c +++ b/test/libc/stdio/fputc_test.c @@ -64,7 +64,6 @@ TEST(fgetc, testUnbuffered) { } BENCH(fputc, bench) { - __enable_tls(); __enable_threads(); FILE *f; ASSERT_NE(NULL, (f = fopen("/dev/null", "w"))); diff --git a/test/libc/thread/pthread_key_create_test.c b/test/libc/thread/pthread_key_create_test.c index 52ccbac30..15cdc4bd2 100644 --- a/test/libc/thread/pthread_key_create_test.c +++ b/test/libc/thread/pthread_key_create_test.c @@ -38,9 +38,3 @@ TEST(pthread_key_create, testRunsDtors_becauseNoLeakReport) { EXPECT_EQ(0, pthread_setspecific(key, x)); EXPECT_EQ(x, pthread_getspecific(key)); } - -__attribute__((__constructor__)) static void init(void) { - if (IsTiny()) { - __enable_tls(); - } -} diff --git a/third_party/dlmalloc/README.cosmo b/third_party/dlmalloc/README.cosmo index d364ed7b6..c3b83321b 100644 --- a/third_party/dlmalloc/README.cosmo +++ b/third_party/dlmalloc/README.cosmo @@ -9,5 +9,7 @@ LICENSE LOCAL CHANGES - - Introduce __oom_hook() - - Favor pause (rather than sched_yield) for spin locks + - Use faster two power roundup for memalign() + - Poison maps to integrate with Address Sanitizer + - Introduce __oom_hook() by using _mapanon() vs. mmap() + - Wrap locks with __threaded check to improve perf lots diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 827dd4867..c2d9cd467 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -5,7 +5,9 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/kprintf.h" +#include "libc/macros.internal.h" #include "libc/mem/mem.h" +#include "libc/nexgen32e/bsr.h" #include "libc/nexgen32e/rdtsc.h" #include "libc/rand/rand.h" #include "libc/runtime/runtime.h" @@ -917,11 +919,8 @@ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { void* mem = 0; if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ alignment = MIN_CHUNK_SIZE; - if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ - size_t a = MALLOC_ALIGNMENT << 1; - while (a < alignment) a <<= 1; - alignment = a; - } + /* alignment is 32+ bytes rounded up to nearest two power */ + alignment = 2ul << bsrl(MAX(MIN_CHUNK_SIZE, alignment) - 1); if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ MALLOC_FAILURE_ACTION; diff --git a/third_party/dlmalloc/locks.inc b/third_party/dlmalloc/locks.inc index 4c38271c0..0bf125f21 100644 --- a/third_party/dlmalloc/locks.inc +++ b/third_party/dlmalloc/locks.inc @@ -1,4 +1,5 @@ // clang-format off +#include "libc/calls/calls.h" #include "libc/nexgen32e/threaded.h" /* --------------------------- Lock preliminaries ------------------------ */ @@ -94,7 +95,7 @@ FORCEINLINE void x86_clear_lock(int* sl) { /* Plain spin locks use single word (embedded in malloc_states) */ static dontinline int spin_acquire_lock(int *sl) { while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { - __builtin_ia32_pause(); + sched_yield(); } return 0; } @@ -154,7 +155,7 @@ FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) { ++lk->c; return 0; } - __builtin_ia32_pause(); + sched_yield(); } } diff --git a/third_party/dlmalloc/platform.inc b/third_party/dlmalloc/platform.inc index 8b0cd9e71..d5c57893c 100644 --- a/third_party/dlmalloc/platform.inc +++ b/third_party/dlmalloc/platform.inc @@ -179,6 +179,8 @@ /* ======================================================================== To make a fully customizable malloc.h header file, cut everything +#include "libc/sysv/consts/map.h" +#include "libc/runtime/runtime.h" above this line, put into file malloc.h, edit to suit, and #include it on the next line, as well as in programs that use this malloc. ======================================================================== @@ -385,7 +387,7 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); /* MORECORE and MMAP must return MFAIL on failure */ -#define MFAIL ((void*)(MAX_SIZE_T)) +#define MFAIL NULL #define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ #if HAVE_MMAP @@ -398,7 +400,7 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); #endif /* MAP_ANON */ #ifdef MAP_ANONYMOUS #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#define MMAP_DEFAULT(s) _mapanon(s) #else /* MAP_ANONYMOUS */ /* Nearly all versions of mmap support MAP_ANONYMOUS, so the following @@ -408,8 +410,8 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ #define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) + mmap_no(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + mmap_no(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) #endif /* MAP_ANONYMOUS */ #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) diff --git a/third_party/dlmalloc/vespene.c b/third_party/dlmalloc/vespene.c index da84f24cc..c47850ad4 100644 --- a/third_party/dlmalloc/vespene.c +++ b/third_party/dlmalloc/vespene.c @@ -16,24 +16,21 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/weaken.h" -#include "libc/calls/calls.h" +#include "libc/dce.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/asancodes.h" -#include "libc/intrin/kprintf.h" #include "libc/runtime/runtime.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/prot.h" +#include "third_party/dlmalloc/vespene.internal.h" /** * Acquires more system memory for dlmalloc. + * @return memory map address on success, or null w/ errno */ void *dlmalloc_requires_more_vespene_gas(size_t size) { char *p; - if ((p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0)) != MAP_FAILED) { - if (weaken(__asan_poison)) { - weaken(__asan_poison)(p, size, kAsanHeapFree); + if ((p = _mapanon(size))) { + if (IsAsan()) { + __asan_poison(p, size, kAsanHeapFree); } } return p; diff --git a/tool/plinko/plinko.c b/tool/plinko/plinko.c index 90158369e..21823eeed 100644 --- a/tool/plinko/plinko.c +++ b/tool/plinko/plinko.c @@ -18,12 +18,14 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/kprintf.h" #include "libc/log/log.h" +#include "libc/nexgen32e/threaded.h" #include "libc/stdio/stdio.h" #include "tool/plinko/lib/plinko.h" STATIC_YOINK("__zipos_get"); int main(int argc, char *argv[]) { + __tls_enabled = false; Plinko(argc, argv); return 0; }