Always initialize thread local storage

We had previously not enabled TLS in MODE=tiny in order to keep the
smallest example programs (e.g. life.com) just 16kb in size. But it
was error prone doing that, so now we just always enable it because
this change uses hacks to ensure it won't increase life.com's size.

This change also fixes a bug on NetBSD, where signal handlers would
break thread local storage if SA_SIGINFO was being used. This looks
like it might be a bug in NetBSD, but it's got a simple workaround.
This commit is contained in:
Justine Tunney 2022-07-18 22:26:11 -07:00
parent 057e8f5b54
commit 69f4152f38
33 changed files with 174 additions and 123 deletions

View file

@ -27,7 +27,7 @@ wget https://justine.lol/cosmopolitan/cosmopolitan.zip
unzip cosmopolitan.zip unzip cosmopolitan.zip
printf 'main() { printf("hello world\\n"); }\n' >hello.c printf 'main() { printf("hello world\\n"); }\n' >hello.c
gcc -g -Os -static -nostdlib -nostdinc -fno-pie -no-pie -mno-red-zone \ gcc -g -Os -static -nostdlib -nostdinc -fno-pie -no-pie -mno-red-zone \
-fno-omit-frame-pointer -pg -mnop-mcount \ -fno-omit-frame-pointer -pg -mnop-mcount -mno-tls-direct-seg-refs \
-o hello.com.dbg hello.c -fuse-ld=bfd -Wl,-T,ape.lds \ -o hello.com.dbg hello.c -fuse-ld=bfd -Wl,-T,ape.lds \
-include cosmopolitan.h crt.o ape-no-modify-self.o cosmopolitan.a -include cosmopolitan.h crt.o ape-no-modify-self.o cosmopolitan.a
objcopy -S -O binary hello.com.dbg hello.com objcopy -S -O binary hello.com.dbg hello.com

View file

@ -8,4 +8,6 @@
*/ */
#endif #endif
int main() { return 42; } int main() {
return 42;
}

View file

@ -21,9 +21,9 @@
__restore_rt_netbsd: __restore_rt_netbsd:
mov %r15,%rdi mov %r15,%rdi
mov $308,%rax # setcontext mov $308,%eax # setcontext
syscall syscall
mov $-1,%edi or $-1,%edi
mov $1,%rax # exit mov $1,%rax # exit
syscall syscall
.endfn __restore_rt_netbsd,globl,hidden .endfn __restore_rt_netbsd,globl,hidden

View file

@ -205,6 +205,11 @@ static int __sigaction(int sig, const struct sigaction *act,
arg4 = (int64_t)(intptr_t)oldact; /* from go code */ arg4 = (int64_t)(intptr_t)oldact; /* from go code */
arg5 = 0; arg5 = 0;
} else if (IsNetbsd()) { } else if (IsNetbsd()) {
/* int __sigaction_sigtramp(int signum,
const struct sigaction *nsa,
struct sigaction *osa,
const void *tramp,
int vers); */
if (ap) { if (ap) {
arg4 = (int64_t)(intptr_t)&__restore_rt_netbsd; arg4 = (int64_t)(intptr_t)&__restore_rt_netbsd;
arg5 = 2; /* netbsd/lib/libc/arch/x86_64/sys/__sigtramp2.S */ arg5 = 2; /* netbsd/lib/libc/arch/x86_64/sys/__sigtramp2.S */

View file

@ -7,7 +7,7 @@
#include "libc/calls/struct/stat.h" #include "libc/calls/struct/stat.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#define _KERNTRACE 0 /* not configurable w/ flag yet */ #define _KERNTRACE 1 /* not configurable w/ flag yet */
#define _POLLTRACE 0 /* not configurable w/ flag yet */ #define _POLLTRACE 0 /* not configurable w/ flag yet */
#define _DATATRACE 1 /* not configurable w/ flag yet */ #define _DATATRACE 1 /* not configurable w/ flag yet */
#define _NTTRACE 0 /* not configurable w/ flag yet */ #define _NTTRACE 0 /* not configurable w/ flag yet */

View file

@ -13,7 +13,6 @@ COSMOPOLITAN_C_START_
#define _UC_TLSBASE 0x00080000 #define _UC_TLSBASE 0x00080000
#define _UC_SETSTACK 0x00010000 #define _UC_SETSTACK 0x00010000
#define _UC_CLRSTACK 0x00020000 #define _UC_CLRSTACK 0x00020000
#define _UC_CLRSTACK 0x00020000
union sigval_netbsd { union sigval_netbsd {
int32_t sival_int; int32_t sival_int;

View file

@ -54,7 +54,9 @@ int gettid(void) {
int tid; int tid;
if (__tls_enabled) { if (__tls_enabled) {
tid = *(int *)(__get_tls() + 0x38); tid = *(int *)(__get_tls() + 0x38);
if (tid > 0) return tid; if (tid > 0) {
return tid;
}
} }
return sys_gettid(); return sys_gettid();
} }

View file

@ -35,6 +35,7 @@
#include "libc/log/log.h" #include "libc/log/log.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
#include "libc/nexgen32e/stackframe.h" #include "libc/nexgen32e/stackframe.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/runtime/pc.internal.h" #include "libc/runtime/pc.internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
@ -283,6 +284,7 @@ relegated void __oncrash(int sig, struct siginfo *si, ucontext_t *ctx) {
int gdbpid, err; int gdbpid, err;
static int sync; static int sync;
static bool notpossible; static bool notpossible;
__tls_enabled = false;
STRACE("__oncrash rip %x", ctx->uc_mcontext.rip); STRACE("__oncrash rip %x", ctx->uc_mcontext.rip);
--__ftrace; --__ftrace;
--__strace; --__strace;

View file

@ -194,7 +194,6 @@ static int sys_unveil_linux(const char *path, const char *permissions) {
*/ */
int unveil(const char *path, const char *permissions) { int unveil(const char *path, const char *permissions) {
int rc; int rc;
__enable_tls();
if (IsLinux()) { if (IsLinux()) {
rc = sys_unveil_linux(path, permissions); rc = sys_unveil_linux(path, permissions);
} else { } else {

View file

@ -199,7 +199,7 @@ int arch_prctl(int code, int64_t addr) {
case METAL: case METAL:
return arch_prctl_msr(code, addr); return arch_prctl_msr(code, addr);
case FREEBSD: case FREEBSD:
/* claims support but it appears not */ // TODO(jart): this should use sysarch()
return arch_prctl_freebsd(code, addr); return arch_prctl_freebsd(code, addr);
case OPENBSD: case OPENBSD:
return arch_prctl_openbsd(code, addr); return arch_prctl_openbsd(code, addr);

View file

@ -565,21 +565,9 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
* either terminated or has finished using its stack memory * either terminated or has finished using its stack memory
* *
* - `CLONE_SETTLS` is needed if you intend to specify the `tls` * - `CLONE_SETTLS` is needed if you intend to specify the `tls`
* argument, which provides a fast-path solution for changing the * argument, which after thread creation may be accessed using
* appropriate TLS segment register within the child thread. The * __get_tls(). Doing this means that `errno`, gettid(), etc.
* child thread may then obtain a reference to the TIB address you * correctly work. Caveat emptor if you choose not to do this.
* supplied, by calling __get_tls(). Your C library holds certain
* expectations about the layout of your Thread Information Block
* (TIB), which are all documented by __initialize_tls(). That
* function can be used to initialize the first positive 64 bytes
* of your TLS allocation, which is the memory Cosmopolitan Libc
* wants for itself (and negative addresses are reserved by the
* GNU Linker). Using this flag will transition the C runtime to a
* `__tls_enabled` state automatically. If you use TLS for just
* one thread, then you must be specify TLS for ALL THREADS. It's
* a good idea to do that since TLS can offer considerable (i.e.
* multiple orders of a magnitude) performance improvement for
* TID-dependent C library services, e.g. recursive mutexes.
* *
* @param arg is passed as an argument to `func` in the child thread * @param arg is passed as an argument to `func` in the child thread
* @param tls may be used to set the thread local storage segment; * @param tls may be used to set the thread local storage segment;
@ -594,8 +582,9 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
int rc; int rc;
struct CloneArgs *wt; struct CloneArgs *wt;
if (flags & CLONE_SETTLS) __enable_tls(); if (flags & CLONE_THREAD) {
if (flags & CLONE_THREAD) __enable_threads(); __enable_threads();
}
if (!func) { if (!func) {
rc = einval(); rc = einval();

View file

@ -76,22 +76,14 @@ cosmo: push %rbp
ret ret
.endfn cosmo,weak .endfn cosmo,weak
#if !IsTiny() // Enables Thread Local Storage.
// Enable TLS early if _Thread_local is used
// In MODE=tiny you may need to explicitly call __enable_tls()
// Otherwise this would bloat life.com from 16kb 32kb D:
.init.start 304,_init_tls .init.start 304,_init_tls
mov $_tls_content,%eax
test %eax,%eax
jz 1f
push %rdi push %rdi
push %rsi push %rsi
call __enable_tls call __enable_tls
pop %rsi pop %rsi
pop %rdi pop %rdi
jz 1f .init.end 304,_init_tls
1: .init.end 304,_init_tls
#endif
#if !IsTiny() #if !IsTiny()
// Creates deterministically addressed stack we can use // Creates deterministically addressed stack we can use

View file

@ -16,12 +16,16 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/assert.h"
#include "libc/bits/bits.h" #include "libc/bits/bits.h"
#include "libc/bits/weaken.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h" #include "libc/calls/strace.internal.h"
#include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/libfatal.internal.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h" #include "libc/nexgen32e/threaded.h"
#include "libc/nt/thread.h" #include "libc/nt/thread.h"
@ -52,12 +56,35 @@ __msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc;
extern unsigned char __tls_mov_nt_rax[]; extern unsigned char __tls_mov_nt_rax[];
extern unsigned char __tls_add_nt_rax[]; extern unsigned char __tls_add_nt_rax[];
_Alignas(long) static char __static_tls[5008];
/** /**
* Enables thread local storage. * Enables thread local storage.
*
* This function is always called by the core runtime to guarantee TLS
* is always available to your program. You must build your code using
* -mno-tls-direct-seg-refs if you want to use _Thread_local.
*
* You can use __get_tls() to get the linear address of your tib. When
* accessing TLS via privileged code you must use __get_tls_privileged
* because we need code morphing to support The New Technology and XNU
*
* On XNU and The New Technology, this function imposes 1ms of latency
* during startup for larger binaries like Python.
*
* If you don't want TLS and you're sure you're not using it, then you
* can disable it as follows:
*
* int main() {
* __tls_enabled = false;
* // do stuff
* }
*
* This is useful if you want to wrestle back control of %fs using the
* arch_prctl() function. However, such programs might not be portable
* and your `errno` variable also won't be thread safe anymore.
*/ */
privileged void __enable_tls(void) { privileged void __enable_tls(void) {
if (__tls_enabled) return;
STRACE("__enable_tls()"); STRACE("__enable_tls()");
// allocate tls memory for main process // allocate tls memory for main process
@ -74,20 +101,42 @@ privileged void __enable_tls(void) {
size_t siz; size_t siz;
cthread_t tib; cthread_t tib;
char *mem, *tls; char *mem, *tls;
siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE); siz = ROUNDUP(_TLSZ + _TIBZ, alignof(__static_tls));
mem = _mapanon(siz); if (siz <= sizeof(__static_tls)) {
// if tls requirement is small then use the static tls block
// which helps avoid a system call for appes with little tls
// this is crucial to keeping life.com 16 kilobytes in size!
_Static_assert(alignof(__static_tls) >= alignof(cthread_t));
mem = __static_tls;
} else {
// if this binary needs a hefty tls block then we'll bank on
// malloc() being linked, which links _mapanon(). otherwise
// if you exceed this, you need to STATIC_YOINK("_mapanon").
// please note that it's probably too early to call calloc()
assert(weaken(_mapanon));
siz = ROUNDUP(siz, FRAMESIZE);
mem = weaken(_mapanon)(siz);
assert(mem);
}
tib = (cthread_t)(mem + siz - _TIBZ); tib = (cthread_t)(mem + siz - _TIBZ);
tls = mem + siz - _TIBZ - _TLSZ; tls = mem + siz - _TIBZ - _TLSZ;
tib->self = tib; tib->self = tib;
tib->self2 = tib; tib->self2 = tib;
tib->err = __errno; tib->err = __errno;
tib->tid = sys_gettid(); if (IsLinux()) {
memmove(tls, _tdata_start, _TLDZ); // gnu/systemd guarantees pid==tid for the main thread so we can
// avoid issuing a superfluous system call at startup in program
tib->tid = __pid;
} else {
tib->tid = sys_gettid();
}
__repmovsb(tls, _tdata_start, _TLDZ);
// ask the operating system to change the x86 segment register // ask the operating system to change the x86 segment register
int ax, dx; int ax, dx;
if (IsWindows()) { if (IsWindows()) {
__tls_index = __imp_TlsAlloc(); __tls_index = __imp_TlsAlloc();
assert(0 <= __tls_index && __tls_index < 64);
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
} else if (IsFreebsd()) { } else if (IsFreebsd()) {
asm volatile("syscall" asm volatile("syscall"
@ -95,9 +144,12 @@ privileged void __enable_tls(void) {
: "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib) : "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib)
: "rcx", "r11", "memory", "cc"); : "rcx", "r11", "memory", "cc");
} else if (IsNetbsd()) { } else if (IsNetbsd()) {
// netbsd has sysarch(X86_SET_FSBASE) but we can't use that because
// signal handlers will cause it to be reset due to net setting the
// _mc_tlsbase field in struct mcontext_netbsd.
asm volatile("syscall" asm volatile("syscall"
: "=a"(ax), "=d"(dx) : "=a"(ax), "=d"(dx)
: "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib) : "0"(__NR__lwp_setprivate), "D"(tib)
: "rcx", "r11", "memory", "cc"); : "rcx", "r11", "memory", "cc");
} else if (IsXnu()) { } else if (IsXnu()) {
asm volatile("syscall" asm volatile("syscall"
@ -179,7 +231,7 @@ privileged void __enable_tls(void) {
} }
// we're checking for the following expression: // we're checking for the following expression:
// 0144 == p[0] && // fs // 0144 == p[0] && // %fs
// 0110 == p[1] && // rex.w (64-bit operand size) // 0110 == p[1] && // rex.w (64-bit operand size)
// (0213 == p[2] || // mov reg/mem → reg (word-sized) // (0213 == p[2] || // mov reg/mem → reg (word-sized)
// 0003 == p[2]) && // add reg/mem → reg (word-sized) // 0003 == p[2]) && // add reg/mem → reg (word-sized)
@ -195,7 +247,7 @@ privileged void __enable_tls(void) {
!p[8]) { !p[8]) {
// now change the code // now change the code
p[0] = 0145; // this changes gs segment to fs segment p[0] = 0145; // change %fs to %gs
p[5] = (dis & 0x000000ff) >> 000; // displacement p[5] = (dis & 0x000000ff) >> 000; // displacement
p[6] = (dis & 0x0000ff00) >> 010; // displacement p[6] = (dis & 0x0000ff00) >> 010; // displacement
p[7] = (dis & 0x00ff0000) >> 020; // displacement p[7] = (dis & 0x00ff0000) >> 020; // displacement

View file

@ -92,7 +92,6 @@ privileged void ftracer(void) {
textstartup int ftrace_install(void) { textstartup int ftrace_install(void) {
if (GetSymbolTable()) { if (GetSymbolTable()) {
__enable_tls();
g_stackdigs = LengthInt64Thousands(GetStackSize()); g_stackdigs = LengthInt64Thousands(GetStackSize());
return __hook(ftrace_hook, GetSymbolTable()); return __hook(ftrace_hook, GetSymbolTable());
} else { } else {

View file

@ -27,7 +27,7 @@ extern unsigned char _tls_size[];
extern unsigned char _tls_content[]; extern unsigned char _tls_content[];
void _init(void) hidden; void _init(void) hidden;
void __enable_tls(void) hidden; void __enable_tls(void);
void __enable_threads(void) hidden; void __enable_threads(void) hidden;
void __restorewintty(void) hidden; void __restorewintty(void) hidden;
void *__cxa_finalize(void *) hidden; void *__cxa_finalize(void *) hidden;

View file

@ -55,15 +55,16 @@
* *
* That is performed automatically for unit test executables. * That is performed automatically for unit test executables.
* *
* @return memory map address on success, or null w/ errrno * @return memory map address on success, or null w/ errno
*/ */
void *_mapanon(size_t size) { void *_mapanon(size_t size) {
/* asan runtime depends on this function */
void *m; void *m;
m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (m == MAP_FAILED && weaken(__oom_hook)) { if (m != MAP_FAILED) {
weaken(__oom_hook)(size); return m;
return 0;
} }
return m; if (errno == ENOMEM && weaken(__oom_hook)) {
weaken(__oom_hook)(size);
}
return 0;
} }

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#define ShouldUseMsabiAttribute() 1 #define ShouldUseMsabiAttribute() 1
#include "libc/assert.h"
#include "libc/bits/asmflag.h" #include "libc/bits/asmflag.h"
#include "libc/calls/internal.h" #include "libc/calls/internal.h"
#include "libc/calls/strace.internal.h" #include "libc/calls/strace.internal.h"
@ -58,10 +59,28 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot,
* @return 0 on success, or -1 w/ errno * @return 0 on success, or -1 w/ errno
*/ */
privileged void __morph_begin(void) { privileged void __morph_begin(void) {
int ax;
bool cf;
intptr_t dx;
sigset_t ss = {{-1, -1}}; sigset_t ss = {{-1, -1}};
STRACE("__morph_begin()"); STRACE("__morph_begin()");
if (!IsWindows()) { if (!IsWindows()) {
sys_sigprocmask(SIG_BLOCK, &ss, &oldss); if (!IsOpenbsd()) {
asm volatile("mov\t$8,%%r10d\n\t"
"syscall"
: "=a"(ax), "=d"(dx)
: "0"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(&ss),
"1"(&oldss)
: "rcx", "r10", "r11", "memory", "cc");
assert(!ax);
} else {
asm volatile(CFLAG_ASM("syscall")
: CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx)
: "1"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(-1u)
: "rcx", "r11", "memory");
oldss.__bits[0] = ax & 0xffffffff;
assert(!cf);
}
} }
__morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE, __morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE,
kNtPageWritecopy); kNtPageWritecopy);
@ -71,10 +90,28 @@ privileged void __morph_begin(void) {
* Begins code morphing execuatble. * Begins code morphing execuatble.
*/ */
privileged void __morph_end(void) { privileged void __morph_end(void) {
int ax;
long dx;
bool cf;
__morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_EXEC, __morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_EXEC,
kNtPageExecuteRead); kNtPageExecuteRead);
if (!IsWindows()) { if (!IsWindows()) {
sys_sigprocmask(SIG_SETMASK, &oldss, 0); if (!IsOpenbsd()) {
asm volatile("mov\t$8,%%r10d\n\t"
"syscall"
: "=a"(ax), "=d"(dx)
: "0"(__NR_sigprocmask), "D"(SIG_SETMASK), "S"(&oldss),
"1"(0)
: "rcx", "r10", "r11", "memory", "cc");
assert(!ax);
} else {
asm volatile(CFLAG_ASM("syscall")
: CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx)
: "1"(__NR_sigprocmask), "D"(SIG_SETMASK),
"S"(oldss.__bits[0])
: "rcx", "r11", "memory");
assert(!cf);
}
} }
STRACE("__morph_end()"); STRACE("__morph_end()");
} }

View file

@ -48,7 +48,7 @@ scall __sys_mmap 0x0c50c51dd20c5009 globl hidden # netbsd+openbsd:pad
scall sys_msync 0x115100041204101a globl hidden scall sys_msync 0x115100041204101a globl hidden
scall sys_mprotect 0x04a04a04a204a00a globl hidden scall sys_mprotect 0x04a04a04a204a00a globl hidden
scall __sys_munmap 0x049049049204900b globl hidden scall __sys_munmap 0x049049049204900b globl hidden
scall sys_sigaction 0x15402e1a0202e00d globl hidden # rt_sigaction on Lunix; it's complicated on NetBSD scall sys_sigaction 0x15402e1a0202e00d globl hidden # rt_sigaction on Lunix; __sigaction_sigtramp() on NetBSD
scall __sys_sigprocmask 0x125030154214900e globl hidden # a.k.a. rt_sigprocmask, openbsd:byvalue, a.k.a. pthread_sigmask scall __sys_sigprocmask 0x125030154214900e globl hidden # a.k.a. rt_sigprocmask, openbsd:byvalue, a.k.a. pthread_sigmask
scall sys_ioctl 0x0360360362036010 globl hidden scall sys_ioctl 0x0360360362036010 globl hidden
scall sys_pread 0x0ad0ad1db2099011 globl hidden # a.k.a. pread64; netbsd+openbsd:pad scall sys_pread 0x0ad0ad1db2099011 globl hidden # a.k.a. pread64; netbsd+openbsd:pad

View file

@ -1,27 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.init.start 400,_main_thread_ctor
push %rdi
push %rsi
call __enable_tls
pop %rsi
pop %rdi
.init.end 400,_main_thread_ctor

View file

@ -20,8 +20,6 @@
#include "libc/nexgen32e/threaded.h" #include "libc/nexgen32e/threaded.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
/** /**
* Returns thread descriptor of the current thread. * Returns thread descriptor of the current thread.
*/ */

View file

@ -20,8 +20,6 @@
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
#define CTHREAD_THREAD_VAL_BITS 32 #define CTHREAD_THREAD_VAL_BITS 32
static void Pause(int attempt) { static void Pause(int attempt) {

View file

@ -32,8 +32,6 @@
#include "libc/thread/spawn.h" #include "libc/thread/spawn.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
/** /**
* @fileoverview Simple threading API * @fileoverview Simple threading API
* *

View file

@ -114,6 +114,12 @@ TEST(malloc, test) {
for (i = 0; i < ARRAYLEN(fds); ++i) close(fds[i]); for (i = 0; i < ARRAYLEN(fds); ++i) close(fds[i]);
} }
TEST(memalign, roundsUpAlignmentToTwoPower) {
char *volatile p = memalign(129, 1);
ASSERT_EQ(0, (intptr_t)p & 255);
free(p);
}
void *bulk[1024]; void *bulk[1024];
void BulkFreeBenchSetup(void) { void BulkFreeBenchSetup(void) {

View file

@ -19,9 +19,11 @@
#include "libc/bits/segmentation.h" #include "libc/bits/segmentation.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
__attribute__((__constructor__)) static void init(void) { __attribute__((__constructor__)) static void init(void) {
__tls_enabled = false;
pledge("stdio rpath", 0); pledge("stdio rpath", 0);
errno = 0; errno = 0;
} }

View file

@ -24,6 +24,7 @@
#include "libc/intrin/kprintf.h" #include "libc/intrin/kprintf.h"
#include "libc/runtime/memtrack.internal.h" #include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h" #include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
@ -66,6 +67,12 @@ TEST(munmap, doesntExist_doesntCare) {
} }
} }
TEST(munmap, invalidParams) {
EXPECT_SYS(EINVAL, -1, munmap(0, 0));
EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000000, 0));
EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000001, FRAMESIZE));
}
TEST(munmap, test) { TEST(munmap, test) {
char *p; char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE,
@ -75,12 +82,6 @@ TEST(munmap, test) {
EXPECT_FALSE(MemoryExists(p)); EXPECT_FALSE(MemoryExists(p));
} }
TEST(munmap, invalidParams) {
EXPECT_SYS(EINVAL, -1, munmap(0, 0));
EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000000, 0));
EXPECT_SYS(EINVAL, -1, munmap((void *)0x100080000001, FRAMESIZE));
}
TEST(munmap, punchHoleInMemory) { TEST(munmap, punchHoleInMemory) {
char *p; char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE * 3, PROT_READ | PROT_WRITE, ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE * 3, PROT_READ | PROT_WRITE,

View file

@ -64,7 +64,6 @@ TEST(fgetc, testUnbuffered) {
} }
BENCH(fputc, bench) { BENCH(fputc, bench) {
__enable_tls();
__enable_threads(); __enable_threads();
FILE *f; FILE *f;
ASSERT_NE(NULL, (f = fopen("/dev/null", "w"))); ASSERT_NE(NULL, (f = fopen("/dev/null", "w")));

View file

@ -38,9 +38,3 @@ TEST(pthread_key_create, testRunsDtors_becauseNoLeakReport) {
EXPECT_EQ(0, pthread_setspecific(key, x)); EXPECT_EQ(0, pthread_setspecific(key, x));
EXPECT_EQ(x, pthread_getspecific(key)); EXPECT_EQ(x, pthread_getspecific(key));
} }
__attribute__((__constructor__)) static void init(void) {
if (IsTiny()) {
__enable_tls();
}
}

View file

@ -9,5 +9,7 @@ LICENSE
LOCAL CHANGES LOCAL CHANGES
- Introduce __oom_hook() - Use faster two power roundup for memalign()
- Favor pause (rather than sched_yield) for spin locks - Poison maps to integrate with Address Sanitizer
- Introduce __oom_hook() by using _mapanon() vs. mmap()
- Wrap locks with __threaded check to improve perf lots

View file

@ -5,7 +5,9 @@
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/kprintf.h" #include "libc/intrin/kprintf.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/rdtsc.h" #include "libc/nexgen32e/rdtsc.h"
#include "libc/rand/rand.h" #include "libc/rand/rand.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
@ -917,11 +919,8 @@ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
void* mem = 0; void* mem = 0;
if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
alignment = MIN_CHUNK_SIZE; alignment = MIN_CHUNK_SIZE;
if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ /* alignment is 32+ bytes rounded up to nearest two power */
size_t a = MALLOC_ALIGNMENT << 1; alignment = 2ul << bsrl(MAX(MIN_CHUNK_SIZE, alignment) - 1);
while (a < alignment) a <<= 1;
alignment = a;
}
if (bytes >= MAX_REQUEST - alignment) { if (bytes >= MAX_REQUEST - alignment) {
if (m != 0) { /* Test isn't needed but avoids compiler warning */ if (m != 0) { /* Test isn't needed but avoids compiler warning */
MALLOC_FAILURE_ACTION; MALLOC_FAILURE_ACTION;

View file

@ -1,4 +1,5 @@
// clang-format off // clang-format off
#include "libc/calls/calls.h"
#include "libc/nexgen32e/threaded.h" #include "libc/nexgen32e/threaded.h"
/* --------------------------- Lock preliminaries ------------------------ */ /* --------------------------- Lock preliminaries ------------------------ */
@ -94,7 +95,7 @@ FORCEINLINE void x86_clear_lock(int* sl) {
/* Plain spin locks use single word (embedded in malloc_states) */ /* Plain spin locks use single word (embedded in malloc_states) */
static dontinline int spin_acquire_lock(int *sl) { static dontinline int spin_acquire_lock(int *sl) {
while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
__builtin_ia32_pause(); sched_yield();
} }
return 0; return 0;
} }
@ -154,7 +155,7 @@ FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
++lk->c; ++lk->c;
return 0; return 0;
} }
__builtin_ia32_pause(); sched_yield();
} }
} }

View file

@ -179,6 +179,8 @@
/* /*
======================================================================== ========================================================================
To make a fully customizable malloc.h header file, cut everything To make a fully customizable malloc.h header file, cut everything
#include "libc/sysv/consts/map.h"
#include "libc/runtime/runtime.h"
above this line, put into file malloc.h, edit to suit, and #include it above this line, put into file malloc.h, edit to suit, and #include it
on the next line, as well as in programs that use this malloc. on the next line, as well as in programs that use this malloc.
======================================================================== ========================================================================
@ -385,7 +387,7 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
/* MORECORE and MMAP must return MFAIL on failure */ /* MORECORE and MMAP must return MFAIL on failure */
#define MFAIL ((void*)(MAX_SIZE_T)) #define MFAIL NULL
#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ #define CMFAIL ((char*)(MFAIL)) /* defined for convenience */
#if HAVE_MMAP #if HAVE_MMAP
@ -398,7 +400,7 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
#endif /* MAP_ANON */ #endif /* MAP_ANON */
#ifdef MAP_ANONYMOUS #ifdef MAP_ANONYMOUS
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) #define MMAP_DEFAULT(s) _mapanon(s)
#else /* MAP_ANONYMOUS */ #else /* MAP_ANONYMOUS */
/* /*
Nearly all versions of mmap support MAP_ANONYMOUS, so the following Nearly all versions of mmap support MAP_ANONYMOUS, so the following
@ -408,8 +410,8 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ #define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
(dev_zero_fd = open("/dev/zero", O_RDWR), \ (dev_zero_fd = open("/dev/zero", O_RDWR), \
mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ mmap_no(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) mmap_no(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
#endif /* MAP_ANONYMOUS */ #endif /* MAP_ANONYMOUS */
#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)

View file

@ -16,24 +16,21 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/bits/weaken.h" #include "libc/dce.h"
#include "libc/calls/calls.h"
#include "libc/intrin/asan.internal.h" #include "libc/intrin/asan.internal.h"
#include "libc/intrin/asancodes.h" #include "libc/intrin/asancodes.h"
#include "libc/intrin/kprintf.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/sysv/consts/map.h" #include "third_party/dlmalloc/vespene.internal.h"
#include "libc/sysv/consts/prot.h"
/** /**
* Acquires more system memory for dlmalloc. * Acquires more system memory for dlmalloc.
* @return memory map address on success, or null w/ errno
*/ */
void *dlmalloc_requires_more_vespene_gas(size_t size) { void *dlmalloc_requires_more_vespene_gas(size_t size) {
char *p; char *p;
if ((p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, if ((p = _mapanon(size))) {
-1, 0)) != MAP_FAILED) { if (IsAsan()) {
if (weaken(__asan_poison)) { __asan_poison(p, size, kAsanHeapFree);
weaken(__asan_poison)(p, size, kAsanHeapFree);
} }
} }
return p; return p;

View file

@ -18,12 +18,14 @@
*/ */
#include "libc/intrin/kprintf.h" #include "libc/intrin/kprintf.h"
#include "libc/log/log.h" #include "libc/log/log.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/stdio/stdio.h" #include "libc/stdio/stdio.h"
#include "tool/plinko/lib/plinko.h" #include "tool/plinko/lib/plinko.h"
STATIC_YOINK("__zipos_get"); STATIC_YOINK("__zipos_get");
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
__tls_enabled = false;
Plinko(argc, argv); Plinko(argc, argv);
return 0; return 0;
} }