Reduce mandatory stack rss by 256kb

This commit is contained in:
Justine Tunney 2023-09-07 04:30:44 -07:00
parent 0e087143fd
commit b592716d1c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
15 changed files with 98 additions and 125 deletions

View file

@ -280,6 +280,8 @@ SECTIONS {
ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000;
ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024;
ape_stack_align = DEFINED(ape_stack_align) ? ape_stack_align : 16;
ape_stack_round = -ape_stack_align;
_tls_size = _tbss_end - _tdata_start;
_tdata_size = _tdata_end - _tdata_start;

View file

@ -585,7 +585,8 @@ ape_stack_vaddr = DEFINED(ape_stack_vaddr) ? ape_stack_vaddr : 0x700000000000;
ape_stack_paddr = ape_ram_paddr + ape_ram_filesz;
ape_stack_filesz = 0;
ape_stack_memsz = DEFINED(ape_stack_memsz) ? ape_stack_memsz : 8 * 1024 * 1024;
ape_stack_align = 16;
ape_stack_align = DEFINED(ape_stack_align) ? ape_stack_align : 16;
ape_stack_round = -ape_stack_align;
ape_note_offset = ape_cod_offset + (ape_note - ape_cod_vaddr);
ape_note_filesz = ape_note_end - ape_note;

View file

@ -68,14 +68,12 @@ _start:
mov %rsp,__oldstack(%rip)
mov %rdx,__envp(%rip)
// setup backtraces
// setup stack
xor %ebp,%ebp
and $ape_stack_round,%rsp
// make process stack (8mb) follow thread stack (256kb) alignment
and $-(256*1024),%rsp
#if SupportsWindows()
// make win32 imps noop
#if SupportsWindows() && !IsTiny()
// make win32 imps crash
.weak ape_idata_iat
.weak ape_idata_iatend
.weak __oops_win32
@ -122,9 +120,12 @@ _start:
// this is the first argument to cosmo() below
mov x0,sp
// make process stack (8mb) conform to thread stack (256kb) alignment
mov x1,sp
and sp,x1,-(256*1024)
// setup the stack
mov x29,#0
mov x30,#0
ldr x1,=ape_stack_round
and x1,x0,x1
mov sp,x1
// second arg shall be struct Syslib passed by ape-m1.c
// used to talk to apple's authoritarian libraries

View file

@ -38,7 +38,6 @@ void __enable_tls(void);
void *__cxa_finalize(void *);
void __stack_chk_fail(void) wontreturn relegated;
void __stack_chk_fail_local(void) wontreturn relegated;
void _jmpstack(void *, void *, ...) wontreturn;
long _setstack(void *, void *, ...);
int GetDosArgv(const char16_t *, char *, size_t, char **, size_t);
int GetDosEnviron(const char16_t *, char *, size_t, char **, size_t);

View file

@ -1,37 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Switches stack.
//
// @param rdi is new rsp, passed as malloc(size) + size
// @param rsi is function to call in new stack space
// @param rdx,rcx,r8,r9 get passed as args to rsi
// @noreturn
_jmpstack:
mov %rdi,%rsp
mov %rsi,%rax
mov %rdx,%rdi
mov %rcx,%rsi
mov %r8,%rdx
mov %r9,%rcx
xor %ebp,%ebp
call *%rax
.unreachable
.endfn _jmpstack,globl,hidden

View file

@ -18,46 +18,18 @@
#define GetGuardSize() 16384
/**
* Tunes APE stack maximum size.
* Align APE main thread stack at startup.
*
* The bottom-most page will be protected to ensure your stack does not
* magically grow beyond this value. It's possible to detect stack
* overflows, by calling `ShowCrashReports()`. Your stack size must be a
* power of two; the linker will check this.
* You need this in your main program module:
*
* If you want to know how much stack your programs needs, then
* STATIC_STACK_ALIGN(GetStackSize());
*
* __static_yoink("stack_usage_logging");
*
* will install an atexit() handler that appends to `o/$MODE/stack.log`
*
* @see libc/sysv/systemfive.S
* @see ape/ape.lds
* If you want to use GetStackAddr() and HaveStackMemory() safely on
* your main thread in your process. It causes crt.S to waste a tiny
* amount of memory to ensure those macros go extremely fast.
*/
#define STATIC_STACK_SIZE(BYTES) \
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
/**
* Tunes APE stack virtual address.
*
* This value must be aligned according to your stack size, and that's
* checked by your linker script. This defaults to `0x700000000000` so
*
* 1. It's easy to see how close you are to the bottom
* 2. The linker script error is unlikely to happen
*
* This macro will be respected, with two exceptions
*
* 1. In MODE=tiny the operating system provided stack is used instead
* 2. Windows 7 doesn't support 64-bit addresses, so we'll instead use
* `0x10000000 - GetStackSize()` as the stack address
*
* @see libc/sysv/systemfive.S
* @see libc/nt/winmain.greg.c
* @see ape/ape.lds
*/
#define STATIC_STACK_ADDR(ADDR) \
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
#define STATIC_STACK_ALIGN(BYTES) \
_STACK_SYMBOL("ape_stack_align", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
/**
* Makes program stack executable if declared, e.g.
@ -77,9 +49,9 @@
#define STATIC_EXEC_STACK() _STACK_SYMBOL("ape_stack_pf", "7")
#define _STACK_STRINGIFY(ADDR) #ADDR
#define _STACK_SYMBOL(NAME, VALUE) \
asm(".equ\t" NAME "," VALUE "\n\t" \
".globl\t" NAME)
#define _STACK_SYMBOL(NAME, VALUE) \
__asm__(".equ\t" NAME "," VALUE "\n\t" \
".globl\t" NAME)
#ifdef __SANITIZE_ADDRESS__
#define _STACK_EXTRA "*2"
@ -95,29 +67,71 @@ extern char ape_stack_memsz[] __attribute__((__weak__));
extern char ape_stack_align[] __attribute__((__weak__));
/**
* Returns address of bottom of stack.
* Returns address of bottom of current stack.
*
* This takes into consideration threads and sigaltstack. This is
* implemented as a fast pure expression, since we're able to make the
* assumption that stack sizes are two powers and aligned. This is
* thanks to (1) the linker script checks the statically chosen sizes,
* and (2) the mmap() address picker will choose aligned addresses when
* the provided size is a two power.
* This always works on threads. If you want it to work on the main
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
* which will burn O(256kb) of memory to ensure thread invariants.
*/
#define GetStackAddr() \
(((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize())
#define GetStaticStackSize() ((uintptr_t)ape_stack_memsz)
/**
* Returns true if at least `n` bytes of stack are available.
*
* This always works on threads. If you want it to work on the main
* process too, then you'll need STATIC_STACK_ALIGN(GetStackSize())
* which will burn O(256kb) of memory to ensure thread invariants,
* which make this check exceedingly fast.
*/
#define HaveStackMemory(n) \
((intptr_t)__builtin_frame_address(0) >= \
GetStackAddr() + GetGuardSize() + (n))
/**
* Extends stack memory by poking large allocations.
*
* This can be particularly useful depending on how your system
* implements guard pages. For example, Windows can make stacks
* that aren't fully committed, in which case there's only 4096
* bytes of grows-down guard pages made by portable executable.
* If you alloca() more memory than that, you should call this,
* since it'll not only ensure stack overflows are detected, it
* will also trigger the stack to grow down safely.
*/
__funline void CheckLargeStackAllocation(void *p, ssize_t n) {
for (; n > 0; n -= 4096) {
((char *)p)[n - 1] = 0;
}
}
void *NewCosmoStack(void) vallocesque;
int FreeCosmoStack(void *) libcesque;
/**
* Tunes stack size of main thread on Windows.
*
* On UNIX systems use `RLIMIT_STACK` to tune the main thread size.
*/
#define STATIC_STACK_SIZE(BYTES) \
_STACK_SYMBOL("ape_stack_memsz", _STACK_STRINGIFY(BYTES) _STACK_EXTRA)
/**
* Tunes main thread stack address on Windows.
*/
#define STATIC_STACK_ADDR(ADDR) \
_STACK_SYMBOL("ape_stack_vaddr", _STACK_STRINGIFY(ADDR))
#ifdef __x86_64__
/**
* Returns preferred bottom address of stack.
* Returns preferred bottom address of main thread stack.
*
* This is the stakc address of the main process. The only time that
* isn't guaranteed to be the case is in MODE=tiny, since it doesn't
* link the code for stack creation at startup. This generally isn't
* problematic, since MODE=tiny doesn't use any of the runtime codes
* which want the stack to be cheaply knowable, e.g. ftrace, kprintf
* On UNIX systems we favor the system provided stack, so this only
* really applies to Windows. It's configurable at link time. It is
* needed because polyfilling fork requires that we know, precicely
* where the stack memory begins and ends.
*/
#define GetStaticStackAddr(ADDEND) \
({ \
@ -132,25 +146,6 @@ extern char ape_stack_align[] __attribute__((__weak__));
#define GetStaticStackAddr(ADDEND) (GetStackAddr() + ADDEND)
#endif
/**
* Returns true if at least `n` bytes of stack are available.
*/
#define HaveStackMemory(n) \
((intptr_t)__builtin_frame_address(0) >= \
GetStackAddr() + GetGuardSize() + (n))
/**
* Extends stack memory by poking large allocations.
*/
forceinline void CheckLargeStackAllocation(void *p, ssize_t n) {
for (; n > 0; n -= 4096) {
((char *)p)[n - 1] = 0;
}
}
void *NewCosmoStack(void) vallocesque;
int FreeCosmoStack(void *) libcesque;
COSMOPOLITAN_C_END_
#endif /* GNU ELF */
#endif /* _COSMO_SOURCE */

View file

@ -27,6 +27,10 @@
#include "libc/str/str.h"
#include "libc/sysv/consts/o.h"
// TODO(jart): Delete?
STATIC_STACK_ALIGN(GetStackSize());
static char stacklog[1024];
dontasan size_t GetStackUsage(char *s, size_t n) {

View file

@ -77,7 +77,10 @@ __msabi extern typeof(SetStdHandle) *const __imp_SetStdHandle;
__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
// clang-format on
extern void cosmo(int, char **, char **, long (*)[2]) wontreturn;
void cosmo(int, char **, char **, long (*)[2]) wontreturn;
void __switch_stacks(int, char **, char **, long (*)[2],
void (*)(int, char **, char **, long (*)[2]),
intptr_t) wontreturn;
static const signed char kNtStdio[3] = {
(signed char)kNtStdInputHandle,
@ -211,8 +214,8 @@ __msabi static textwindows wontreturn void WinInit(const char16_t *cmdline) {
__envp = &wa->envp[0];
// handover control to cosmopolitan runtime
_jmpstack((char *)(stackaddr + (stacksize - sizeof(struct WinArgs))), cosmo,
count, wa->argv, wa->envp, wa->auxv);
__switch_stacks(count, wa->argv, wa->envp, wa->auxv, cosmo,
stackaddr + (stacksize - sizeof(struct WinArgs)));
}
__msabi textwindows int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,

View file

@ -112,6 +112,7 @@ long double
coshl(long double x)
{
long double hi,lo,x2,x4;
(void)x4;
#if LDBL_MANT_DIG == 113
double dx2;
#endif

View file

@ -108,6 +108,7 @@ long double
sinhl(long double x)
{
long double hi,lo,x2,x4;
(void)x4;
#if LDBL_MANT_DIG == 113
double dx2;
#endif

View file

@ -135,6 +135,7 @@ long double
tanhl(long double x)
{
long double hi,lo,s,x2,x4,z;
(void)x4;
#if LDBL_MANT_DIG == 113
double dx2;
#endif

View file

@ -57,7 +57,7 @@ Lua 5.4.3 (MIT License)\\n\
Copyright 19942021 Lua.org, PUC-Rio.\"");
asm(".include \"libc/disclaimer.inc\"");
STATIC_STACK_SIZE(0x80000);
STATIC_STACK_ALIGN(GetStackSize());
#if !defined(LUA_PROGNAME)
#define LUA_PROGNAME "lua"

View file

@ -50,7 +50,6 @@ Lua 5.4.3 (MIT License)\\n\
Copyright 19942021 Lua.org, PUC-Rio.\"");
asm(".include \"libc/disclaimer.inc\"");
static void PrintFunction(const Proto* f, int full);
#define luaU_print PrintFunction

View file

@ -4,11 +4,14 @@
Python 3
https://docs.python.org/3/license.html │
*/
#include "libc/runtime/stack.h"
#include "third_party/python/Include/yoink.h"
#include "third_party/python/runpythonmodule.h"
#include "tool/args/args.h"
// clang-format off
STATIC_STACK_ALIGN(GetStackSize());
PYTHON_YOINK("xed");
PYTHON_YOINK("xterm");

View file

@ -142,7 +142,7 @@
#pragma GCC diagnostic ignored "-Wunused-variable"
STATIC_STACK_SIZE(0x80000);
STATIC_STACK_ALIGN(GetStackSize());
__static_yoink("zipos");