Keep argv[0], add COSMOPOLITAN_PROGRAM_EXECUTABLE (#980)

* Introduce env.com

Handy tool for debugging environment issues.

* Inject path as COSMOPOLITAN_PROGRAM_EXECUTABLE

`argv[0]` was previously being used as a communication channel between
the loader and the binary, giving the binary its full path for use e.g.
in `GetProgramExecutableName`. But `argv[0]` is not a good channel for
this; much of what made 2a3813c6 so gross is due to that.

This change fixes the issue by preserving `argv[0]` and establishing a
new communication channel: `COSMOPOLITAN_PROGRAM_EXECUTABLE`.

The M1 loader will always set this as the first variable. Linux should
soon follow. On the other side, `GetProgramExecutableName` checks that
variable first. If it sees it, it trusts it as-is.

A lot of the churn in `ape/ape-m1.c` in this change is actually backing
out hacks introduced in 2a3813c6; the best comparison is:

    git diff 2a3813c6^..
This commit is contained in:
Jōshin 2023-12-04 15:45:46 -05:00 committed by GitHub
parent 2a3813c6cf
commit ed8fadea37
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 168 additions and 38 deletions

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include <assert.h>
#include <dispatch/dispatch.h>
#include <dlfcn.h>
#include <errno.h>
@ -35,6 +36,10 @@
#include <unistd.h>
#define pagesz 16384
#define VARNAME "COSMOPOLITAN_PROGRAM_EXECUTABLE="
#define VARSIZE (sizeof(VARNAME) - 1)
/* maximum path size that cosmo can take */
#define PATHSIZE (PATH_MAX < 1024 ? PATH_MAX : 1024)
#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
#define SYSLIB_VERSION 8
@ -198,8 +203,11 @@ struct PathSearcher {
unsigned long namelen;
const char *name;
const char *syspath;
char path[1024];
char varname[VARSIZE];
char path[PATHSIZE];
};
_Static_assert(offsetof(struct PathSearcher, varname) + VARSIZE ==
offsetof(struct PathSearcher, path), "struct layout");
struct ApeLoader {
struct PathSearcher ps;
@ -313,7 +321,14 @@ __attribute__((__noreturn__)) static void Pexit(const char *c, int failed,
}
static char AccessCommand(struct PathSearcher *ps, unsigned long pathlen) {
if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) return 0;
if (!pathlen && *ps->name != '/') {
if (!getcwd(ps->path, sizeof(ps->path) - 1 - ps->namelen)) {
Pexit("getcwd", -errno, "failed");
}
pathlen = strlen(ps->path);
} else if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) {
return 0;
}
if (pathlen && ps->path[pathlen - 1] != '/') ps->path[pathlen++] = '/';
memmove(ps->path + pathlen, ps->name, ps->namelen);
ps->path[pathlen + ps->namelen] = 0;
@ -884,8 +899,9 @@ int main(int argc, char **argv, char **envp) {
struct ApeLoader *M;
long *sp, *sp2, *auxv;
union ElfEhdrBuf *ebuf;
int c, islogin, n, fd, rc;
char *p, *pe, *dash_l, *exe, *prog, *shell, *execfn;
int c, n, fd, rc;
char *p, *pe, *exe, *prog, *shell, *execfn;
char **varpos;
/* allocate loader memory in program's arg block */
n = sizeof(struct ApeLoader);
@ -947,24 +963,15 @@ int main(int argc, char **argv, char **envp) {
M->lib.dlclose = dlclose;
M->lib.dlerror = dlerror;
/* there is a common convention of shells being told that they
are login shells via the OS prepending a - to their argv[0].
the APE system doesn't like it when argv[0] is not the full
path of the binary. to rectify this, the loader puts a "-l"
flag in argv[1] and ignores the dash. */
if ((islogin = argc > 0 && *argv[0] == '-' && (shell = GetEnv(envp, "SHELL"))
&& !StrCmp(argv[0] + 1, BaseName(shell)))) {
execfn = shell;
dash_l = __builtin_alloca(3);
memmove(dash_l, "-l", 3);
} else {
execfn = argc > 0 ? argv[0] : 0;
}
/* getenv("_") is close enough to at_execfn */
execfn = argc > 0 ? argv[0] : 0;
varpos = 0;
for (i = 0; envp[i]; ++i) {
if (envp[i][0] == '_' && envp[i][1] == '=') {
execfn = envp[i] + 2;
} else if (!memcmp(VARNAME, envp[i], VARSIZE)) {
assert(!varpos);
varpos = envp + i;
}
}
@ -975,24 +982,22 @@ int main(int argc, char **argv, char **envp) {
/* create new bottom of stack for spawned program
system v abi aligns this on a 16-byte boundary
grows down the alloc by poking the guard pages */
n = (auxv - sp + islogin + AUXV_WORDS + 1) * sizeof(long);
n = (auxv - sp + !varpos + AUXV_WORDS + 1) * sizeof(long);
sp2 = (long *)__builtin_alloca(n);
if ((long)sp2 & 15) ++sp2;
for (; n > 0; n -= pagesz) {
((char *)sp2)[n - 1] = 0;
}
if (islogin) {
memmove(sp2, sp, 2 * sizeof(long));
*((char **)sp2 + 2) = dash_l;
memmove(sp2 + 3, sp + 2, (auxv - sp - 2) * sizeof(long));
++argc;
sp2[0] = argc;
} else {
memmove(sp2, sp, (auxv - sp) * sizeof(long));
}
memmove(sp2, sp, (auxv - sp) * sizeof(long));
argv = (char **)(sp2 + 1);
envp = (char **)(sp2 + 1 + argc + 1);
if (varpos) {
varpos = (char **)((long *)varpos - sp + sp2);
} else {
varpos = envp + i++;
*(envp + i) = 0;
}
auxv = (long *)(envp + i + 1);
sp = sp2;
@ -1008,13 +1013,14 @@ int main(int argc, char **argv, char **envp) {
but it will if you say:
ln -sf /usr/local/bin/ape /opt/cosmos/bin/bash.ape
and then use #!/opt/cosmos/bin/bash.ape instead. */
prog = (char *)sp[1];
if (*argv[0] == '-' && (shell = GetEnv(envp, "SHELL")) &&
!StrCmp(argv[0] + 1, BaseName(shell))) {
execfn = prog = shell;
} else {
prog = (char *)sp[1];
}
argc = sp[0];
argv = (char **)(sp + 1);
if (islogin) {
++argv[0];
prog = shell;
}
} else if ((M->ps.literally = argc >= 3 && !StrCmp(argv[1], "-"))) {
/* if the first argument is a hyphen then we give the user the
power to change argv[0] or omit it entirely. most operating
@ -1056,11 +1062,11 @@ int main(int argc, char **argv, char **envp) {
}
pe = ebuf->buf + rc;
/* resolve argv[0] to reflect path search */
if (argc > 0 && ((*prog != '/' && *exe == '/' && !StrCmp(prog, argv[0])) ||
M->ps.indirect || !StrCmp(BaseName(prog), argv[0]))) {
argv[0] = exe;
}
/* inject program executable as first environment variable,
swapping the old first variable for it. */
memmove(M->ps.varname, VARNAME, VARSIZE);
*varpos = *envp;
*envp = M->ps.varname;
/* generate some hard random data */
if ((rc = sys_getentropy(M->rando, sizeof(M->rando))) < 0) {

10
examples/env.c Normal file
View file

@ -0,0 +1,10 @@
#include "libc/stdio/stdio.h"
#include "libc/runtime/runtime.h"
int main(int argc, char* argv[]) {
printf("%s\n", argv[0]);
for (char **p = environ; *p; ++p) {
printf(" %s\n", *p);
}
return 0;
}

View file

@ -36,6 +36,8 @@
#define KERN_PROC 14
#define KERN_PROC_PATHNAME_FREEBSD 12
#define KERN_PROC_PATHNAME_NETBSD 5
#define VARNAME "COSMOPOLITAN_PROGRAM_EXECUTABLE="
#define VARSIZE (sizeof(VARNAME) - 1)
static struct {
atomic_uint once;
@ -77,6 +79,13 @@ static inline void InitProgramExecutableNameImpl(void) {
goto CopyString;
}
/* new-style loader supplies the full program path as the first
environment variable; if it is defined, trust it as-is. */
if (*__envp && !strncmp(*__envp, VARNAME, VARSIZE)) {
strlcpy(g_prog.u.buf, *__envp + VARSIZE, sizeof(g_prog.u.buf));
return;
}
// if argv[0] exists then turn it into an absolute path. we also try
// adding a .com suffix since the ape auto-appends it when resolving
if ((q = __argv[0])) {
@ -146,6 +155,7 @@ static inline void InitProgramExecutableNameImpl(void) {
}
}
*p = 0;
return;
}
// if we don't even have that then empty the string

View file

@ -0,0 +1,104 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/limits.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/subprocess.h"
#include "libc/testlib/testlib.h"
static char *self;
static bool skipcosmotests;
void SetUp(void) {
self = GetProgramExecutableName();
}
void SetUpOnce(void) {
if (!getenv("COSMOPOLITAN_PROGRAM_EXECUTABLE")) {
fprintf(stderr,
"warning: old ape loader detected; skipping some tests %m\n");
skipcosmotests = true;
}
testlib_enable_tmp_setup_teardown();
}
__attribute__((__constructor__)) static void Child(int argc, char *argv[]) {
if (argc >= 2 && !strcmp(argv[1], "Child")) {
ASSERT_EQ(3, argc);
EXPECT_STREQ(argv[2], GetProgramExecutableName());
exit(0);
}
}
TEST(GetProgramExecutableName, ofThisFile) {
EXPECT_EQ('/', *self);
EXPECT_TRUE(
endswith(self, "test/libc/calls/getprogramexecutablename_test.com"));
}
TEST(GetProgramExecutableName, nullEnv) {
SPAWN(fork);
execve(self, (char *[]){self, "Child", self, 0}, (char *[]){ 0 });
EXITS(0);
}
TEST(GetProramExecutableName, weirdArgv0NullEnv) {
SPAWN(fork);
execve(self, (char *[]){"hello", "Child", self, 0}, (char *[]){ 0 });
EXITS(0);
}
TEST(GetProgramExecutableName, weirdArgv0CosmoVar) {
if (skipcosmotests) return;
char buf[32 + PATH_MAX];
stpcpy(stpcpy(buf, "COSMOPOLITAN_PROGRAM_EXECUTABLE="), self);
SPAWN(fork);
execve(self, (char *[]){"hello", "Child", self, 0}, (char *[]){ buf, 0});
EXITS(0);
}
TEST(GetProgramExecutableName, weirdArgv0WrongCosmoVar) {
if (skipcosmotests) return;
char *bad = "COSMOPOLITAN_PROGRAM_EXECUTABLE=hi";
SPAWN(fork);
execve(self, (char *[]){"hello", "Child", self, 0}, (char *[]){ bad, 0});
EXITS(0);
}
TEST(GetProgramExecutableName, MovedSelf) {
char buf[BUFSIZ];
ASSERT_SYS(0, 3, open(GetProgramExecutableName(), O_RDONLY));
ASSERT_SYS(0, 4, creat("test", 0755));
ssize_t rc;
while ((rc = read(3, buf, BUFSIZ)) > 0) {
ASSERT_SYS(0, rc, write(4, buf, rc));
}
ASSERT_EQ(0, rc);
ASSERT_SYS(0, 0, close(4));
ASSERT_SYS(0, 0, close(3));
ASSERT_NE(NULL, getcwd(buf, BUFSIZ - 5));
stpcpy(buf + strlen(buf), "/test");
SPAWN(fork);
execve(buf, (char *[]){"hello", "Child", buf, 0}, (char *[]){ 0 });
EXITS(0);
}