Embed cocmd.com interpreter for system() / open()

This change lets you use system() in an easier and portable way. The
problem with the call in the past has always been that bourne and
cmd.com on Windows have less than nothing in common, so pretty much the
only command system() could be used for across platforms was maybe echo.
cmd.exe is also a security liability due to its escaping rules.

Since cocmd.com implements 85% of what we need from bourne, in a really
tiny way, it makes perfect sense to be embedded in these functionss. We
get a huge performance boost too.

Fixes #644
This commit is contained in:
Justine Tunney 2022-10-02 15:29:57 -07:00
parent daca5499b9
commit 950a1b310b
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
9 changed files with 313 additions and 320 deletions

View file

@ -1,21 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/paths.h"
const char *_PATH_BSHELL = "/bin/sh";

View file

@ -1,17 +1,10 @@
#ifndef COSMOPOLITAN_LIBC_PATHS_H_
#define COSMOPOLITAN_LIBC_PATHS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
extern const char *_PATH_BSHELL;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#define _PATH_DEFPATH "/usr/local/bin:/bin:/usr/bin"
#define _PATH_STDPATH "/bin:/usr/bin:/sbin:/usr/sbin"
#define _PATH_BSHELL _PATH_BSHELL
#define _PATH_BSHELL "/bin/sh"
#define _PATH_CONSOLE "/dev/console"
#define _PATH_DEVNULL "/dev/null"
#define _PATH_KLOG "/proc/kmsg"

270
libc/stdio/cocmd.c Normal file
View file

@ -0,0 +1,270 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/o.h"
/**
* @fileoverview Cosmopolitan Command Interpreter
*
* This is a lightweight command interpreter for GNU Make. It has just
* enough shell script language support to support our build config.
*/
#define STATE_SHELL 0
#define STATE_STR 1
#define STATE_QUO 2
char *p;
char *q;
size_t n;
char *cmd;
char *args[8192];
const char *prog;
char argbuf[ARG_MAX];
bool unsupported[256];
void Write(const char *s, ...) {
va_list va;
va_start(va, s);
do {
write(2, s, strlen(s));
} while ((s = va_arg(va, const char *)));
va_end(va);
}
wontreturn void UnsupportedSyntax(unsigned char c) {
char cbuf[2];
char ibuf[13];
cbuf[0] = c;
cbuf[1] = 0;
FormatOctal32(ibuf, c, true);
Write(prog, ": unsupported shell syntax '", cbuf, "' (", ibuf, "): ", cmd,
"\n", 0);
exit(4);
}
wontreturn void SysExit(int rc, const char *call, const char *thing) {
int err;
char ibuf[12];
const char *estr;
err = errno;
FormatInt32(ibuf, err);
estr = _strerdoc(err);
if (!estr) estr = "EUNKNOWN";
Write(thing, ": ", call, "() failed: ", estr, " (", ibuf, ")\n", 0);
exit(rc);
}
void Open(const char *path, int fd, int flags) {
const char *err;
close(fd);
if (open(path, flags, 0644) == -1) {
SysExit(7, "open", path);
}
}
wontreturn void Exec(void) {
const char *s;
if (!n) {
Write(prog, ": error: too few args\n", 0);
exit(5);
}
execv(args[0], args);
SysExit(127, "execve", args[0]);
}
void Pipe(void) {
int pid, pfds[2];
if (pipe2(pfds, O_CLOEXEC)) {
SysExit(8, "pipe2", prog);
}
if ((pid = vfork()) == -1) {
SysExit(9, "vfork", prog);
}
if (!pid) {
dup2(pfds[1], 1);
Exec();
}
dup2(pfds[0], 0);
n = 0;
}
char *Tokenize(void) {
char *r;
int c, t;
while (*p == ' ' || *p == '\t' || *p == '\n' ||
(p[0] == '\\' && p[1] == '\n')) {
++p;
}
if (!*p) return 0;
t = STATE_SHELL;
for (r = q;; ++p) {
switch (t) {
case STATE_SHELL:
if (unsupported[*p & 255]) {
UnsupportedSyntax(*p);
}
if (!*p || *p == ' ' || *p == '\t') {
*q++ = 0;
return r;
} else if (*p == '"') {
t = STATE_QUO;
} else if (*p == '\'') {
t = STATE_STR;
} else if (*p == '\\') {
if (!p[1]) UnsupportedSyntax(*p);
*q++ = *++p;
} else if (*p == '|') {
if (q > r) {
*q = 0;
return r;
} else {
Pipe();
++p;
}
} else {
*q++ = *p;
}
break;
case STATE_STR:
if (!*p) {
Write("cmd: error: unterminated single string\n", 0);
exit(6);
}
if (*p == '\'') {
t = STATE_SHELL;
} else {
*q++ = *p;
}
break;
case STATE_QUO:
if (!*p) {
Write("cmd: error: unterminated quoted string\n", 0);
exit(6);
}
if (*p == '"') {
t = STATE_SHELL;
} else if (p[0] == '\\') {
switch ((c = *++p)) {
case 0:
UnsupportedSyntax('\\');
case '\n':
break;
case '$':
case '`':
case '"':
*q++ = c;
break;
default:
*q++ = '\\';
*q++ = c;
break;
}
} else {
*q++ = *p;
}
break;
default:
unreachable;
}
}
}
int cocmd(int argc, char *argv[]) {
char *arg;
size_t i, j;
prog = argc > 0 ? argv[0] : "cocmd.com";
for (i = 1; i < 32; ++i) {
unsupported[i] = true;
}
unsupported['\t'] = false;
unsupported[0177] = true;
unsupported['~'] = true;
unsupported['`'] = true;
unsupported['#'] = true;
unsupported['*'] = true;
unsupported['('] = true;
unsupported[')'] = true;
unsupported['['] = true;
unsupported[']'] = true;
unsupported['{'] = true;
unsupported['}'] = true;
unsupported[';'] = true;
unsupported['?'] = true;
unsupported['!'] = true;
if (argc != 3) {
Write(prog, ": error: wrong number of args\n", 0);
exit(10);
}
if (strcmp(argv[1], "-c")) {
Write(prog, ": error: argv[1] should -c\n", 0);
exit(11);
}
p = cmd = argv[2];
if (strlen(cmd) >= ARG_MAX) {
Write(prog, ": error: cmd too long: ", cmd, "\n", 0);
exit(12);
}
n = 0;
q = argbuf;
while ((arg = Tokenize())) {
if (n + 1 < ARRAYLEN(args)) {
if (isdigit(arg[0]) && arg[1] == '>' && arg[2] == '&' &&
isdigit(arg[3])) {
dup2(arg[3] - '0', arg[0] - '0');
} else if (arg[0] == '>' && arg[1] == '&' && isdigit(arg[2])) {
dup2(arg[2] - '0', 1);
} else if (isdigit(arg[0]) && arg[1] == '>' && arg[2] == '>') {
Open(arg + 3, arg[0] - '0', O_WRONLY | O_CREAT | O_APPEND);
} else if (arg[0] == '>' && arg[1] == '>') {
Open(arg + 2, 1, O_WRONLY | O_CREAT | O_APPEND);
} else if (isdigit(arg[0]) && arg[1] == '>') {
Open(arg + 2, arg[0] - '0', O_WRONLY | O_CREAT | O_TRUNC);
} else if (arg[0] == '>') {
Open(arg + 1, 1, O_WRONLY | O_CREAT | O_TRUNC);
} else if (arg[0] == '<') {
Open(arg + 1, 0, O_RDONLY);
} else {
args[n++] = arg;
args[n] = 0;
}
} else {
Write(prog, ": error: too many args\n", 0);
exit(13);
}
}
Exec();
}

View file

@ -0,0 +1,10 @@
#ifndef COSMOPOLITAN_LIBC_STDIO_COCMD_INTERNAL_H_
#define COSMOPOLITAN_LIBC_STDIO_COCMD_INTERNAL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
int cocmd(int, char **);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STDIO_COCMD_INTERNAL_H_ */

View file

@ -29,6 +29,16 @@
/**
* Spawns subprocess and returns pipe stream.
*
* This embeds the cocmd.com shell interpreter which supports a limited
* subset of the bourne shell that's significantly faster:
*
* - pipelines
* - single quotes
* - double quotes
* - input redirection, e.g. `<path`
* - output redirection, e.g. `>path`, `>>append`, `2>err.txt, `2>&1`
*
* @see pclose()
*/
FILE *popen(const char *cmdline, const char *mode) {

View file

@ -32,10 +32,14 @@
/**
* Launches program with system command interpreter.
*
* Warning: Caution is very much advised on Windows where this function
* currently delegates to CMD.EXE, which has notoriously mysterious and
* insecure escaping rules. Much better idea is to not use this at all,
* favoring instead explicit execve() invocations without using shells.
* This embeds the cocmd.com shell interpreter which supports a limited
* subset of the bourne shell that's significantly faster:
*
* - pipelines
* - single quotes
* - double quotes
* - input redirection, e.g. `<path`
* - output redirection, e.g. `>path`, `>>append`, `2>err.txt, `2>&1`
*
* @param cmdline is an interpreted Turing-complete command
* @return -1 if child process couldn't be created, otherwise a wait
@ -45,11 +49,7 @@ int system(const char *cmdline) {
int pid, wstatus;
sigset_t chldmask, savemask;
struct sigaction ignore, saveint, savequit;
if (!cmdline) {
if (IsWindows()) return 1;
if (!access(_PATH_BSHELL, X_OK)) return 1;
return 0;
}
if (!cmdline) return 1;
ignore.sa_flags = 0;
ignore.sa_handler = SIG_IGN;
sigemptyset(&ignore.sa_mask);

View file

@ -16,33 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/macros.internal.h"
#include "libc/paths.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/errfuns.h"
#include "libc/stdio/cocmd.internal.h"
#include "libc/stdio/stdio.h"
// Support code for system() and popen().
// TODO(jart): embed cocmd instead of using /bin/sh and cmd.exe
int systemexec(const char *cmdline) {
size_t n, m;
char *a, *b, *argv[4], comspec[PATH_MAX];
if (!IsWindows()) {
argv[0] = _PATH_BSHELL;
argv[1] = "-c";
} else {
b = "cmd.exe";
a = kNtSystemDirectory;
if ((n = strlen(a)) + (m = strlen(b)) >= ARRAYLEN(comspec)) {
return enametoolong();
}
memcpy(mempcpy(comspec, a, n), b, m + 1);
argv[0] = comspec;
argv[1] = "/C";
}
argv[2] = cmdline;
argv[3] = NULL;
return execv(argv[0], argv);
_Exit(cocmd(3, (char *[]){"cocmd.com", "-c", cmdline, 0}));
}

View file

@ -23,6 +23,7 @@
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
@ -31,11 +32,8 @@ char testlib_enable_tmp_setup_teardown;
TEST(system, testStdoutRedirect) {
int ws;
testlib_extract("/zip/echo.com", "echo.com", 0755);
testlib_extract("/zip/cocmd.com", "cocmd.com", 0755);
setenv("PATH", ".", true); // avoid / vs. \ until cocmd.com is ready
_PATH_BSHELL = "cocmd.com"; // cmd.exe shall still be used on windows
ASSERT_TRUE(system(0));
ws = system("echo.com hello >hello.txt");
ws = system("./echo.com hello >hello.txt");
ASSERT_TRUE(WIFEXITED(ws));
ASSERT_EQ(0, WEXITSTATUS(ws));
EXPECT_STREQ("hello\n", _gc(xslurp("hello.txt", 0)));
@ -44,12 +42,14 @@ TEST(system, testStdoutRedirect) {
TEST(system, testStdoutRedirect_withSpacesInFilename) {
int ws;
testlib_extract("/zip/echo.com", "echo.com", 0755);
testlib_extract("/zip/cocmd.com", "cocmd.com", 0755);
setenv("PATH", ".", true); // avoid / vs. \ until cocmd.com is ready
_PATH_BSHELL = "cocmd.com"; // cmd.exe shall still be used on windows
ASSERT_TRUE(system(0));
ws = system("echo.com hello >\"hello there.txt\"");
ws = system("./echo.com hello >\"hello there.txt\"");
ASSERT_TRUE(WIFEXITED(ws));
ASSERT_EQ(0, WEXITSTATUS(ws));
EXPECT_STREQ("hello\n", _gc(xslurp("hello there.txt", 0)));
}
BENCH(system, bench) {
testlib_extract("/zip/echo.com", "echo.com", 0755);
EZBENCH2("system", donothing, system("./echo.com hi >/dev/null"));
}

View file

@ -16,255 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/o.h"
/**
* @fileoverview Cosmopolitan Command Interpreter
*
* This is a lightweight command interpreter for GNU Make. It has just
* enough shell script language support to support our build config.
*/
#define STATE_SHELL 0
#define STATE_STR 1
#define STATE_QUO 2
char *p;
char *q;
size_t n;
char *cmd;
char *args[8192];
const char *prog;
char argbuf[ARG_MAX];
bool unsupported[256];
void Write(const char *s, ...) {
va_list va;
va_start(va, s);
do {
write(2, s, strlen(s));
} while ((s = va_arg(va, const char *)));
va_end(va);
}
wontreturn void UnsupportedSyntax(unsigned char c) {
char cbuf[2];
char ibuf[13];
cbuf[0] = c;
cbuf[1] = 0;
FormatOctal32(ibuf, c, true);
Write(prog, ": unsupported shell syntax '", cbuf, "' (", ibuf, "): ", cmd,
"\n", 0);
exit(4);
}
wontreturn void SysExit(int rc, const char *call, const char *thing) {
int err;
char ibuf[12];
const char *estr;
err = errno;
FormatInt32(ibuf, err);
estr = _strerdoc(err);
if (!estr) estr = "EUNKNOWN";
Write(thing, ": ", call, "() failed: ", estr, " (", ibuf, ")\n", 0);
exit(rc);
}
void Open(const char *path, int fd, int flags) {
const char *err;
close(fd);
if (open(path, flags, 0644) == -1) {
SysExit(7, "open", path);
}
}
wontreturn void Exec(void) {
const char *s;
if (!n) {
Write(prog, ": error: too few args\n", 0);
exit(5);
}
execv(args[0], args);
SysExit(127, "execve", args[0]);
}
void Pipe(void) {
int pid, pfds[2];
if (pipe2(pfds, O_CLOEXEC)) {
SysExit(8, "pipe2", prog);
}
if ((pid = vfork()) == -1) {
SysExit(9, "vfork", prog);
}
if (!pid) {
dup2(pfds[1], 1);
Exec();
}
dup2(pfds[0], 0);
n = 0;
}
char *Tokenize(void) {
char *r;
int c, t;
while (*p == ' ' || *p == '\t' || *p == '\n' ||
(p[0] == '\\' && p[1] == '\n')) {
++p;
}
if (!*p) return 0;
t = STATE_SHELL;
for (r = q;; ++p) {
switch (t) {
case STATE_SHELL:
if (unsupported[*p & 255]) {
UnsupportedSyntax(*p);
}
if (!*p || *p == ' ' || *p == '\t') {
*q++ = 0;
return r;
} else if (*p == '"') {
t = STATE_QUO;
} else if (*p == '\'') {
t = STATE_STR;
} else if (*p == '\\') {
if (!p[1]) UnsupportedSyntax(*p);
*q++ = *++p;
} else if (*p == '|') {
if (q > r) {
*q = 0;
return r;
} else {
Pipe();
++p;
}
} else {
*q++ = *p;
}
break;
case STATE_STR:
if (!*p) {
Write("cmd: error: unterminated single string\n", 0);
exit(6);
}
if (*p == '\'') {
t = STATE_SHELL;
} else {
*q++ = *p;
}
break;
case STATE_QUO:
if (!*p) {
Write("cmd: error: unterminated quoted string\n", 0);
exit(6);
}
if (*p == '"') {
t = STATE_SHELL;
} else if (p[0] == '\\') {
switch ((c = *++p)) {
case 0:
UnsupportedSyntax('\\');
case '\n':
break;
case '$':
case '`':
case '"':
*q++ = c;
break;
default:
*q++ = '\\';
*q++ = c;
break;
}
} else {
*q++ = *p;
}
break;
default:
unreachable;
}
}
}
#include "libc/stdio/cocmd.internal.h"
int main(int argc, char *argv[]) {
char *arg;
size_t i, j;
prog = argc > 0 ? argv[0] : "cocmd.com";
for (i = 1; i < 32; ++i) {
unsupported[i] = true;
}
unsupported['\t'] = false;
unsupported[0177] = true;
unsupported['~'] = true;
unsupported['`'] = true;
unsupported['#'] = true;
unsupported['*'] = true;
unsupported['('] = true;
unsupported[')'] = true;
unsupported['['] = true;
unsupported[']'] = true;
unsupported['{'] = true;
unsupported['}'] = true;
unsupported[';'] = true;
unsupported['?'] = true;
unsupported['!'] = true;
if (argc != 3) {
Write(prog, ": error: wrong number of args\n", 0);
exit(10);
}
if (strcmp(argv[1], "-c")) {
Write(prog, ": error: argv[1] should -c\n", 0);
exit(11);
}
p = cmd = argv[2];
if (strlen(cmd) >= ARG_MAX) {
Write(prog, ": error: cmd too long: ", cmd, "\n", 0);
exit(12);
}
n = 0;
q = argbuf;
while ((arg = Tokenize())) {
if (n + 1 < ARRAYLEN(args)) {
if (isdigit(arg[0]) && arg[1] == '>' && arg[2] == '&' &&
isdigit(arg[3])) {
dup2(arg[3] - '0', arg[0] - '0');
} else if (arg[0] == '>' && arg[1] == '&' && isdigit(arg[2])) {
dup2(arg[2] - '0', 1);
} else if (isdigit(arg[0]) && arg[1] == '>' && arg[2] == '>') {
Open(arg + 3, arg[0] - '0', O_WRONLY | O_CREAT | O_APPEND);
} else if (arg[0] == '>' && arg[1] == '>') {
Open(arg + 2, 1, O_WRONLY | O_CREAT | O_APPEND);
} else if (isdigit(arg[0]) && arg[1] == '>') {
Open(arg + 2, arg[0] - '0', O_WRONLY | O_CREAT | O_TRUNC);
} else if (arg[0] == '>') {
Open(arg + 1, 1, O_WRONLY | O_CREAT | O_TRUNC);
} else if (arg[0] == '<') {
Open(arg + 1, 0, O_RDONLY);
} else {
args[n++] = arg;
args[n] = 0;
}
} else {
Write(prog, ": error: too many args\n", 0);
exit(13);
}
}
Exec();
return cocmd(argc, argv);
}