Have execve() escape double quotes in cmd.exe's preferred style

This makes it possible for us to use system() and popen() with paths
that redirect to filenames that contain spaces, e.g.

    system("echo.com hello >\"hello there.txt\"")

It's difficult to solve this problem, because WIN32 only allows passing
one single argument when launching programs and each program is allowed
to tokenize that however it wants. Most software follows the convention
of cmd.exe which is poorly documented and positively byzantine.

In the future we're going to solve this by not using cmd.exe at all and
instead embedding the cocmd.com interpreter into the system() function.
In the meantime, our documentation has been updated to help recalibrate
any expectation the user might hold regarding the security of using the
Windows command interpreter.

Fixes #644
This commit is contained in:
Justine Tunney 2022-10-02 08:43:25 -07:00
parent acd8900071
commit 3f3cb0650b
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
15 changed files with 223 additions and 95 deletions

View file

@ -35,6 +35,12 @@
/**
* Replaces current process with program.
*
* Warning: Our implementation of argument escaping on Windows hasn't
* been security reviewed for optimal handling of malicious arguments
* when interoperating with commonly used software. We make an effort
* to follow the same conventions as other FOSS software, e.g. PuTTY,
* however each WIN32 app is permitted to tokenize args how it wants.
*
* @param program will not be PATH searched, see commandv()
* @param argv[0] is the name of the program to run
* @param argv[1,n-2] optionally specify program arguments

View file

@ -34,8 +34,15 @@
static bool NeedsQuotes(const char *s) {
if (!*s) return true;
do {
if (*s == ' ' || *s == '\t') {
return true;
switch (*s) {
case '"':
case ' ':
case '\t':
case '\v':
case '\n':
return true;
default:
break;
}
} while (*s++);
return false;
@ -45,19 +52,21 @@ static inline int IsAlpha(int c) {
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
}
/**
* Converts System V argv to Windows-style command line.
*
* Escaping is performed and it's designed to round-trip with
* GetDosArgv() or GetDosArgv(). This function does NOT escape
* command interpreter syntax, e.g. $VAR (sh), %VAR% (cmd).
*
* @param cmdline is output buffer
* @param prog is used as argv[0]
* @param argv is an a NULL-terminated array of UTF-8 strings
* @return freshly allocated lpCommandLine or NULL w/ errno
* @see libc/runtime/dosargv.c
*/
// Converts System V argv to Windows-style command line.
//
// Escaping is performed and it's designed to round-trip with
// GetDosArgv() or GetDosArgv(). This function does NOT escape
// command interpreter syntax, e.g. $VAR (sh), %VAR% (cmd).
//
// TODO(jart): this needs fuzzing and security review
//
// @param cmdline is output buffer
// @param prog is frontloaded as argv[0]
// @param argv is an a NULL-terminated array of UTF-8 strings
// @return 0 on success, or -1 w/ errno
// @raise E2BIG if everything is too huge
// @see "Everyone quotes command line arguments the wrong way" MSDN
// @see libc/runtime/getdosargv.c
textwindows int mkntcmdline(char16_t cmdline[ARG_MAX / 2], const char *prog,
char *const argv[]) {
char *arg;
@ -102,8 +111,8 @@ textwindows int mkntcmdline(char16_t cmdline[ARG_MAX / 2], const char *prog,
} else {
// turn stuff like `less /c/...`
// into `less c:/...`
// turn stuff like `more <\\\"/c/...\\\"`
// into `more <\\\"c:/...\\\"`
// turn stuff like `more <"/c/..."`
// into `more <"c:/..."`
if (k > 3 && IsAlpha(cmdline[k - 1]) &&
(cmdline[k - 2] == '/' || cmdline[k - 2] == '\\') &&
(cmdline[k - 3] == '"' || cmdline[k - 3] == ' ')) {
@ -115,11 +124,8 @@ textwindows int mkntcmdline(char16_t cmdline[ARG_MAX / 2], const char *prog,
if (x == '\\') {
++slashes;
} else if (x == '"') {
for (s = 0; s < slashes * 2; ++s) {
APPEND(u'\\');
}
slashes = 0;
APPEND(u'\\');
APPEND(u'"');
APPEND(u'"');
APPEND(u'"');
} else {
for (s = 0; s < slashes; ++s) {

View file

@ -17,8 +17,8 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/ntmagicpaths.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/macros.internal.h"
#include "libc/nt/systeminfo.h"
#include "libc/str/oldutf16.internal.h"
@ -173,5 +173,24 @@ textwindows int __mkntpath2(const char *path,
p[j] = 0;
n = j;
return x + m + n;
// our path is now stored at `path16` with length `n`
n = x + m + n;
// To avoid toil like this:
//
// CMD.EXE was started with the above path as the current directory.
// UNC paths are not supported. Defaulting to Windows directory.
// Access is denied.
//
// Remove \\?\ prefix if we're within 260 character limit.
if (n > 4 && n < 260 && //
path16[0] == '\\' && //
path16[1] == '\\' && //
path16[2] == '?' && //
path16[3] == '\\') {
memmove(path16, path16 + 4, (n - 4 + 1) * sizeof(char16_t));
n -= 4;
}
return n;
}

View file

@ -0,0 +1,21 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/paths.h"
const char *_PATH_BSHELL = "/bin/sh";

View file

@ -1,30 +1,37 @@
#ifndef COSMOPOLITAN_LIBC_PATHS_H_
#define COSMOPOLITAN_LIBC_PATHS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
extern const char *_PATH_BSHELL;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#define _PATH_DEFPATH "/usr/local/bin:/bin:/usr/bin"
#define _PATH_STDPATH "/bin:/usr/bin:/sbin:/usr/sbin"
#define _PATH_BSHELL "/bin/sh"
#define _PATH_CONSOLE "/dev/console"
#define _PATH_DEVNULL "/dev/null"
#define _PATH_KLOG "/proc/kmsg"
#define _PATH_LASTLOG "/var/log/lastlog"
#define _PATH_MAILDIR "/var/mail"
#define _PATH_MAN "/usr/share/man"
#define _PATH_MNTTAB "/etc/fstab"
#define _PATH_MOUNTED "/etc/mtab"
#define _PATH_NOLOGIN "/etc/nologin"
#define _PATH_BSHELL _PATH_BSHELL
#define _PATH_CONSOLE "/dev/console"
#define _PATH_DEVNULL "/dev/null"
#define _PATH_KLOG "/proc/kmsg"
#define _PATH_LASTLOG "/var/log/lastlog"
#define _PATH_MAILDIR "/var/mail"
#define _PATH_MAN "/usr/share/man"
#define _PATH_MNTTAB "/etc/fstab"
#define _PATH_MOUNTED "/etc/mtab"
#define _PATH_NOLOGIN "/etc/nologin"
#define _PATH_SENDMAIL "/usr/sbin/sendmail"
#define _PATH_SHADOW "/etc/shadow"
#define _PATH_SHELLS "/etc/shells"
#define _PATH_TTY "/dev/tty"
#define _PATH_UTMP "/dev/null/utmp"
#define _PATH_VI "/usr/bin/vi"
#define _PATH_WTMP "/dev/null/wtmp"
#define _PATH_SHADOW "/etc/shadow"
#define _PATH_SHELLS "/etc/shells"
#define _PATH_TTY "/dev/tty"
#define _PATH_UTMP "/dev/null/utmp"
#define _PATH_VI "/usr/bin/vi"
#define _PATH_WTMP "/dev/null/wtmp"
#define _PATH_DEV "/dev/"
#define _PATH_TMP "/tmp/"
#define _PATH_VARDB "/var/lib/misc/"
#define _PATH_DEV "/dev/"
#define _PATH_TMP "/tmp/"
#define _PATH_VARDB "/var/lib/misc/"
#define _PATH_VARRUN "/var/run/"
#define _PATH_VARTMP "/var/tmp/"

View file

@ -66,25 +66,23 @@ static textwindows noasan int Count(int c, struct DosArgv *st) {
return n;
}
/**
* Tokenizes and transcodes Windows NT CLI args, thus avoiding
* CommandLineToArgv() schlepping in forty megs of dependencies.
*
* @param s is the command line string provided by the executive
* @param buf is where we'll store double-NUL-terminated decoded args
* @param size is how many bytes are available in buf
* @param argv is where we'll store the decoded arg pointer array, which
* is guaranteed to be NULL-terminated if max>0
* @param max specifies the item capacity of argv, or 0 to do scanning
* @return number of args written, excluding the NULL-terminator; or,
* if the output buffer wasn't passed, or was too short, then the
* number of args that *would* have been written is returned; and
* there are currently no failure conditions that would have this
* return -1 since it doesn't do system calls
* @see test/libc/dosarg_test.c
* @see libc/runtime/ntspawn.c
* @note kudos to Simon Tatham for figuring out quoting behavior
*/
// Tokenizes and transcodes Windows NT CLI args, thus avoiding
// CommandLineToArgv() schlepping in forty megs of dependencies.
//
// @param s is the command line string provided by the executive
// @param buf is where we'll store double-NUL-terminated decoded args
// @param size is how many bytes are available in buf
// @param argv is where we'll store the decoded arg pointer array, which
// is guaranteed to be NULL-terminated if max>0
// @param max specifies the item capacity of argv, or 0 to do scanning
// @return number of args written, excluding the NULL-terminator; or,
// if the output buffer wasn't passed, or was too short, then the
// number of args that *would* have been written is returned; and
// there are currently no failure conditions that would have this
// return -1 since it doesn't do system calls
// @see test/libc/dosarg_test.c
// @see libc/runtime/ntspawn.c
// @note kudos to Simon Tatham for figuring out quoting behavior
textwindows noasan int GetDosArgv(const char16_t *cmdline, char *buf,
size_t size, char **argv, size_t max) {
bool inquote;

View file

@ -104,16 +104,14 @@ textwindows noinstrument noasan void FixPath(char *path) {
}
}
/**
* Transcodes NT environment variable block from UTF-16 to UTF-8.
*
* @param env is a double NUL-terminated block of key=values
* @param buf is the new environment which gets double-nul'd
* @param size is the byte capacity of buf
* @param envp stores NULL-terminated string pointer list (optional)
* @param max is the pointer count capacity of envp
* @return number of variables decoded, excluding NULL-terminator
*/
// Transcodes NT environment variable block from UTF-16 to UTF-8.
//
// @param env is a double NUL-terminated block of key=values
// @param buf is the new environment which gets double-nul'd
// @param size is the byte capacity of buf
// @param envp stores NULL-terminated string pointer list (optional)
// @param max is the pointer count capacity of envp
// @return number of variables decoded, excluding NULL-terminator
textwindows noasan noinstrument int GetDosEnviron(const char16_t *env,
char *buf, size_t size,
char **envp, size_t max) {

View file

@ -32,6 +32,11 @@
/**
* Launches program with system command interpreter.
*
* Warning: Caution is very much advised on Windows where this function
* currently delegates to CMD.EXE, which has notoriously mysterious and
* insecure escaping rules. Much better idea is to not use this at all,
* favoring instead explicit execve() invocations without using shells.
*
* @param cmdline is an interpreted Turing-complete command
* @return -1 if child process couldn't be created, otherwise a wait
* status that can be accessed using macros like WEXITSTATUS(s)

View file

@ -24,10 +24,8 @@
#include "libc/str/str.h"
#include "libc/sysv/errfuns.h"
/**
* Executes system command replacing current process.
* @vforksafe
*/
// Support code for system() and popen().
// TODO(jart): embed cocmd instead of using /bin/sh and cmd.exe
int systemexec(const char *cmdline) {
size_t n, m;
char *a, *b, *argv[4], comspec[PATH_MAX];

View file

@ -18,8 +18,8 @@
*/
#include "libc/calls/ntspawn.h"
#include "libc/errno.h"
#include "libc/mem/mem.h"
#include "libc/mem/gc.internal.h"
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "libc/testlib/testlib.h"
@ -61,12 +61,6 @@ TEST(mkntcmdline, justSlash) {
EXPECT_STREQ(u"\\", cmdline);
}
TEST(mkntcmdline, basicQuoting) {
char *argv[] = {"a\"b c", "d", NULL};
EXPECT_NE(-1, mkntcmdline(cmdline, argv[0], argv));
EXPECT_STREQ(u"\"a\\\"b c\" d" /* "a\"b c" d */, cmdline);
}
TEST(mkntcmdline, testUnicode) {
char *argv1[] = {
gc(strdup("(╯°□°)╯")),
@ -82,12 +76,12 @@ TEST(mkntcmdline, fixAsBestAsWeCanForNow1) {
char *argv1[] = {
"/C/WINDOWS/system32/cmd.exe",
"/C",
"more < \"/C/Users/jart/AppData/Local/Temp/tmplquaa_d6\"",
"more <\"/C/Users/jart/AppData/Local/Temp/tmplquaa_d6\"",
NULL,
};
EXPECT_NE(-1, mkntcmdline(cmdline, argv1[0], argv1));
EXPECT_STREQ(u"C:\\WINDOWS\\system32\\cmd.exe /C \"more < "
u"\\\"C:/Users/jart/AppData/Local/Temp/tmplquaa_d6\\\"\"",
EXPECT_STREQ(u"C:\\WINDOWS\\system32\\cmd.exe /C \"more <"
u"\"\"\"C:/Users/jart/AppData/Local/Temp/tmplquaa_d6\"\"\"\"",
cmdline);
}

View file

@ -47,11 +47,4 @@ TEST(mkntpath, testUnicode) {
TEST(mkntpath, testRemoveDoubleSlash) {
EXPECT_EQ(21, __mkntpath("C:\\Users\\jart\\\\.config", p));
EXPECT_STREQ(u"C:\\Users\\jart\\.config", p);
EXPECT_EQ(8, __mkntpath("\\\\?\\doge", p));
EXPECT_STREQ(u"\\\\?\\doge", p);
}
TEST(mkntpath, testJustC) {
EXPECT_EQ(7, __mkntpath("/C", p));
EXPECT_STREQ(u"\\\\?\\C:\\", p);
}

View file

@ -169,3 +169,18 @@ TEST(GetDosArgv, waqQuoting2) {
free(argv);
free(buf);
}
TEST(GetDosArgv, cmdToil) {
size_t max = 4;
size_t size = ARG_MAX / 2;
char *buf = malloc(size * sizeof(char));
char **argv = malloc(max * sizeof(char *));
EXPECT_EQ(3, GetDosArgv(u"cmd.exe /C \"echo hi >\"\"\"foo bar.txt\"\"\"\"",
buf, size, argv, max));
EXPECT_STREQ("cmd.exe", argv[0]);
EXPECT_STREQ("/C", argv[1]);
EXPECT_STREQ("echo hi >\"foo bar.txt\"", argv[2]);
EXPECT_EQ(NULL, argv[3]);
free(argv);
free(buf);
}

View file

@ -0,0 +1,55 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/mem/copyfd.internal.h"
#include "libc/mem/gc.h"
#include "libc/paths.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
char testlib_enable_tmp_setup_teardown;
TEST(system, testStdoutRedirect) {
int ws;
testlib_extract("/zip/echo.com", "echo.com", 0755);
testlib_extract("/zip/cocmd.com", "cocmd.com", 0755);
setenv("PATH", ".", true); // avoid / vs. \ until cocmd.com is ready
_PATH_BSHELL = "cocmd.com"; // cmd.exe shall still be used on windows
ASSERT_TRUE(system(0));
ws = system("echo.com hello >hello.txt");
ASSERT_TRUE(WIFEXITED(ws));
ASSERT_EQ(0, WEXITSTATUS(ws));
EXPECT_STREQ("hello\n", _gc(xslurp("hello.txt", 0)));
}
TEST(system, testStdoutRedirect_withSpacesInFilename) {
int ws;
testlib_extract("/zip/echo.com", "echo.com", 0755);
testlib_extract("/zip/cocmd.com", "cocmd.com", 0755);
setenv("PATH", ".", true); // avoid / vs. \ until cocmd.com is ready
_PATH_BSHELL = "cocmd.com"; // cmd.exe shall still be used on windows
ASSERT_TRUE(system(0));
ws = system("echo.com hello >\"hello there.txt\"");
ASSERT_TRUE(WIFEXITED(ws));
ASSERT_EQ(0, WEXITSTATUS(ws));
EXPECT_STREQ("hello\n", _gc(xslurp("hello there.txt", 0)));
}

View file

@ -63,9 +63,20 @@ o/$(MODE)/test/libc/stdio/%.com.dbg: \
$(APE_NO_MODIFY_SELF)
@$(APELINK)
o/$(MODE)/test/libc/stdio/system_test.com.dbg: \
$(TEST_LIBC_STDIO_DEPS) \
o/$(MODE)/test/libc/stdio/system_test.o \
o/$(MODE)/test/libc/stdio/stdio.pkg \
o/$(MODE)/tool/build/echo.com.zip.o \
o/$(MODE)/tool/build/cocmd.com.zip.o \
$(LIBC_TESTMAIN) \
$(CRT) \
$(APE_NO_MODIFY_SELF)
@$(APELINK)
$(TEST_LIBC_STDIO_OBJS): private \
DEFAULT_CCFLAGS += \
-fno-builtin
DEFAULT_CCFLAGS += \
-fno-builtin
.PHONY: o/$(MODE)/test/libc/stdio
o/$(MODE)/test/libc/stdio: \

View file

@ -111,6 +111,8 @@ o/$(MODE)/tool/build/chmod.zip.o \
o/$(MODE)/tool/build/cp.zip.o \
o/$(MODE)/tool/build/mv.zip.o \
o/$(MODE)/tool/build/echo.zip.o \
o/$(MODE)/tool/build/echo.com.zip.o \
o/$(MODE)/tool/build/cocmd.com.zip.o \
o/$(MODE)/tool/build/gzip.zip.o \
o/$(MODE)/tool/build/printf.zip.o \
o/$(MODE)/tool/build/dd.zip.o: private \