cosmopolitan/tool/build/objbincopy.c
Jōshin 6e6fc38935
Apply clang-format update to repo (#1154)
Commit bc6c183 introduced a bunch of discrepancies between what files
look like in the repo and what clang-format says they should look like.
However, there were already a few discrepancies prior to that. Most of
these discrepancies seemed to be unintentional, but a few of them were
load-bearing (e.g., a #include that violated header ordering needing
something to have been #defined by a 'later' #include.)

I opted to take what I hope is a relatively smooth-brained approach: I
reverted the .clang-format change, ran clang-format on the whole repo,
reapplied the .clang-format change, reran clang-format again, and then
reverted the commit that contained the first run. Thus the full effect
of this PR should only be to apply the changed formatting rules to the
repo, and from skimming the results, this seems to be the case.

My work can be checked by applying the short, manual commits, and then
rerunning the command listed in the autogenerated commits (those whose
messages I have prefixed auto:) and seeing if your results agree.

It might be that the other diffs should be fixed at some point but I'm
leaving that aside for now.

fd '\.c(c|pp)?$' --print0| xargs -0 clang-format -i
2024-04-25 10:38:00 -07:00

442 lines
16 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2023 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/elf/def.h"
#include "libc/elf/elf.h"
#include "libc/elf/struct/ehdr.h"
#include "libc/intrin/kprintf.h"
#include "libc/macho.internal.h"
#include "libc/macros.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "third_party/getopt/getopt.internal.h"
#define VERSION \
"objbincopy v1.0\n" \
"copyright 2023 justine tunney\n" \
"https://github.com/jart/cosmopolitan\n"
#define MANUAL \
" -o OUTPUT INPUT\n" \
"\n" \
"DESCRIPTION\n" \
"\n" \
" Fast `objcopy -SO binary` that doesn't insert bloat.\n" \
"\n" \
" This program is for times where the unix linker is being\n" \
" used to create executables, that define their own custom\n" \
" executable headers. The ld program outputs such programs\n" \
" as an executable wrapped inside an executable. Normally\n" \
" the only way to get it out is using `objcopy -SO binary`\n" \
" except that it has the undesirable impact of adding lots\n" \
" of bloat to the output file, in order to make its layout\n" \
" the same as the virtual memory layout. That's useful for\n" \
" things like naive firmware loaders but isnt a great idea\n" \
" when our goal is to generate files like ELF and PE which\n" \
" support loading segments, from overlapping file regions.\n" \
" Therefore, this program performs a naive objcopy of your\n" \
" ELF PT_LOAD segments without considering virtual layout.\n" \
"\n" \
"FLAGS\n" \
"\n" \
" -h show usage\n" \
" -o OUTPUT set output path\n" \
" -m create Mach-O executable\n" \
" -f coerce EI_OSABI to FreeBSD\n" \
"\n" \
"ARGUMENTS\n" \
"\n" \
" OUTPUT where to save the unwrapped executable\n" \
" INPUT is an elf executable made by the unix linker\n" \
"\n"
#ifdef MODE_DBG
#define DEBUG(...) kprintf("DEBUG: " __VA_ARGS__)
#else
#define DEBUG(...) (void)0
#endif
#define IsStaticStringEqual(buf, str) \
(strnlen(buf, sizeof(buf)) == strlen(str) && !memcmp(buf, str, strlen(str)))
static int outfd;
static bool want_macho;
static const char *prog;
static bool want_freebsd;
static const char *outpath;
static wontreturn void Die(const char *thing, const char *reason) {
tinyprint(2, thing, ": ", reason, "\n", NULL);
exit(1);
}
static wontreturn void DieSys(const char *thing) {
perror(thing);
exit(1);
}
static wontreturn void ShowUsage(int rc, int fd) {
tinyprint(fd, VERSION, "\nUSAGE\n\n ", prog, MANUAL, NULL);
exit(rc);
}
static void GetOpts(int argc, char *argv[]) {
int opt;
while ((opt = getopt(argc, argv, "hmfo:")) != -1) {
switch (opt) {
case 'o':
outpath = optarg;
break;
case 'f':
want_freebsd = true;
break;
case 'm':
want_macho = true;
break;
case 'h':
ShowUsage(0, 1);
default:
ShowUsage(1, 2);
}
}
if (!outpath) {
Die(prog, "need output path");
}
if (optind == argc) {
Die(prog, "missing input argument");
}
}
static int PhdrFlagsToProt(Elf64_Word flags) {
int prot = PROT_NONE;
if (flags & PF_R)
prot |= PROT_READ;
if (flags & PF_W)
prot |= PROT_WRITE;
if (flags & PF_X)
prot |= PROT_EXEC;
return prot;
}
static void Write(const void *data, size_t size) {
ssize_t rc;
const char *p, *e;
for (p = data, e = p + size; p < e; p += (size_t)rc) {
if ((rc = write(outfd, p, e - p)) == -1) {
DieSys(outpath);
}
}
}
// apple imposes very strict requirements which forbid creativity to the
// greatest possible extent. this routine is designed to help us know if
// something we've built won't be accepted by the xnu kernel.
static void ValidateMachoSection(const char *inpath, //
Elf64_Ehdr *elf, //
Elf64_Shdr *shdr, //
struct MachoHeader *macho, //
Elf64_Off filesize) {
int i;
char *end;
bool found_uuid;
bool found_segment;
uint64_t lastvaddr;
uint64_t lastoffset;
bool found_pagezero;
bool found_unixthread;
struct MachoLoadCommand *cmd;
if (!shdr)
return;
if (elf->e_machine != EM_NEXGEN32E) {
Die(inpath, ".macho section only supported for ELF x86_64");
}
if (!macho)
Die(inpath, "corrupted .macho section content");
if (shdr->sh_size < sizeof(struct MachoHeader)) {
Die(inpath, ".macho section too small for mach-o header");
}
if (macho->magic != 0xFEEDFACE + 1) {
Die(inpath, ".macho header magic wasn't 0xFEEDFACE+1");
}
if (macho->arch != MAC_CPU_NEXGEN32E) {
Die(inpath, "mach-o arch wasn't MAC_CPU_NEXGEN32E");
}
if (shdr->sh_size != sizeof(struct MachoHeader) + macho->loadsize) {
Die(inpath, ".macho section size not equal to sizeof(header) + loadsize");
}
lastvaddr = 0;
lastoffset = 0;
found_uuid = false;
found_segment = false;
found_pagezero = false;
found_unixthread = false;
end = (char *)(macho + 1) + macho->loadsize;
cmd = (struct MachoLoadCommand *)(macho + 1);
for (i = 0; i < macho->loadcount; ++i) {
if ((char *)cmd + sizeof(struct MachoLoadCommand *) > end ||
(char *)cmd + cmd->size > end) {
Die(inpath, "mach-o load commands overflowed loadsize");
}
if (cmd->command == MAC_LC_SEGMENT_64) {
size_t namelen;
struct MachoLoadSegment *loadseg;
loadseg = (struct MachoLoadSegment *)cmd;
if (loadseg->sectioncount) {
Die(inpath, "don't bother with mach-o sections");
}
namelen = strnlen(loadseg->name, sizeof(loadseg->name));
if (!loadseg->name[0]) {
Die(inpath, "mach-o load segment missing name");
}
if (filesize || (loadseg->vaddr && loadseg->memsz)) {
if (loadseg->vaddr < lastvaddr) {
Die(inpath,
"the virtual memory regions defined by mach-o load segment "
"commands aren't allowed to overlap and must be specified "
"monotonically");
}
if (loadseg->vaddr + loadseg->memsz < loadseg->vaddr) {
Die(inpath, "mach-o segment memsz overflows");
}
if (loadseg->filesz > loadseg->memsz) {
Die(inpath, "mach-o segment filesz exceeds memsz");
}
lastvaddr = loadseg->vaddr + loadseg->memsz;
if (loadseg->vaddr & 4095) {
Die(inpath, "mach-o segment vaddr must be page aligned");
}
}
if (filesize) {
if (loadseg->offset < lastoffset) {
Die(inpath,
"the file segments defined by mach-o load segment commands "
"aren't allowed to overlap and must be specified monotonically");
}
if (loadseg->filesz > filesize) {
Die(inpath, "mach-o segment filesz exceeds file size");
}
if (loadseg->offset + loadseg->filesz < loadseg->offset) {
Die(inpath, "mach-o segment offset + filesz overflows");
}
if (loadseg->offset + loadseg->filesz > filesize) {
Die(inpath, "mach-o segment overlaps end of file");
}
lastoffset = loadseg->offset + loadseg->filesz;
}
if (namelen == strlen("__PAGEZERO") &&
!memcmp(loadseg->name, "__PAGEZERO", namelen)) {
found_pagezero = true;
if (i != 0) {
Die(inpath, "mach-o __PAGEZERO must be first load command");
}
} else {
if (!found_segment) {
found_segment = true;
if (loadseg->offset) {
Die(inpath, "the first mach-o load segment (that isn't page zero) "
"must begin loading the executable from offset zero");
}
}
}
} else if (cmd->command == MAC_LC_UUID) {
uint64_t *uuid;
found_uuid = true;
if (cmd->size != sizeof(*cmd) + 16) {
Die(inpath, "MAC_LC_UUID size wrong");
}
uuid = (uint64_t *)(cmd + 1);
if (!uuid[0] && !uuid[1]) {
uuid[0] = _rand64();
uuid[1] = _rand64();
}
} else if (cmd->command == MAC_LC_UNIXTHREAD) {
uint64_t *registers;
struct MachoLoadThreadCommand *thread;
if (cmd->size != sizeof(*thread) + 21 * 8) {
Die(inpath, "MAC_LC_UNIXTHREAD size should be 4+4+4+4+21*8");
}
thread = (struct MachoLoadThreadCommand *)cmd;
if (thread->flavor != MAC_THREAD_NEXGEN32E) {
Die(inpath, "MAC_LC_UNIXTHREAD flavor should be MAC_THREAD_NEXGEN32E");
}
if (thread->count != 21 * 8 / 4) {
Die(inpath, "MAC_LC_UNIXTHREAD count should be 21*8/4");
}
registers = (uint64_t *)(thread + 1);
if (!registers[16]) {
Die(inpath, "MAC_LC_UNIXTHREAD doesn't specify RIP register");
}
found_unixthread = true;
} else {
Die(inpath, "unsupported mach-o load command");
}
cmd = (struct MachoLoadCommand *)((char *)cmd + cmd->size);
}
if (!found_uuid) {
Die(inpath, "mach-o missing MAC_LC_UUID");
}
if (!found_unixthread) {
Die(inpath, "mach-o missing MAC_LC_UNIXTHREAD");
}
if (!found_pagezero) {
Die(inpath, "mach-o missing __PAGEZERO load segment command");
}
if ((char *)cmd != end) {
Die(inpath, "mach-o loadsize greater than load commands");
}
}
static struct MachoLoadSegment *GetNextMachoLoadSegment(
struct MachoLoadCommand **load, int *count) {
struct MachoLoadCommand *cmd;
while (*count) {
--*count;
cmd = *load;
*load = (struct MachoLoadCommand *)((char *)cmd + cmd->size);
if (cmd->command == MAC_LC_SEGMENT_64) {
struct MachoLoadSegment *loadseg;
loadseg = (struct MachoLoadSegment *)cmd;
if (!IsStaticStringEqual(loadseg->name, "__PAGEZERO")) {
return loadseg;
}
}
}
return 0;
}
static void HandleElf(const char *inpath, Elf64_Ehdr *elf, size_t esize) {
char *secstrs;
int i, loadcount;
Elf64_Off maxoff;
Elf64_Phdr *phdr;
Elf64_Shdr *macho_shdr;
struct MachoHeader *macho;
struct MachoLoadCommand *loadcommand;
struct MachoLoadSegment *loadsegment;
if (elf->e_type != ET_EXEC && elf->e_type != ET_DYN) {
Die(inpath, "elf binary isn't an executable");
}
if (!(secstrs = GetElfSectionNameStringTable(elf, esize))) {
Die(inpath, "elf section name string table not found");
}
macho_shdr = FindElfSectionByName(elf, esize, secstrs, ".macho");
macho = GetElfSectionAddress(elf, esize, macho_shdr);
// ValidateMachoSection(inpath, elf, macho_shdr, macho, 0);
loadcommand = (struct MachoLoadCommand *)(macho + 1);
loadcount = macho->loadcount;
if (want_freebsd) {
elf->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
}
elf->e_shoff = 0;
elf->e_shnum = 0;
elf->e_shstrndx = 0;
elf->e_shentsize = 0;
for (maxoff = i = 0; i < elf->e_phnum; ++i) {
phdr = GetElfProgramHeaderAddress(elf, esize, i);
if (!phdr)
Die(inpath, "corrupted elf header");
if (phdr->p_type == PT_INTERP)
Die(inpath, "PT_INTERP isn't supported");
if (phdr->p_type == PT_DYNAMIC)
Die(inpath, "PT_DYNAMIC isn't supported");
if (!phdr->p_filesz)
continue;
maxoff = MAX(maxoff, phdr->p_offset + phdr->p_filesz);
if (macho && phdr->p_type == PT_LOAD) {
if (!(loadsegment = GetNextMachoLoadSegment(&loadcommand, &loadcount))) {
Die(inpath, "there must exist a MAC_LC_SEGMENT_64 for every PT_LOAD "
"when the .macho section is defined");
}
loadsegment->vaddr = phdr->p_vaddr;
loadsegment->memsz = phdr->p_memsz;
loadsegment->offset = phdr->p_offset;
loadsegment->filesz = phdr->p_filesz;
loadsegment->initprot |= PhdrFlagsToProt(phdr->p_flags);
if (loadsegment->initprot == PROT_EXEC) {
loadsegment->initprot |= PROT_READ;
}
loadsegment->maxprot |= loadsegment->initprot;
}
}
// ValidateMachoSection(inpath, elf, macho_shdr, macho, maxoff);
Write((char *)elf, maxoff);
if (want_macho) {
if (!macho_shdr || !macho) {
Die(inpath, "requested Mach-O output but .macho section not found");
}
if (lseek(outfd, 0, SEEK_SET)) {
DieSys(inpath);
}
// TODO(jart): Add a check that ensures we aren't overwriting
// anything except ELF headers and the old machoo
Write((char *)elf + macho_shdr->sh_offset, macho_shdr->sh_size);
}
}
static void HandleInput(const char *inpath) {
int infd;
void *map;
ssize_t size;
if ((infd = open(inpath, O_RDONLY)) == -1) {
DieSys(inpath);
}
if ((size = lseek(infd, 0, SEEK_END)) == -1) {
DieSys(inpath);
}
if (size) {
if ((map = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, 0)) ==
MAP_FAILED) {
DieSys(inpath);
}
if (IsElf64Binary(map, size)) {
HandleElf(inpath, map, size);
} else {
Die(prog, "not an elf64 binary");
}
if (munmap(map, size)) {
DieSys(inpath);
}
}
if (close(infd)) {
DieSys(inpath);
}
}
int main(int argc, char *argv[]) {
int i;
prog = argv[0];
if (!prog)
prog = "objbincopy";
GetOpts(argc, argv);
if ((outfd = creat(outpath, 0755)) == -1) {
DieSys(outpath);
}
for (i = optind; i < argc; ++i) {
HandleInput(argv[i]);
}
if (close(outfd)) {
DieSys(outpath);
}
}