Add APE support for 32-bit programs

This is an experimental proof of concept demo:

    make -j8 o//examples/i386.i386.com
    o//examples/i386.i386.com --32
    echo $?  # <-- prints main32's argc parameter

Please note there's no libc support at the moment. This change is just
for fun and shouldn't be interpreted as intent to support. Having this
project on the whole support i386 would be tough because the calling
conventions are so different. There really isn't much in the way of low
hanging fruit opportunities to do something like NOP out REX prefixes
and call it a day. It'd likely need disjoint builds or possibly better
yet really cleverly crafted code generation in //third_party/chibicc!

Another challenge is is that Linux changed its SYSCALL ordinals when it
migrated from i386 to x86_64. Linux used to use the same magic numbers
as everyone else for functions like exit/write/read/etc. BSDs on the
other hand didn't pointlessly renumber things. So we'd need to find a
way to define the ordinals in libc/sysv/syscalls.sh twice for Linux. The
same goes for other ISAs too. Especially MIPS. For the Linux Kernel
alone, syscall magic numbers and data structure layouts are totally
different, and that's likely the case for other ISAs on other operating
systems too. Probably because ISA code historically got contributed to
open source by the companies that made the chips. Ulrich Drepper wrote
an amusing essay on the subject some years back.
This commit is contained in:
Justine Tunney 2021-02-20 15:16:27 -08:00
parent e345b42d78
commit face6b61d5
11 changed files with 190 additions and 2 deletions

View file

@ -487,6 +487,36 @@ apesh: .ascii "'\n#'\"\n" # sixth edition shebang
.ascii "el"
#endif
.ascii "if exec 7<> \"$o\"; then\n"
// Writes 32-bit ELF header if --32 is the first argument
// This should be changed to check $(uname -m) on ENOEXEC
.ascii "if [ x\"$1\" = x--32 ]; then\n"
.ascii "printf '"
.ascii "\\177ELF" # 0x0: ELF
.ascii "\\1" # 4: legacy mode
.ascii "\\1" # 5: little endian
.ascii "\\1" # 6: elf v1.o
.ascii "\\011" # 7: FreeBSD
.ascii "\\0" # 8: os/abi ver.
.ascii "\\0\\0\\0" # 9: padding 3/7
.ascii "\\0\\0\\0\\0" # padding 4/7
.ascii "\\2\\0" # 10: εxεcµταblε
.ascii "\\3\\0" # 12: i386
.ascii "\\1\\0\\0\\0" # 14: elf v1.o
.shstub ape_elf32_entry,4 # 18: e_entry
.shstub ape_elf32_phoff,4 # 1c: e_phoff
.shstub ape_elf32_shoff,4 # 20: e_shoff
.ascii "\\0\\0\\0\\0" # 24: e_flags
.ascii "\\100\\0" # 28: e_ehsize
.ascii "\\040\\0" # 2a: e_phentsize
.shstub ape_elf32_phnum,2 # 2c: e_phnum
.ascii "\\0\\0" # 2e: e_shentsize
.shstub ape_elf32_shnum,2 # 30: e_shnum
.shstub ape_elf32_shstrndx,2 # 32: e_shstrndx
.ascii "' >&7\n"
.ascii "else\n"
// Standard 64-bit ELF Header
.ascii "printf '"
.ascii "\\177ELF" # 0x0: ELF
.ascii "\\2" # 4: long mode
@ -510,6 +540,8 @@ apesh: .ascii "'\n#'\"\n" # sixth edition shebang
.shstub ape_elf_shnum,2 # 3c: e_shnum
.shstub ape_elf_shstrndx,2 # 3e: e_shstrndx
.ascii "' >&7\n"
.ascii "fi\n"
.ascii "exec 7<&-\n"
.ascii "fi\n"
.ascii "exec \"$0\" \"$@\"\n" # etxtbsy tail recursion

View file

@ -207,6 +207,9 @@ SECTIONS {
HIDDEN(ape_phdrs = .);
KEEP(*(.elf.phdrs))
HIDDEN(ape_phdrs_end = .);
HIDDEN(ape_phdrs32 = .);
KEEP(*(.elf.phdrs32))
HIDDEN(ape_phdrs32_end = .);
/* OpenBSD */
. = ALIGN(__SIZEOF_POINTER__);
@ -246,7 +249,7 @@ SECTIONS {
/*END: realmode addressability guarantee */
/* Normal Code */
*(.start)
KEEP(*(.start))
KEEP(*(.initprologue))
KEEP(*(SORT_BY_NAME(.init.*)))
KEEP(*(.init))
@ -445,6 +448,13 @@ PFSTUB4(ape_elf_phnum, (ape_phdrs_end - ape_phdrs) / 56);
PFSTUB4(ape_elf_shnum, 0);
PFSTUB4(ape_elf_shstrndx, 0);
PFSTUB4(ape_elf32_entry, _start32);
PFSTUB4(ape_elf32_phoff, RVA(ape_phdrs32));
PFSTUB4(ape_elf32_shoff, 0);
PFSTUB4(ape_elf32_phnum, (ape_phdrs32_end - ape_phdrs32) / 32);
PFSTUB4(ape_elf32_shnum, 0);
PFSTUB4(ape_elf32_shstrndx, 0);
HIDDEN(__privileged_addr = ROUNDDOWN(__privileged_start, PAGESIZE));
HIDDEN(__privileged_size = (ROUNDUP(__privileged_end, PAGESIZE) -
ROUNDDOWN(__privileged_start, PAGESIZE)));

Binary file not shown.

Binary file not shown.

View file

@ -299,6 +299,11 @@ OBJECTIFY.greg.c = \
-fno-sanitize=all \
-c
OBJECTIFY.i386.c = \
$(OBJECTIFY.c) \
-m32 \
-wrapper build/i386ify.sh
OBJECTIFY.ansi.c = $(CC) $(OBJECTIFY.c.flags) -ansi -Wextra -Werror -pedantic-errors -c
OBJECTIFY.c99.c = $(CC) $(OBJECTIFY.c.flags) -std=c99 -Wextra -Werror -pedantic-errors -c
OBJECTIFY.c11.c = $(CC) $(OBJECTIFY.c.flags) -std=c11 -Wextra -Werror -pedantic-errors -c

35
build/i386ify.sh Executable file
View file

@ -0,0 +1,35 @@
#!/bin/sh
#
# SYNOPSIS
#
# gcc -m32 -wrapper build/i386ify.sh -c -o foo.o foo.c
#
# OVERVIEW
#
# Compiles 32-bit code inside 64-bit ELF objects.
if [ "${1##*/}" = as ]; then
FIRST=0
for x; do
if [ $FIRST -eq 0 ]; then
set --
FIRST=1
fi
if [ "$x" = "--32" ]; then
continue
fi
if [ "${x##*.}" = s ]; then
{
printf "\t.section .yoink\n"
printf "\tnopl\t_start32\n"
printf "\t.previous\n"
printf "\t.code32\n"
cat "$x"
} >"$x".tmp
mv -f "$x".tmp "$x"
fi
set -- "$@" "$x"
done
fi
exec "$@"

View file

@ -64,6 +64,7 @@ o/$(MODE)/%.lds: %.lds ; @$(COMPILE) -APREPROCESS $(PREPROCESS.lds)
o/$(MODE)/%.h.ok: %.h ; @$(COMPILE) -ACHECK.h $(COMPILE.c) -x c -g0 -o $@ $<
o/$(MODE)/%.h.okk: %.h ; @$(COMPILE) -ACHECK.h $(COMPILE.cxx) -x c++ -g0 -o $@ $<
o/$(MODE)/%.o: %.greg.c ; @$(COMPILE) -AOBJECTIFY.greg $(OBJECTIFY.greg.c) $(OUTPUT_OPTION) $<
o/$(MODE)/%.i386.o: %.i386.c ; @$(COMPILE) -AOBJECTIFY.i386 $(OBJECTIFY.i386.c) $(OUTPUT_OPTION) $<
o/$(MODE)/%.greg.o: %.greg.c ; @$(COMPILE) -AOBJECTIFY.greg $(OBJECTIFY.greg.c) $(OUTPUT_OPTION) $<
o/$(MODE)/%.ansi.o: %.ansi.c ; @$(COMPILE) -AOBJECTIFY.ansi $(OBJECTIFY.ansi.c) $(OUTPUT_OPTION) $<
o/$(MODE)/%.ansi.o: %.c ; @$(COMPILE) -AOBJECTIFY.ansi $(OBJECTIFY.ansi.c) $(OUTPUT_OPTION) $<

19
examples/i386.i386.c Normal file
View file

@ -0,0 +1,19 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to this file,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "libc/str/str.h"
char kTenFour[10] = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
int main32(int argc, char *argv[]) {
return argc;
if (memset(argv[0], 4, 10) != argv[0]) return 1;
if (memcmp(argv[0], kTenFour, 10)) return 2;
return 0;
}

View file

@ -23,5 +23,6 @@ bool IsElf64Binary(const Elf64_Ehdr *elf, size_t mapsize) {
if (mapsize < sizeof(Elf64_Ehdr)) return false;
if (memcmp(elf->e_ident, ELFMAG, 4)) return false;
return (elf->e_ident[EI_CLASS] == ELFCLASSNONE ||
elf->e_ident[EI_CLASS] == ELFCLASS64);
elf->e_ident[EI_CLASS] == ELFCLASS64 ||
elf->e_ident[EI_CLASS] == ELFCLASS32);
}

View file

@ -33,6 +33,7 @@
// @param rsi:rdi is 128-bit unsigned 𝑥 value
// @return eax number in range [0,128) or undef if 𝑥 is 0
// @see also treasure trove of nearly identical functions
// @mode legacy,long
bsrmax: .leafprologue
.profilable
bsr %rsi,%rax

84
libc/runtime/crt32.S Normal file
View file

@ -0,0 +1,84 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "ape/macros.internal.h"
#include "libc/dce.h"
#include "libc/elf/def.h"
#include "libc/macros.h"
.section .start,"ax",@progbits
.code32
// @fileoverview 80386 APE UNIX CRT
//
// This module is yoinked by build/i386ify.sh when GNU make
// builds source codes that have the .i386.c file extension
// thereby avoiding some 386 bloat when it isn't being used
//
// TODO: Verify we can't use 64-bit ELF with i386 e_machine
_start32:
.weak main32
call main32
mov %eax,%ebx
pushpop 1,%eax # _exit() ordinal
push %ebx # hybrid calling convention
push %eax # should work on Linux+BSDs
int $0x80
ud2
.endfn _start32,globl,hidden
.section .elf.phdrs32,"a",@progbits
.long PT_LOAD # text segment
.stub ape_rom_offset,long
.stub ape_rom_vaddr,long
.stub ape_rom_paddr,long
.stub ape_rom_filesz,long
.stub ape_rom_memsz,long
.long PF_R|PF_X # <-- wut
.stub ape_rom_align,long
.long PT_LOAD # data segment
.stub ape_ram_offset,long
.stub ape_ram_vaddr,long
.stub ape_ram_paddr,long
.stub ape_ram_filesz,long
.stub ape_ram_memsz,long
.long PF_R|PF_W
.stub ape_ram_align,long
#if SupportsLinux()
// Linux ignores mprotect() and returns 0 without this lool
// It has nothing to do with the stack, which is still exec
.long PT_GNU_STACK # p_type
.long 0 # p_offset
.long 0 # p_vaddr
.long 0 # p_paddr
.long 0 # p_filesz
.long 0 # p_memsz
.long PF_R|PF_W # p_flags
.long 16 # p_align
#endif
#if SupportsOpenbsd() || SupportsNetbsd()
.long PT_NOTE # notes
.stub ape_note_offset,long
.stub ape_note_vaddr,long
.stub ape_note_paddr,long
.stub ape_note_filesz,long
.stub ape_note_memsz,long
.long PF_R
.stub ape_note_align,long
#endif
.previous