cosmopolitan/third_party/chibicc/file.c
Justine Tunney 8bfd56b59e
Rename _bsr/_bsf to bsr/bsf
Now that these functions are behind _COSMO_SOURCE there's no reason for
having the ugly underscore anymore. To use these functions, you need to
pass -mcosmo to cosmocc.
2024-03-04 17:33:26 -08:00

177 lines
4.8 KiB
C

#include "libc/intrin/bsf.h"
#include "third_party/chibicc/chibicc.h"
// Slurps contents of file.
char *read_file(const char *path) {
char *p;
FILE *fp;
int buflen, nread, end, n;
if (!strcmp(path, "-")) {
fp = stdin;
} else {
fp = fopen(path, "r");
if (!fp) return NULL;
}
buflen = 4096;
nread = 0;
p = calloc(1, buflen);
for (;;) {
end = buflen - 2;
n = fread(p + nread, 1, end - nread, fp);
if (n == 0) break;
nread += n;
if (nread == end) {
buflen *= 2;
p = realloc(p, buflen);
}
}
if (fp != stdin) fclose(fp);
if (nread > 0 && p[nread - 1] == '\\') {
p[nread - 1] = '\n';
} else if (nread == 0 || p[nread - 1] != '\n') {
p[nread++] = '\n';
}
p[nread] = '\0';
return p;
}
char *skip_bom(char *p) {
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
// If exists, just skip them because they are useless bytes.
// (It is actually not recommended to add BOM markers to UTF-8
// texts, but it's not uncommon particularly on Windows.)
if (!memcmp(p, "\357\273\277", 3)) p += 3;
return p;
}
// Replaces \r or \r\n with \n.
void canonicalize_newline(char *p) {
char *q = p;
for (;;) {
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) // :'(
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
if (!((uintptr_t)p & 15)) {
xmm_t v;
unsigned m;
xmm_t z = {0};
xmm_t s = {'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'};
xmm_t t = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
for (;;) {
v = *(const xmm_t *)p;
m = __builtin_ia32_pmovmskb128((v == z) | (v == s) | (v == t));
if (!m) {
*(xmm_u *)q = v;
p += 16;
q += 16;
} else {
m = bsf(m);
memmove(q, p, m);
p += m;
q += m;
break;
}
}
}
#endif
if (p[0]) {
if (p[0] == '\r' && p[1] == '\n') {
p += 2;
*q++ = '\n';
} else if (p[0] == '\r') {
p += 1;
*q++ = '\n';
} else {
*q++ = *p++;
}
} else {
break;
}
}
*q = '\0';
}
// Removes backslashes followed by a newline.
void remove_backslash_newline(char *p) {
int i = 0, j = 0;
// We want to keep the number of newline characters so that
// the logical line number matches the physical one.
// This counter maintain the number of newlines we have removed.
int n = 0;
bool instring = false;
for (;;) {
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) // :'(
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
if (!((uintptr_t)(p + i) & 15)) {
xmm_t v;
unsigned m;
xmm_t A = {0};
xmm_t B = {'/', '/', '/', '/', '/', '/', '/', '/',
'/', '/', '/', '/', '/', '/', '/', '/'};
xmm_t C = {'"', '"', '"', '"', '"', '"', '"', '"',
'"', '"', '"', '"', '"', '"', '"', '"'};
xmm_t D = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'};
xmm_t E = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
for (;;) {
v = *(const xmm_t *)(p + i);
m = __builtin_ia32_pmovmskb128((v == A) | (v == B) | (v == C) |
(v == D) | (v == E));
if (!m) {
*(xmm_u *)(p + j) = v;
i += 16;
j += 16;
} else {
m = bsf(m);
memmove(p + j, p + i, m);
i += m;
j += m;
break;
}
}
}
#endif
if (p[i]) {
if (instring) {
if (p[i] == '"' && p[i - 1] != '\\') {
instring = false;
}
} else {
if (p[i] == '"') {
instring = true;
} else if (p[i] == '/' && p[i + 1] == '*') {
p[j++] = p[i++];
p[j++] = p[i++];
while (p[i]) {
if (p[i] == '*' && p[i + 1] == '/') {
p[j++] = p[i++];
p[j++] = p[i++];
break;
} else {
p[j++] = p[i++];
}
}
continue;
}
}
if (p[i] == '\\' && p[i + 1] == '\n') {
i += 2;
n++;
} else if (p[i] == '\n') {
p[j++] = p[i++];
for (; n > 0; n--) p[j++] = '\n';
} else {
p[j++] = p[i++];
}
} else {
break;
}
}
for (; n > 0; n--) p[j++] = '\n';
p[j] = '\0';
}