From 1bc48bc8e4c212c73c3f64b3ed6179c4171463a7 Mon Sep 17 00:00:00 2001 From: mataha Date: Sat, 23 Dec 2023 06:39:27 +0100 Subject: [PATCH] Update stb (#885) This commit and, by extension, PR attempts to update `stb` in the most straightforward way possible as well as include fixes from main repo's unmerged PRs for cases rearing their ugly heads during everyday usage: - stb#1299: stb_rect_pack: Make rect_height_compare a stable sort - stb#1402: stb_image: Fix "unused invalid_chunk" with STBI_FAILURE_USERMSG - stb#1404: stb_image: Fix gif two_back memory address - stb#1420: stb_image: Improve error reporting if file operations fail within *_from_file functions - stb#1445: stb_vorbis: Few static analyzers fixes - stb#1487: stb_vorbis: Fix residue classdata bounding for f->temp_memory_required - stb#1490: stb_vorbis: Fix broken clamp in codebook_decode_deinterleave_repeat - stb#1496: stb_image: Fix pnm only build - stb#1497: stb_image: Fix memory leaks if stbi__convert failed - stb#1498: stb_vorbis: Fix memory leaks in stb_vorbis - stb#1499: stb_vorbis: Minor change to prevent the undefined behavior - left shift of a negative value - stb#1500: stb_vorbis: Fix signed integer overflow Includes additional small fixes that I felt didn't warrant a separate PR. --- dsp/core/core.h | 5 +- dsp/core/dct.c | 79 +- third_party/stb/README.cosmo | 29 +- third_party/stb/README.txt | 143 ++-- third_party/stb/stb_image.c | 1014 +++++++++++++++---------- third_party/stb/stb_image.h | 15 +- third_party/stb/stb_image_resize.c | 30 +- third_party/stb/stb_image_write.c | 621 ++++++++++----- third_party/stb/stb_image_write.h | 1 - third_party/stb/stb_image_write_png.c | 379 --------- third_party/stb/stb_rect_pack.c | 45 +- third_party/stb/stb_truetype.c | 4 +- third_party/stb/stb_vorbis.c | 358 +++++---- third_party/stb/stb_vorbis.h | 15 + tool/viz/derasterize.c | 11 +- tool/viz/memzoom.c | 23 +- tool/viz/od16.c | 19 +- tool/viz/printansi.c | 15 +- tool/viz/printimage.c | 17 +- tool/viz/printvideo.c | 19 +- 20 files changed, 1560 insertions(+), 1282 deletions(-) delete mode 100644 third_party/stb/stb_image_write_png.c diff --git a/dsp/core/core.h b/dsp/core/core.h index 714f3a392..eadf040f9 100644 --- a/dsp/core/core.h +++ b/dsp/core/core.h @@ -9,8 +9,9 @@ int mulaw(int); int unmulaw(int); void *double2byte(long, const void *, double, double) vallocesque; void *byte2double(long, const void *, double, double) vallocesque; -void *dct(float[8][8], float, float, float, float, float); -void *dctjpeg(float[8][8]); +void *dct(float[restrict hasatleast 8][8], unsigned, + float, float, float, float, float); +void *dctjpeg(float[restrict hasatleast 8][8], unsigned); double det3(const double[3][3]) nosideeffect; void *inv3(double[restrict 3][3], const double[restrict 3][3], double); void *matmul3(double[restrict 3][3], const double[3][3], const double[3][3]); diff --git a/dsp/core/dct.c b/dsp/core/dct.c index 506c96f86..cae19d596 100644 --- a/dsp/core/dct.c +++ b/dsp/core/dct.c @@ -18,40 +18,40 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/core/core.h" -#define DCT(A, B, C, D, E, F, G, H, T, C0, C1, C2, C3, C4) \ - do { \ - T z1, z2, z3, z4, z5, z11, z13; \ - T t0, t1, t2, t3, t4, t5, t6, t7, t10, t11, t12, t13; \ - t0 = A + H; \ - t7 = A - H; \ - t1 = B + G; \ - t6 = B - G; \ - t2 = C + F; \ - t5 = C - F; \ - t3 = D + E; \ - t4 = D - E; \ - t10 = t0 + t3; \ - t13 = t0 - t3; \ - t11 = t1 + t2; \ - t12 = t1 - t2; \ - A = t10 + t11; \ - E = t10 - t11; \ - z1 = (t12 + t13) * C0; \ - C = t13 + z1; \ - G = t13 - z1; \ - t10 = t4 + t5; \ - t11 = t5 + t6; \ - t12 = t6 + t7; \ - z5 = (t10 - t12) * C1; \ - z2 = t10 * C2 + z5; \ - z4 = t12 * C3 + z5; \ - z3 = t11 * C4; \ - z11 = t7 + z3; \ - z13 = t7 - z3; \ - F = z13 + z2; \ - D = z13 - z2; \ - B = z11 + z4; \ - H = z11 - z4; \ +#define DCT(A, B, C, D, E, F, G, H, T, C0, C1, C2, C3, C4) \ + do { \ + T z1, z2, z3, z4, z5, z11, z13; \ + T t0, t1, t2, t3, t4, t5, t6, t7, t10, t11, t12, t13; \ + t0 = A + H; \ + t7 = A - H; \ + t1 = B + G; \ + t6 = B - G; \ + t2 = C + F; \ + t5 = C - F; \ + t3 = D + E; \ + t4 = D - E; \ + t10 = t0 + t3; \ + t13 = t0 - t3; \ + t11 = t1 + t2; \ + t12 = t1 - t2; \ + A = t10 + t11; \ + E = t10 - t11; \ + z1 = (t12 + t13) * C0; \ + C = t13 + z1; \ + G = t13 - z1; \ + t10 = t4 + t5; \ + t11 = t5 + t6; \ + t12 = t6 + t7; \ + z5 = (t10 - t12) * C1; \ + z2 = t10 * C2 + z5; \ + z4 = t12 * C3 + z5; \ + z3 = t11 * C4; \ + z11 = t7 + z3; \ + z13 = t7 - z3; \ + F = z13 + z2; \ + D = z13 - z2; \ + B = z11 + z4; \ + H = z11 - z4; \ } while (0) /** @@ -65,20 +65,21 @@ * * @cost ~100ns */ -void *dct(float M[8][8], float c0, float c1, float c2, float c3, float c4) { +void *dct(float M[restrict hasatleast 8][8], unsigned stride, + float c0, float c1, float c2, float c3, float c4) { unsigned y, x; - for (y = 0; y < 8; ++y) { + for (y = 0; y < stride * 8; y += stride) { DCT(M[y][0], M[y][1], M[y][2], M[y][3], M[y][4], M[y][5], M[y][6], M[y][7], float, c0, c1, c2, c3, c4); } - for (x = 0; x < 8; ++x) { + for (x = 0; x < stride * 8; x += stride) { DCT(M[0][x], M[1][x], M[2][x], M[3][x], M[4][x], M[5][x], M[6][x], M[7][x], float, c0, c1, c2, c3, c4); } return M; } -void *dctjpeg(float M[8][8]) { - return dct(M, .707106781f, .382683433f, .541196100f, 1.306562965f, +void *dctjpeg(float M[restrict hasatleast 8][8], unsigned stride) { + return dct(M, stride, .707106781f, .382683433f, .541196100f, 1.306562965f, .707106781f); } diff --git a/third_party/stb/README.cosmo b/third_party/stb/README.cosmo index ca3b785c9..b2c7717ad 100644 --- a/third_party/stb/README.cosmo +++ b/third_party/stb/README.cosmo @@ -5,8 +5,8 @@ LOCAL CHANGES - Removed undefined behavior - Removed BMP [endian code made it 100x slower than PNG/JPEG] - Removed PIC [never heard of it] - - Removed TGA [consider imaagemagick convert command] - - Removed PSD [consider imaagemagick convert command] + - Removed TGA [consider imagemagick convert command] + - Removed PSD [consider imagemagick convert command] - Removed HDR [mine eyes and wikipedia agree stb gamma math is off] - Patched PNG loading edge case - Fixed code C standard says is undefined @@ -14,10 +14,25 @@ LOCAL CHANGES - Removed unnecessary ifdefs - Removed MSVC torture code -SYNCHRONIZATION POINT +SYNCHRONIZATION POINT (`--date=format:"%a %b %d %H:%M:%S %Y %z"`) - commit f67165c2bb2af3060ecae7d20d6f731173485ad0 - Author: Sean Barrett - Date: Mon Oct 28 09:30:02 2019 -0700 + commit 5736b15f7ea0ffb08dd38af21067c314d6a3aae9 + Author: Sean Barrett + Date: Sun Jan 29 10:46:04 2023 -0800 - Update README.md + re-add perlin noise again + +ADDITIONAL CHANGES/FIXES: + + - https://github.com/nothings/stb/pull/1299 + - https://github.com/nothings/stb/pull/1402 + - https://github.com/nothings/stb/pull/1404 + - https://github.com/nothings/stb/pull/1420 + - https://github.com/nothings/stb/pull/1445 + - https://github.com/nothings/stb/pull/1487 + - https://github.com/nothings/stb/pull/1490 + - https://github.com/nothings/stb/pull/1496 + - https://github.com/nothings/stb/pull/1497 + - https://github.com/nothings/stb/pull/1498 + - https://github.com/nothings/stb/pull/1499 + - https://github.com/nothings/stb/pull/1500 diff --git a/third_party/stb/README.txt b/third_party/stb/README.txt index 4c6915271..5c2434237 100644 --- a/third_party/stb/README.txt +++ b/third_party/stb/README.txt @@ -1,13 +1,12 @@ -/* - * stb_image - v2.23 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb * no warranty implied; use at your own risk * * [heavily modified by justine tunney] * * JPEG baseline & progressive (12 bpc/arithmetic not supported, same - * as stock IJG lib) PNG 1/2/4/8/16-bit-per-channel + * as stock IJG lib) + * PNG 1/2/4/8/16-bit-per-channel * GIF (*comp always reports as 4-channel) - * HDR (radiance rgbE format) * PNM (PPM and PGM binary only) * * Animated GIF still needs a proper API, but here's one way to do it: @@ -18,45 +17,53 @@ * * ============================ Contributors ========================= * - * Image formats Extensions, features - * Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) - * Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) - * Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) - * Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) - * Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) - * Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) - * Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) - * github:urraka (animated gif) Junggon Kim (PNM comments) - * Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) - * socks-the-fox (16-bit PNG) - * Jeremy Sawicki (ImageNet JPGs) - * Mikhail Morozov (1-bit BMP) - * Optimizations & bugfixes Anael Seghezzi (is-16-bit query) - * Fabian "ryg" Giesen - * Arseny Kapoulkine - * John-Mark Allen + * Image formats Extensions, features + * Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + * Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + * Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + * Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + * Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + * Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + * Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + * github:urraka (animated gif) Junggon Kim (PNM comments) + * Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + * socks-the-fox (16-bit PNG) + * Optimizations & bugfixes Jeremy Sawicki (ImageNet JPGs) + * Fabian "ryg" Giesen Mikhail Morozov (1-bit BMP) + * Arseny Kapoulkine Anael Seghezzi (is-16-bit query) + * John-Mark Allen Simon Breuss (16-bit PNM) * Carmelo J Fdez-Aguera * * Bug & warning fixes - * Marc LeBlanc David Woo Guillaume George Martins Mozeiko - * Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan - * Dave Moore Roy Eltham Hayaki Saito Nathan Reed - * Won Chun Luke Graham Johan Duparc Nick Verigakis - * the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh - * Janez Zemva John Bartholomew Michal Cichon github:romigrou - * Jonathan Blow Ken Hamada Tero Hanninen github:svdijk - * Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar - * Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex - * Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 - * Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw - * Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus - * Julian Raschke Gregory Mullen Baldur Karlsson - * github:poppolopoppo Christian Floisand Kevin Schmidt JR Smith - * github:darealshinji Blazej Dariusz Roszkowski github:Michaelangel007 - */ - -/* - * DOCUMENTATION + * Marc LeBlanc Laurent Gomila JR Smith + * Christpher Lloyd Sergio Gonzalez Matvey Cherevko + * Phil Jordan Ryamond Barbiero Zack Middleton + * Hayaki Saito Engin Manap + * Luke Graham Dale Weiler Martins Mozeiko + * Thomas Ruf Neil Bickford Blazej Dariusz Roszkowski + * Janez Zemva Gregory Mullen Roy Eltham + * Jonathan Blow Kevin Schmidt + * Eugene Golushkov Brad Weinberger the Horde3D community + * Aruelien Pocheville Alexander Veselov github:rlyeh + * Cass Everitt [reserved] github:romigrou + * Paul Du Bois github:svdijk + * Philipp Wiesemann Guillaume George github:snagar + * Josh Tobin Joseph Thomson github:Zelex + * Julian Raschke Dave Moore github:grim210 + * Baldur Karlsson Won Chun github:sammyhw + * Nick Verigakis github:phprus + * Luca Sas github:poppolopoppo + * Ryan C. Gordon Michal Cichon github:darealshinji + * David Woo Tero Hanninen github:Michaelangel007 + * Jerry Jansson Cort Stratton github:mosra + * Thibault Reuille [reserved] + * Nathan Reed [reserved] + * Johan Duparc Aldo Culquicondor + * Ronny Chevalier Oriol Ferrer Jacko Dirks + * John Bartholomew Matthew Gregan + * Ken Hamada Christian Floisand + * + * ============================ Documentation ========================= * * Limitations: * - no 12-bit-per-channel JPEG @@ -70,14 +77,15 @@ * // ... x = width, y = height, n = # 8-bit components per pixel ... * // ... replace '0' with '1'..'4' to force that many components per pixel * // ... but 'n' will always be the number that it would have been if you - * said 0 stbi_image_free(data) + * // ... said 0 + * stbi_image_free(data); * * Standard parameters: * int *x -- outputs image width in pixels * int *y -- outputs image height in pixels * int *channels_in_file -- outputs # of image components in image file * int desired_channels -- if non-zero, # of image components requested in - * result + * result * * The return value from an image loader is an 'unsigned char *' which points * to the pixel data, or NULL on an allocation failure or if the image is @@ -110,6 +118,32 @@ * * Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. * + * To query the width, height and component count of an image without having to + * decode the full file, you can use the stbi_info family of functions: + * + * int x,y,n,ok; + * ok = stbi_info(filename, &x, &y, &n); + * // returns ok=1 and sets x, y, n if image is a supported format, + * // 0 otherwise. + * + * Note that stb_image pervasively uses ints in its public API for sizes, + * including sizes of memory buffers. This is now part of the API and thus + * hard to change without causing breakage. As a result, the various image + * loaders all have certain limits on image size; these differ somewhat + * by format but generally boil down to either just under 2GB or just under + * 1GB. When the decoded image would be larger than this, stb_image decoding + * will fail. + * + * Additionally, stb_image will reject image files that have any of their + * dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, + * which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, + * the only way to have an image with such dimensions load correctly + * is for it to have a rather extreme aspect ratio. Either way, the + * assumption here is that such larger images are likely to be malformed + * or malicious. If you do need to load an image with individual dimensions + * larger than that, and it still fits in the overall size limit, you can + * #define STBI_MAX_DIMENSIONS on your own to be something larger. + * * =========================================================================== * * I/O callbacks @@ -163,11 +197,10 @@ * * iPhone PNG support: * - * By default we convert iphone-formatted PNGs back to RGB, even though - * they are internally encoded differently. You can disable this conversion - * by calling stbi_convert_iphone_png_to_rgb(0), in which case - * you will always just get the native iphone "format" through (which - * is BGR stored in RGB). + * We optionally support converting iPhone-formatted PNGs (which store + * premultiplied BGRA) back to RGB, even though they're internally encoded + * differently. To enable this conversion, call + * stbi_convert_iphone_png_to_rgb(1). * * Call stbi_set_unpremultiply_on_load(1) as well to force a divide per * pixel to remove any premultiplied alpha *only* if the image file explicitly @@ -191,9 +224,18 @@ * - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still * want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB * + * - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater + * than that size (in either width or height) without further processing. + * This is to let programs in the wild set an upper bound to prevent + * denial-of-service attacks on untrusted data, as one could generate a + * valid image of gigantic dimensions and force stb_image to allocate a + * huge block of memory and spend disproportionate time decoding it. By + * default this is set to (1 << 24), which is 16777216, but that's still + * very big. + * */ -/* stb_image_resize - v0.96 - public domain image resizing +/* stb_image_resize - v0.97 - public domain image resizing * by Jorge L Rodriguez (@VinoBS) - 2014 * http://github.com/nothings/stb * @@ -214,9 +256,7 @@ * output_pixels, out_w, out_h, 0, * num_channels , alpha_chan , 0, STBIR_EDGE_CLAMP) * // WRAP/REFLECT/ZERO - */ - -/* + * * DOCUMENTATION * * SRGB & FLOATING POINT REPRESENTATION @@ -348,6 +388,7 @@ * Nathan Reed: warning fixes * * REVISIONS + * 0.97 (2020-02-02) fixed warning * 0.96 (2019-03-04) fixed warnings * 0.95 (2017-07-23) fixed warnings * 0.94 (2017-03-18) fixed warnings diff --git a/third_party/stb/stb_image.c b/third_party/stb/stb_image.c index 97b560a33..6852c3adc 100644 --- a/third_party/stb/stb_image.c +++ b/third_party/stb/stb_image.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -51,15 +51,12 @@ http://nothings.org/stb\""); #define idct_block_kernel stbi__idct_block #endif -#define ROL(w, k) ((w) << (k) | (w) >> (sizeof(w) * CHAR_BIT - (k))) +#define ROL(w, k) (((w) << (k)) | ((w) >> (-(k) & (sizeof(w) * CHAR_BIT - 1)))) -#ifndef STBI_REALLOC_SIZED -#define STBI_REALLOC_SIZED(p, oldsz, newsz) realloc(p, newsz) +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) #endif -typedef unsigned char stbi_uc; -typedef unsigned short stbi_us; - // stbi__context structure is our basic context used by all images, so it // contains all the IO context, plus some basic image information typedef struct { @@ -70,6 +67,7 @@ typedef struct { int read_from_callbacks; int buflen; unsigned char buffer_start[128]; + int callback_already_read; unsigned char *img_buffer, *img_buffer_end; unsigned char *img_buffer_original, *img_buffer_original_end; } stbi__context; @@ -83,6 +81,7 @@ static void stbi__start_mem(stbi__context *s, unsigned char const *buffer, int len) { s->io.read = NULL; s->read_from_callbacks = 0; + s->callback_already_read = 0; s->img_buffer = s->img_buffer_original = (unsigned char *)buffer; s->img_buffer_end = s->img_buffer_original_end = (unsigned char *)buffer + len; @@ -95,7 +94,8 @@ static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, s->io_user_data = user; s->buflen = sizeof(s->buffer_start); s->read_from_callbacks = 1; - s->img_buffer_original = s->buffer_start; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = s->buffer_start; stbi__refill_buffer(s); s->img_buffer_original_end = s->img_buffer_end; } @@ -105,11 +105,16 @@ static int stbi__stdio_read(void *user, char *data, int size) { } static void stbi__stdio_skip(void *user, int n) { + int ch; fseek(user, n, SEEK_CUR); + ch = fgetc(user); + if (ch != EOF) { + ungetc(ch, user); + } } static int stbi__stdio_eof(void *user) { - return feof(user); + return feof(user) || ferror(user); } static stbi_io_callbacks stbi__stdio_callbacks = { @@ -168,8 +173,8 @@ const char *stbi_failure_reason(void) { static int stbi__err(const char *specific_details, const char *general_details) { - /* DebugBreak(); */ - /* WARNF("%s: %s", general_details, specific_details); */ + // DebugBreak(); + // WARNF("%s: %s", general_details, specific_details); stbi__g_failure_reason = general_details; return 0; } @@ -204,17 +209,27 @@ static int stbi__mul2sizes_valid(int a, int b) { return a <= INT_MAX / b; } -// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +// returns 1 if "a * b + add" has no negative terms/factors +// and doesn't overflow static int stbi__mad2sizes_valid(int a, int b, int add) { return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add); } -// returns 1 if "a*b*c + add" has no negaive terms/factors and doesn't overflow +// returns 1 if "a * b * c + add" has no negative terms/factors +// and doesn't overflow static int stbi__mad3sizes_valid(int a, int b, int c, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && stbi__addsizes_valid(a * b * c, add); } +// returns 1 if "a * b * c * d + add" has no negative terms/factors +// and doesn't overflow +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) { + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && + stbi__mul2sizes_valid(a * b * c, d) && + stbi__addsizes_valid(a * b * c * d, add); +} + // mallocs with size overflow checking static void *stbi__malloc_mad2(int a, int b, int add) { if (!stbi__mad2sizes_valid(a, b, add)) return NULL; @@ -226,6 +241,44 @@ static void *stbi__malloc_mad3(int a, int b, int c, int add) { return xmalloc(a * b * c + add); } +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) { + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return xmalloc(a * b * c * d + add); +} + +// returns 1 if the sum of two signed ints is valid +// (between -2^31 and 2^31-1 inclusive), 0 on overflow. +static int stbi__addints_valid(int a, int b) { + if ((a >= 0) != (b >= 0)) { + // a and b have different signs, so no overflow + return 1; + } + if (a < 0 && b < 0) { + // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. + return a >= INT_MIN - b; + } + return a <= INT_MAX - b; +} + +// returns 1 if the product of two ints fits in a signed short, +// 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) { + if (b == 0 || b == -1) { + // multiplication by 0 is always 0; + // check for -1 so SHRT_MIN / b doesn't overflow + return 1; + } + if ((a >= 0) == (b >= 0)) { + // product is positive, so similar to mul2sizes_valid + return a <= SHRT_MAX / b; + } + if (b < 0) { + // same as a * b >= SHRT_MIN + return a <= SHRT_MIN / b; + } + return a >= SHRT_MIN / b; +} + #define stbi__errpf(x, y) \ ({ \ stbi__err(x, y); \ @@ -247,22 +300,22 @@ void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { } static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, - int req_comp, stbi__result_info *ri, int bpc) { + int req_comp, stbi__result_info *ri) { bzero(ri, sizeof(*ri)); ri->bits_per_channel = 8; ri->num_channels = 0; -#ifndef STBI_NO_JPEG - if (stbi__jpeg_test(s)) return stbi__jpeg_load(s, x, y, comp, req_comp, ri); -#endif -#ifndef STBI_NO_PNG + + // test the formats with a very explicit header first (at least a FOURCC + // or distinctive magic number first) if (stbi__png_test(s)) return stbi__png_load(s, x, y, comp, req_comp, ri); -#endif -#ifndef STBI_NO_GIF if (stbi__gif_test(s)) return stbi__gif_load(s, x, y, comp, req_comp, ri); -#endif -#ifndef STBI_NO_PNM + + // then the formats that can end up attempting to load with just 1 or 2 + // bytes matching expectations; these are prone to false positives, so + // try them later + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s, x, y, comp, req_comp, ri); if (stbi__pnm_test(s)) return stbi__pnm_load(s, x, y, comp, req_comp, ri); -#endif + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); } @@ -335,12 +388,18 @@ static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int req_comp) { void *result; stbi__result_info ri; - result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + result = stbi__load_main(s, x, y, comp, req_comp, &ri); if (result == NULL) return NULL; + assert(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); if (ri.bits_per_channel != 8) { - assert(ri.bits_per_channel == 16); - result = - stbi__convert_16_to_8(result, *x, *y, req_comp == 0 ? *comp : req_comp); + // https://github.com/nothings/stb/pull/1497 + unsigned char *converted = stbi__convert_16_to_8( + (uint16_t *)result, *x, *y, req_comp == 0 ? *comp : req_comp); + if (converted == NULL) { + free(result); + return NULL; + } + result = converted; ri.bits_per_channel = 8; } // @TODO: move stbi__convert_format to here @@ -354,13 +413,20 @@ static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, static uint16_t *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) { + void *result; stbi__result_info ri; - void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + result = stbi__load_main(s, x, y, comp, req_comp, &ri); if (result == NULL) return NULL; + assert(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); if (ri.bits_per_channel != 16) { - assert(ri.bits_per_channel == 8); - result = stbi__convert_8_to_16((unsigned char *)result, *x, *y, - req_comp == 0 ? *comp : req_comp); + // https://github.com/nothings/stb/pull/1497 + uint16_t *converted = stbi__convert_8_to_16( + (unsigned char *)result, *x, *y, req_comp == 0 ? *comp : req_comp); + if (converted == NULL) { + free(result); + return NULL; + } + result = converted; ri.bits_per_channel = 16; } // @TODO: move stbi__convert_format16 to here @@ -396,7 +462,14 @@ unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp); if (result) { // need to 'unget' all the characters in the IO buffer - fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR); + // https://github.com/nothings/stb/pull/1420 + if (fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR)) { + // fseek() failed; we can no longer maintain the file cursor position + // guarantee of this function, so return null. + free(result); + return stbi__errpuc("bad file", + "fseek() failed; seek position unreliable"); + } } return result; } @@ -409,7 +482,14 @@ uint16_t *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp); if (result) { // need to 'unget' all the characters in the IO buffer - fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR); + // https://github.com/nothings/stb/pull/1420 + if (fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR)) { + // fseek() failed; we can no longer maintain the file cursor position + // guarantee of this function, so return null. + free(result); + return (uint16_t *)stbi__errpuc( + "bad file", "fseek() failed; seek position unreliable"); + } } return result; } @@ -476,6 +556,7 @@ enum { STBI__SCAN_load = 0, STBI__SCAN_type, STBI__SCAN_header }; static void stbi__refill_buffer(stbi__context *s) { int n = (s->io.read)(s->io_user_data, (char *)s->buffer_start, s->buflen); + s->callback_already_read += (int)(s->img_buffer - s->img_buffer_original); if (n == 0) { // at end of file, treat same as if from memory, but need to handle case // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file @@ -510,6 +591,7 @@ forceinline int stbi__at_eof(stbi__context *s) { } static void stbi__skip(stbi__context *s, int n) { + if (n == 0) return; // already there! if (n < 0) { s->img_buffer = s->img_buffer_end; return; @@ -569,7 +651,7 @@ static uint32_t stbi__get32be(stbi__context *s) { // generic converter from built-in img_n to req_comp // individual types do this automatically as much as possible (e.g. jpeg // does all cases internally since it needs to colorspace convert anyway, -// and it never has alpha, so very few cases ). png can automatically +// and it never has alpha, so very few cases). png can automatically // interleave an alpha=255 channel, but falls back to this for other cases // // assume data buffer is malloced, so malloc a new one and free that one @@ -658,6 +740,9 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, break; default: assert(0); + free(data); + free(good); + return stbi__errpuc("unsupported", "Unsupported format conversion"); } #undef STBI__CASE } @@ -751,6 +836,10 @@ static uint16_t *stbi__convert_format16(uint16_t *data, int img_n, int req_comp, break; default: assert(0); + free(data); + free(good); + return (uint16_t *)stbi__errpuc("unsupported", + "Unsupported format conversion"); } #undef STBI__CASE } @@ -849,8 +938,14 @@ static int stbi__build_huffman(stbi__huffman *h, int *count) { int i, j, k = 0; unsigned int code; // build size list for each symbol (from JPEG spec) - for (i = 0; i < 16; ++i) - for (j = 0; j < count[i]; ++j) h->size[k++] = (unsigned char)(i + 1); + for (i = 0; i < 16; ++i) { + for (j = 0; j < count[i]; ++j) { + h->size[k++] = (unsigned char)(i + 1); + if (k >= 257) { + return stbi__err("bad size list", "Corrupt JPEG"); + } + } + } h->size[k] = 0; // compute actual symbols (from jpeg spec) @@ -962,8 +1057,10 @@ forceinline int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) { for (k = FAST_BITS + 1;; ++k) if (temp < h->maxcode[k]) break; if (k == 17) { + WARNF("j->code_bits: %d", j->code_bits); // error! code not found j->code_bits -= 16; + WARNF("Symbol: %d", k); return -1; } @@ -971,6 +1068,10 @@ forceinline int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) { // convert the huffman code to the symbol id c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + if (c < 0 || c >= 256) { + // symbol id out of bounds! + return -1; + } assert((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); @@ -980,7 +1081,7 @@ forceinline int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) { return h->values[c]; } -// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); - sgn = (int32_t)j->code_buffer >> 31; // sign bit is always in MSB + if (j->code_bits < n) { + // ran out of bits from stream, return 0s intead of continuing + return 0; + } + // sign bit is always in MSB; + // 0 if MSB clear (positive), 1 if MSB set (negative) + sgn = j->code_buffer >> 31; k = ROL(j->code_buffer, n); - assert(n >= 0 && n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask))); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; j->code_bits -= n; - return k + (stbi__jbias[n] & ~sgn); + return k + (stbi__jbias[n] & (sgn - 1)); } // get some unsigned bits forceinline int stbi__jpeg_get_bits(stbi__jpeg *j, int n) { unsigned int k; if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) { + // ran out of bits from stream, return 0s intead of continuing + return 0; + } k = ROL(j->code_buffer, n); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; @@ -1015,6 +1124,10 @@ forceinline int stbi__jpeg_get_bits(stbi__jpeg *j, int n) { forceinline int stbi__jpeg_get_bit(stbi__jpeg *j) { unsigned int k; if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + if (j->code_bits < 1) { + // ran out of bits from stream, return 0s intead of continuing + return 0; + } k = j->code_buffer; j->code_buffer <<= 1; --j->code_bits; @@ -1039,12 +1152,18 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], int diff, dc, k, t, c, r, s, rs; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); t = stbi__jpeg_huff_decode(j, hdc); - if (t < 0) return stbi__err("bad huffman code", "Corrupt JPEG"); + if (t < 0 || t > 15) return stbi__err("bad huffman code", "Corrupt JPEG"); // 0 all the ac values now so we can do it 32-bits at a time bzero(data, 64 * sizeof(data[0])); diff = t ? stbi__extend_receive(j, t) : 0; + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) { + return stbi__err("bad delta", "Corrupt JPEG"); + } dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; + if (!stbi__mul2shorts_valid(dc, dequant[0])) { + return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + } data[0] = (short)(dc * dequant[0]); // decode AC components, see JPEG spec k = 1; @@ -1055,6 +1174,10 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length + if (s > j->code_bits) { + return stbi__err("bad huffman code", + "Combined length longer than code bits available"); + } j->code_buffer <<= s; j->code_bits -= s; // decode into unzigzag'd location @@ -1082,7 +1205,6 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) { int t; - short s; int diff, dc; if (j->spec_end != 0) { return stbi__err("can't merge dc and ac", "Corrupt JPEG"); @@ -1092,15 +1214,22 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], // first scan for DC coefficient, must be first bzero(data, 64 * sizeof(data[0])); // 0 all the ac values now t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) { + return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + } diff = t ? stbi__extend_receive(j, t) : 0; + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) { + return stbi__err("bad delta", "Corrupt JPEG"); + } dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; - s = dc; - s *= 1u << j->succ_low; - data[0] = s; /* (short)(dc << j->succ_low); */ + if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) { + return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + } + data[0] = (short)(dc * (1u << j->succ_low)); } else { // refinement scan for DC coefficient - if (stbi__jpeg_get_bit(j)) data[0] += (short)(1 << j->succ_low); + if (stbi__jpeg_get_bit(j)) data[0] += (short)(1u << j->succ_low); } return 1; } @@ -1129,10 +1258,14 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length + if (s > j->code_bits) { + return stbi__err("bad huffman code", + "Combined length longer than code bits available"); + } j->code_buffer <<= s; j->code_bits -= s; zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (r / 256) * (1u << shift); + data[zig] = (short)((r >> 8) * (1u << shift)); } else { rs = stbi__jpeg_huff_decode(j, hac); if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG"); @@ -1149,15 +1282,13 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], } else { k += r; zig = stbi__jpeg_dezigzag[k++]; - data[zig] = stbi__extend_receive(j, s) * (1u << shift); + data[zig] = (short)(stbi__extend_receive(j, s) * (1u << shift)); } } } while (k <= j->spec_end); } else { // refinement scan for these AC coefficients - - bit = (short)(1 << j->succ_low); - + bit = (short)(1u << j->succ_low); if (j->eob_run) { --j->eob_run; for (k = j->spec_start; k <= j->spec_end; ++k) { @@ -1273,9 +1404,10 @@ forceinline unsigned char stbi__clamp(int x) { t1 += p2 + p4; \ t0 += p1 + p3; -static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) { +static void stbi__idct_block(unsigned char *out, int out_stride, + short data[64]) { int i, val[64], *v = val; - stbi_uc *o; + unsigned char *o; short *d = data; // columns @@ -1338,7 +1470,8 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) { // sse2 integer IDCT. not the fastest possible implementation but it // produces bit-identical results to the generic C version so it's // fully "transparent". -static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { +static void stbi__idct_simd(unsigned char *out, int out_stride, + short data[64]) { // This is constructed to match our regular (generic) integer IDCT exactly. __m128i row0, row1, row2, row3, row4, row5, row6, row7; __m128i tmp; @@ -1540,7 +1673,8 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { // NEON integer IDCT. should produce bit-identical // results to the generic C version. -static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { +static void stbi__idct_simd(unsigned char *out, int out_stride, + short data[64]) { int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); @@ -2025,6 +2159,10 @@ static int stbi__process_marker(stbi__jpeg *z, int m) { sizes[i] = stbi__get8(z->s); n += sizes[i]; } + if (n > 256) { + // Loop over i < n would write past end of values! + return stbi__err("bad DHT header", "Corrupt JPEG"); + } L -= 17; if (tc == 0) { if (!stbi__build_huffman(z->huff_dc + th, sizes)) return 0; @@ -2058,7 +2196,7 @@ static int stbi__process_marker(stbi__jpeg *z, int m) { for (i = 0; i < 5; ++i) if (stbi__get8(z->s) != tag[i]) ok = 0; L -= 5; - if (ok) z->jfif = 1; + if (ok) z->jfif = m; } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'}; int ok = 1; @@ -2150,24 +2288,36 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) { stbi__context *s = z->s; int Lf, p, i, q, h_max = 1, v_max = 1, c; Lf = stbi__get16be(s); - if (Lf < 11) return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG + if (Lf < 11) { + // JPEG + return stbi__err("bad SOF len", "Corrupt JPEG"); + } p = stbi__get8(s); - if (p != 8) - return stbi__err("only 8-bit", - "JPEG format not supported: 8-bit only"); // JPEG baseline + if (p != 8) { + // JPEG baseline + return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only"); + } s->img_y = stbi__get16be(s); - if (s->img_y == 0) - return stbi__err( - "no header height", - "JPEG format not supported: delayed height"); // Legal, but we don't - // handle it--but neither - // does IJG + if (s->img_y == 0) { + // Legal, but we don't handle it--but neither does IJG + return stbi__err("no header height", + "JPEG format not supported: delayed height"); + } s->img_x = stbi__get16be(s); - if (s->img_x == 0) - return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires + if (s->img_x == 0) { + // JPEG requires + return stbi__err("0 width", "Corrupt JPEG"); + } + if (s->img_y > STBI_MAX_DIMENSIONS) { + return stbi__err("too large", "Very large image (corrupt?)"); + } + if (s->img_x > STBI_MAX_DIMENSIONS) { + return stbi__err("too large", "Very large image (corrupt?)"); + } c = stbi__get8(s); - if (c != 3 && c != 1 && c != 4) + if (c != 3 && c != 1 && c != 4) { return stbi__err("bad component count", "Corrupt JPEG"); + } s->img_n = c; for (i = 0; i < c; ++i) { z->img_comp[i].data = NULL; @@ -2202,6 +2352,18 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) { if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; } + // check that plane subsampling factors are integer ratios; + // our resamplers can't deal with fractional ratios + // and I've never seen a non-corrupted JPEG file actually use them + for (i = 0; i < s->img_n; ++i) { + if (h_max % z->img_comp[i].h != 0) { + return stbi__err("bad H", "Corrupt JPEG"); + } + if (v_max % z->img_comp[i].v != 0) { + return stbi__err("bad V", "Corrupt JPEG"); + } + } + // compute interleaved mcu info z->img_h_max = h_max; z->img_v_max = v_max; @@ -2272,6 +2434,27 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) { return 1; } +static unsigned char stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { + // some JPEGs have junk at end, skip over it but if we find what looks + // like a valid marker, resume there + while (!stbi__at_eof(j->s)) { + unsigned char x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker + if (stbi__at_eof(j->s)) return STBI__MARKER_none; + x = stbi__get8(j->s); + if (x != 0x00 && x != 0xff) { + // not a stuffed zero or lead-in to another marker, looks + // like an actual marker, return it + return x; + } + // stuffed zero has x=0 now which ends the loop, meaning we go + // back to regular scan loop. + // repeated 0xff keeps trying to read the next byte of the marker. + } + } + return STBI__MARKER_none; +} + // decode image to YCbCr format static int stbi__decode_jpeg_image(stbi__jpeg *j) { int m; @@ -2287,26 +2470,22 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j) { if (!stbi__process_scan_header(j)) return 0; if (!stbi__parse_entropy_coded_data(j)) return 0; if (j->marker == STBI__MARKER_none) { - // handle 0s at the end of image data from IP Kamera 9060 - while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - if (x == 255) { - j->marker = stbi__get8(j->s); - break; - } - } + j->marker = stbi__skip_jpeg_junk_at_end(j); // if we reach eof without hitting a marker, stbi__get_marker() below // will fail and we'll eventually return 0 } + m = stbi__get_marker(j); + if (STBI__RESTART(m)) m = stbi__get_marker(j); } else if (stbi__DNL(m)) { int Ld = stbi__get16be(j->s); uint32_t NL = stbi__get16be(j->s); if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + m = stbi__get_marker(j); } else { - if (!stbi__process_marker(j, m)) return 0; + if (!stbi__process_marker(j, m)) return 1; + m = stbi__get_marker(j); } - m = stbi__get_marker(j); } if (j->progressive) stbi__jpeg_finish(j); return 1; @@ -2471,9 +2650,10 @@ static unsigned char *stbi__resample_row_nearest(unsigned char *out, // this is a reduced-precision calculation of YCbCr-to-RGB introduced // to make sure the code produces the same results in both SIMD and scalar #define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8) -static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, - const stbi_uc *pcb, const stbi_uc *pcr, - int count, int step) { +static void stbi__YCbCr_to_RGB_row(unsigned char *out, const unsigned char *y, + const unsigned char *pcb, + const unsigned char *pcr, int count, + int step) { int i; for (i = 0; i < count; ++i) { int y_fixed = (y[i] << 20) + (1 << 19); // rounding @@ -2505,18 +2685,19 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; + out[0] = (unsigned char)r; + out[1] = (unsigned char)g; + out[2] = (unsigned char)b; out[3] = 255; out += step; } } #if defined(STBI_SSE2) || defined(STBI_NEON) -static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, - stbi_uc const *pcb, stbi_uc const *pcr, - int count, int step) { +static void stbi__YCbCr_to_RGB_simd(unsigned char *out, unsigned char const *y, + unsigned char const *pcb, + unsigned char const *pcr, int count, + int step) { int i = 0; #ifdef STBI_SSE2 @@ -2656,9 +2837,9 @@ static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; + out[0] = (unsigned char)r; + out[1] = (unsigned char)g; + out[2] = (unsigned char)b; out[3] = 255; out += step; } @@ -2722,6 +2903,13 @@ static unsigned char *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, decode_n = z->s->img_n; } + // nothing to do if no components requested; check this now to avoid + // accessing uninitialized coutput[0] later + if (decode_n <= 0) { + stbi__cleanup_jpeg(z); + return NULL; + } + // resample and color-convert { int k; @@ -2876,7 +3064,10 @@ static dontinline void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { unsigned char *result; - stbi__jpeg *j = (stbi__jpeg *)malloc(sizeof(stbi__jpeg)); + stbi__jpeg *j; + j = malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__errpuc("outofmem", "Out of memory"); + bzero(j, sizeof(stbi__jpeg)); j->s = s; stbi__setup_jpeg(j); result = load_jpeg_image(j, x, y, comp, req_comp); @@ -2888,6 +3079,8 @@ static int stbi__jpeg_test(stbi__context *s) { int r; stbi__jpeg *j; j = malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__err("outofmem", "Out of memory"); + bzero(j, sizeof(stbi__jpeg)); j->s = s; stbi__setup_jpeg(j); r = stbi__decode_jpeg_header(j, STBI__SCAN_type); @@ -2910,6 +3103,8 @@ static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) { static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) { int result; stbi__jpeg *j = (stbi__jpeg *)(malloc(sizeof(stbi__jpeg))); + if (!j) return stbi__err("outofmem", "Out of memory"); + bzero(j, sizeof(stbi__jpeg)); j->s = s; result = stbi__jpeg_info_raw(j, x, y, comp); free(j); @@ -2926,6 +3121,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) { // fast-way is faster to check than jpeg huffman, but slow way is slower #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) +#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet // zlib-style huffman encoding // (jpegs packs from left, zlib from right, so can't share code) @@ -2934,8 +3130,8 @@ typedef struct { uint16_t firstcode[16]; int maxcode[17]; uint16_t firstsymbol[16]; - unsigned char size[288]; - uint16_t value[288]; + unsigned char size[STBI__ZNSYMS]; + uint16_t value[STBI__ZNSYMS]; } stbi__zhuffman; static uint32_t ReverseBits32(uint32_t x) { @@ -3010,6 +3206,7 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, typedef struct { unsigned char *zbuffer, *zbuffer_end; int num_bits; + int hit_zeof_once; uint32_t code_buffer; char *zout; char *zout_start; @@ -3018,14 +3215,20 @@ typedef struct { stbi__zhuffman z_length, z_distance; } stbi__zbuf; +forceinline int stbi__zeof(stbi__zbuf *z) { + return (z->zbuffer >= z->zbuffer_end); +} + forceinline unsigned char stbi__zget8(stbi__zbuf *z) { - if (z->zbuffer >= z->zbuffer_end) return 0; - return *z->zbuffer++; + return stbi__zeof(z) ? 0 : *z->zbuffer++; } static void stbi__fill_bits(stbi__zbuf *z) { do { - assert(z->code_buffer < (1u << z->num_bits)); + if (z->code_buffer >= (1u << z->num_bits)) { + z->zbuffer = z->zbuffer_end; // treat this as EOF so we fail. + return; + } z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits; z->num_bits += 8; } while (z->num_bits <= 24); @@ -3047,10 +3250,17 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) { k = stbi__bit_reverse(a->code_buffer, 16); for (s = STBI__ZFAST_BITS + 1;; ++s) if (k < z->maxcode[s]) break; - if (s == 16) return -1; // invalid code! + if (s >= 16) return -1; // invalid code! // code size is s, so: b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s]; - assert(z->size[b] == s); + if (b >= STBI__ZNSYMS) { + // some data was corrupt somewhere! + return -1; + } + if (z->size[b] != s) { + // was originally an assert, but report failure instead. + return -1; + } a->code_buffer >>= s; a->num_bits -= s; return z->value[b]; @@ -3058,7 +3268,23 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) { forceinline int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) { int b, s; - if (a->num_bits < 16) stbi__fill_bits(a); + if (a->num_bits < 16) { + if (stbi__zeof(a)) { + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding bits + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); + } + } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { s = b >> 9; @@ -3071,13 +3297,19 @@ forceinline int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) { static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) { char *q; - int cur, limit, old_limit; + unsigned int cur, limit; z->zout = zout; if (!z->z_expandable) return stbi__err("output buffer limit", "Corrupt PNG"); - cur = (int)(z->zout - z->zout_start); - limit = old_limit = (int)(z->zout_end - z->zout_start); - while (cur + n > limit) limit *= 2; - q = (char *)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + cur = (unsigned int)(z->zout - z->zout_start); + limit = (unsigned)(z->zout_end - z->zout_start); + if (UINT_MAX - cur < (unsigned)n) { + return stbi__err("outofmem", "Out of memory"); + } + while (cur + n > limit) { + if (limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); + limit *= 2; + } + q = (char *)realloc(z->zout_start, limit); if (q == NULL) return stbi__err("outofmem", "Out of memory"); z->zout_start = q; z->zout = q + cur; @@ -3122,19 +3354,36 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) { int len, dist; if (z == 256) { a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our + // bit buffer so the decoder can just do its speculative decoding. But + // if we actually consumed any of those bits (which is the case when + // num_bits < 16), the stream actually read past the end so it is + // malformed. + return stbi__err("unexpected end", "Corrupt PNG"); + } return 1; } + if (z >= 286) { + // per DEFLATE, length codes 286 and 287 + // must not appear in compressed data + return stbi__err("bad huffman code", "Corrupt PNG"); + } z -= 257; len = stbi__zlength_base[z]; if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); z = stbi__zhuffman_decode(a, &a->z_distance); - if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG"); + if (z < 0 || z >= 30) { + // per DEFLATE, distance codes 30 and 31 + // must not appear in compressed data + return stbi__err("bad huffman code", "Corrupt PNG"); + } dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist", "Corrupt PNG"); - if (zout + len > a->zout_end) { + if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -3184,11 +3433,12 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a) { c = stbi__zreceive(a, 2) + 3; if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); fill = lencodes[n - 1]; - } else if (c == 17) + } else if (c == 17) { c = stbi__zreceive(a, 3) + 3; - else { - assert(c == 18); + } else if (c == 18) { c = stbi__zreceive(a, 7) + 11; + } else { + return stbi__err("bad codelengths", "Corrupt PNG"); } if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); memset(lencodes + n, fill, c); @@ -3213,7 +3463,7 @@ static int stbi__parse_uncompressed_block(stbi__zbuf *a) { a->code_buffer >>= 8; a->num_bits -= 8; } - assert(a->num_bits == 0); + if (a->num_bits < 0) return stbi__err("zlib corrupt", "Corrupt PNG"); // now fill header the normal way while (k < 4) header[k++] = stbi__zget8(a); len = header[1] * 256 + header[0]; @@ -3234,6 +3484,8 @@ static int stbi__parse_zlib_header(stbi__zbuf *a) { int cm = cmf & 15; /* int cinfo = cmf >> 4; */ int flg = stbi__zget8(a); + if (stbi__zeof(a)) + return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec if ((cmf * 256 + flg) % 31 != 0) return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec if (flg & 32) @@ -3246,7 +3498,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a) { return 1; } -static const unsigned char stbi__zdefault_length[288] = { +static const unsigned char stbi__zdefault_length[STBI__ZNSYMS] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -3282,6 +3534,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) { if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; + a->hit_zeof_once = 0; do { final = stbi__zreceive(a, 1); type = stbi__zreceive(a, 2); @@ -3292,7 +3545,8 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) { } else { if (type == 1) { // use fixed code lengths - if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288)) + if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, + STBI__ZNSYMS)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; @@ -3443,31 +3697,57 @@ enum { STBI__F_up = 2, STBI__F_avg = 3, STBI__F_paeth = 4, - // synthetic filters used for first scanline to avoid needing a dummy row of - // 0s - STBI__F_avg_first, - STBI__F_paeth_first + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first }; static int stbi__de_iphone_flag = 0; static int stbi__unpremultiply_on_load = 0; -static unsigned char first_row_filter[5] = {STBI__F_none, STBI__F_sub, - STBI__F_none, STBI__F_avg_first, - STBI__F_paeth_first}; +static unsigned char first_row_filter[5] = { + STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first, + STBI__F_sub // Paeth with b = c = 0 turns out to be equivalent to sub +}; static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p - a); - int pb = abs(p - b); - int pc = abs(p - c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, + // but is actually equivalent and has favorable data dependencies and admits + // straightforward generation of branch-free code, which helps performance + // significantly. + int thresh = c * 3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } static const unsigned char stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0, 0, 0, 0x01}; +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(unsigned char *dest, + unsigned char *src, uint32_t x, + int img_n) { + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i = x - 1; i >= 0; --i) { + dest[i * 2 + 1] = 255; + dest[i * 2 + 0] = src[i]; + } + } else { + assert(img_n == 3); + for (i = x - 1; i >= 0; --i) { + dest[i * 4 + 3] = 255; + dest[i * 4 + 2] = src[i * 3 + 2]; + dest[i * 4 + 1] = src[i * 3 + 1]; + dest[i * 4 + 0] = src[i * 3 + 0]; + } + } +} + // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, unsigned char *raw, uint32_t raw_len, int out_n, uint32_t x, @@ -3476,6 +3756,8 @@ static int stbi__create_png_image_raw(stbi__png *a, unsigned char *raw, stbi__context *s = a->s; uint32_t i, j, stride = x * out_n * bytes; uint32_t img_len, img_width_bytes; + unsigned char *filler_buf; + int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -3487,9 +3769,13 @@ static int stbi__create_png_image_raw(stbi__png *a, unsigned char *raw, a->out = stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) + return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on @@ -3498,260 +3784,143 @@ static int stbi__create_png_image_raw(stbi__png *a, unsigned char *raw, // always. if (raw_len < img_len) return stbi__err("not enough pixels", "Corrupt PNG"); + // Allocate two scan lines worth of filter workspace buffer. + filler_buf = stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filler_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; + } + for (j = 0; j < y; ++j) { - unsigned char *cur = a->out + stride * j; - unsigned char *prior; + // cur/prior filter buffers alternate + unsigned char *cur = filler_buf + (j & 1) * img_width_bytes; + unsigned char *prior = filler_buf + (~j & 1) * img_width_bytes; + unsigned char *dest = a->out + stride * j; + int nk = width * filter_bytes; int filter = *raw++; - if (filter > 4) return stbi__err("invalid filter", "Corrupt PNG"); - - if (depth < 8) { - assert(img_width_bytes <= x); - cur += - x * out_n - img_width_bytes; // store output to the rightmost img_len - // bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter", "Corrupt PNG"); + break; } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' - // computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // handle first byte explicitly - for (k = 0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none: - cur[k] = raw[k]; - break; - case STBI__F_sub: - cur[k] = raw[k]; - break; - case STBI__F_up: - cur[k] = STBI__BYTECAST(raw[k] + prior[k]); - break; - case STBI__F_avg: + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1)); - break; - case STBI__F_paeth: - cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0)); - break; - case STBI__F_avg_first: - cur[k] = raw[k]; - break; - case STBI__F_paeth_first: - cur[k] = raw[k]; - break; - } - } - - if (depth == 8) { - if (img_n != out_n) cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes + 1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; - } - - // this is a little gross, so that we don't switch per-pixel or - // per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1) * filter_bytes; -#define STBI__CASE(f) \ - case f: \ - for (k = 0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: - memcpy(cur, raw, nk); - break; - STBI__CASE(STBI__F_sub) { - cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); - } - break; - STBI__CASE(STBI__F_up) { - cur[k] = STBI__BYTECAST(raw[k] + prior[k]); - } - break; - STBI__CASE(STBI__F_avg) { - cur[k] = STBI__BYTECAST(raw[k] + - ((prior[k] + cur[k - filter_bytes]) >> 1)); - } - break; - STBI__CASE(STBI__F_paeth) { - cur[k] = STBI__BYTECAST(raw[k] + - stbi__paeth(cur[k - filter_bytes], prior[k], - prior[k - filter_bytes])); - } - break; - STBI__CASE(STBI__F_avg_first) { - cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); - } - break; - STBI__CASE(STBI__F_paeth_first) { - cur[k] = STBI__BYTECAST(raw[k] + - stbi__paeth(cur[k - filter_bytes], 0, 0)); - } - break; - } -#undef STBI__CASE - raw += nk; - } else { - assert(img_n + 1 == out_n); -#define STBI__CASE(f) \ - case f: \ - for (i = x - 1; i >= 1; --i, cur[filter_bytes] = 255, raw += filter_bytes, \ - cur += output_bytes, prior += output_bytes) \ - for (k = 0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { - cur[k] = raw[k]; - } + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + + ((prior[k] + cur[k - filter_bytes]) >> 1)); break; - STBI__CASE(STBI__F_sub) { - cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); - } - break; - STBI__CASE(STBI__F_up) { + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + // prior[k] == stbi__paeth(0, prior[k], 0) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); - } - break; - STBI__CASE(STBI__F_avg) { + for (k = filter_bytes; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + - ((prior[k] + cur[k - output_bytes]) >> 1)); - } + stbi__paeth(cur[k - filter_bytes], prior[k], + prior[k - filter_bytes])); break; - STBI__CASE(STBI__F_paeth) { - cur[k] = STBI__BYTECAST(raw[k] + - stbi__paeth(cur[k - output_bytes], prior[k], - prior[k - output_bytes])); - } + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); break; - STBI__CASE(STBI__F_avg_first) { - cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); - } - break; - STBI__CASE(STBI__F_paeth_first) { - cur[k] = - STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0)); - } - break; - } -#undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride * j; // start at the beginning of the row again - for (i = 0; i < x; ++i, cur += output_bytes) { - cur[filter_bytes + 1] = 255; - } - } } - } - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j = 0; j < y; ++j) { - unsigned char *cur = a->out + stride * j; - unsigned char *in = a->out + stride * j + x * out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common - // 8-bit path optimal at minimal cost for 1/2/4-bit png guarante byte - // alignment, if width is not multiple of 8/4/2 we'll decode dummy - // trailing data that will be skipped in the later loop + raw += nk; + + // expand decoded bits in cur to dest, also adding an extra alpha channel + // if desired + if (depth < 8) { unsigned char scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + unsigned char *in = cur; + unsigned char *out = dest; + unsigned char inb = 0; + uint32_t nsmp = x * img_n; - // note that the final byte might overshoot and write more data than - // desired. we can allocate enough data that this never writes out of - // memory, but it could also overwrite the next scanline. can it - // overwrite non-empty data on the next scanline? yes, consider - // 1-pixel-wide scanlines with 1-bit-per-pixel. so we need to explicitly - // clamp the final ones - + // expand bits to bytes first if (depth == 4) { - for (k = x * img_n; k >= 2; k -= 2, ++in) { - *cur++ = scale * ((*in >> 4)); - *cur++ = scale * ((*in) & 0x0f); + for (i = 0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; } - if (k > 0) *cur++ = scale * ((*in >> 4)); } else if (depth == 2) { - for (k = x * img_n; k >= 4; k -= 4, ++in) { - *cur++ = scale * ((*in >> 6)); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in) & 0x03); + for (i = 0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; } - if (k > 0) *cur++ = scale * ((*in >> 6)); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k = x * img_n; k >= 8; k -= 8, ++in) { - *cur++ = scale * ((*in >> 7)); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in) & 0x01); + } else { + assert(depth == 1); + for (i = 0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; } - if (k > 0) *cur++ = scale * ((*in >> 7)); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride * j; + + // insert alpha=255 values if desired + if (img_n != out_n) stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x * img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + // TODO TYPES + uint16_t *dest16 = (uint16_t *)dest; + uint32_t nsmp = x * img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + assert(img_n + 1 == out_n); if (img_n == 1) { - for (q = x - 1; q >= 0; --q) { - cur[q * 2 + 1] = 255; - cur[q * 2 + 0] = cur[q]; + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; } } else { assert(img_n == 3); - for (q = x - 1; q >= 0; --q) { - cur[q * 4 + 3] = 255; - cur[q * 4 + 2] = cur[q * 3 + 2]; - cur[q * 4 + 1] = cur[q * 3 + 1]; - cur[q * 4 + 0] = cur[q * 3 + 0]; + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; } } } } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - unsigned char *cur = a->out; - uint16_t *cur16 = (uint16_t *)cur; - - for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) { - *cur16 = (cur[0] << 8) | cur[1]; - } } + free(filler_buf); + if (!all_ok) return 0; + return 1; } @@ -3767,6 +3936,7 @@ static int stbi__create_png_image(stbi__png *a, unsigned char *image_data, a->s->img_x, a->s->img_y, depth, color); // de-interlacing final = stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + if (!final) return stbi__err("outofmem", "Out of memory"); for (p = 0; p < 7; ++p) { int xorig[] = {0, 4, 0, 2, 0, 1, 0}; int yorig[] = {0, 0, 4, 0, 2, 0, 1}; @@ -3954,10 +4124,10 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { first = 0; if (c.length != 13) return stbi__err("bad IHDR len", "Corrupt PNG"); s->img_x = stbi__get32be(s); - if (s->img_x > (1 << 24)) - return stbi__err("too large", "Very large image (corrupt?)"); s->img_y = stbi__get32be(s); - if (s->img_y > (1 << 24)) + if (s->img_y > STBI_MAX_DIMENSIONS) + return stbi__err("too large", "Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large", "Very large image (corrupt?)"); z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && @@ -3985,15 +4155,14 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); - if (scan == STBI__SCAN_header) return 1; } else { // if paletted, then pal_n is our final components, and // img_n is # components to decompress/filter. s->img_n = 1; if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large", "Corrupt PNG"); - // if SCAN_header, have to scan to see if we have a tRNS } + // even with SCAN_header, have to scan to see if we have a tRNS break; } @@ -4031,6 +4200,12 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { if (c.length != (uint32_t)s->img_n * 2) return stbi__err("bad tRNS len", "Corrupt PNG"); has_trans = 1; + // non-paletted with tRNS = constant alpha. + // if header-scanning, we can stop now. + if (scan == STBI__SCAN_header) { + ++s->img_n; + return 1; + } if (z->depth == 16) { for (k = 0; k < s->img_n; ++k) tc16[k] = (uint16_t)stbi__get16be(s); // copy the values as-is @@ -4048,17 +4223,19 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (pal_img_n && !pal_len) return stbi__err("no PLTE", "Corrupt PNG"); if (scan == STBI__SCAN_header) { - s->img_n = pal_img_n; + // header scan definitely stops at first IDAT + if (pal_img_n) s->img_n = pal_img_n; return 1; } + if (c.length > (1u << 30)) + return stbi__err("IDAT size limit", + "IDAT section larger than 2^30 bytes"); if ((int)(ioff + c.length) < (int)ioff) return 0; if (ioff + c.length > idata_limit) { - uint32_t idata_limit_old = idata_limit; unsigned char *p; if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; while (ioff + c.length > idata_limit) idata_limit *= 2; - (void)idata_limit_old; - p = STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); + p = realloc(z->idata, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); z->idata = p; } @@ -4114,7 +4291,8 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { } free(z->expanded); z->expanded = NULL; - stbi__get32be(s); /* nothings/stb#835 */ + // end of PNG chunk, read and skip CRC + stbi__get32be(s); return 1; } @@ -4122,7 +4300,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { // if critical, fail if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if ((c.type & (1 << 29)) == 0) { -#ifndef STBI_NO_FAILURE_STRINGS +#if !defined(STBI_NO_FAILURE_STRINGS) && !defined(STBI_FAILURE_USERMSG) // not threadsafe static char invalid_chunk[] = "XXXX PNG chunk not known"; invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); @@ -4147,10 +4325,13 @@ static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { - if (p->depth < 8) + if (p->depth <= 8) ri->bits_per_channel = 8; + else if (p->depth == 16) + ri->bits_per_channel = 16; else - ri->bits_per_channel = p->depth; + return stbi__errpuc("bad bits_per_channel", + "PNG not supported: unsupported color depth"); result = p->out; p->out = NULL; if (req_comp && req_comp != p->s->img_out_n) { @@ -4297,6 +4478,10 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, g->bgindex = stbi__get8(s); g->ratio = stbi__get8(s); g->transparent = -1; + if (g->w > STBI_MAX_DIMENSIONS) + return stbi__err("too large", "Very large image (corrupt?)"); + if (g->h > STBI_MAX_DIMENSIONS) + return stbi__err("too large", "Very large image (corrupt?)"); if (comp != 0) { *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the // comments @@ -4310,6 +4495,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) { stbi__gif *g = (stbi__gif *)malloc(sizeof(stbi__gif)); + if (!g) return stbi__err("outofmem", "Out of memory"); if (!stbi__gif_header(s, g, comp, 1)) { free(g); stbi__rewind(s); @@ -4458,7 +4644,7 @@ static unsigned char *stbi__gif_load_next(stbi__context *s, stbi__gif *g, if (!g->out || !g->background || !g->history) return stbi__errpuc("outofmem", "Out of memory"); - // image is treated as "transparent" at the start - ie, nothing overwrites + // image is treated as "transparent" at the start - i.e. nothing overwrites // the current background; background colour is only used for pixels that // are not rendered first frame, after that "background" color refers to // the color that was there the previous frame. @@ -4469,7 +4655,7 @@ static unsigned char *stbi__gif_load_next(stbi__context *s, stbi__gif *g, pcount); // pixels that were affected previous frame first_frame = 1; } else { - // second frame - how do we dispoase of the previous one? + // second frame - how do we dispose of the previous one? dispose = (g->eflags & 0x1C) >> 2; pcount = g->w * g->h; @@ -4492,10 +4678,10 @@ static unsigned char *stbi__gif_load_next(stbi__context *s, stbi__gif *g, } } } else { - // This is a non-disposal case eithe way, so just + // This is a non-disposal case either way, so just // leave the pixels as is, and they will become the new background // 1: do not dispose - // 0: not specified. + // 0: not specified. } // background is what out is after the undoing of the previou frame; @@ -4622,6 +4808,16 @@ static unsigned char *stbi__gif_load_next(stbi__context *s, stbi__gif *g, } } +static void *stbi__load_gif_main_outofmem(stbi__gif *g, unsigned char *out, + int **delays) { + free(g->out); + free(g->history); + free(g->background); + if (out) free(out); + if (delays && *delays) free(*delays); + return stbi__errpuc("outofmem", "Out of memory"); +} + static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) { if (stbi__gif_test(s)) { @@ -4644,21 +4840,29 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, ++layers; stride = g->w * g->h * 4; if (out) { - out = (unsigned char *)realloc(out, layers * stride); - if (!out) abort(); + void *tmp = (unsigned char *)realloc(out, layers * stride); + if (!tmp) + return stbi__load_gif_main_outofmem(g, out, delays); + else { + out = (unsigned char *)tmp; + } if (delays) { - *delays = (int *)realloc(*delays, sizeof(int) * layers); - if (!*delays) abort(); + int *new_delays = (int *)realloc(*delays, sizeof(int) * layers); + if (!new_delays) + return stbi__load_gif_main_outofmem(g, out, delays); + *delays = new_delays; } } else { out = malloc(layers * stride); + if (!out) return stbi__load_gif_main_outofmem(g, out, delays); if (delays) { *delays = malloc(layers * sizeof(int)); + if (!*delays) return stbi__load_gif_main_outofmem(g, out, delays); } } memcpy(out + ((layers - 1) * stride), u, stride); if (layers >= 2) { - two_back = out - 2 * stride; + two_back = out + ((layers - 2) * stride); } if (delays) { (*delays)[layers - 1U] = g->delay; @@ -4720,7 +4924,6 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) { // Known limitations: // Does not support comments in the header section // Does not support ASCII image data (formats P2 and P3) -// Does not support 16-bit-per-channel static int stbi__pnm_test(stbi__context *s) { char p, t; @@ -4737,20 +4940,37 @@ static dontinline void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { unsigned char *out; - if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, - (int *)&s->img_n)) { - return 0; + ri->bits_per_channel = + stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); + if (ri->bits_per_channel == 0) return 0; + if (s->img_y > STBI_MAX_DIMENSIONS) { + return stbi__errpuc("too large", "Very large image (corrupt?)"); + } + if (s->img_x > STBI_MAX_DIMENSIONS) { + return stbi__errpuc("too large", "Very large image (corrupt?)"); } *x = s->img_x; *y = s->img_y; if (comp) *comp = s->img_n; - if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) { + if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, + ri->bits_per_channel / 8, 0)) { return stbi__errpuc("too large", "PNM too large"); } - out = stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); - stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + out = stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, + ri->bits_per_channel / 8, 0); + if (!stbi__getn( + s, out, + s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { + free(out); + return stbi__errpuc("bad PNM", "PNM file truncated"); + } if (req_comp && req_comp != s->img_n) { - out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (ri->bits_per_channel == 16) { + out = (unsigned char *)stbi__convert_format16( + (uint16_t *)out, s->img_n, req_comp, s->img_x, s->img_y); + } else { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + } if (out == NULL) return out; // stbi__convert_format frees input on failure } return out; @@ -4779,6 +4999,12 @@ static int stbi__pnm_getinteger(stbi__context *s, char *c) { while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { value = value * 10 + (*c - '0'); *c = (char)stbi__get8(s); + // TODO INT_MAX + if ((value > 214748364) || (value == 214748364 && *c > '7')) { + return stbi__err( + "integer parse overflow", + "Parsing an integer in the PPM header overflowed a 32-bit int"); + } } return value; } @@ -4802,15 +5028,30 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) { c = (char)stbi__get8(s); stbi__pnm_skip_whitespace(s, &c); *x = stbi__pnm_getinteger(s, &c); // read width + if (*x == 0) { + return stbi__err("invalid_width", + "PPM image header had zero or overflowing width"); + } stbi__pnm_skip_whitespace(s, &c); *y = stbi__pnm_getinteger(s, &c); // read height + if (*y == 0) { + return stbi__err("invalid height", + "PPM image header had zero or overflowing height"); + } stbi__pnm_skip_whitespace(s, &c); maxv = stbi__pnm_getinteger(s, &c); // read max value - if (maxv > 255) - return stbi__err("max value > 255", "PPM image not 8-bit"); - else { - return 1; - } + if (maxv > 65535) + return stbi__err("max value > 65535", + "PPM image supports only 8-bit and 16-bit images"); + else if (maxv > 255) + return 16; + else + return 8; +} + +static int stbi__pnm_is16(stbi__context *s) { + if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) return 1; + return 0; } static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) { @@ -4831,7 +5072,12 @@ static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) { } static int stbi__is_16_main(stbi__context *s) { +#ifndef STBI_NO_PNG if (stbi__png_is16(s)) return 1; +#endif +#ifndef STBI_NO_PNM + if (stbi__pnm_is16(s)) return 1; +#endif return 0; } @@ -4848,9 +5094,10 @@ int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) { int r; stbi__context s; long pos = ftell(f); + if (pos < 0) return stbi__err("bad file", "ftell() failed"); stbi__start_file(&s, f); r = stbi__info_main(&s, x, y, comp); - fseek(f, pos, SEEK_SET); + if (fseek(f, pos, SEEK_SET)) return stbi__err("bad file", "fseek() failed"); return r; } @@ -4867,9 +5114,10 @@ int stbi_is_16_bit_from_file(FILE *f) { int r; stbi__context s; long pos = ftell(f); + if (pos < 0) return stbi__err("bad file", "ftell() failed"); stbi__start_file(&s, f); r = stbi__is_16_main(&s); - fseek(f, pos, SEEK_SET); + if (fseek(f, pos, SEEK_SET)) return stbi__err("bad file", "fseek() failed"); return r; } diff --git a/third_party/stb/stb_image.h b/third_party/stb/stb_image.h index 9f3076887..558506908 100644 --- a/third_party/stb/stb_image.h +++ b/third_party/stb/stb_image.h @@ -13,12 +13,14 @@ enum { struct FILE; typedef struct { - int (*read)(void *user, char *data, - int size); // fill 'data' with 'size' bytes. return number of - // bytes actually read - void (*skip)(void *user, int n); // skip the next 'n' bytes, or 'unget' the - // last -n bytes if negative - int (*eof)(void *user); // returns nonzero if we are at end of file/data + // fill 'data' with 'size' bytes. return number of bytes actually read + int (*read)(void *user, char *data, int size); + + // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + void (*skip)(void *user, int n); + + // returns nonzero if we are at end of file/data + int (*eof)(void *user); } stbi_io_callbacks; // @@ -63,7 +65,6 @@ unsigned short *stbi_load_from_file_16(struct FILE *f, int *x, int *y, int desired_channels); // get a VERY brief reason for failure -// NOT THREADSAFE const char *stbi_failure_reason(void); // free the loaded image -- this is just free() diff --git a/third_party/stb/stb_image_resize.c b/third_party/stb/stb_image_resize.c index 55d1b8351..24cb373e5 100644 --- a/third_party/stb/stb_image_resize.c +++ b/third_party/stb/stb_image_resize.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -655,9 +655,14 @@ static void stbir__calculate_coefficients_upsample( total_filter += coefficient_group[i]; } - STBIR_ASSERT(stbir__filter_info_table[filter].kernel( - (float)(in_last_pixel + 1) + 0.5f - in_center_of_out, - 1 / scale) == 0); + // NOTE(fg): Not actually true in general, nor is there any reason to expect + // it should be. It would be true in exact math but is at best approximately + // true in floating-point math, and it would not make sense to try and put + // actual bounds on this here because it depends on the image aspect ratio + // which can get pretty extreme. + // STBIR_ASSERT(stbir__filter_info_table[filter].kernel( + // (float)(in_last_pixel + 1) + 0.5f - in_center_of_out, + // 1 / scale) == 0); STBIR_ASSERT(total_filter > 0.9); STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off. @@ -701,9 +706,14 @@ static void stbir__calculate_coefficients_downsample( stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio; } - STBIR_ASSERT(stbir__filter_info_table[filter].kernel( - (float)(out_last_pixel + 1) + 0.5f - out_center_of_in, - scale_ratio) == 0); + // NOTE(fg): Not actually true in general, nor is there any reason to expect + // it should be. It would be true in exact math but is at best approximately + // true in floating-point math, and it would not make sense to try and put + // actual bounds on this here because it depends on the image aspect ratio + // which can get pretty extreme. + // STBIR_ASSERT(stbir__filter_info_table[filter].kernel( + // (float)(out_last_pixel + 1) + 0.5f - out_center_of_in, + // scale_ratio) == 0); for (i = out_last_pixel - out_first_pixel; i >= 0; i--) { if (coefficient_group[i]) break; @@ -851,7 +861,7 @@ static float* stbir__get_decode_buffer(stbir__info* stbir_info) { } #define STBIR__DECODE(type, colorspace) \ - ((type) * (STBIR_MAX_COLORSPACES) + (colorspace)) + ((int)(type) * (STBIR_MAX_COLORSPACES) + (int)(colorspace)) static void stbir__decode_scanline(stbir__info* stbir_info, int n) { int c; @@ -1199,7 +1209,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int out_pixel_index = k * 1; float coefficient = horizontal_coefficients[coefficient_group + k - n0]; - STBIR_ASSERT(coefficient != 0); output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; } @@ -1220,7 +1229,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int out_pixel_index = k * 2; float coefficient = horizontal_coefficients[coefficient_group + k - n0]; - STBIR_ASSERT(coefficient != 0); output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; output_buffer[out_pixel_index + 1] += @@ -1243,7 +1251,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int out_pixel_index = k * 3; float coefficient = horizontal_coefficients[coefficient_group + k - n0]; - STBIR_ASSERT(coefficient != 0); output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; output_buffer[out_pixel_index + 1] += @@ -1268,7 +1275,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int out_pixel_index = k * 4; float coefficient = horizontal_coefficients[coefficient_group + k - n0]; - STBIR_ASSERT(coefficient != 0); output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; output_buffer[out_pixel_index + 1] += diff --git a/third_party/stb/stb_image_write.c b/third_party/stb/stb_image_write.c index 63187511d..1f984bc78 100644 --- a/third_party/stb/stb_image_write.c +++ b/third_party/stb/stb_image_write.c @@ -1,123 +1,21 @@ -/* stb_image_write - v1.13 - public domain - http://nothings.org/stb - * writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 - * no warranty implied; use at your own risk - * - * ABOUT: - * - * This file is a library for writing images to stdio or a callback. - * - * The PNG output is not optimal; it is 20-50% larger than the file - * written by a decent optimizing implementation; though providing a - * custom zlib compress function (see STBIW_ZLIB_COMPRESS) can - * mitigate that. This library is designed for source code - * compactness and simplicity, not optimal image file size or - * run-time performance. - * - * USAGE: - * - * There are five functions, one for each image file format: - * - * stbi_write_png - * stbi_write_bmp - * stbi_write_tga - * stbi_write_jpg - * stbi_write_hdr - * - * stbi_flip_vertically_on_write - * - * There are also five equivalent functions that use an arbitrary - * write function. You are expected to open/close your - * file-equivalent before and after calling these: - * - * stbi_write_png_to_func - * stbi_write_bmp_to_func - * stbi_write_tga_to_func - * stbi_write_hdr_to_func - * stbi_write_jpg_to_func - * - * where the callback is: - * void stbi_write_func(void *context, void *data, int size); - * - * You can configure it with these: - * stbi_write_tga_with_rle - * stbi_write_png_compression_level - * stbi_write_force_png_filter - * - * Each function returns 0 on failure and non-0 on success. - * - * The functions create an image file defined by the parameters. The - * image is a rectangle of pixels stored from left-to-right, - * top-to-bottom. Each pixel contains 'comp' channels of data stored - * interleaved with 8-bits per channel, in the following order: 1=Y, - * 2=YA, 3=RGB, 4=RGBA. (Y is monochrome color.) The rectangle is 'w' - * pixels wide and 'h' pixels tall. The *data pointer points to the - * first byte of the top-left-most pixel. For PNG, "stride_in_bytes" - * is the distance in bytes from the first byte of a row of pixels to - * the first byte of the next row of pixels. - * - * PNG creates output files with the same number of components as the - * input. The BMP format expands Y to RGB in the file format and does - * not output alpha. - * - * PNG supports writing rectangles of data even when the bytes - * storing rows of data are not consecutive in memory (e.g. - * sub-rectangles of a larger image), by supplying the stride between - * the beginning of adjacent rows. The other formats do not. (Thus - * you cannot write a native-format BMP through the BMP writer, both - * because it is in BGR order and because it may have padding at the - * end of the line.) - * - * PNG allows you to set the deflate compression level by setting the - * global variable 'stbi_write_png_compression_level' (it defaults to - * 8). - * - * HDR expects linear float data. Since the format is always 32-bit - * rgb(e) data, alpha (if provided) is discarded, and for monochrome - * data it is replicated across all three channels. - * - * TGA supports RLE or non-RLE compressed data. To use - * non-RLE-compressed data, set the global variable - * 'stbi_write_tga_with_rle' to 0. - * - * JPEG does ignore alpha channels in input data; quality is between - * 1 and 100. Higher quality looks better but results in a bigger - * image. JPEG baseline (no JPEG progressive). - * - * CREDITS: - * - * - * Sean Barrett - PNG/BMP/TGA - * Baldur Karlsson - HDR - * Jean-Sebastien Guay - TGA monochrome - * Tim Kelsey - misc enhancements - * Alan Hickman - TGA RLE - * Emmanuel Julien - initial file IO callback implementation - * Jon Olick - original jo_jpeg.cpp code - * Daniel Gibson - integrate JPEG, allow external zlib - * Aarni Koskela - allow choosing PNG filter - * - * bugfixes: - * github:Chribba - * Guillaume Chereau - * github:jry2 - * github:romigrou - * Sergio Gonzalez - * Jonas Karlsson - * Filip Wasil - * Thatcher Ulrich - * github:poppolopoppo - * Patrick Boettcher - * github:xeekworx - * Cap Petschulat - * Simon Rodriguez - * Ivan Tikhonov - * github:ignotion - * Adam Schackart - * - * LICENSE - * - * Public Domain (www.unlicense.org) - */ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/stb/stb_image_write.h" #include "dsp/core/core.h" #include "libc/assert.h" @@ -131,16 +29,32 @@ #include "libc/str/str.h" #include "third_party/zlib/zlib.h" +asm(".ident\t\"\\n\\n\ +stb_image_write (Public Domain)\\n\ +Credit: Sean Barrett, et al.\\n\ +http://nothings.org/stb\""); + #define STBIW_UCHAR(x) (unsigned char)((x)&0xff) -#define STBIW_REALLOC_SIZED(p, oldsz, newsz) realloc(p, newsz) + +#define stbiw__wpng4(o, a, b, c, d) \ + ((o)[0] = STBIW_UCHAR(a), (o)[1] = STBIW_UCHAR(b), (o)[2] = STBIW_UCHAR(c), \ + (o)[3] = STBIW_UCHAR(d), (o) += 4) +#define stbiw__wp32(data, v) \ + stbiw__wpng4(data, (v) >> 24, (v) >> 16, (v) >> 8, (v)); +#define stbiw__wptag(data, s) stbiw__wpng4(data, s[0], s[1], s[2], s[3]) typedef struct { stbi_write_func *func; void *context; + unsigned char buffer[64]; + int buf_used; } stbi__write_context; -int stbi__flip_vertically_on_write = 0; +int stbi_write_png_compression_level = 4; int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; + +static int stbi__flip_vertically_on_write = 0; void stbi_flip_vertically_on_write(int flag) { stbi__flip_vertically_on_write = flag; @@ -168,9 +82,6 @@ static void stbi__end_write_file(stbi__write_context *s) { fclose((FILE *)s->context); } -typedef unsigned int stbiw_uint32; -typedef int stb_image_write_test[sizeof(stbiw_uint32) == 4 ? 1 : -1]; - static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) { while (*fmt) { switch (*fmt++) { @@ -190,7 +101,7 @@ static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) { break; } case '4': { - stbiw_uint32 x = va_arg(v, int); + unsigned int x = va_arg(v, int); unsigned char b[4]; b[0] = STBIW_UCHAR(x); b[1] = STBIW_UCHAR(x >> 8); @@ -212,17 +123,31 @@ static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) { va_end(v); } +static void stbiw__write_flush(stbi__write_context *s) { + if (s->buf_used) { + s->func(s->context, &s->buffer, s->buf_used); + s->buf_used = 0; + } +} + static void stbiw__putc(stbi__write_context *s, unsigned char c) { s->func(s->context, &c, 1); } +static void stbiw__write1(stbi__write_context *s, unsigned char a) { + if ((size_t)s->buf_used + 1 > sizeof(s->buffer)) stbiw__write_flush(s); + s->buffer[s->buf_used++] = a; +} + static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) { - unsigned char arr[3]; - arr[0] = a; - arr[1] = b; - arr[2] = c; - s->func(s->context, arr, 3); + int n; + if ((size_t)s->buf_used + 3 > sizeof(s->buffer)) stbiw__write_flush(s); + n = s->buf_used; + s->buf_used = n + 3; + s->buffer[n + 0] = a; + s->buffer[n + 1] = b; + s->buffer[n + 2] = c; } static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, @@ -231,7 +156,7 @@ static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, unsigned char bg[3] = {255, 0, 255}, px[3]; int k; - if (write_alpha < 0) s->func(s->context, &d[comp - 1], 1); + if (write_alpha < 0) stbiw__write1(s, d[comp - 1]); switch (comp) { case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as @@ -240,7 +165,7 @@ static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, if (expand_mono) stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp else - s->func(s->context, d, 1); // monochrome TGA + stbiw__write1(s, d[0]); // monochrome TGA break; case 4: if (!write_alpha) { @@ -254,14 +179,14 @@ static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); break; } - if (write_alpha > 0) s->func(s->context, &d[comp - 1], 1); + if (write_alpha > 0) stbiw__write1(s, d[comp - 1]); } static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) { - stbiw_uint32 zero = 0; + unsigned int zero = 0; int i, j, j_end; if (y <= 0) return; if (stbi__flip_vertically_on_write) vdir *= -1; @@ -277,6 +202,7 @@ static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, unsigned char *d = (unsigned char *)data + (j * x + i) * comp; stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); } + stbiw__write_flush(s); s->func(s->context, &zero, scanline_pad); } } @@ -299,25 +225,41 @@ static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) { - int pad = (-x * 3) & 3; - return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void *)data, 0, pad, - "11 4 22 4" - "4 44 22 444444", - 'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0, - 14 + 40, // file header - 40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header + if (comp != 4) { + // write RGB bitmap + int pad; + pad = (-x * 3) & 3; + return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void *)data, 0, pad, + "11 4 22 4" + "4 44 22 444444", + 'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0, + 14 + 40, // file header + 40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header + } else { + // RGBA bitmaps need a v4 header + // use BI_BITFIELDS mode with 32bpp and alpha mask + // (straight BI_RGB with alpha mask doesn't work in most readers) + return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void *)data, 1, 0, + "11 4 22 4" + "4 44 22 444444 4444 4 444 444 444 444", + 'B', 'M', 14 + 108 + x * y * 4, 0, 0, + 14 + 108, // file header + 108, x, y, 1, 32, 3, 0, 0, 0, 0, 0, 0xff0000, 0xff00, + 0xff, 0xff000000u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0); // bitmap V4 header + } } int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) { - stbi__write_context s; + stbi__write_context s = {0}; stbi__start_write_callbacks(&s, func, context); return stbi_write_bmp_core(&s, x, y, comp, data); } int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) { - stbi__write_context s; + stbi__write_context s = {0}; if (stbi__start_write_file(&s, filename)) { int r = stbi_write_bmp_core(&s, x, y, comp, data); stbi__end_write_file(&s); @@ -393,31 +335,32 @@ static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, if (diff) { unsigned char header = STBIW_UCHAR(len - 1); - s->func(s->context, &header, 1); + stbiw__write1(s, header); for (k = 0; k < len; ++k) { stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); } } else { unsigned char header = STBIW_UCHAR(len - 129); - s->func(s->context, &header, 1); + stbiw__write1(s, header); stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); } } } + stbiw__write_flush(s); } return 1; } int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) { - stbi__write_context s; + stbi__write_context s = {0}; stbi__start_write_callbacks(&s, func, context); return stbi_write_tga_core(&s, x, y, comp, (void *)data); } int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) { - stbi__write_context s; + stbi__write_context s = {0}; if (stbi__start_write_file(&s, filename)) { int r = stbi_write_tga_core(&s, x, y, comp, (void *)data); stbi__end_write_file(&s); @@ -426,6 +369,250 @@ int stbi_write_tga(char const *filename, int x, int y, int comp, return 0; } +/* + * PNG writer + */ + +static unsigned char *stbi_zlib_compress(unsigned char *data, int size, + int *out_len, int quality) { + unsigned long newsize; + unsigned char *newdata, *trimdata; + assert(0 <= size && size <= INT_MAX); + if ((newdata = malloc((newsize = compressBound(size)))) && + compress2(newdata, &newsize, data, size, + stbi_write_png_compression_level) == Z_OK) { + *out_len = newsize; + if ((trimdata = realloc(newdata, newsize))) { + return trimdata; + } else { + return newdata; + } + } + free(newdata); + return NULL; +} + +static void stbiw__wpcrc(unsigned char **data, int len) { + unsigned int crc = crc32(0, *data - len - 4, len + 4); + stbiw__wp32(*data, crc); +} + +forceinline unsigned char stbiw__paeth(int a, int b, int c) { + int p = a + b - c, pa = abs(p - a), pb = abs(p - b), pc = abs(p - c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(const unsigned char *pixels, + int stride_bytes, int width, int height, + int y, int n, int filter_type, + signed char *line_buffer) { + int mapping[] = {0, 1, 2, 3, 4}; + int firstmap[] = {0, 1, 0, 5, 6}; + const unsigned char *z; + int *mymap, i, type, signed_stride; + + mymap = (y != 0) ? mapping : firstmap; + type = mymap[filter_type]; + z = pixels + + stride_bytes * (stbi__flip_vertically_on_write ? height - 1 - y : y); + signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + + if (type == 0) { + memcpy(line_buffer, z, width * n); + return; + } + + for (i = 0; i < n; ++i) { + switch (type) { + case 1: + line_buffer[i] = z[i]; + break; + case 2: + line_buffer[i] = z[i] - z[i - signed_stride]; + break; + case 3: + line_buffer[i] = z[i] - (z[i - signed_stride] >> 1); + break; + case 4: + line_buffer[i] = + (signed char)(z[i] - stbiw__paeth(0, z[i - signed_stride], 0)); + break; + case 5: + line_buffer[i] = z[i]; + break; + case 6: + line_buffer[i] = z[i]; + break; + } + } + + switch (type) { + case 1: + for (i = n; i < width * n; ++i) { + line_buffer[i] = z[i] - z[i - n]; + } + break; + case 2: + for (i = n; i < width * n; ++i) { + line_buffer[i] = z[i] - z[i - signed_stride]; + } + break; + case 3: + for (i = n; i < width * n; ++i) { + line_buffer[i] = z[i] - ((z[i - n] + z[i - signed_stride]) >> 1); + } + break; + case 4: + for (i = n; i < width * n; ++i) { + line_buffer[i] = z[i] - stbiw__paeth(z[i - n], z[i - signed_stride], + z[i - signed_stride - n]); + } + break; + case 5: + for (i = n; i < width * n; ++i) { + line_buffer[i] = z[i] - (z[i - n] >> 1); + } + break; + case 6: + for (i = n; i < width * n; ++i) { + line_buffer[i] = z[i] - stbiw__paeth(z[i - n], 0, 0); + } + break; + } +} + +unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, + int stride_bytes, int x, int y, int n, + int *out_len) { + int force_filter = stbi_write_force_png_filter; + int ctype[5] = {-1, 0, 4, 2, 6}; + unsigned char sig[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + unsigned char *out, *o, *filt, *zlib; + signed char *line_buffer; + int j, zlen; + + if (stride_bytes == 0) stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = malloc((x * n + 1) * y); + if (!filt) return 0; + line_buffer = malloc(x * n); + if (!line_buffer) { + free(filt); + return 0; + } + for (j = 0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, + line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, + line_buffer); + + // Estimate the entropy of the line using this filter; the less, the + // better. + est = 0; + for (i = 0; i < x * n; ++i) { + est += abs((signed char)line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us + // the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, + line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer + // contains the data + filt[j * (x * n + 1)] = (unsigned char)filter_type; + memmove(filt + j * (x * n + 1) + 1, line_buffer, x * n); + } + free(line_buffer); + zlib = stbi_zlib_compress(filt, y * (x * n + 1), &zlen, + stbi_write_png_compression_level); + free(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = malloc(8 + 12 + 13 + 12 + zlen + 12); + if (!out) return 0; + *out_len = 8 + 12 + 13 + 12 + zlen + 12; + + o = out; + memmove(o, sig, 8); + o += 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o, 13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + memmove(o, zlib, zlen); + o += zlen; + free(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o, 0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o, 0); + + assert(o == out + *out_len); + + return out; +} + +int stbi_write_png(const char *filename, int x, int y, int comp, + const void *data, int stride_bytes) { + int len; + FILE *f; + unsigned char *png; + png = stbi_write_png_to_mem(data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + f = fopen(filename, "wb"); + if (!f) { + free(png); + return 0; + } + fwrite(png, 1, len, f); + fclose(f); + free(png); + return 1; +} + +int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, + int comp, const void *data, int stride_bytes) { + int len; + unsigned char *png; + png = stbi_write_png_to_mem((const unsigned char *)data, stride_bytes, x, y, + comp, &len); + if (png == NULL) return 0; + func(context, png, len); + free(png); + return 1; +} + /* JPEG writer * * This is based on Jon Olick's jo_jpeg.cpp: @@ -472,24 +659,25 @@ static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { } static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, - int *bitCnt, float *CDU, float *fdtbl, int DC, + int *bitCnt, float *CDU, unsigned du_stride, + float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) { const unsigned short EOB[2] = {HTAC[0x00][0], HTAC[0x00][1]}; const unsigned short M16zeroes[2] = {HTAC[0xF0][0], HTAC[0xF0][1]}; - unsigned i, diff, end0pos; + unsigned i, j, diff, end0pos, x, y; int DU[64]; - dctjpeg((void *)CDU); + dctjpeg((void *)CDU, du_stride / 8); // Quantize/descale/zigzag the coefficients - for (i = 0; i < 64; ++i) { - float v = CDU[i] * fdtbl[i]; - DU[stbiw__jpg_ZigZag[i]] = v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f); - // DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + - // 0.5f)); ceilf() and floorf() are C99, not C89, but I /think/ they're not - // needed here anyway? - /* DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? v - 0.5f : v + 0.5f); */ + for (j = 0, y = 0; y < 8; ++y) { + for (x = 0; x < 8; ++x, ++j) { + float v; + i = y * du_stride + x; + v = CDU[i] * fdtbl[j]; + DU[stbiw__jpg_ZigZag[j]] = v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f); + } } // Encode DC @@ -709,7 +897,7 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f}; - int row, col, i, k; + int row, col, i, k, subsample; float fdtbl_Y[64], fdtbl_UV[64]; unsigned char YTable[64], UVTable[64]; @@ -718,6 +906,7 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, } quality = quality ? quality : 97; + subsample = quality <= 97 ? 1 : 0; quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; quality = quality < 50 ? 5000 / quality : 200 - quality * 2; @@ -758,7 +947,7 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, STBIW_UCHAR(width), 3, 1, - 0x11, + (unsigned char)(subsample ? 0x22 : 0x11), 0, 2, 0x11, @@ -802,42 +991,92 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, // Encode 8x8 macroblocks { static const unsigned short fillBits[] = {0x7F, 7}; - const unsigned char *imageData = (const unsigned char *)data; int DCY = 0, DCU = 0, DCV = 0; int bitBuf = 0, bitCnt = 0; // comp == 2 is grey+alpha (alpha is ignored) int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + const unsigned char *dataR = (const unsigned char *)data; + const unsigned char *dataG = dataR + ofsG; + const unsigned char *dataB = dataR + ofsB; int x, y, pos; - for (y = 0; y < height; y += 8) { - for (x = 0; x < width; x += 8) { - float YDU[64], UDU[64], VDU[64]; - for (row = y, pos = 0; row < y + 8; ++row) { - // row >= height => use last input row - int clamped_row = (row < height) ? row : height - 1; - int base_p = - (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) - : clamped_row) * - width * comp; - for (col = x; col < x + 8; ++col, ++pos) { - float r, g, b; - // if col >= width => use pixel from last input column - int p = base_p + ((col < width) ? col : (width - 1)) * comp; + if (subsample) { + for (y = 0; y < height; y += 16) { + for (x = 0; x < width; x += 16) { + float Y[256], U[256], V[256]; + for (row = y, pos = 0; row < y + 16; ++row) { + // row >= height => use last input row + int clamped_row = (row < height) ? row : height - 1; + int base_p = + (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) + : clamped_row) * + width * comp; + for (col = x; col < x + 16; ++col, ++pos) { + // if col >= width => use pixel from last input column + int p = base_p + ((col < width) ? col : (width - 1)) * comp; + float r = dataR[p], g = dataG[p], b = dataB[p]; + Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128; + U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b; + V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b; + } + } - r = imageData[p + 0]; - g = imageData[p + ofsG]; - b = imageData[p + ofsB]; - YDU[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128; - UDU[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b; - VDU[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b; + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 0, 16, fdtbl_Y, + DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 8, 16, fdtbl_Y, + DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 128, 16, fdtbl_Y, + DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 136, 16, fdtbl_Y, + DCY, YDC_HT, YAC_HT); + + // subsample U,V + { + float subU[64], subV[64]; + int yy, xx; + for (yy = 0, pos = 0; yy < 8; ++yy) { + for (xx = 0; xx < 8; ++xx, ++pos) { + int j = yy * 32 + xx * 2; + subU[pos] = + (U[j + 0] + U[j + 1] + U[j + 16] + U[j + 17]) * 0.25f; + subV[pos] = + (V[j + 0] + V[j + 1] + V[j + 16] + V[j + 17]) * 0.25f; + } + } + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, + DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, + DCV, UVDC_HT, UVAC_HT); } } + } + } else { + for (y = 0; y < height; y += 8) { + for (x = 0; x < width; x += 8) { + float Y[64], U[64], V[64]; + for (row = y, pos = 0; row < y + 8; ++row) { + // row >= height => use last input row + int clamped_row = (row < height) ? row : height - 1; + int base_p = + (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) + : clamped_row) * + width * comp; + for (col = x; col < x + 8; ++col, ++pos) { + // if col >= width => use pixel from last input column + int p = base_p + ((col < width) ? col : (width - 1)) * comp; + float r = dataR[p], g = dataG[p], b = dataB[p]; + Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128; + U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b; + V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b; + } + } - DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, - YDC_HT, YAC_HT); - DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, - UVDC_HT, UVAC_HT); - DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, - UVDC_HT, UVAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, + YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, + UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, + UVDC_HT, UVAC_HT); + } } } @@ -854,14 +1093,14 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) { - stbi__write_context s; + stbi__write_context s = {0}; stbi__start_write_callbacks(&s, func, context); return stbi_write_jpg_core(&s, x, y, comp, (void *)data, quality); } int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) { - stbi__write_context s; + stbi__write_context s = {0}; if (stbi__start_write_file(&s, filename)) { int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); stbi__end_write_file(&s); @@ -1026,14 +1265,14 @@ static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) { - stbi__write_context s; + stbi__write_context s = {0}; stbi__start_write_callbacks(&s, func, context); return stbi_write_hdr_core(&s, x, y, comp, (float *)data); } int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) { - stbi__write_context s; + stbi__write_context s = {0}; if (stbi__start_write_file(&s, filename)) { int r = stbi_write_hdr_core(&s, x, y, comp, (float *)data); stbi__end_write_file(&s); diff --git a/third_party/stb/stb_image_write.h b/third_party/stb/stb_image_write.h index 2b21f4f15..f1ad8e167 100644 --- a/third_party/stb/stb_image_write.h +++ b/third_party/stb/stb_image_write.h @@ -3,7 +3,6 @@ COSMOPOLITAN_C_START_ extern int stbi_write_png_compression_level; -extern int stbi__flip_vertically_on_write; extern int stbi_write_tga_with_rle; extern int stbi_write_force_png_filter; diff --git a/third_party/stb/stb_image_write_png.c b/third_party/stb/stb_image_write_png.c deleted file mode 100644 index 9ee926f58..000000000 --- a/third_party/stb/stb_image_write_png.c +++ /dev/null @@ -1,379 +0,0 @@ -/* stb_image_write - v1.13 - public domain - http://nothings.org/stb - * writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 - * no warranty implied; use at your own risk - * - * ABOUT: - * - * This file is a library for writing images to stdio or a callback. - * - * The PNG output is not optimal; it is 20-50% larger than the file - * written by a decent optimizing implementation; though providing a - * custom zlib compress function (see STBIW_ZLIB_COMPRESS) can - * mitigate that. This library is designed for source code - * compactness and simplicity, not optimal image file size or - * run-time performance. - * - * USAGE: - * - * There are five functions, one for each image file format: - * - * stbi_write_png - * stbi_write_bmp - * stbi_write_tga - * stbi_write_jpg - * stbi_write_hdr - * - * stbi_flip_vertically_on_write - * - * There are also five equivalent functions that use an arbitrary - * write function. You are expected to open/close your - * file-equivalent before and after calling these: - * - * stbi_write_png_to_func - * stbi_write_bmp_to_func - * stbi_write_tga_to_func - * stbi_write_hdr_to_func - * stbi_write_jpg_to_func - * - * where the callback is: - * void stbi_write_func(void *context, void *data, int size); - * - * You can configure it with these: - * stbi_write_tga_with_rle - * stbi_write_png_compression_level - * stbi_write_force_png_filter - * - * Each function returns 0 on failure and non-0 on success. - * - * The functions create an image file defined by the parameters. The - * image is a rectangle of pixels stored from left-to-right, - * top-to-bottom. Each pixel contains 'comp' channels of data stored - * interleaved with 8-bits per channel, in the following order: 1=Y, - * 2=YA, 3=RGB, 4=RGBA. (Y is monochrome color.) The rectangle is 'w' - * pixels wide and 'h' pixels tall. The *data pointer points to the - * first byte of the top-left-most pixel. For PNG, "stride_in_bytes" - * is the distance in bytes from the first byte of a row of pixels to - * the first byte of the next row of pixels. - * - * PNG creates output files with the same number of components as the - * input. The BMP format expands Y to RGB in the file format and does - * not output alpha. - * - * PNG supports writing rectangles of data even when the bytes - * storing rows of data are not consecutive in memory (e.g. - * sub-rectangles of a larger image), by supplying the stride between - * the beginning of adjacent rows. The other formats do not. (Thus - * you cannot write a native-format BMP through the BMP writer, both - * because it is in BGR order and because it may have padding at the - * end of the line.) - * - * PNG allows you to set the deflate compression level by setting the - * global variable 'stbi_write_png_compression_level' (it defaults to - * 8). - * - * HDR expects linear float data. Since the format is always 32-bit - * rgb(e) data, alpha (if provided) is discarded, and for monochrome - * data it is replicated across all three channels. - * - * TGA supports RLE or non-RLE compressed data. To use - * non-RLE-compressed data, set the global variable - * 'stbi_write_tga_with_rle' to 0. - * - * JPEG does ignore alpha channels in input data; quality is between - * 1 and 100. Higher quality looks better but results in a bigger - * image. JPEG baseline (no JPEG progressive). - * - * CREDITS: - * - * - * Sean Barrett - PNG/BMP/TGA - * Baldur Karlsson - HDR - * Jean-Sebastien Guay - TGA monochrome - * Tim Kelsey - misc enhancements - * Alan Hickman - TGA RLE - * Emmanuel Julien - initial file IO callback implementation - * Jon Olick - original jo_jpeg.cpp code - * Daniel Gibson - integrate JPEG, allow external zlib - * Aarni Koskela - allow choosing PNG filter - * - * bugfixes: - * github:Chribba - * Guillaume Chereau - * github:jry2 - * github:romigrou - * Sergio Gonzalez - * Jonas Karlsson - * Filip Wasil - * Thatcher Ulrich - * github:poppolopoppo - * Patrick Boettcher - * github:xeekworx - * Cap Petschulat - * Simon Rodriguez - * Ivan Tikhonov - * github:ignotion - * Adam Schackart - * - * LICENSE - * - * Public Domain (www.unlicense.org) - */ -#include "libc/assert.h" -#include "libc/fmt/conv.h" -#include "libc/limits.h" -#include "libc/mem/mem.h" -#include "libc/stdio/stdio.h" -#include "libc/str/str.h" -#include "third_party/stb/stb_image_write.h" -#include "third_party/zlib/zlib.h" - -#define STBIW_UCHAR(x) (unsigned char)((x)&0xff) -#define stbiw__wpng4(o, a, b, c, d) \ - ((o)[0] = STBIW_UCHAR(a), (o)[1] = STBIW_UCHAR(b), (o)[2] = STBIW_UCHAR(c), \ - (o)[3] = STBIW_UCHAR(d), (o) += 4) -#define stbiw__wp32(data, v) \ - stbiw__wpng4(data, (v) >> 24, (v) >> 16, (v) >> 8, (v)); -#define stbiw__wptag(data, s) stbiw__wpng4(data, s[0], s[1], s[2], s[3]) - -int stbi_write_png_compression_level = 4; -int stbi_write_force_png_filter = -1; - -static unsigned char *stbi_zlib_compress(unsigned char *data, int size, - int *out_len, int quality) { - unsigned long newsize; - unsigned char *newdata, *trimdata; - assert(0 <= size && size <= INT_MAX); - if ((newdata = malloc((newsize = compressBound(size)))) && - compress2(newdata, &newsize, data, size, - stbi_write_png_compression_level) == Z_OK) { - *out_len = newsize; - if ((trimdata = realloc(newdata, newsize))) { - return trimdata; - } else { - return newdata; - } - } - free(newdata); - return NULL; -} - -static void stbiw__wpcrc(unsigned char **data, int len) { - unsigned int crc = crc32(0, *data - len - 4, len + 4); - stbiw__wp32(*data, crc); -} - -forceinline unsigned char stbiw__paeth(int a, int b, int c) { - int p = a + b - c, pa = abs(p - a), pb = abs(p - b), pc = abs(p - c); - if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); - if (pb <= pc) return STBIW_UCHAR(b); - return STBIW_UCHAR(c); -} - -// @OPTIMIZE: provide an option that always forces left-predict or paeth predict -static void stbiw__encode_png_line(const unsigned char *pixels, - int stride_bytes, int width, int height, - int y, int n, int filter_type, - signed char *line_buffer) { - int mapping[] = {0, 1, 2, 3, 4}; - int firstmap[] = {0, 1, 0, 5, 6}; - const unsigned char *z; - int *mymap, i, type, signed_stride; - - mymap = (y != 0) ? mapping : firstmap; - type = mymap[filter_type]; - z = pixels + - stride_bytes * (stbi__flip_vertically_on_write ? height - 1 - y : y); - signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; - - if (type == 0) { - memcpy(line_buffer, z, width * n); - return; - } - - for (i = 0; i < n; ++i) { - switch (type) { - case 1: - line_buffer[i] = z[i]; - break; - case 2: - line_buffer[i] = z[i] - z[i - signed_stride]; - break; - case 3: - line_buffer[i] = z[i] - (z[i - signed_stride] >> 1); - break; - case 4: - line_buffer[i] = - (signed char)(z[i] - stbiw__paeth(0, z[i - signed_stride], 0)); - break; - case 5: - line_buffer[i] = z[i]; - break; - case 6: - line_buffer[i] = z[i]; - break; - } - } - - switch (type) { - case 1: - for (i = n; i < width * n; ++i) { - line_buffer[i] = z[i] - z[i - n]; - } - break; - case 2: - for (i = n; i < width * n; ++i) { - line_buffer[i] = z[i] - z[i - signed_stride]; - } - break; - case 3: - for (i = n; i < width * n; ++i) { - line_buffer[i] = z[i] - ((z[i - n] + z[i - signed_stride]) >> 1); - } - break; - case 4: - for (i = n; i < width * n; ++i) { - line_buffer[i] = z[i] - stbiw__paeth(z[i - n], z[i - signed_stride], - z[i - signed_stride - n]); - } - break; - case 5: - for (i = n; i < width * n; ++i) { - line_buffer[i] = z[i] - (z[i - n] >> 1); - } - break; - case 6: - for (i = n; i < width * n; ++i) { - line_buffer[i] = z[i] - stbiw__paeth(z[i - n], 0, 0); - } - break; - } -} - -unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, - int stride_bytes, int x, int y, int n, - int *out_len) { - int force_filter = stbi_write_force_png_filter; - int ctype[5] = {-1, 0, 4, 2, 6}; - unsigned char sig[8] = {137, 80, 78, 71, 13, 10, 26, 10}; - unsigned char *out, *o, *filt, *zlib; - signed char *line_buffer; - int j, zlen; - - if (stride_bytes == 0) stride_bytes = x * n; - - if (force_filter >= 5) { - force_filter = -1; - } - - filt = malloc((x * n + 1) * y); - if (!filt) return 0; - line_buffer = malloc(x * n); - if (!line_buffer) { - free(filt); - return 0; - } - for (j = 0; j < y; ++j) { - int filter_type; - if (force_filter > -1) { - filter_type = force_filter; - stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, - line_buffer); - } else { // Estimate the best filter by running through all of them: - int best_filter = 0, best_filter_val = 0x7fffffff, est, i; - for (filter_type = 0; filter_type < 5; filter_type++) { - stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, - line_buffer); - - // Estimate the entropy of the line using this filter; the less, the - // better. - est = 0; - for (i = 0; i < x * n; ++i) { - est += abs((signed char)line_buffer[i]); - } - if (est < best_filter_val) { - best_filter_val = est; - best_filter = filter_type; - } - } - if (filter_type != best_filter) { // If the last iteration already got us - // the best filter, don't redo it - stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, - line_buffer); - filter_type = best_filter; - } - } - // when we get here, filter_type contains the filter type, and line_buffer - // contains the data - filt[j * (x * n + 1)] = (unsigned char)filter_type; - memmove(filt + j * (x * n + 1) + 1, line_buffer, x * n); - } - free(line_buffer); - zlib = stbi_zlib_compress(filt, y * (x * n + 1), &zlen, - stbi_write_png_compression_level); - free(filt); - if (!zlib) return 0; - - // each tag requires 12 bytes of overhead - out = malloc(8 + 12 + 13 + 12 + zlen + 12); - if (!out) return 0; - *out_len = 8 + 12 + 13 + 12 + zlen + 12; - - o = out; - memmove(o, sig, 8); - o += 8; - stbiw__wp32(o, 13); // header length - stbiw__wptag(o, "IHDR"); - stbiw__wp32(o, x); - stbiw__wp32(o, y); - *o++ = 8; - *o++ = STBIW_UCHAR(ctype[n]); - *o++ = 0; - *o++ = 0; - *o++ = 0; - stbiw__wpcrc(&o, 13); - - stbiw__wp32(o, zlen); - stbiw__wptag(o, "IDAT"); - memmove(o, zlib, zlen); - o += zlen; - free(zlib); - stbiw__wpcrc(&o, zlen); - - stbiw__wp32(o, 0); - stbiw__wptag(o, "IEND"); - stbiw__wpcrc(&o, 0); - - assert(o == out + *out_len); - - return out; -} - -int stbi_write_png(const char *filename, int x, int y, int comp, - const void *data, int stride_bytes) { - int len; - FILE *f; - unsigned char *png; - png = stbi_write_png_to_mem(data, stride_bytes, x, y, comp, &len); - if (png == NULL) return 0; - f = fopen(filename, "wb"); - if (!f) { - free(png); - return 0; - } - fwrite(png, 1, len, f); - fclose(f); - free(png); - return 1; -} - -int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, - int comp, const void *data, int stride_bytes) { - int len; - unsigned char *png; - png = stbi_write_png_to_mem((const unsigned char *)data, stride_bytes, x, y, - comp, &len); - if (png == NULL) return 0; - func(context, png, len); - free(png); - return 1; -} diff --git a/third_party/stb/stb_rect_pack.c b/third_party/stb/stb_rect_pack.c index 052a2c3f1..65fab7afa 100644 --- a/third_party/stb/stb_rect_pack.c +++ b/third_party/stb/stb_rect_pack.c @@ -1,29 +1,20 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:3;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=3 sts=3 sw=3 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ -│ stb_truetype │ -│ Copyright 2017 Sean Barrett │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ │ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/stb/stb_rect_pack.h" #include "libc/assert.h" @@ -41,8 +32,6 @@ asm(".include \"libc/disclaimer.inc\""); // Useful for e.g. packing rectangular textures into an atlas. // Does not do rotation. // -// in the file that you want to have the implementation. -// // Not necessarily the awesomest packing method, but better than // the totally naive one in stb_truetype (which is primarily what // this is meant to replace). @@ -390,7 +379,11 @@ static int rect_height_compare(const void *a, const void *b) return -1; if (p->h < q->h) return 1; - return (p->w > q->w) ? -1 : (p->w < q->w); + if (p->w > q->w) + return -1; + if (p->w < q->w) + return 1; + return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed); } static int rect_original_order(const void *a, const void *b) diff --git a/third_party/stb/stb_truetype.c b/third_party/stb/stb_truetype.c index 742b03469..a59346df3 100644 --- a/third_party/stb/stb_truetype.c +++ b/third_party/stb/stb_truetype.c @@ -1,5 +1,5 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:3;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=3 sts=3 sw=3 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ │ │ │ stb_truetype │ diff --git a/third_party/stb/stb_vorbis.c b/third_party/stb/stb_vorbis.c index 88124b77d..3b3da48f3 100644 --- a/third_party/stb/stb_vorbis.c +++ b/third_party/stb/stb_vorbis.c @@ -32,6 +32,7 @@ // manxorist@github saga musix github:infatum // Timur Gagiev Maxwell Koo // + #include "third_party/stb/stb_vorbis.h" #include "libc/assert.h" #include "libc/calls/calls.h" @@ -45,6 +46,11 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" +asm(".ident\t\"\\n\\n\ +stb_vorbis (Public Domain)\\n\ +Credit: Sean Barrett, et al.\\n\ +http://nothings.org/stb\""); + // STB_VORBIS_NO_PUSHDATA_API // does not compile the code for the various stb_vorbis_*_pushdata() // functions @@ -343,6 +349,10 @@ struct stb_vorbis { unsigned int temp_memory_required; unsigned int setup_temp_memory_required; + char *vendor; + int comment_list_length; + char **comment_list; + // input config #ifndef STB_VORBIS_NO_STDIO FILE *f; @@ -358,8 +368,11 @@ struct stb_vorbis { uint8 push_mode; + // the page to seek to when seeking to start, may be zero uint32 first_audio_page_offset; + // p_first is the page on which the first audio packet ends + // (but not necessarily the page on which it starts) ProbedPage p_first, p_last; // memory management @@ -493,7 +506,7 @@ static dontinline void *make_block_array(void *mem, int count, int size) { } static dontinline void *setup_malloc(vorb *f, int sz) { - sz = (sz + 3) & ~3; + sz = (sz + 7) & ~7; // round up to nearest 8 for alignment of future allocs. f->setup_memory_required += sz; if (f->alloc.alloc_buffer) { void *p = (char *)f->alloc.alloc_buffer + f->setup_offset; @@ -510,7 +523,7 @@ static dontinline void setup_free(vorb *f, void *p) { } static dontinline void *setup_temp_malloc(vorb *f, int sz) { - sz = (sz + 3) & ~3; + sz = (sz + 7) & ~7; // round up to nearest 8 for alignment of future allocs. if (f->alloc.alloc_buffer) { if (f->temp_offset - sz < f->setup_offset) return NULL; f->temp_offset -= sz; @@ -521,7 +534,7 @@ static dontinline void *setup_temp_malloc(vorb *f, int sz) { static dontinline void setup_temp_free(vorb *f, void *p, int sz) { if (f->alloc.alloc_buffer) { - f->temp_offset += (sz + 3) & ~3; + f->temp_offset += (sz + 7) & ~7; return; } free(p); @@ -593,7 +606,7 @@ static float float32_unpack(uint32 x) { uint32 sign = x & 0x80000000; uint32 exp = (x & 0x7fe00000) >> 21; double res = sign ? -(double)mantissa : (double)mantissa; - return (float)ldexp((float)res, exp - 788); + return (float)ldexp((float)res, (int)exp - 788); } // zlib & jpeg huffman tables assume that the output symbols @@ -636,6 +649,8 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) { assert(c->sorted_entries == 0); return TRUE; } + // no error return required, code reading lens checks this + assert(len[k] < 32); // add to the list add_entry(c, 0, k, m++, len[k], values); // add all available leaves @@ -648,6 +663,8 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) { uint32 res; int z = len[i], y; if (z == NO_CODE) continue; + // no error return required, code reading lens checks this + assert(z < 32); // find lowest available leaf (should always be earliest, // which is what the specification calls for) // note that this property, and the fact we can never have @@ -659,12 +676,10 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) { return FALSE; } res = available[z]; - assert(z >= 0 && z < 32); available[z] = 0; add_entry(c, ReverseBits32(res), i, m++, len[i], values); // propagate availability up the tree if (z != len[i]) { - assert(len[i] >= 0 && len[i] < 32); for (y = len[i]; y > z; --y) { assert(available[y] == 0); available[y] = res + (1 << (32 - y)); @@ -991,6 +1006,9 @@ static int capture_pattern(vorb *f) { static int start_page_no_capturepattern(vorb *f) { uint32 loc0, loc1, n; + if (f->first_decode && !IS_PUSH_MODE(f)) { + f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4; + } // stream structure version if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version); // header flag @@ -1027,14 +1045,12 @@ static int start_page_no_capturepattern(vorb *f) { } if (f->first_decode) { int i, len; - ProbedPage p; len = 0; for (i = 0; i < f->segment_count; ++i) len += f->segments[i]; len += 27 + f->segment_count; - p.page_start = f->first_audio_page_offset; - p.page_end = p.page_start + len; - p.last_decoded_sample = loc0; - f->p_first = p; + + f->p_first.page_end = f->p_first.page_start + len; + f->p_first.last_decoded_sample = loc0; } f->next_seg = 0; return TRUE; @@ -1124,6 +1140,15 @@ static int get8_packet(vorb *f) { return x; } +static int get32_packet(vorb *f) { + uint32 x; + x = get8_packet(f); + x += (uint32)get8_packet(f) << 8; + x += (uint32)get8_packet(f) << 16; + x += (uint32)get8_packet(f) << 24; + return x; +} + static void flush_packet(vorb *f) { while (get8_packet_raw(f) != EOP) ; @@ -1153,7 +1178,7 @@ static uint32 get_bits(vorb *f, int n) { f->valid_bits += 8; } } - if (f->valid_bits < 0) return 0; + assert(f->valid_bits >= n); z = f->acc & ((1 << n) - 1); f->acc >>= n; f->valid_bits -= n; @@ -1225,7 +1250,7 @@ static int codebook_decode_scalar_raw(vorb *f, Codebook *c) { assert(!c->sparse); for (i = 0; i < c->entries; ++i) { if (c->codeword_lengths[i] == NO_CODE) continue; - if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i]) - 1))) { + if (c->codewords[i] == (f->acc & ((1u << c->codeword_lengths[i]) - 1))) { if (f->valid_bits >= c->codeword_lengths[i]) { f->acc >>= c->codeword_lengths[i]; f->valid_bits -= c->codeword_lengths[i]; @@ -1414,7 +1439,8 @@ static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), // and the length we'll be using (effective) if (c_inter + p_inter * ch + effective > len * ch) { - effective = len * ch - (p_inter * ch - c_inter); + // https://github.com/nothings/stb/pull/1490 + effective = len * ch - (p_inter * ch + c_inter); } #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK @@ -1717,49 +1743,7 @@ static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, ++class_set; #endif } - } else if (ch == 1) { - while (pcount < part_read) { - int z = r->begin + pcount * r->part_size; - int c_inter = 0, p_inter = z; - if (pass_ == 0) { - Codebook *c = f->codebooks + r->classbook; - int q; - DECODE(q, f, c); - if (q == EOP) goto done; -#ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - part_classdata[0][class_set] = r->classdata[q]; -#else - for (i = classwords - 1; i >= 0; --i) { - classifications[0][i + pcount] = q % r->classifications; - q /= r->classifications; - } -#endif - } - for (i = 0; i < classwords && pcount < part_read; ++i, ++pcount) { - int z = r->begin + pcount * r->part_size; -#ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - int c = part_classdata[0][class_set][i]; -#else - int c = classifications[0][pcount]; -#endif - int b = r->residue_books[c][pass_]; - if (b >= 0) { - Codebook *book = f->codebooks + b; - if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, - ch, &c_inter, &p_inter, - n, r->part_size)) - goto done; - } else { - z += r->part_size; - c_inter = 0; - p_inter = z; - } - } -#ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - ++class_set; -#endif - } - } else { + } else if (ch > 2) { while (pcount < part_read) { int z = r->begin + pcount * r->part_size; int c_inter = z % ch, p_inter = z / ch; @@ -2165,34 +2149,33 @@ static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, while (z > base) { float k00, k11; + float l00, l11; k00 = z[-0] - z[-8]; k11 = z[-1] - z[-9]; + l00 = z[-2] - z[-10]; + l11 = z[-3] - z[-11]; z[-0] = z[-0] + z[-8]; z[-1] = z[-1] + z[-9]; - z[-8] = k00; - z[-9] = k11; - - k00 = z[-2] - z[-10]; - k11 = z[-3] - z[-11]; z[-2] = z[-2] + z[-10]; z[-3] = z[-3] + z[-11]; - z[-10] = (k00 + k11) * A2; - z[-11] = (k11 - k00) * A2; + z[-8] = k00; + z[-9] = k11; + z[-10] = (l00 + l11) * A2; + z[-11] = (l11 - l00) * A2; - k00 = z[-12] - z[-4]; // reverse to avoid a unary negation + k00 = z[-4] - z[-12]; k11 = z[-5] - z[-13]; + l00 = z[-6] - z[-14]; + l11 = z[-7] - z[-15]; z[-4] = z[-4] + z[-12]; z[-5] = z[-5] + z[-13]; - z[-12] = k11; - z[-13] = k00; - - k00 = z[-14] - z[-6]; // reverse to avoid a unary negation - k11 = z[-7] - z[-15]; z[-6] = z[-6] + z[-14]; z[-7] = z[-7] + z[-15]; - z[-14] = (k00 + k11) * A2; - z[-15] = (k00 - k11) * A2; + z[-12] = k11; + z[-13] = -k00; + z[-14] = (l11 - l00) * A2; + z[-15] = (l00 + l11) * -A2; iter_54(z); iter_54(z - 8); @@ -2630,7 +2613,8 @@ void inverse_mdct_naive(float *buffer, int n) #endif static float *get_window(vorb *f, int len) { - len <<= 1; + // https://github.com/nothings/stb/pull/1499 + len = (unsigned int)len << 1; if (len == f->blocksize_0) return f->window[0]; if (len == f->blocksize_1) return f->window[1]; return NULL; @@ -2755,8 +2739,8 @@ static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int right_end, int *p_left) { Mapping *map; int i, j, k, n, n2; - int zero_channel[256]; - int really_zero_channel[256]; + int zero_channel[256] = {0}; + int really_zero_channel[256] = {0}; // WINDOWING @@ -2959,7 +2943,9 @@ static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, // this isn't to spec, but spec would require us to read ahead // and decode the size of all current frames--could be done, // but presumably it's not a commonly used feature - f->current_loc = -n2; // start of first frame is positioned for discard + f->current_loc = 0u - n2; // start of first frame is positioned for discard + // (NB this is an intentional unsigned + // overflow wrap-around) // we might have to discard samples "from" the next frame too, // if we're lapping a large block then a small at the start? f->discard_samples_deferred = n - right_end; @@ -3089,7 +3075,7 @@ static int vorbis_pump_first_frame(stb_vorbis *f) { } #ifndef STB_VORBIS_NO_PUSHDATA_API -static int is_whole_packet_present(stb_vorbis *f, int end_page) { +static int is_whole_packet_present(stb_vorbis *f) { // make sure that we have the packet available before continuing... // this requires a full ogg parse, but we know we can fetch from f->stream @@ -3109,8 +3095,6 @@ static int is_whole_packet_present(stb_vorbis *f, int end_page) { break; } // either this continues, or it ends it... - if (end_page) - if (s < f->segment_count - 1) return error(f, VORBIS_invalid_stream); if (s == f->segment_count) s = -1; // set 'crosses page' flag if (p > f->stream_end) return error(f, VORBIS_need_more_data); first = FALSE; @@ -3144,8 +3128,6 @@ static int is_whole_packet_present(stb_vorbis *f, int end_page) { p += q[s]; if (q[s] < 255) break; } - if (end_page) - if (s < n - 1) return error(f, VORBIS_invalid_stream); if (s == n) s = -1; // set 'crosses page' flag if (p > f->stream_end) return error(f, VORBIS_need_more_data); first = FALSE; @@ -3160,6 +3142,7 @@ static int start_decoder(vorb *f) { int longest_floorlist = 0; // first page, first packet + f->first_decode = TRUE; if (!start_page(f)) return FALSE; // validate page flag @@ -3218,6 +3201,50 @@ static int start_decoder(vorb *f) { if (!start_page(f)) return FALSE; if (!start_packet(f)) return FALSE; + + if (!next_segment(f)) return FALSE; + + if (get8_packet(f) != VORBIS_packet_comment) + return error(f, VORBIS_invalid_setup); + + for (i = 0; i < 6; ++i) header[i] = get8_packet(f); + + if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup); + // file vendor + len = get32_packet(f); + f->vendor = (char *)setup_malloc(f, sizeof(char) * (len + 1)); + if (f->vendor == NULL) return error(f, VORBIS_outofmem); + for (i = 0; i < len; ++i) { + f->vendor[i] = get8_packet(f); + } + f->vendor[len] = (char)'\0'; + // user comments + f->comment_list_length = get32_packet(f); + f->comment_list = NULL; + if (f->comment_list_length > 0) { + f->comment_list = + (char **)setup_malloc(f, sizeof(char *) * (f->comment_list_length)); + if (f->comment_list == NULL) return error(f, VORBIS_outofmem); + } + + for (i = 0; i < f->comment_list_length; ++i) { + len = get32_packet(f); + f->comment_list[i] = (char *)setup_malloc(f, sizeof(char) * (len + 1)); + if (f->comment_list[i] == NULL) return error(f, VORBIS_outofmem); + + for (j = 0; j < len; ++j) { + f->comment_list[i][j] = get8_packet(f); + } + f->comment_list[i][len] = (char)'\0'; + } + + // framing_flag + x = get8_packet(f); + if (!(x & 1)) return error(f, VORBIS_invalid_setup); + + skip(f, f->bytes_in_seg); + f->bytes_in_seg = 0; + do { len = next_segment(f); skip(f, len); @@ -3229,7 +3256,7 @@ static int start_decoder(vorb *f) { #ifndef STB_VORBIS_NO_PUSHDATA_API if (IS_PUSH_MODE(f)) { - if (!is_whole_packet_present(f, TRUE)) { + if (!is_whole_packet_present(f)) { // convert error in ogg header to write type if (f->error == VORBIS_invalid_stream) f->error = VORBIS_invalid_setup; return FALSE; @@ -3302,7 +3329,10 @@ static int start_decoder(vorb *f) { if (present) { lengths[j] = get_bits(f, 5) + 1; ++total; - if (lengths[j] == 32) return error(f, VORBIS_invalid_setup); + if (lengths[j] == 32) { + if (c->sparse) setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_invalid_setup); + } } else { lengths[j] = NO_CODE; } @@ -3315,7 +3345,10 @@ static int start_decoder(vorb *f) { f->setup_temp_memory_required = c->entries; c->codeword_lengths = (uint8 *)setup_malloc(f, c->entries); - if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem); + if (c->codeword_lengths == NULL) { + setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_outofmem); + } memcpy(c->codeword_lengths, lengths, c->entries); setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been @@ -3349,13 +3382,22 @@ static int start_decoder(vorb *f) { unsigned int size; if (c->sorted_entries) { c->codeword_lengths = (uint8 *)setup_malloc(f, c->sorted_entries); - if (!c->codeword_lengths) return error(f, VORBIS_outofmem); + if (!c->codeword_lengths) { + setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_outofmem); + } c->codewords = (uint32 *)setup_temp_malloc( f, sizeof(*c->codewords) * c->sorted_entries); - if (!c->codewords) return error(f, VORBIS_outofmem); + if (!c->codewords) { + setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_outofmem); + } values = (uint32 *)setup_temp_malloc(f, sizeof(*values) * c->sorted_entries); - if (!values) return error(f, VORBIS_outofmem); + if (!values) { + setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_outofmem); + } } size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries; @@ -3364,7 +3406,10 @@ static int start_decoder(vorb *f) { } if (!compute_codewords(c, lengths, c->entries, values)) { - if (c->sparse) setup_temp_free(f, values, 0); + if (c->sparse) { + setup_temp_free(f, values, 0); + setup_temp_free(f, lengths, c->entries); + } return error(f, VORBIS_invalid_setup); } @@ -3372,12 +3417,18 @@ static int start_decoder(vorb *f) { // allocate an extra slot for sentinels c->sorted_codewords = (uint32 *)setup_malloc( f, sizeof(*c->sorted_codewords) * (c->sorted_entries + 1)); - if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem); + if (c->sorted_codewords == NULL) { + if (c->sparse) setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_outofmem); + } // allocate an extra slot at the front so that c->sorted_values[-1] is // defined so that we can catch that case without an extra if c->sorted_values = (int *)setup_malloc( f, sizeof(*c->sorted_values) * (c->sorted_entries + 1)); - if (c->sorted_values == NULL) return error(f, VORBIS_outofmem); + if (c->sorted_values == NULL) { + if (c->sparse) setup_temp_free(f, lengths, c->entries); + return error(f, VORBIS_outofmem); + } ++c->sorted_values; c->sorted_values[-1] = -1; compute_sorted_huffman(c, lengths, values); @@ -3446,8 +3497,7 @@ static int start_decoder(vorb *f) { unsigned int div = 1; for (k = 0; k < c->dimensions; ++k) { int off = (z / div) % c->lookup_values; - float val = mults[off]; - val = mults[off] * c->delta_value + c->minimum_value + last; + float val = mults[off] * c->delta_value + c->minimum_value + last; c->multiplicands[j * c->dimensions + k] = val; if (c->sequence_p) last = val; if (k + 1 < c->dimensions) { @@ -3532,7 +3582,7 @@ static int start_decoder(vorb *f) { return error(f, VORBIS_invalid_setup); } for (k = 0; k < 1 << g->class_subclasses[j]; ++k) { - g->subclass_books[j][k] = get_bits(f, 8) - 1; + g->subclass_books[j][k] = (int16)get_bits(f, 8) - 1; if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); } @@ -3560,7 +3610,7 @@ static int start_decoder(vorb *f) { for (j = 0; j < g->values; ++j) g->sorted_order[j] = (uint8)p[j].id; // precompute the neighbors for (j = 2; j < g->values; ++j) { - int low, hi; + int low = 0, hi = 0; neighbors(g->Xlist, j, &low, &hi); g->neighbors[j][0] = low; g->neighbors[j][1] = hi; @@ -3738,7 +3788,9 @@ static int start_decoder(vorb *f) { int i, max_part_read = 0; for (i = 0; i < f->residue_count; ++i) { Residue *r = f->residue_config + i; - unsigned int actual_size = f->blocksize_1 / 2; + unsigned int rtype = f->residue_types[i]; + unsigned int actual_size = + rtype == 2 ? f->blocksize_1 : f->blocksize_1 / 2; unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size; unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size; @@ -3761,8 +3813,6 @@ static int start_decoder(vorb *f) { f->temp_memory_required = imdct_mem; } - f->first_decode = TRUE; - if (f->alloc.alloc_buffer) { assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes); // check if there's enough temp memory so we don't error later @@ -3771,13 +3821,30 @@ static int start_decoder(vorb *f) { return error(f, VORBIS_outofmem); } - f->first_audio_page_offset = stb_vorbis_get_file_offset(f); + // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point + // to a page without PAGEFLAG_continued_packet, so this either points + // to the first page, or the page after the end of the headers. It might + // be cleaner to point to a page in the middle of the headers, when that's + // the page where the first audio packet starts, but we'd have to also + // correctly skip the end of any continued packet in stb_vorbis_seek_start. + if (f->next_seg == -1) { + f->first_audio_page_offset = stb_vorbis_get_file_offset(f); + } else { + f->first_audio_page_offset = 0; + } return TRUE; } static void vorbis_deinit(stb_vorbis *p) { int i, j; + + setup_free(p, p->vendor); + for (i = 0; i < p->comment_list_length; ++i) { + setup_free(p, p->comment_list[i]); + } + setup_free(p, p->comment_list); + if (p->residue_config) { for (i = 0; i < p->residue_count; ++i) { Residue *r = p->residue_config + i; @@ -3840,8 +3907,7 @@ static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z) { memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start if (z) { p->alloc = *z; - p->alloc.alloc_buffer_length_in_bytes = - (p->alloc.alloc_buffer_length_in_bytes + 3) & ~3; + p->alloc.alloc_buffer_length_in_bytes &= ~7; p->temp_offset = p->alloc.alloc_buffer_length_in_bytes; } p->eof = 0; @@ -3873,6 +3939,14 @@ stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f) { return d; } +stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f) { + stb_vorbis_comment d; + d.vendor = f->vendor; + d.comment_list_length = f->comment_list_length; + d.comment_list = f->comment_list; + return d; +} + int stb_vorbis_get_error(stb_vorbis *f) { int e = f->error; f->error = VORBIS__no_error; @@ -4007,7 +4081,7 @@ int stb_vorbis_decode_frame_pushdata( f->error = VORBIS__no_error; // check that we have the entire packet in memory - if (!is_whole_packet_present(f, FALSE)) { + if (!is_whole_packet_present(f)) { *samples = 0; return 0; } @@ -4069,6 +4143,7 @@ stb_vorbis *stb_vorbis_open_pushdata( *error = VORBIS_need_more_data; else *error = p.error; + vorbis_deinit(&p); return NULL; } f = vorbis_alloc(&p); @@ -4121,7 +4196,7 @@ static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last) { if (f->eof) return 0; if (header[4] != 0) goto invalid; goal = header[22] + (header[23] << 8) + (header[24] << 16) + - (header[25] << 24); + ((uint32)header[25] << 24); for (i = 22; i < 26; ++i) header[i] = 0; crc = 0; for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]); @@ -4232,8 +4307,8 @@ static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset) { static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) { ProbedPage left, right, mid; int i, start_seg_with_known_loc, end_pos, page_start; - uint32 delta, stream_length, padding; - double offset, bytes_per_sample; + uint32 delta, stream_length, padding, last_sample_limit; + double offset = 0.0, bytes_per_sample = 0.0; int probe = 0; bytes_per_sample = 2; /* TODO(jart): ???? */ @@ -4249,9 +4324,9 @@ static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) { // indicates should be the granule position (give or take one)). padding = ((f->blocksize_1 - f->blocksize_0) >> 2); if (sample_number < padding) - sample_number = 0; + last_sample_limit = 0; else - sample_number -= padding; + last_sample_limit = sample_number - padding; left = f->p_first; while (left.last_decoded_sample == ~0U) { @@ -4264,8 +4339,11 @@ static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) { assert(right.last_decoded_sample != ~0U); // starting from the start is handled differently - if (sample_number <= left.last_decoded_sample) { - if (stb_vorbis_seek_start(f)) return 1; + if (last_sample_limit <= left.last_decoded_sample) { + if (stb_vorbis_seek_start(f)) { + if (f->current_loc > sample_number) return error(f, VORBIS_seek_failed); + return 1; + } return 0; } @@ -4284,10 +4362,10 @@ static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) { bytes_per_sample = data_bytes / right.last_decoded_sample; offset = left.page_start + - bytes_per_sample * (sample_number - left.last_decoded_sample); + bytes_per_sample * (last_sample_limit - left.last_decoded_sample); } else { // second probe (try to bound the other side) - double error = ((double)sample_number - mid.last_decoded_sample) * + double error = ((double)last_sample_limit - mid.last_decoded_sample) * bytes_per_sample; if (error >= 0 && error < 8000) error = 8000; if (error < 0 && error > -8000) error = -8000; @@ -4318,13 +4396,15 @@ static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) { } // if we've just found the last page again then we're in a tricky file, - // and we're close enough. - if (mid.page_start == right.page_start) break; - - if (sample_number < mid.last_decoded_sample) - right = mid; - else - left = mid; + // and we're close enough (if it wasn't an interpolation probe). + if (mid.page_start == right.page_start) { + if (probe >= 2 || delta <= 65536) break; + } else { + if (last_sample_limit < mid.last_decoded_sample) + right = mid; + else + left = mid; + } ++probe; } @@ -4437,8 +4517,8 @@ int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number) { flush_packet(f); } } - // the next frame will start with the sample - assert(f->current_loc == sample_number); + // the next frame should start with the sample + if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed); return 1; } @@ -4514,7 +4594,8 @@ unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f) { // set. whoops! break; } - previous_safe = last_page_loc + 1; + // NOTE: not used after this point, but note for debugging + // previous_safe = last_page_loc + 1; last_page_loc = stb_vorbis_get_file_offset(f); } @@ -4618,7 +4699,10 @@ stb_vorbis *stb_vorbis_open_filename(const char *filename, int *error, stb_vorbis *stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc) { stb_vorbis *f, p; - if (data == NULL) return NULL; + if (!data) { + if (error) *error = VORBIS_unexpected_eof; + return NULL; + } vorbis_init(&p, alloc); p.stream = (uint8 *)data; p.stream_end = (uint8 *)data + len; @@ -4684,18 +4768,18 @@ static void copy_samples(short *dest, float *src, int len) { for (i = 0; i < len; ++i) { FASTDEF(temp); int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i], 15); - if ((unsigned int)(v + 32768) > 65535) v = v < 0 ? -32768 : 32767; + if (((unsigned int)v + 32768) > 65535) v = v < 0 ? -32768 : 32767; dest[i] = v; } } static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len) { -#define BUFFER_SIZE 32 - float buffer[BUFFER_SIZE]; - int i, j, o, n = BUFFER_SIZE; +#define STB_BUFFER_SIZE 32 + float buffer[STB_BUFFER_SIZE]; + int i, j, o, n = STB_BUFFER_SIZE; check_endianness(); - for (o = 0; o < len; o += BUFFER_SIZE) { + for (o = 0; o < len; o += STB_BUFFER_SIZE) { memset(buffer, 0, sizeof(buffer)); if (o + n > len) n = len - o; for (j = 0; j < num_c; ++j) { @@ -4706,20 +4790,21 @@ static void compute_samples(int mask, short *output, int num_c, float **data, for (i = 0; i < n; ++i) { FASTDEF(temp); int v = FAST_SCALED_FLOAT_TO_INT(temp, buffer[i], 15); - if ((unsigned int)(v + 32768) > 65535) v = v < 0 ? -32768 : 32767; + if (((unsigned int)v + 32768) > 65535) v = v < 0 ? -32768 : 32767; output[o + i] = v; } } +#undef STB_BUFFER_SIZE } static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len) { -#define BUFFER_SIZE 32 - float buffer[BUFFER_SIZE]; - int i, j, o, n = BUFFER_SIZE >> 1; +#define STB_BUFFER_SIZE 32 + float buffer[STB_BUFFER_SIZE]; + int i, j, o, n = STB_BUFFER_SIZE >> 1; // o is the offset in the source data check_endianness(); - for (o = 0; o < len; o += (BUFFER_SIZE >> 1)) { + for (o = 0; o < len; o += (STB_BUFFER_SIZE >> 1)) { // o2 is the offset in the output data int o2 = o << 1; memset(buffer, 0, sizeof(buffer)); @@ -4744,10 +4829,11 @@ static void compute_stereo_samples(short *output, int num_c, float **data, for (i = 0; i < (n << 1); ++i) { FASTDEF(temp); int v = FAST_SCALED_FLOAT_TO_INT(temp, buffer[i], 15); - if ((unsigned int)(v + 32768) > 65535) v = v < 0 ? -32768 : 32767; + if (((unsigned int)v + 32768) > 65535) v = v < 0 ? -32768 : 32767; output[o2 + i] = v; } } +#undef STB_BUFFER_SIZE } static void convert_samples_short(int buf_c, short **buffer, int b_offset, @@ -4771,7 +4857,7 @@ static void convert_samples_short(int buf_c, short **buffer, int b_offset, int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples) { - float **output; + float **output = NULL; int len = stb_vorbis_get_frame_float(f, NULL, &output); if (len > num_samples) len = num_samples; if (len) convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len); @@ -4796,7 +4882,7 @@ static void convert_channels_short_interleaved(int buf_c, short *buffer, float f = data[i][d_offset + j]; int v = FAST_SCALED_FLOAT_TO_INT(temp, f, 15); // data[i][d_offset+j],15); - if ((unsigned int)(v + 32768) > 65535) v = v < 0 ? -32768 : 32767; + if (((unsigned int)v + 32768) > 65535) v = v < 0 ? -32768 : 32767; *buffer++ = v; } for (; i < buf_c; ++i) *buffer++ = 0; @@ -4824,8 +4910,6 @@ int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, float **outputs; int len = num_shorts / channels; int n = 0; - int z = f->channels; - if (z > channels) z = channels; while (n < len) { int k = f->channel_buffer_end - f->channel_buffer_start; if (n + k >= len) k = len - n; @@ -4846,8 +4930,6 @@ int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len) { float **outputs; int n = 0; - int z = f->channels; - if (z > channels) z = channels; while (n < len) { int k = f->channel_buffer_end - f->channel_buffer_start; if (n + k >= len) k = len - n; diff --git a/third_party/stb/stb_vorbis.h b/third_party/stb/stb_vorbis.h index 758e251b4..4510b0188 100644 --- a/third_party/stb/stb_vorbis.h +++ b/third_party/stb/stb_vorbis.h @@ -43,9 +43,18 @@ typedef struct { int max_frame_size; } stb_vorbis_info; +typedef struct { + char *vendor; + int comment_list_length; + char **comment_list; +} stb_vorbis_comment; + // get general information about the file stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f); +// get ogg comments +stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f); + // get the last error detected (clears it, too) int stb_vorbis_get_error(stb_vorbis *f); @@ -119,6 +128,12 @@ int stb_vorbis_decode_frame_pushdata( // channel. In other words, (*output)[0][0] contains the first sample from // the first channel, and (*output)[1][0] contains the first sample from // the second channel. +// +// *output points into stb_vorbis's internal output buffer storage; these +// buffers are owned by stb_vorbis and application code should not free +// them or modify their contents. They are transient and will be overwritten +// once you ask for more data to get decoded, so be sure to grab any data +// you need before then. void stb_vorbis_flush_pushdata(stb_vorbis *f); // inform stb_vorbis that your next datablock will not be contiguous with diff --git a/tool/viz/derasterize.c b/tool/viz/derasterize.c index 27c44c6ed..9f43b398d 100644 --- a/tool/viz/derasterize.c +++ b/tool/viz/derasterize.c @@ -551,8 +551,8 @@ static int ParseNumberOption(const char *arg) { return x; } -static void PrintUsage(int rc, FILE *f) { - fputs(HELPTEXT, f); +static void PrintUsage(int rc, int fd) { + tinyprint(fd, HELPTEXT, NULL); exit(rc); } @@ -573,9 +573,12 @@ static void GetOpts(int argc, char *argv[]) { break; case '?': case 'H': - PrintUsage(EXIT_SUCCESS, stdout); default: - PrintUsage(EX_USAGE, stderr); + if (opt == optopt) { + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); + } else { + PrintUsage(EX_USAGE, STDERR_FILENO); + } } } } diff --git a/tool/viz/memzoom.c b/tool/viz/memzoom.c index 63c98dced..a0bcf9297 100644 --- a/tool/viz/memzoom.c +++ b/tool/viz/memzoom.c @@ -45,6 +45,7 @@ #include "libc/str/unicode.h" #include "libc/sysv/consts/ex.h" #include "libc/sysv/consts/exit.h" +#include "libc/sysv/consts/fileno.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/poll.h" @@ -63,7 +64,7 @@ DESCRIPTION\n\ \n\ FLAGS\n\ \n\ - -h help\n\ + -h or -? help\n\ -z zoom\n\ -m morton ordering\n\ -H hilbert ordering\n\ @@ -887,10 +888,8 @@ static void MemZoom(void) { } while (!(action & INTERRUPTED)); } -static wontreturn void PrintUsage(int rc) { - Write("SYNOPSIS\n\n "); - Write(program_invocation_name); - Write(USAGE); +static wontreturn void PrintUsage(int rc, int fd) { + tinyprint(fd, "SYNOPSIS\n\n ", program_invocation_name, USAGE, NULL); exit(rc); } @@ -898,7 +897,7 @@ static void GetOpts(int argc, char *argv[]) { int opt; char *p; fps = 10; - while ((opt = getopt(argc, argv, "hzHNWf:p:")) != -1) { + while ((opt = getopt(argc, argv, "?hmzHNWf:p:")) != -1) { switch (opt) { case 'z': ++zoom; @@ -927,9 +926,13 @@ static void GetOpts(int argc, char *argv[]) { } break; case 'h': - PrintUsage(EXIT_SUCCESS); + case '?': default: - PrintUsage(EX_USAGE); + if (opt == optopt) { + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); + } else { + PrintUsage(EX_USAGE, STDERR_FILENO); + } } } if (pid) { @@ -941,10 +944,10 @@ static void GetOpts(int argc, char *argv[]) { stpcpy(p, "/maps"); } else { if (optind == argc) { - PrintUsage(EX_USAGE); + PrintUsage(EX_USAGE, STDERR_FILENO); } if (!memccpy(path, argv[optind], '\0', sizeof(path))) { - PrintUsage(EX_USAGE); + PrintUsage(EX_USAGE, STDERR_FILENO); } } } diff --git a/tool/viz/od16.c b/tool/viz/od16.c index 16e221fc6..9fabccc9d 100644 --- a/tool/viz/od16.c +++ b/tool/viz/od16.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" #include "libc/errno.h" #include "libc/fmt/conv.h" #include "libc/log/check.h" @@ -25,6 +26,7 @@ #include "libc/str/str.h" #include "libc/sysv/consts/ex.h" #include "libc/sysv/consts/exit.h" +#include "libc/sysv/consts/fileno.h" #include "third_party/getopt/getopt.internal.h" #define USAGE \ @@ -36,17 +38,15 @@ Flags:\n\ -c INT\n\ -w INT width (aka cols) [default 8]\n\ -o PATH output path [default -]\n\ - -h shows this information\n\ + -h or -? shows this information\n\ \n" static long width_; static FILE *in_, *out_; static char *inpath_, *outpath_; -void PrintUsage(int rc, FILE *f) { - fputs("Usage: ", f); - fputs(program_invocation_name, f); - fputs(USAGE, f); +void PrintUsage(int rc, int fd) { + tinyprint(fd, "Usage: ", program_invocation_name, USAGE, NULL); exit(rc); } @@ -63,11 +63,14 @@ void GetOpts(int *argc, char *argv[]) { case 'w': width_ = strtol(optarg, NULL, 0); break; - case '?': case 'h': - PrintUsage(EXIT_SUCCESS, stdout); + case '?': default: - PrintUsage(EX_USAGE, stderr); + if (opt == optopt) { + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); + } else { + PrintUsage(EX_USAGE, STDERR_FILENO); + } } } if (optind == *argc) { diff --git a/tool/viz/printansi.c b/tool/viz/printansi.c index 6e98e6014..810d8bb79 100644 --- a/tool/viz/printansi.c +++ b/tool/viz/printansi.c @@ -71,8 +71,8 @@ static struct Flags { enum TtyQuantizationAlgorithm quant; } g_flags; -static wontreturn void PrintUsage(int rc, FILE *f) { - fprintf(f, "Usage: %s%s", program_invocation_name, "\ +static wontreturn void PrintUsage(int rc, int fd) { + tinyprint(fd, "Usage: ", program_invocation_name, "\ [FLAGS] [PATH]\n\ \n\ FLAGS\n\ @@ -86,7 +86,7 @@ EXAMPLES\n\ \n\ printansi.com -w80 -h40 logo.png\n\ \n\ -\n"); +\n", NULL); exit(rc); } @@ -107,7 +107,7 @@ static void GetOpts(int *argc, char *argv[]) { g_flags.blocks = IsWindows() ? kTtyBlocksCp437 : kTtyBlocksUnicode; if (*argc == 2 && (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-help") == 0)) { - PrintUsage(EXIT_SUCCESS, stdout); + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); } while ((opt = getopt(*argc, argv, "?ivpfrtxads234o:w:h:")) != -1) { switch (opt) { @@ -162,9 +162,12 @@ static void GetOpts(int *argc, char *argv[]) { ++__log_level; break; case '?': - PrintUsage(EXIT_SUCCESS, stdout); default: - PrintUsage(EX_USAGE, stderr); + if (opt == optopt) { + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); + } else { + PrintUsage(EX_USAGE, STDERR_FILENO); + } } } if (optind == *argc) { diff --git a/tool/viz/printimage.c b/tool/viz/printimage.c index e82c39b57..7c2d35c67 100644 --- a/tool/viz/printimage.c +++ b/tool/viz/printimage.c @@ -66,8 +66,8 @@ static struct Flags { struct winsize g_winsize; -static wontreturn void PrintUsage(int rc, FILE *f) { - fprintf(f, "Usage: %s%s", program_invocation_name, "\ +static wontreturn void PrintUsage(int rc, int fd) { + tinyprint(fd, "Usage: ", program_invocation_name, "\ [FLAGS] [PATH]\n\ \n\ FLAGS\n\ @@ -94,7 +94,7 @@ FLAGS\n\ EXAMPLES\n\ \n\ printimage.com -sxd lemurs.jpg # 256-color dither unsharp\n\ -\n"); +\n", NULL); exit(rc); } @@ -114,7 +114,7 @@ static void GetOpts(int *argc, char *argv[]) { g_flags.blocks = IsWindows() ? kTtyBlocksCp437 : kTtyBlocksUnicode; if (*argc == 2 && (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-help") == 0)) { - PrintUsage(EXIT_SUCCESS, stdout); + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); } while ((opt = getopt(*argc, argv, "?vpmfirtxads234o:w:h:")) != -1) { switch (opt) { @@ -170,9 +170,12 @@ static void GetOpts(int *argc, char *argv[]) { ++__log_level; break; case '?': - PrintUsage(EXIT_SUCCESS, stdout); default: - PrintUsage(EX_USAGE, stderr); + if (opt == optopt) { + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); + } else { + PrintUsage(EX_USAGE, STDERR_FILENO); + } } } g_winsize.ws_col = 80; @@ -435,7 +438,7 @@ int main(int argc, char *argv[]) { int i; ShowCrashReports(); GetOpts(&argc, argv); - if (optind == argc) PrintUsage(0, stdout); + if (optind == argc) PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); stbi_set_unpremultiply_on_load(true); for (i = optind; i < argc; ++i) { WithImageFile(argv[i], ProcessImage); diff --git a/tool/viz/printvideo.c b/tool/viz/printvideo.c index ccff3df9f..727afad59 100644 --- a/tool/viz/printvideo.c +++ b/tool/viz/printvideo.c @@ -123,7 +123,7 @@ Flags & Keyboard Shortcuts:\n\ -v increases verbosity [flag]\n\ -L PATH redirects stderr to path [flag]\n\ -y yes to interactive prompts [flag]\n\ - -h shows this information [flag]\n\ + -h or -? shows this information [flag]\n\ UP/DOWN adjust volume [keyboard]\n\ CTRL+L redraw [keyboard]\n\ CTRL+Z suspend [keyboard]\n\ @@ -1374,10 +1374,8 @@ static bool CanPlayAudio(void) { } } -static void PrintUsage(int rc, FILE *f) { - fputs("Usage: ", f); - fputs(program_invocation_name, f); - fputs(USAGE, f); +static void PrintUsage(int rc, int fd) { + tinyprint(fd, "Usage: ", program_invocation_name, USAGE, NULL); exit(rc); } @@ -1399,12 +1397,15 @@ static void GetOpts(int argc, char *argv[]) { case 'Y': yonly_ = true; break; - case '?': case 'h': - PrintUsage(EXIT_SUCCESS, stdout); + case '?': default: if (!ProcessOptKey(opt)) { - PrintUsage(EX_USAGE, stderr); + if (opt == optopt) { + PrintUsage(EXIT_SUCCESS, STDOUT_FILENO); + } else { + PrintUsage(EX_USAGE, STDERR_FILENO); + } } } } @@ -1562,7 +1563,7 @@ int main(int argc, char *argv[]) { fullclear_ = true; GetOpts(argc, argv); if (!tuned_) PickDefaults(); - if (optind == argc) PrintUsage(EX_USAGE, stderr); + if (optind == argc) PrintUsage(EX_USAGE, STDERR_FILENO); patharg_ = argv[optind]; s = commandvenv("SOX", "sox"); sox_ = s ? strdup(s) : 0;