cosmopolitan/third_party/python/Modules/_decimal/libmpdec/io.c
2022-09-13 04:14:55 -07:00

1541 lines
44 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright (c) 2008-2016 Stefan Krah. All rights reserved. │
│ │
│ Redistribution and use in source and binary forms, with or without │
│ modification, are permitted provided that the following conditions │
│ are met: │
│ │
│ 1. Redistributions of source code must retain the above copyright │
│ notice, this list of conditions and the following disclaimer. │
│ │
│ 2. Redistributions in binary form must reproduce the above copyright │
│ notice, this list of conditions and the following disclaimer in │
│ the documentation and/or other materials provided with the │
│ distribution. │
│ │
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND │
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR │
│ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS │
│ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, │
│ OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT │
│ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR │
│ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, │
│ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE │
│ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, │
│ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/errno.h"
#include "libc/fmt/fmt.h"
#include "libc/str/locale.h"
#include "libc/str/unicode.h"
#include "third_party/python/Modules/_decimal/libmpdec/bits.h"
#include "third_party/python/Modules/_decimal/libmpdec/constants.h"
#include "third_party/python/Modules/_decimal/libmpdec/io.h"
#include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
#include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
/* clang-format off */
#if __GNUC__ >= 11
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
#endif
asm(".ident\t\"\\n\\n\
libmpdec (BSD-2)\\n\
Copyright 2008-2016 Stefan Krah\"");
asm(".include \"libc/disclaimer.inc\"");
/* This file contains functions for decimal <-> string conversions, including
PEP-3101 formatting for numeric types. */
/* Disable warning that is part of -Wextra since gcc 7.0. */
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && __GNUC__ >= 7
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#endif
/*
* Work around the behavior of tolower() and strcasecmp() in certain
* locales. For example, in tr_TR.utf8:
*
* tolower((unsigned char)'I') == 'I'
*
* u is the exact uppercase version of l; n is strlen(l) or strlen(l)+1
*/
static inline int
_mpd_strneq(const char *s, const char *l, const char *u, size_t n)
{
while (--n != SIZE_MAX) {
if (*s != *l && *s != *u) {
return 0;
}
s++; u++; l++;
}
return 1;
}
static mpd_ssize_t
strtoexp(const char *s)
{
char *end;
mpd_ssize_t retval;
errno = 0;
retval = mpd_strtossize(s, &end, 10);
if (errno == 0 && !(*s != '\0' && *end == '\0'))
errno = EINVAL;
return retval;
}
/*
* Scan 'len' words. The most significant word contains 'r' digits,
* the remaining words are full words. Skip dpoint. The string 's' must
* consist of digits and an optional single decimal point at 'dpoint'.
*/
static void
string_to_coeff(mpd_uint_t *data, const char *s, const char *dpoint, int r,
size_t len)
{
int j;
if (r > 0) {
data[--len] = 0;
for (j = 0; j < r; j++, s++) {
if (s == dpoint) s++;
data[len] = 10 * data[len] + (*s - '0');
}
}
while (--len != SIZE_MAX) {
data[len] = 0;
for (j = 0; j < MPD_RDIGITS; j++, s++) {
if (s == dpoint) s++;
data[len] = 10 * data[len] + (*s - '0');
}
}
}
/*
* Partially verify a numeric string of the form:
*
* [cdigits][.][cdigits][eE][+-][edigits]
*
* If successful, return a pointer to the location of the first
* relevant coefficient digit. This digit is either non-zero or
* part of one of the following patterns:
*
* ["0\x00", "0.\x00", "0.E", "0.e", "0E", "0e"]
*
* The locations of a single optional dot or indicator are stored
* in 'dpoint' and 'exp'.
*
* The end of the string is stored in 'end'. If an indicator [eE]
* occurs without trailing [edigits], the condition is caught
* later by strtoexp().
*/
static const char *
scan_dpoint_exp(const char *s, const char **dpoint, const char **exp,
const char **end)
{
const char *coeff = NULL;
*dpoint = NULL;
*exp = NULL;
for (; *s != '\0'; s++) {
switch (*s) {
case '.':
if (*dpoint != NULL || *exp != NULL)
return NULL;
*dpoint = s;
break;
case 'E': case 'e':
if (*exp != NULL)
return NULL;
*exp = s;
if (*(s+1) == '+' || *(s+1) == '-')
s++;
break;
default:
if (!isdigit((uchar)*s))
return NULL;
if (coeff == NULL && *exp == NULL) {
if (*s == '0') {
if (!isdigit((uchar)*(s+1)))
if (!(*(s+1) == '.' &&
isdigit((uchar)*(s+2))))
coeff = s;
}
else {
coeff = s;
}
}
break;
}
}
*end = s;
return coeff;
}
/* scan the payload of a NaN */
static const char *
scan_payload(const char *s, const char **end)
{
const char *coeff;
while (*s == '0')
s++;
coeff = s;
while (isdigit((uchar)*s))
s++;
*end = s;
return (*s == '\0') ? coeff : NULL;
}
/* convert a character string to a decimal */
void
mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
uint32_t *status)
{
mpd_ssize_t q, r, len;
const char *coeff, *end;
const char *dpoint = NULL, *exp = NULL;
size_t digits;
uint8_t sign = MPD_POS;
mpd_set_flags(dec, 0);
dec->len = 0;
dec->exp = 0;
/* sign */
if (*s == '+') {
s++;
}
else if (*s == '-') {
mpd_set_negative(dec);
sign = MPD_NEG;
s++;
}
if (_mpd_strneq(s, "nan", "NAN", 3)) { /* NaN */
s += 3;
mpd_setspecial(dec, sign, MPD_NAN);
if (*s == '\0')
return;
/* validate payload: digits only */
if ((coeff = scan_payload(s, &end)) == NULL)
goto conversion_error;
/* payload consists entirely of zeros */
if (*coeff == '\0')
return;
digits = end - coeff;
/* prec >= 1, clamp is 0 or 1 */
if (digits > (size_t)(ctx->prec-ctx->clamp))
goto conversion_error;
} /* sNaN */
else if (_mpd_strneq(s, "snan", "SNAN", 4)) {
s += 4;
mpd_setspecial(dec, sign, MPD_SNAN);
if (*s == '\0')
return;
/* validate payload: digits only */
if ((coeff = scan_payload(s, &end)) == NULL)
goto conversion_error;
/* payload consists entirely of zeros */
if (*coeff == '\0')
return;
digits = end - coeff;
if (digits > (size_t)(ctx->prec-ctx->clamp))
goto conversion_error;
}
else if (_mpd_strneq(s, "inf", "INF", 3)) {
s += 3;
if (*s == '\0' || _mpd_strneq(s, "inity", "INITY", 6)) {
/* numeric-value: infinity */
mpd_setspecial(dec, sign, MPD_INF);
return;
}
goto conversion_error;
}
else {
/* scan for start of coefficient, decimal point, indicator, end */
if ((coeff = scan_dpoint_exp(s, &dpoint, &exp, &end)) == NULL)
goto conversion_error;
/* numeric-value: [exponent-part] */
if (exp) {
/* exponent-part */
end = exp; exp++;
dec->exp = strtoexp(exp);
if (errno) {
if (!(errno == ERANGE &&
(dec->exp == MPD_SSIZE_MAX ||
dec->exp == MPD_SSIZE_MIN)))
goto conversion_error;
}
}
digits = end - coeff;
if (dpoint) {
size_t fracdigits = end-dpoint-1;
if (dpoint > coeff) digits--;
if (fracdigits > MPD_MAX_PREC) {
goto conversion_error;
}
if (dec->exp < MPD_SSIZE_MIN+(mpd_ssize_t)fracdigits) {
dec->exp = MPD_SSIZE_MIN;
}
else {
dec->exp -= (mpd_ssize_t)fracdigits;
}
}
if (digits > MPD_MAX_PREC) {
goto conversion_error;
}
if (dec->exp > MPD_EXP_INF) {
dec->exp = MPD_EXP_INF;
}
if (dec->exp == MPD_SSIZE_MIN) {
dec->exp = MPD_SSIZE_MIN+1;
}
}
_mpd_idiv_word(&q, &r, (mpd_ssize_t)digits, MPD_RDIGITS);
len = (r == 0) ? q : q+1;
if (len == 0) {
goto conversion_error; /* GCOV_NOT_REACHED */
}
if (!mpd_qresize(dec, len, status)) {
mpd_seterror(dec, MPD_Malloc_error, status);
return;
}
dec->len = len;
string_to_coeff(dec->data, coeff, dpoint, (int)r, len);
mpd_setdigits(dec);
mpd_qfinalize(dec, ctx, status);
return;
conversion_error:
/* standard wants a positive NaN */
mpd_seterror(dec, MPD_Conversion_syntax, status);
}
/* Print word x with n decimal digits to string s. dot is either NULL
or the location of a decimal point. */
#define EXTRACT_DIGIT(s, x, d, dot) \
if (s == dot) *s++ = '.'; *s++ = '0' + (char)(x / d); x %= d
static inline char *
word_to_string(char *s, mpd_uint_t x, int n, char *dot)
{
switch(n) {
case 20: EXTRACT_DIGIT(s, x, 10000000000000000000ULL, dot); /* GCOV_NOT_REACHED */
case 19: EXTRACT_DIGIT(s, x, 1000000000000000000ULL, dot);
case 18: EXTRACT_DIGIT(s, x, 100000000000000000ULL, dot);
case 17: EXTRACT_DIGIT(s, x, 10000000000000000ULL, dot);
case 16: EXTRACT_DIGIT(s, x, 1000000000000000ULL, dot);
case 15: EXTRACT_DIGIT(s, x, 100000000000000ULL, dot);
case 14: EXTRACT_DIGIT(s, x, 10000000000000ULL, dot);
case 13: EXTRACT_DIGIT(s, x, 1000000000000ULL, dot);
case 12: EXTRACT_DIGIT(s, x, 100000000000ULL, dot);
case 11: EXTRACT_DIGIT(s, x, 10000000000ULL, dot);
case 10: EXTRACT_DIGIT(s, x, 1000000000UL, dot);
case 9: EXTRACT_DIGIT(s, x, 100000000UL, dot);
case 8: EXTRACT_DIGIT(s, x, 10000000UL, dot);
case 7: EXTRACT_DIGIT(s, x, 1000000UL, dot);
case 6: EXTRACT_DIGIT(s, x, 100000UL, dot);
case 5: EXTRACT_DIGIT(s, x, 10000UL, dot);
case 4: EXTRACT_DIGIT(s, x, 1000UL, dot);
case 3: EXTRACT_DIGIT(s, x, 100UL, dot);
case 2: EXTRACT_DIGIT(s, x, 10UL, dot);
default: if (s == dot) *s++ = '.'; *s++ = '0' + (char)x;
}
*s = '\0';
return s;
}
/* Print exponent x to string s. Undefined for MPD_SSIZE_MIN. */
static inline char *
exp_to_string(char *s, mpd_ssize_t x)
{
char sign = '+';
if (x < 0) {
sign = '-';
x = -x;
}
*s++ = sign;
return word_to_string(s, x, mpd_word_digits(x), NULL);
}
/* Print the coefficient of dec to string s. len(dec) > 0. */
static inline char *
coeff_to_string(char *s, const mpd_t *dec)
{
mpd_uint_t x;
mpd_ssize_t i;
/* most significant word */
x = mpd_msword(dec);
s = word_to_string(s, x, mpd_word_digits(x), NULL);
/* remaining full words */
for (i=dec->len-2; i >= 0; --i) {
x = dec->data[i];
s = word_to_string(s, x, MPD_RDIGITS, NULL);
}
return s;
}
/* Print the coefficient of dec to string s. len(dec) > 0. dot is either
NULL or a pointer to the location of a decimal point. */
static inline char *
coeff_to_string_dot(char *s, char *dot, const mpd_t *dec)
{
mpd_uint_t x;
mpd_ssize_t i;
/* most significant word */
x = mpd_msword(dec);
s = word_to_string(s, x, mpd_word_digits(x), dot);
/* remaining full words */
for (i=dec->len-2; i >= 0; --i) {
x = dec->data[i];
s = word_to_string(s, x, MPD_RDIGITS, dot);
}
return s;
}
/* Format type */
#define MPD_FMT_LOWER 0x00000000
#define MPD_FMT_UPPER 0x00000001
#define MPD_FMT_TOSCI 0x00000002
#define MPD_FMT_TOENG 0x00000004
#define MPD_FMT_EXP 0x00000008
#define MPD_FMT_FIXED 0x00000010
#define MPD_FMT_PERCENT 0x00000020
#define MPD_FMT_SIGN_SPACE 0x00000040
#define MPD_FMT_SIGN_PLUS 0x00000080
/* Default place of the decimal point for MPD_FMT_TOSCI, MPD_FMT_EXP */
#define MPD_DEFAULT_DOTPLACE 1
/*
* Set *result to the string representation of a decimal. Return the length
* of *result, not including the terminating '\0' character.
*
* Formatting is done according to 'flags'. A return value of -1 with *result
* set to NULL indicates MPD_Malloc_error.
*
* 'dplace' is the default place of the decimal point. It is always set to
* MPD_DEFAULT_DOTPLACE except for zeros in combination with MPD_FMT_EXP.
*/
static mpd_ssize_t
_mpd_to_string(char **result, const mpd_t *dec, int flags, mpd_ssize_t dplace)
{
char *decstring = NULL, *cp = NULL;
mpd_ssize_t ldigits;
mpd_ssize_t mem = 0, k;
if (mpd_isspecial(dec)) {
mem = sizeof "-Infinity%";
if (mpd_isnan(dec) && dec->len > 0) {
/* diagnostic code */
mem += dec->digits;
}
cp = decstring = mpd_alloc(mem, sizeof *decstring);
if (cp == NULL) {
*result = NULL;
return -1;
}
if (mpd_isnegative(dec)) {
*cp++ = '-';
}
else if (flags&MPD_FMT_SIGN_SPACE) {
*cp++ = ' ';
}
else if (flags&MPD_FMT_SIGN_PLUS) {
*cp++ = '+';
}
if (mpd_isnan(dec)) {
if (mpd_isqnan(dec)) {
strcpy(cp, "NaN");
cp += 3;
}
else {
strcpy(cp, "sNaN");
cp += 4;
}
if (dec->len > 0) { /* diagnostic code */
cp = coeff_to_string(cp, dec);
}
}
else if (mpd_isinfinite(dec)) {
strcpy(cp, "Infinity");
cp += 8;
}
else { /* debug */
abort(); /* GCOV_NOT_REACHED */
}
}
else {
assert(dec->len > 0);
/*
* For easier manipulation of the decimal point's location
* and the exponent that is finally printed, the number is
* rescaled to a virtual representation with exp = 0. Here
* ldigits denotes the number of decimal digits to the left
* of the decimal point and remains constant once initialized.
*
* dplace is the location of the decimal point relative to
* the start of the coefficient. Note that 3) always holds
* when dplace is shifted.
*
* 1) ldigits := dec->digits - dec->exp
* 2) dplace := ldigits (initially)
* 3) exp := ldigits - dplace (initially exp = 0)
*
* 0.00000_.____._____000000.
* ^ ^ ^ ^
* | | | |
* | | | `- dplace >= digits
* | | `- dplace in the middle of the coefficient
* | ` dplace = 1 (after the first coefficient digit)
* `- dplace <= 0
*/
ldigits = dec->digits + dec->exp;
if (flags&MPD_FMT_EXP) {
;
}
else if (flags&MPD_FMT_FIXED || (dec->exp <= 0 && ldigits > -6)) {
/* MPD_FMT_FIXED: always use fixed point notation.
* MPD_FMT_TOSCI, MPD_FMT_TOENG: for a certain range,
* override exponent notation. */
dplace = ldigits;
}
else if (flags&MPD_FMT_TOENG) {
if (mpd_iszero(dec)) {
/* If the exponent is divisible by three,
* dplace = 1. Otherwise, move dplace one
* or two places to the left. */
dplace = -1 + mod_mpd_ssize_t(dec->exp+2, 3);
}
else { /* ldigits-1 is the adjusted exponent, which
* should be divisible by three. If not, move
* dplace one or two places to the right. */
dplace += mod_mpd_ssize_t(ldigits-1, 3);
}
}
/*
* Basic space requirements:
*
* [-][.][coeffdigits][E][-][expdigits+1][%]['\0']
*
* If the decimal point lies outside of the coefficient digits,
* space is adjusted accordingly.
*/
if (dplace <= 0) {
mem = -dplace + dec->digits + 2;
}
else if (dplace >= dec->digits) {
mem = dplace;
}
else {
mem = dec->digits;
}
mem += (MPD_EXPDIGITS+1+6);
cp = decstring = mpd_alloc(mem, sizeof *decstring);
if (cp == NULL) {
*result = NULL;
return -1;
}
if (mpd_isnegative(dec)) {
*cp++ = '-';
}
else if (flags&MPD_FMT_SIGN_SPACE) {
*cp++ = ' ';
}
else if (flags&MPD_FMT_SIGN_PLUS) {
*cp++ = '+';
}
if (dplace <= 0) {
/* space: -dplace+dec->digits+2 */
*cp++ = '0';
*cp++ = '.';
for (k = 0; k < -dplace; k++) {
*cp++ = '0';
}
cp = coeff_to_string(cp, dec);
}
else if (dplace >= dec->digits) {
/* space: dplace */
cp = coeff_to_string(cp, dec);
for (k = 0; k < dplace-dec->digits; k++) {
*cp++ = '0';
}
}
else {
/* space: dec->digits+1 */
cp = coeff_to_string_dot(cp, cp+dplace, dec);
}
/*
* Conditions for printing an exponent:
*
* MPD_FMT_TOSCI, MPD_FMT_TOENG: only if ldigits != dplace
* MPD_FMT_FIXED: never (ldigits == dplace)
* MPD_FMT_EXP: always
*/
if (ldigits != dplace || flags&MPD_FMT_EXP) {
/* space: expdigits+2 */
*cp++ = (flags&MPD_FMT_UPPER) ? 'E' : 'e';
cp = exp_to_string(cp, ldigits-dplace);
}
}
if (flags&MPD_FMT_PERCENT) {
*cp++ = '%';
}
assert(cp < decstring+mem);
assert(cp-decstring < MPD_SSIZE_MAX);
*cp = '\0';
*result = decstring;
return (mpd_ssize_t)(cp-decstring);
}
char *
mpd_to_sci(const mpd_t *dec, int fmt)
{
char *res;
int flags = MPD_FMT_TOSCI;
flags |= fmt ? MPD_FMT_UPPER : MPD_FMT_LOWER;
(void)_mpd_to_string(&res, dec, flags, MPD_DEFAULT_DOTPLACE);
return res;
}
char *
mpd_to_eng(const mpd_t *dec, int fmt)
{
char *res;
int flags = MPD_FMT_TOENG;
flags |= fmt ? MPD_FMT_UPPER : MPD_FMT_LOWER;
(void)_mpd_to_string(&res, dec, flags, MPD_DEFAULT_DOTPLACE);
return res;
}
mpd_ssize_t
mpd_to_sci_size(char **res, const mpd_t *dec, int fmt)
{
int flags = MPD_FMT_TOSCI;
flags |= fmt ? MPD_FMT_UPPER : MPD_FMT_LOWER;
return _mpd_to_string(res, dec, flags, MPD_DEFAULT_DOTPLACE);
}
mpd_ssize_t
mpd_to_eng_size(char **res, const mpd_t *dec, int fmt)
{
int flags = MPD_FMT_TOENG;
flags |= fmt ? MPD_FMT_UPPER : MPD_FMT_LOWER;
return _mpd_to_string(res, dec, flags, MPD_DEFAULT_DOTPLACE);
}
/* Copy a single UTF-8 char to dest. See: The Unicode Standard, version 5.2,
chapter 3.9: Well-formed UTF-8 byte sequences. */
static int
_mpd_copy_utf8(char dest[5], const char *s)
{
const uchar *cp = (const uchar *)s;
uchar lb, ub;
int count, i;
if (*cp == 0) {
/* empty string */
dest[0] = '\0';
return 0;
}
else if (*cp <= 0x7f) {
/* ascii */
dest[0] = *cp;
dest[1] = '\0';
return 1;
}
else if (0xc2 <= *cp && *cp <= 0xdf) {
lb = 0x80; ub = 0xbf;
count = 2;
}
else if (*cp == 0xe0) {
lb = 0xa0; ub = 0xbf;
count = 3;
}
else if (*cp <= 0xec) {
lb = 0x80; ub = 0xbf;
count = 3;
}
else if (*cp == 0xed) {
lb = 0x80; ub = 0x9f;
count = 3;
}
else if (*cp <= 0xef) {
lb = 0x80; ub = 0xbf;
count = 3;
}
else if (*cp == 0xf0) {
lb = 0x90; ub = 0xbf;
count = 4;
}
else if (*cp <= 0xf3) {
lb = 0x80; ub = 0xbf;
count = 4;
}
else if (*cp == 0xf4) {
lb = 0x80; ub = 0x8f;
count = 4;
}
else {
/* invalid */
goto error;
}
dest[0] = *cp++;
if (*cp < lb || ub < *cp) {
goto error;
}
dest[1] = *cp++;
for (i = 2; i < count; i++) {
if (*cp < 0x80 || 0xbf < *cp) {
goto error;
}
dest[i] = *cp++;
}
dest[i] = '\0';
return count;
error:
dest[0] = '\0';
return -1;
}
int
mpd_validate_lconv(mpd_spec_t *spec)
{
size_t n;
#if CHAR_MAX == SCHAR_MAX
const char *cp = spec->grouping;
while (*cp != '\0') {
if (*cp++ < 0) {
return -1;
}
}
#endif
n = strlen(spec->dot);
if (n == 0 || n > 4) {
return -1;
}
if (strlen(spec->sep) > 4) {
return -1;
}
return 0;
}
int
mpd_parse_fmt_str(mpd_spec_t *spec, const char *fmt, int caps)
{
char *cp = (char *)fmt;
int have_align = 0, n;
/* defaults */
spec->min_width = 0;
spec->prec = -1;
spec->type = caps ? 'G' : 'g';
spec->align = '>';
spec->sign = '-';
spec->dot = "";
spec->sep = "";
spec->grouping = "";
/* presume that the first character is a UTF-8 fill character */
if ((n = _mpd_copy_utf8(spec->fill, cp)) < 0) {
return 0;
}
/* alignment directive, prefixed by a fill character */
if (*cp && (*(cp+n) == '<' || *(cp+n) == '>' ||
*(cp+n) == '=' || *(cp+n) == '^')) {
cp += n;
spec->align = *cp++;
have_align = 1;
} /* alignment directive */
else {
/* default fill character */
spec->fill[0] = ' ';
spec->fill[1] = '\0';
if (*cp == '<' || *cp == '>' ||
*cp == '=' || *cp == '^') {
spec->align = *cp++;
have_align = 1;
}
}
/* sign formatting */
if (*cp == '+' || *cp == '-' || *cp == ' ') {
spec->sign = *cp++;
}
/* zero padding */
if (*cp == '0') {
/* zero padding implies alignment, which should not be
* specified twice. */
if (have_align) {
return 0;
}
spec->align = 'z';
spec->fill[0] = *cp++;
spec->fill[1] = '\0';
}
/* minimum width */
if (isdigit((uchar)*cp)) {
if (*cp == '0') {
return 0;
}
errno = 0;
spec->min_width = mpd_strtossize(cp, &cp, 10);
if (errno == ERANGE || errno == EINVAL) {
return 0;
}
}
/* thousands separator */
if (*cp == ',') {
spec->dot = ".";
spec->sep = ",";
spec->grouping = "\003\003";
cp++;
}
/* fraction digits or significant digits */
if (*cp == '.') {
cp++;
if (!isdigit((uchar)*cp)) {
return 0;
}
errno = 0;
spec->prec = mpd_strtossize(cp, &cp, 10);
if (errno == ERANGE || errno == EINVAL) {
return 0;
}
}
/* type */
if (*cp == 'E' || *cp == 'e' || *cp == 'F' || *cp == 'f' ||
*cp == 'G' || *cp == 'g' || *cp == '%') {
spec->type = *cp++;
}
else if (*cp == 'N' || *cp == 'n') {
/* locale specific conversion */
struct lconv *lc;
/* separator has already been specified */
if (*spec->sep) {
return 0;
}
spec->type = *cp++;
spec->type = (spec->type == 'N') ? 'G' : 'g';
lc = localeconv();
spec->dot = lc->decimal_point;
spec->sep = lc->thousands_sep;
spec->grouping = lc->grouping;
if (mpd_validate_lconv(spec) < 0) {
return 0; /* GCOV_NOT_REACHED */
}
}
/* check correctness */
if (*cp != '\0') {
return 0;
}
return 1;
}
/*
* The following functions assume that spec->min_width <= MPD_MAX_PREC, which
* is made sure in mpd_qformat_spec. Then, even with a spec that inserts a
* four-byte separator after each digit, nbytes in the following struct
* cannot overflow.
*/
/* Multibyte string */
typedef struct {
mpd_ssize_t nbytes; /* length in bytes */
mpd_ssize_t nchars; /* length in chars */
mpd_ssize_t cur; /* current write index */
char *data;
} mpd_mbstr_t;
static inline void
_mpd_bcopy(char *dest, const char *src, mpd_ssize_t n)
{
/* [jart] just use memmove */
memmove(dest, src, n);
}
static inline void
_mbstr_copy_char(mpd_mbstr_t *dest, const char *src, mpd_ssize_t n)
{
dest->nbytes += n;
dest->nchars += (n > 0 ? 1 : 0);
dest->cur -= n;
if (dest->data != NULL) {
_mpd_bcopy(dest->data+dest->cur, src, n);
}
}
static inline void
_mbstr_copy_ascii(mpd_mbstr_t *dest, const char *src, mpd_ssize_t n)
{
dest->nbytes += n;
dest->nchars += n;
dest->cur -= n;
if (dest->data != NULL) {
_mpd_bcopy(dest->data+dest->cur, src, n);
}
}
static inline void
_mbstr_copy_pad(mpd_mbstr_t *dest, mpd_ssize_t n)
{
dest->nbytes += n;
dest->nchars += n;
dest->cur -= n;
if (dest->data != NULL) {
char *cp = dest->data + dest->cur;
while (--n >= 0) {
cp[n] = '0';
}
}
}
/*
* Copy a numeric string to dest->data, adding separators in the integer
* part according to spec->grouping. If leading zero padding is enabled
* and the result is smaller than spec->min_width, continue adding zeros
* and separators until the minimum width is reached.
*
* The final length of dest->data is stored in dest->nbytes. The number
* of UTF-8 characters is stored in dest->nchars.
*
* First run (dest->data == NULL): determine the length of the result
* string and store it in dest->nbytes.
*
* Second run (write to dest->data): data is written in chunks and in
* reverse order, starting with the rest of the numeric string.
*/
static void
_mpd_add_sep_dot(mpd_mbstr_t *dest,
const char *sign, /* location of optional sign */
const char *src, mpd_ssize_t n_src, /* integer part and length */
const char *dot, /* location of optional decimal point */
const char *rest, mpd_ssize_t n_rest, /* remaining part and length */
const mpd_spec_t *spec)
{
mpd_ssize_t n_sep, n_sign, consume;
const char *g;
int pad = 0;
n_sign = sign ? 1 : 0;
n_sep = (mpd_ssize_t)strlen(spec->sep);
/* Initial write index: set to location of '\0' in the output string.
* Irrelevant for the first run. */
dest->cur = dest->nbytes;
dest->nbytes = dest->nchars = 0;
_mbstr_copy_ascii(dest, rest, n_rest);
if (dot) {
_mbstr_copy_char(dest, dot, (mpd_ssize_t)strlen(dot));
}
g = spec->grouping;
consume = *g;
while (1) {
/* If the group length is 0 or CHAR_MAX or greater than the
* number of source bytes, consume all remaining bytes. */
if (*g == 0 || *g == CHAR_MAX || consume > n_src) {
consume = n_src;
}
n_src -= consume;
if (pad) {
_mbstr_copy_pad(dest, consume);
}
else {
_mbstr_copy_ascii(dest, src+n_src, consume);
}
if (n_src == 0) {
/* Either the real source of intpart digits or the virtual
* source of padding zeros is exhausted. */
if (spec->align == 'z' &&
dest->nchars + n_sign < spec->min_width) {
/* Zero padding is set and length < min_width:
* Generate n_src additional characters. */
n_src = spec->min_width - (dest->nchars + n_sign);
/* Next iteration:
* case *g == 0 || *g == CHAR_MAX:
* consume all padding characters
* case consume < g*:
* fill remainder of current group
* case consume == g*
* copying is a no-op */
consume = *g - consume;
/* Switch on virtual source of zeros. */
pad = 1;
continue;
}
break;
}
if (n_sep > 0) {
/* If padding is switched on, separators are counted
* as padding characters. This rule does not apply if
* the separator would be the first character of the
* result string. */
if (pad && n_src > 1) n_src -= 1;
_mbstr_copy_char(dest, spec->sep, n_sep);
}
/* If non-NUL, use the next value for grouping. */
if (*g && *(g+1)) g++;
consume = *g;
}
if (sign) {
_mbstr_copy_ascii(dest, sign, 1);
}
if (dest->data) {
dest->data[dest->nbytes] = '\0';
}
}
/*
* Convert a numeric-string to its locale-specific appearance.
* The string must have one of these forms:
*
* 1) [sign] digits [exponent-part]
* 2) [sign] digits '.' [digits] [exponent-part]
*
* Not allowed, since _mpd_to_string() never returns this form:
*
* 3) [sign] '.' digits [exponent-part]
*
* Input: result->data := original numeric string (ASCII)
* result->bytes := strlen(result->data)
* result->nchars := strlen(result->data)
*
* Output: result->data := modified or original string
* result->bytes := strlen(result->data)
* result->nchars := number of characters (possibly UTF-8)
*/
static int
_mpd_apply_lconv(mpd_mbstr_t *result, const mpd_spec_t *spec, uint32_t *status)
{
const char *sign = NULL, *intpart = NULL, *dot = NULL;
const char *rest, *dp;
char *decstring;
mpd_ssize_t n_int, n_rest;
/* original numeric string */
dp = result->data;
/* sign */
if (*dp == '+' || *dp == '-' || *dp == ' ') {
sign = dp++;
}
/* integer part */
assert(isdigit((uchar)*dp));
intpart = dp++;
while (isdigit((uchar)*dp)) {
dp++;
}
n_int = (mpd_ssize_t)(dp-intpart);
/* decimal point */
if (*dp == '.') {
dp++; dot = spec->dot;
}
/* rest */
rest = dp;
n_rest = result->nbytes - (mpd_ssize_t)(dp-result->data);
if (dot == NULL && (*spec->sep == '\0' || *spec->grouping == '\0')) {
/* _mpd_add_sep_dot() would not change anything */
return 1;
}
/* Determine the size of the new decimal string after inserting the
* decimal point, optional separators and optional padding. */
decstring = result->data;
result->data = NULL;
_mpd_add_sep_dot(result, sign, intpart, n_int, dot,
rest, n_rest, spec);
result->data = mpd_alloc(result->nbytes+1, 1);
if (result->data == NULL) {
*status |= MPD_Malloc_error;
mpd_free(decstring);
return 0;
}
/* Perform actual writes. */
_mpd_add_sep_dot(result, sign, intpart, n_int, dot,
rest, n_rest, spec);
mpd_free(decstring);
return 1;
}
/* Add padding to the formatted string if necessary. */
static int
_mpd_add_pad(mpd_mbstr_t *result, const mpd_spec_t *spec, uint32_t *status)
{
if (result->nchars < spec->min_width) {
mpd_ssize_t add_chars, add_bytes;
size_t lpad = 0, rpad = 0;
size_t n_fill, len, i, j;
char align = spec->align;
uint8_t err = 0;
char *cp;
n_fill = strlen(spec->fill);
add_chars = (spec->min_width - result->nchars);
/* max value: MPD_MAX_PREC * 4 */
add_bytes = add_chars * (mpd_ssize_t)n_fill;
cp = result->data = mpd_realloc(result->data,
result->nbytes+add_bytes+1,
sizeof *result->data, &err);
if (err) {
*status |= MPD_Malloc_error;
mpd_free(result->data);
return 0;
}
if (align == 'z') {
align = '=';
}
if (align == '<') {
rpad = add_chars;
}
else if (align == '>' || align == '=') {
lpad = add_chars;
}
else { /* align == '^' */
lpad = add_chars/2;
rpad = add_chars-lpad;
}
len = result->nbytes;
if (align == '=' && (*cp == '-' || *cp == '+' || *cp == ' ')) {
/* leave sign in the leading position */
cp++; len--;
}
memmove(cp+n_fill*lpad, cp, len);
for (i = 0; i < lpad; i++) {
for (j = 0; j < n_fill; j++) {
cp[i*n_fill+j] = spec->fill[j];
}
}
cp += (n_fill*lpad + len);
for (i = 0; i < rpad; i++) {
for (j = 0; j < n_fill; j++) {
cp[i*n_fill+j] = spec->fill[j];
}
}
result->nbytes += add_bytes;
result->nchars += add_chars;
result->data[result->nbytes] = '\0';
}
return 1;
}
/* Round a number to prec digits. The adjusted exponent stays the same
or increases by one if rounding up crosses a power of ten boundary.
If result->digits would exceed MPD_MAX_PREC+1, MPD_Invalid_operation
is set and the result is NaN. */
static inline void
_mpd_round(mpd_t *result, const mpd_t *a, mpd_ssize_t prec,
const mpd_context_t *ctx, uint32_t *status)
{
mpd_ssize_t exp = a->exp + a->digits - prec;
if (prec <= 0) {
mpd_seterror(result, MPD_Invalid_operation, status); /* GCOV_NOT_REACHED */
return; /* GCOV_NOT_REACHED */
}
if (mpd_isspecial(a) || mpd_iszero(a)) {
mpd_qcopy(result, a, status); /* GCOV_NOT_REACHED */
return; /* GCOV_NOT_REACHED */
}
mpd_qrescale_fmt(result, a, exp, ctx, status);
if (result->digits > prec) {
mpd_qrescale_fmt(result, result, exp+1, ctx, status);
}
}
/*
* Return the string representation of an mpd_t, formatted according to 'spec'.
* The format specification is assumed to be valid. Memory errors are indicated
* as usual. This function is quiet.
*/
char *
mpd_qformat_spec(const mpd_t *dec, const mpd_spec_t *spec,
const mpd_context_t *ctx, uint32_t *status)
{
mpd_uint_t dt[MPD_MINALLOC_MAX];
mpd_t tmp = {MPD_STATIC|MPD_STATIC_DATA,0,0,0,MPD_MINALLOC_MAX,dt};
mpd_ssize_t dplace = MPD_DEFAULT_DOTPLACE;
mpd_mbstr_t result;
mpd_spec_t stackspec;
char type = spec->type;
int flags = 0;
if (spec->min_width > MPD_MAX_PREC) {
*status |= MPD_Invalid_operation;
return NULL;
}
if (isupper((uchar)type)) {
type = tolower((uchar)type);
flags |= MPD_FMT_UPPER;
}
if (spec->sign == ' ') {
flags |= MPD_FMT_SIGN_SPACE;
}
else if (spec->sign == '+') {
flags |= MPD_FMT_SIGN_PLUS;
}
if (mpd_isspecial(dec)) {
if (spec->align == 'z') {
stackspec = *spec;
stackspec.fill[0] = ' ';
stackspec.fill[1] = '\0';
stackspec.align = '>';
spec = &stackspec;
}
if (type == '%') {
flags |= MPD_FMT_PERCENT;
}
}
else {
uint32_t workstatus = 0;
mpd_ssize_t prec;
switch (type) {
case 'g': flags |= MPD_FMT_TOSCI; break;
case 'e': flags |= MPD_FMT_EXP; break;
case '%': flags |= MPD_FMT_PERCENT;
if (!mpd_qcopy(&tmp, dec, status)) {
return NULL;
}
tmp.exp += 2;
dec = &tmp;
type = 'f'; /* fall through */
case 'f': flags |= MPD_FMT_FIXED; break;
default: abort(); /* debug: GCOV_NOT_REACHED */
}
if (spec->prec >= 0) {
if (spec->prec > MPD_MAX_PREC) {
*status |= MPD_Invalid_operation;
goto error;
}
switch (type) {
case 'g':
prec = (spec->prec == 0) ? 1 : spec->prec;
if (dec->digits > prec) {
_mpd_round(&tmp, dec, prec, ctx,
&workstatus);
dec = &tmp;
}
break;
case 'e':
if (mpd_iszero(dec)) {
dplace = 1-spec->prec;
}
else {
_mpd_round(&tmp, dec, spec->prec+1, ctx,
&workstatus);
dec = &tmp;
}
break;
case 'f':
mpd_qrescale(&tmp, dec, -spec->prec, ctx,
&workstatus);
dec = &tmp;
break;
}
}
if (type == 'f') {
if (mpd_iszero(dec) && dec->exp > 0) {
mpd_qrescale(&tmp, dec, 0, ctx, &workstatus);
dec = &tmp;
}
}
if (workstatus&MPD_Errors) {
*status |= (workstatus&MPD_Errors);
goto error;
}
}
/*
* At this point, for all scaled or non-scaled decimals:
* 1) 1 <= digits <= MAX_PREC+1
* 2) adjexp(scaled) = adjexp(orig) [+1]
* 3) case 'g': MIN_ETINY <= exp <= MAX_EMAX+1
* case 'e': MIN_ETINY-MAX_PREC <= exp <= MAX_EMAX+1
* case 'f': MIN_ETINY <= exp <= MAX_EMAX+1
* 4) max memory alloc in _mpd_to_string:
* case 'g': MAX_PREC+36
* case 'e': MAX_PREC+36
* case 'f': 2*MPD_MAX_PREC+30
*/
result.nbytes = _mpd_to_string(&result.data, dec, flags, dplace);
result.nchars = result.nbytes;
if (result.nbytes < 0) {
*status |= MPD_Malloc_error;
goto error;
}
if (*spec->dot != '\0' && !mpd_isspecial(dec)) {
if (result.nchars > MPD_MAX_PREC+36) {
/* Since a group length of one is not explicitly
* disallowed, ensure that it is always possible to
* insert a four byte separator after each digit. */
*status |= MPD_Invalid_operation;
mpd_free(result.data);
goto error;
}
if (!_mpd_apply_lconv(&result, spec, status)) {
goto error;
}
}
if (spec->min_width) {
if (!_mpd_add_pad(&result, spec, status)) {
goto error;
}
}
mpd_del(&tmp);
return result.data;
error:
mpd_del(&tmp);
return NULL;
}
char *
mpd_qformat(const mpd_t *dec, const char *fmt, const mpd_context_t *ctx,
uint32_t *status)
{
mpd_spec_t spec;
if (!mpd_parse_fmt_str(&spec, fmt, 1)) {
*status |= MPD_Invalid_operation;
return NULL;
}
return mpd_qformat_spec(dec, &spec, ctx, status);
}
/*
* The specification has a *condition* called Invalid_operation and an
* IEEE *signal* called Invalid_operation. The former corresponds to
* MPD_Invalid_operation, the latter to MPD_IEEE_Invalid_operation.
* MPD_IEEE_Invalid_operation comprises the following conditions:
*
* [MPD_Conversion_syntax, MPD_Division_impossible, MPD_Division_undefined,
* MPD_Fpu_error, MPD_Invalid_context, MPD_Invalid_operation,
* MPD_Malloc_error]
*
* In the following functions, 'flag' denotes the condition, 'signal'
* denotes the IEEE signal.
*/
static const char *mpd_flag_string[MPD_NUM_FLAGS] = {
"Clamped",
"Conversion_syntax",
"Division_by_zero",
"Division_impossible",
"Division_undefined",
"Fpu_error",
"Inexact",
"Invalid_context",
"Invalid_operation",
"Malloc_error",
"Not_implemented",
"Overflow",
"Rounded",
"Subnormal",
"Underflow",
};
static const char *mpd_signal_string[MPD_NUM_FLAGS] = {
"Clamped",
"IEEE_Invalid_operation",
"Division_by_zero",
"IEEE_Invalid_operation",
"IEEE_Invalid_operation",
"IEEE_Invalid_operation",
"Inexact",
"IEEE_Invalid_operation",
"IEEE_Invalid_operation",
"IEEE_Invalid_operation",
"Not_implemented",
"Overflow",
"Rounded",
"Subnormal",
"Underflow",
};
/* print conditions to buffer, separated by spaces */
int
mpd_snprint_flags(char *dest, int nmemb, uint32_t flags)
{
char *cp;
int n, j;
assert(nmemb >= MPD_MAX_FLAG_STRING);
*dest = '\0'; cp = dest;
for (j = 0; j < MPD_NUM_FLAGS; j++) {
if (flags & (1U<<j)) {
n = snprintf(cp, nmemb, "%s ", mpd_flag_string[j]);
if (n < 0 || n >= nmemb) return -1;
cp += n; nmemb -= n;
}
}
if (cp != dest) {
*(--cp) = '\0';
}
return (int)(cp-dest);
}
/* print conditions to buffer, in list form */
int
mpd_lsnprint_flags(char *dest, int nmemb, uint32_t flags, const char *flag_string[])
{
char *cp;
int n, j;
assert(nmemb >= MPD_MAX_FLAG_LIST);
if (flag_string == NULL) {
flag_string = mpd_flag_string;
}
*dest = '[';
*(dest+1) = '\0';
cp = dest+1;
--nmemb;
for (j = 0; j < MPD_NUM_FLAGS; j++) {
if (flags & (1U<<j)) {
n = snprintf(cp, nmemb, "%s, ", flag_string[j]);
if (n < 0 || n >= nmemb) return -1;
cp += n; nmemb -= n;
}
}
/* erase the last ", " */
if (cp != dest+1) {
cp -= 2;
}
*cp++ = ']';
*cp = '\0';
return (int)(cp-dest); /* strlen, without NUL terminator */
}
/* print signals to buffer, in list form */
int
mpd_lsnprint_signals(char *dest, int nmemb, uint32_t flags, const char *signal_string[])
{
char *cp;
int n, j;
int ieee_invalid_done = 0;
assert(nmemb >= MPD_MAX_SIGNAL_LIST);
if (signal_string == NULL) {
signal_string = mpd_signal_string;
}
*dest = '[';
*(dest+1) = '\0';
cp = dest+1;
--nmemb;
for (j = 0; j < MPD_NUM_FLAGS; j++) {
uint32_t f = flags & (1U<<j);
if (f) {
if (f&MPD_IEEE_Invalid_operation) {
if (ieee_invalid_done) {
continue;
}
ieee_invalid_done = 1;
}
n = snprintf(cp, nmemb, "%s, ", signal_string[j]);
if (n < 0 || n >= nmemb) return -1;
cp += n; nmemb -= n;
}
}
/* erase the last ", " */
if (cp != dest+1) {
cp -= 2;
}
*cp++ = ']';
*cp = '\0';
return (int)(cp-dest); /* strlen, without NUL terminator */
}
/* The following two functions are mainly intended for debugging. */
void
mpd_fprint(FILE *file, const mpd_t *dec)
{
char *decstring;
decstring = mpd_to_sci(dec, 1);
if (decstring != NULL) {
fprintf(file, "%s\n", decstring);
mpd_free(decstring);
}
else {
fputs("mpd_fprint: output error\n", file); /* GCOV_NOT_REACHED */
}
}
void
mpd_print(const mpd_t *dec)
{
char *decstring;
decstring = mpd_to_sci(dec, 1);
if (decstring != NULL) {
printf("%s\n", decstring);
mpd_free(decstring);
}
else {
fputs("mpd_fprint: output error\n", stderr); /* GCOV_NOT_REACHED */
}
}