The other half of Issue #1580: use short float repr where possible.

Addresses the float -> string conversion, using David Gay's code which
was added in Mark Dickinson's checkin r71663.

Also addresses these, which are intertwined with the short repr
changes:

- Issue #5772: format(1e100, '<') produces '1e+100', not '1.0e+100'
- Issue #5515: 'n' formatting with commas no longer works poorly
    with leading zeros.
- PEP 378 Format Specifier for Thousands Separator: implemented
    for floats.
This commit is contained in:
Eric Smith 2009-04-16 20:16:10 +00:00
parent b08a53a99d
commit 0923d1d8d7
16 changed files with 1491 additions and 830 deletions

View file

@ -236,12 +236,15 @@ w_object(PyObject *v, WFILE *p)
w_string((char*)buf, 8, p);
}
else {
char buf[256]; /* Plenty to format any double */
n = _PyFloat_Repr(PyFloat_AS_DOUBLE(v),
buf, sizeof(buf));
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
'r', 0, 0, NULL);
if (!buf)
return;
n = strlen(buf);
w_byte(TYPE_FLOAT, p);
w_byte((int)n, p);
w_string(buf, (int)n, p);
PyMem_Free(buf);
}
}
#ifndef WITHOUT_COMPLEX
@ -263,17 +266,24 @@ w_object(PyObject *v, WFILE *p)
w_string((char*)buf, 8, p);
}
else {
char buf[256]; /* Plenty to format any double */
char *buf;
w_byte(TYPE_COMPLEX, p);
n = _PyFloat_Repr(PyComplex_RealAsDouble(v),
buf, sizeof(buf));
buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
'r', 0, 0, NULL);
if (!buf)
return;
n = strlen(buf);
w_byte((int)n, p);
w_string(buf, (int)n, p);
n = _PyFloat_Repr(PyComplex_ImagAsDouble(v),
buf, sizeof(buf));
PyMem_Free(buf);
buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
'r', 0, 0, NULL);
if (!buf)
return;
n = strlen(buf);
w_byte((int)n, p);
w_string(buf, (int)n, p);
PyMem_Free(buf);
}
}
#endif

View file

@ -37,6 +37,38 @@
*
* Return value: the #gdouble value.
**/
#ifndef PY_NO_SHORT_FLOAT_REPR
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
double result;
_Py_SET_53BIT_PRECISION_HEADER;
assert(nptr != NULL);
/* Set errno to zero, so that we can distinguish zero results
and underflows */
errno = 0;
_Py_SET_53BIT_PRECISION_START;
result = _Py_dg_strtod(nptr, endptr);
_Py_SET_53BIT_PRECISION_END;
return result;
}
#else
/*
Use system strtod; since strtod is locale aware, we may
have to first fix the decimal separator.
Note that unlike _Py_dg_strtod, the system strtod may not always give
correctly rounded results.
*/
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
@ -187,6 +219,15 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val;
}
#endif
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}
/* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */
@ -292,8 +333,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size)
}
}
/* Ensure that buffer has a decimal point in it. The decimal point
will not be in the current locale, it will always be '.' */
/* Ensure that buffer has a decimal point in it. The decimal point will not
be in the current locale, it will always be '.'. Don't add a decimal if an
exponent is present. */
Py_LOCAL_INLINE(void)
ensure_decimal_point(char* buffer, size_t buf_size)
{
@ -322,7 +364,8 @@ ensure_decimal_point(char* buffer, size_t buf_size)
insert_count = 1;
}
}
else {
else if (!(*p == 'e' || *p == 'E')) {
/* Don't add ".0" if we have an exponent. */
chars_to_insert = ".0";
insert_count = 2;
}
@ -341,37 +384,6 @@ ensure_decimal_point(char* buffer, size_t buf_size)
}
}
/* Add the locale specific grouping characters to buffer. Note
that any decimal point (if it's present) in buffer is already
locale-specific. Return 0 on error, else 1. */
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
Py_ssize_t len = strlen(buffer);
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
adding grouping. */
char *p = strstr(buffer, decimal_point);
if (!p) {
/* No decimal, use the entire string. */
/* If any exponent, adjust p. */
p = strpbrk(buffer, "eE");
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = buffer + len;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
return _PyBytes_InsertThousandsGroupingLocale(buffer, len, p-buffer,
buf_size, NULL, 1);
}
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
@ -386,9 +398,8 @@ add_thousands_grouping(char* buffer, size_t buf_size)
* Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
*
* 'n' is the same as 'g', except it uses the current locale.
* 'Z' is the same as 'g', except it always has a decimal and
* at least one digit after the decimal.
*
@ -403,11 +414,6 @@ PyOS_ascii_formatd(char *buffer,
char format_char;
size_t format_len = strlen(format);
/* For type 'n', we need to make a copy of the format string, because
we're going to modify 'n' -> 'g', and format is const char*, so we
can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
we ever need this to be. There's an upcoming check to ensure it's
big enough. */
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
also with at least one character past the decimal. */
char tmp_format[FLOAT_FORMATBUFLEN];
@ -433,12 +439,12 @@ PyOS_ascii_formatd(char *buffer,
if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G' ||
format_char == 'n' || format_char == 'Z'))
format_char == 'Z'))
return NULL;
/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
/* Map 'Z' format_char to 'g', by copying the format string and
replacing the final char with a 'g' */
if (format_char == 'n' || format_char == 'Z') {
if (format_char == 'Z') {
if (format_len + 1 >= sizeof(tmp_format)) {
/* The format won't fit in our copy. Error out. In
practice, this will never happen and will be
@ -457,11 +463,8 @@ PyOS_ascii_formatd(char *buffer,
/* Do various fixups on the return string */
/* Get the current locale, and find the decimal point string.
Convert that string back to a dot. Do not do this if using the
'n' (number) format code, since we want to keep the localized
decimal point in that case. */
if (format_char != 'n')
change_decimal_from_locale_to_dot(buffer);
Convert that string back to a dot. */
change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
@ -475,16 +478,497 @@ PyOS_ascii_formatd(char *buffer,
if (format_char == 'Z')
ensure_decimal_point(buffer, buf_size);
/* If format_char is 'n', add the thousands grouping. */
if (format_char == 'n')
if (!add_thousands_grouping(buffer, buf_size))
return NULL;
return buffer;
}
double
PyOS_ascii_atof(const char *nptr)
#ifdef PY_NO_SHORT_FLOAT_REPR
/* The fallback code to use if _Py_dg_dtoa is not available. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
return PyOS_ascii_strtod(nptr, NULL);
char buf[128];
char format[32];
Py_ssize_t len;
char *result;
char *p;
int t;
int upper = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
break;
case 'E':
upper = 1;
format_code = 'e';
break;
case 'F':
upper = 1;
format_code = 'f';
break;
case 'G':
upper = 1;
format_code = 'g';
break;
case 'r': /* repr format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 17;
format_code = 'g';
break;
case 's': /* str format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
t = Py_DTST_NAN;
} else if (Py_IS_INFINITY(val)) {
if (copysign(1., val) == 1.)
strcpy(buf, "inf");
else
strcpy(buf, "-inf");
t = Py_DTST_INFINITE;
} else {
t = Py_DTST_FINITE;
if (flags & Py_DTSF_ADD_DOT_0)
format_code = 'Z';
PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
PyOS_ascii_formatd(buf, sizeof(buf), format, val);
}
len = strlen(buf);
/* Add 1 for the trailing 0 byte.
Add 1 because we might need to make room for the sign.
*/
result = PyMem_Malloc(len + 2);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
}
p = result;
/* Never add sign for nan/inf, even if asked. */
if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE)
*p++ = '+';
strcpy(p, buf);
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = p; *p1; p1++)
*p1 = toupper(*p1);
}
if (type)
*type = t;
return result;
}
#else
/* _Py_dg_dtoa is available. */
/* I'm using a lookup table here so that I don't have to invent a non-locale
specific way to convert to uppercase */
#define OFS_INF 0
#define OFS_NAN 1
#define OFS_E 2
/* The lengths of these are known to the code below, so don't change them */
static char *lc_float_strings[] = {
"inf",
"nan",
"e",
};
static char *uc_float_strings[] = {
"INF",
"NAN",
"E",
};
/* Convert a double d to a string, and return a PyMem_Malloc'd block of
memory contain the resulting string.
Arguments:
d is the double to be converted
format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g'
correspond to '%e', '%f' and '%g'; 'r' and 's' correspond
to repr and str.
mode is one of '0', '2' or '3', and is completely determined by
format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0.
precision is the desired precision
always_add_sign is nonzero if a '+' sign should be included for positive
numbers
add_dot_0_if_integer is nonzero if integers in non-exponential form
should have ".0" added. Only applies to format codes 'r', 's', and 'g'.
use_alt_formatting is nonzero if alternative formatting should be
used. Only applies to format codes 'e', 'f' and 'g'.
type, if non-NULL, will be set to one of these constants to identify
the type of the 'd' argument:
Py_DTST_FINITE
Py_DTST_INFINITE
Py_DTST_NAN
Returns a PyMem_Malloc'd block of memory containing the resulting string,
or NULL on error. If NULL is returned, the Python error has been set.
*/
static char *
format_float_short(double d, char format_code,
int mode, Py_ssize_t precision,
int always_add_sign, int add_dot_0_if_integer,
int use_alt_formatting, char **float_strings, int *type)
{
char *buf = NULL;
char *p = NULL;
Py_ssize_t bufsize = 0;
char *digits, *digits_end;
int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
_Py_SET_53BIT_PRECISION_HEADER;
/* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
Must be matched by a call to _Py_dg_freedtoa. */
_Py_SET_53BIT_PRECISION_START;
digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
&digits_end);
_Py_SET_53BIT_PRECISION_END;
decpt = (Py_ssize_t)decpt_as_int;
if (digits == NULL) {
/* The only failure mode is no memory. */
PyErr_NoMemory();
goto exit;
}
assert(digits_end != NULL && digits_end >= digits);
digits_len = digits_end - digits;
if (digits_len && !isdigit(digits[0])) {
/* Infinities and nans here; adapt Gay's output,
so convert Infinity to inf and NaN to nan, and
ignore sign of nan. Then return. */
/* We only need 5 bytes to hold the result "+inf\0" . */
bufsize = 5; /* Used later in an assert. */
buf = (char *)PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
p = buf;
if (digits[0] == 'i' || digits[0] == 'I') {
if (sign == 1) {
*p++ = '-';
}
else if (always_add_sign) {
*p++ = '+';
}
strncpy(p, float_strings[OFS_INF], 3);
p += 3;
if (type)
*type = Py_DTST_INFINITE;
}
else if (digits[0] == 'n' || digits[0] == 'N') {
/* note that we *never* add a sign for a nan,
even if one has explicitly been requested */
strncpy(p, float_strings[OFS_NAN], 3);
p += 3;
if (type)
*type = Py_DTST_NAN;
}
else {
/* shouldn't get here: Gay's code should always return
something starting with a digit, an 'I', or 'N' */
strncpy(p, "ERR", 3);
p += 3;
assert(0);
}
goto exit;
}
/* The result must be finite (not inf or nan). */
if (type)
*type = Py_DTST_FINITE;
/* We got digits back, format them. We may need to pad 'digits'
either on the left or right (or both) with extra zeros, so in
general the resulting string has the form
[<sign>]<zeros><digits><zeros>[<exponent>]
where either of the <zeros> pieces could be empty, and there's a
decimal point that could appear either in <digits> or in the
leading or trailing <zeros>.
Imagine an infinite 'virtual' string vdigits, consisting of the
string 'digits' (starting at index 0) padded on both the left and
right with infinite strings of zeros. We want to output a slice
vdigits[vdigits_start : vdigits_end]
of this virtual string. Thus if vdigits_start < 0 then we'll end
up producing some leading zeros; if vdigits_end > digits_len there
will be trailing zeros in the output. The next section of code
determines whether to use an exponent or not, figures out the
position 'decpt' of the decimal point, and computes 'vdigits_start'
and 'vdigits_end'. */
vdigits_end = digits_len;
switch (format_code) {
case 'e':
use_exp = 1;
vdigits_end = precision;
break;
case 'f':
vdigits_end = decpt + precision;
break;
case 'g':
if (decpt <= -4 || decpt > precision)
use_exp = 1;
if (use_alt_formatting)
vdigits_end = precision;
break;
case 'r':
/* convert to exponential format at 1e16. We used to convert
at 1e17, but that gives odd-looking results for some values
when a 16-digit 'shortest' repr is padded with bogus zeros.
For example, repr(2e16+8) would give 20000000000000010.0;
the true value is 20000000000000008.0. */
if (decpt <= -4 || decpt > 16)
use_exp = 1;
break;
case 's':
/* if we're forcing a digit after the point, convert to
exponential format at 1e11. If not, convert at 1e12. */
if (decpt <= -4 || decpt >
(add_dot_0_if_integer ? precision-1 : precision))
use_exp = 1;
break;
default:
PyErr_BadInternalCall();
goto exit;
}
/* if using an exponent, reset decimal point position to 1 and adjust
exponent accordingly.*/
if (use_exp) {
exp = decpt - 1;
decpt = 1;
}
/* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
decpt < vdigits_end if add_dot_0_if_integer and no exponent */
vdigits_start = decpt <= 0 ? decpt-1 : 0;
if (!use_exp && add_dot_0_if_integer)
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
else
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
/* double check inequalities */
assert(vdigits_start <= 0 &&
0 <= digits_len &&
digits_len <= vdigits_end);
/* decimal point should be in (vdigits_start, vdigits_end] */
assert(vdigits_start < decpt && decpt <= vdigits_end);
/* Compute an upper bound how much memory we need. This might be a few
chars too long, but no big deal. */
bufsize =
/* sign, decimal point and trailing 0 byte */
3 +
/* total digit count (including zero padding on both sides) */
(vdigits_end - vdigits_start) +
/* exponent "e+100", max 3 numerical digits */
(use_exp ? 5 : 0);
/* Now allocate the memory and initialize p to point to the start of
it. */
buf = (char *)PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
p = buf;
/* Add a negative sign if negative, and a plus sign if non-negative
and always_add_sign is true. */
if (sign == 1)
*p++ = '-';
else if (always_add_sign)
*p++ = '+';
/* note that exactly one of the three 'if' conditions is true,
so we include exactly one decimal point */
/* Zero padding on left of digit string */
if (decpt <= 0) {
memset(p, '0', decpt-vdigits_start);
p += decpt - vdigits_start;
*p++ = '.';
memset(p, '0', 0-decpt);
p += 0-decpt;
}
else {
memset(p, '0', 0-vdigits_start);
p += 0 - vdigits_start;
}
/* Digits, with included decimal point */
if (0 < decpt && decpt <= digits_len) {
strncpy(p, digits, decpt-0);
p += decpt-0;
*p++ = '.';
strncpy(p, digits+decpt, digits_len-decpt);
p += digits_len-decpt;
}
else {
strncpy(p, digits, digits_len);
p += digits_len;
}
/* And zeros on the right */
if (digits_len < decpt) {
memset(p, '0', decpt-digits_len);
p += decpt-digits_len;
*p++ = '.';
memset(p, '0', vdigits_end-decpt);
p += vdigits_end-decpt;
}
else {
memset(p, '0', vdigits_end-digits_len);
p += vdigits_end-digits_len;
}
/* Delete a trailing decimal pt unless using alternative formatting. */
if (p[-1] == '.' && !use_alt_formatting)
p--;
/* Now that we've done zero padding, add an exponent if needed. */
if (use_exp) {
*p++ = float_strings[OFS_E][0];
exp_len = sprintf(p, "%+.02d", exp);
p += exp_len;
}
exit:
if (buf) {
*p = '\0';
/* It's too late if this fails, as we've already stepped on
memory that isn't ours. But it's an okay debugging test. */
assert(p-buf < bufsize);
}
if (digits)
_Py_dg_freedtoa(digits);
return buf;
}
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
char lc_format_code = format_code;
char** float_strings = lc_float_strings;
int mode = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
case 'r': /* repr format */
case 's': /* str format */
break;
case 'E':
lc_format_code = 'e';
break;
case 'F':
lc_format_code = 'f';
break;
case 'G':
lc_format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
if (format_code != lc_format_code)
float_strings = uc_float_strings;
/* From the format code, compute the mode and make any adjustments as
needed. */
switch (lc_format_code) {
case 'e':
mode = 2;
precision++;
break;
case 'f':
mode = 3;
break;
case 'g':
mode = 2;
/* precision 0 makes no sense for 'g' format; interpret as 1 */
if (precision == 0)
precision = 1;
break;
case 'r':
/* "repr" pseudo-mode */
mode = 0;
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
break;
case 's':
mode = 2;
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
break;
}
return format_float_short(val, lc_format_code, mode, precision,
flags & Py_DTSF_SIGN,
flags & Py_DTSF_ADD_DOT_0,
flags & Py_DTSF_ALT,
float_strings, type);
}
#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */