Backport of some of the work in r71665 to trunk. This reworks much of

int, long, and float __format__(), and it keeps their implementation
in sync with py3k.

Also added PyOS_double_to_string. This is the "fallback" version
that's also available in trunk, and should be kept in sync with that
code. I'll add an issue to document PyOS_double_to_string in the C
API.

There are many internal cleanups. Externally visible changes include:

- Implement PEP 378, Format Specifier for Thousands Separator, for
  floats, ints, and longs.

- Issue #5515: 'n' formatting for ints, longs, and floats handles
  leading zero formatting poorly.

- Issue #5772: For float.__format__, don't add a trailing ".0" if
  we're using no type code and we have an exponent.
This commit is contained in:
Eric Smith 2009-04-22 13:29:05 +00:00
parent cbb5308723
commit aca19e6a74
9 changed files with 894 additions and 496 deletions

View file

@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d); PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d);
/* The caller is responsible for calling PyMem_Free to free the buffer
that's is returned. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type);
/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
#define Py_DTSF_SIGN 0x01 /* always add the sign */
#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code
specific */
/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */
#define Py_DTST_FINITE 0
#define Py_DTST_INFINITE 1
#define Py_DTST_NAN 2
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -177,16 +177,26 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
strings) */ strings) */
); );
/* Using the current locale, insert the thousands grouping /* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions, into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */ see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGroupingLocale(char *buffer,
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer, Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits, Py_ssize_t n_digits,
Py_ssize_t buf_size, Py_ssize_t min_width);
Py_ssize_t *count,
int append_zero_char); /* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep);
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */

View file

@ -232,6 +232,10 @@ class FormatTest(unittest.TestCase):
testboth("%o", -042L, "-42") testboth("%o", -042L, "-42")
testboth("%o", float(042), "42") testboth("%o", float(042), "42")
# alternate float formatting
testformat('%g', 1.1, '1.1')
testformat('%#g', 1.1, '1.10000')
# Test exception for unknown format characters # Test exception for unknown format characters
if verbose: if verbose:
print 'Testing exceptions' print 'Testing exceptions'

View file

@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase):
self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101') self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
self.assertEqual('%e' % 1.5e-101, '1.500000e-101') self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
self.assertEqual('%g' % 1.0, '1')
self.assertEqual('%#g' % 1.0, '1.00000')
def test_normal_integers(self): def test_normal_integers(self):
# Ensure the first 256 integers are shared # Ensure the first 256 integers are shared
a = 256 a = 256
@ -412,6 +415,9 @@ class TypesTests(unittest.TestCase):
self.assertRaises(TypeError, 3 .__format__, None) self.assertRaises(TypeError, 3 .__format__, None)
self.assertRaises(TypeError, 3 .__format__, 0) self.assertRaises(TypeError, 3 .__format__, 0)
# can't have ',' with 'c'
self.assertRaises(ValueError, 3 .__format__, ",c")
# ensure that only int and float type specifiers work # ensure that only int and float type specifiers work
for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
[chr(x) for x in range(ord('A'), ord('Z')+1)]): [chr(x) for x in range(ord('A'), ord('Z')+1)]):
@ -609,10 +615,36 @@ class TypesTests(unittest.TestCase):
# a totaly empty format specifier means something else. # a totaly empty format specifier means something else.
# So, just use a sign flag # So, just use a sign flag
test(1e200, '+g', '+1e+200') test(1e200, '+g', '+1e+200')
test(1e200, '+', '+1.0e+200') test(1e200, '+', '+1e+200')
test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200') test(1.1e200, '+', '+1.1e+200')
test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200')
# 0 padding
test(1234., '010f', '1234.000000')
test(1234., '011f', '1234.000000')
test(1234., '012f', '01234.000000')
test(-1234., '011f', '-1234.000000')
test(-1234., '012f', '-1234.000000')
test(-1234., '013f', '-01234.000000')
test(-1234.12341234, '013f', '-01234.123412')
test(-123456.12341234, '011.2f', '-0123456.12')
# 0 padding with commas
test(1234., '011,f', '1,234.000000')
test(1234., '012,f', '1,234.000000')
test(1234., '013,f', '01,234.000000')
test(-1234., '012,f', '-1,234.000000')
test(-1234., '013,f', '-1,234.000000')
test(-1234., '014,f', '-01,234.000000')
test(-12345., '015,f', '-012,345.000000')
test(-123456., '016,f', '-0,123,456.000000')
test(-123456., '017,f', '-0,123,456.000000')
test(-123456.12341234, '017,f', '-0,123,456.123412')
test(-123456.12341234, '013,.2f', '-0,123,456.12')
# % formatting # % formatting
test(-1.0, '%', '-100.000000%') test(-1.0, '%', '-100.000000%')
@ -637,6 +669,24 @@ class TypesTests(unittest.TestCase):
self.assertRaises(ValueError, format, 0.0, '#') self.assertRaises(ValueError, format, 0.0, '#')
self.assertRaises(ValueError, format, 0.0, '#20f') self.assertRaises(ValueError, format, 0.0, '#20f')
def test_format_spec_errors(self):
# int, float, and string all share the same format spec
# mini-language parser.
# Check that we can't ask for too many digits. This is
# probably a CPython specific test. It tries to put the width
# into a C long.
self.assertRaises(ValueError, format, 0, '1'*10000 + 'd')
# Similar with the precision.
self.assertRaises(ValueError, format, 0, '.' + '1'*10000 + 'd')
# And may as well test both.
self.assertRaises(ValueError, format, 0, '1'*1000 + '.' + '1'*10000 + 'd')
# Make sure commas aren't allowed with various type codes
for code in 'xXobns':
self.assertRaises(ValueError, format, 0, ',' + code)
def test_main(): def test_main():
run_unittest(TypesTests) run_unittest(TypesTests)

View file

@ -12,6 +12,15 @@ What's New in Python 2.7 alpha 1
Core and Builtins Core and Builtins
----------------- -----------------
- Implement PEP 378, Format Specifier for Thousands Separator, for
floats, ints, and longs.
- Issue #5515: 'n' formatting for ints, longs, and floats handles
leading zero formatting poorly.
- Issue #5772: For float.__format__, don't add a trailing ".0" if
we're using no type code and we have an exponent.
- Issue #3166: Make long -> float (and int -> float) conversions - Issue #3166: Make long -> float (and int -> float) conversions
correctly rounded. correctly rounded.

View file

@ -1,6 +1,8 @@
/* implements the string, long, and float formatters. that is, /* implements the string, long, and float formatters. that is,
string.__format__, etc. */ string.__format__, etc. */
#include <locale.h>
/* Before including this, you must include either: /* Before including this, you must include either:
stringlib/unicodedefs.h stringlib/unicodedefs.h
stringlib/stringdefs.h stringlib/stringdefs.h
@ -13,8 +15,6 @@
be. These are the only non-static functions defined here. be. These are the only non-static functions defined here.
*/ */
#define ALLOW_PARENS_FOR_SIGN 0
/* Raises an exception about an unknown presentation type for this /* Raises an exception about an unknown presentation type for this
* type. */ * type. */
@ -104,9 +104,6 @@ is_sign_element(STRINGLIB_CHAR c)
{ {
switch (c) { switch (c) {
case ' ': case '+': case '-': case ' ': case '+': case '-':
#if ALLOW_PARENS_FOR_SIGN
case '(':
#endif
return 1; return 1;
default: default:
return 0; return 0;
@ -120,6 +117,7 @@ typedef struct {
int alternate; int alternate;
STRINGLIB_CHAR sign; STRINGLIB_CHAR sign;
Py_ssize_t width; Py_ssize_t width;
int thousands_separators;
Py_ssize_t precision; Py_ssize_t precision;
STRINGLIB_CHAR type; STRINGLIB_CHAR type;
} InternalFormatSpec; } InternalFormatSpec;
@ -142,13 +140,14 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
/* end-ptr is used throughout this code to specify the length of /* end-ptr is used throughout this code to specify the length of
the input string */ the input string */
Py_ssize_t specified_width; Py_ssize_t consumed;
format->fill_char = '\0'; format->fill_char = '\0';
format->align = '\0'; format->align = '\0';
format->alternate = 0; format->alternate = 0;
format->sign = '\0'; format->sign = '\0';
format->width = -1; format->width = -1;
format->thousands_separators = 0;
format->precision = -1; format->precision = -1;
format->type = default_type; format->type = default_type;
@ -168,11 +167,6 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
if (end-ptr >= 1 && is_sign_element(ptr[0])) { if (end-ptr >= 1 && is_sign_element(ptr[0])) {
format->sign = ptr[0]; format->sign = ptr[0];
++ptr; ++ptr;
#if ALLOW_PARENS_FOR_SIGN
if (end-ptr >= 1 && ptr[0] == ')') {
++ptr;
}
#endif
} }
/* If the next character is #, we're in alternate mode. This only /* If the next character is #, we're in alternate mode. This only
@ -191,25 +185,35 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
++ptr; ++ptr;
} }
/* XXX add error checking */ consumed = get_integer(&ptr, end, &format->width);
specified_width = get_integer(&ptr, end, &format->width); if (consumed == -1)
/* Overflow error. Exception already set. */
return 0;
/* if specified_width is 0, we didn't consume any characters for /* If consumed is 0, we didn't consume any characters for the
the width. in that case, reset the width to -1, because width. In that case, reset the width to -1, because
get_integer() will have set it to zero */ get_integer() will have set it to zero. -1 is how we record
if (specified_width == 0) { that the width wasn't specified. */
if (consumed == 0)
format->width = -1; format->width = -1;
/* Comma signifies add thousands separators */
if (end-ptr && ptr[0] == ',') {
format->thousands_separators = 1;
++ptr;
} }
/* Parse field precision */ /* Parse field precision */
if (end-ptr && ptr[0] == '.') { if (end-ptr && ptr[0] == '.') {
++ptr; ++ptr;
/* XXX add error checking */ consumed = get_integer(&ptr, end, &format->precision);
specified_width = get_integer(&ptr, end, &format->precision); if (consumed == -1)
/* Overflow error. Exception already set. */
return 0;
/* not having a precision after a dot is an error */ /* Not having a precision after a dot is an error. */
if (specified_width == 0) { if (consumed == 0) {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Format specifier missing precision"); "Format specifier missing precision");
return 0; return 0;
@ -217,10 +221,10 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
} }
/* Finally, parse the type field */ /* Finally, parse the type field. */
if (end-ptr > 1) { if (end-ptr > 1) {
/* invalid conversion spec */ /* More than one char remain, invalid conversion spec. */
PyErr_Format(PyExc_ValueError, "Invalid conversion specification"); PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
return 0; return 0;
} }
@ -230,6 +234,29 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
++ptr; ++ptr;
} }
/* Do as much validating as we can, just by looking at the format
specifier. Do not take into account what type of formatting
we're doing (int, float, string). */
if (format->thousands_separators) {
switch (format->type) {
case 'd':
case 'e':
case 'f':
case 'g':
case 'E':
case 'G':
case '%':
case 'F':
/* These are allowed. See PEP 378.*/
break;
default:
PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '%c'.", format->type);
return 0;
}
}
return 1; return 1;
} }
@ -238,6 +265,20 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
/*********** common routines for numeric formatting *********************/ /*********** common routines for numeric formatting *********************/
/************************************************************************/ /************************************************************************/
/* Locale type codes. */
#define LT_CURRENT_LOCALE 0
#define LT_DEFAULT_LOCALE 1
#define LT_NO_LOCALE 2
/* Locale info needed for formatting integers and the part of floats
before and including the decimal. Note that locales only support
8-bit chars, not unicode. */
typedef struct {
char *decimal_point;
char *thousands_sep;
char *grouping;
} LocaleInfo;
/* describes the layout for an integer, see the comment in /* describes the layout for an integer, see the comment in
calc_number_widths() for details */ calc_number_widths() for details */
typedef struct { typedef struct {
@ -245,38 +286,84 @@ typedef struct {
Py_ssize_t n_prefix; Py_ssize_t n_prefix;
Py_ssize_t n_spadding; Py_ssize_t n_spadding;
Py_ssize_t n_rpadding; Py_ssize_t n_rpadding;
char lsign; char sign;
Py_ssize_t n_lsign; Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
char rsign; Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
Py_ssize_t n_rsign; any grouping chars. */
Py_ssize_t n_total; /* just a convenience, it's derivable from the Py_ssize_t n_decimal; /* 0 if only an integer */
other fields */ Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
excluding the decimal itself, if
present. */
/* These 2 are not the widths of fields, but are needed by
STRINGLIB_GROUPING. */
Py_ssize_t n_digits; /* The number of digits before a decimal
or exponent. */
Py_ssize_t n_min_width; /* The min_width we used when we computed
the n_grouped_digits width. */
} NumberFieldWidths; } NumberFieldWidths;
/* Given a number of the form:
digits[remainder]
where ptr points to the start and end points to the end, find where
the integer part ends. This could be a decimal, an exponent, both,
or neither.
If a decimal point is present, set *has_decimal and increment
remainder beyond it.
Results are undefined (but shouldn't crash) for improperly
formatted strings.
*/
static void
parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
Py_ssize_t *n_remainder, int *has_decimal)
{
STRINGLIB_CHAR *end = ptr + len;
STRINGLIB_CHAR *remainder;
while (ptr<end && isdigit(*ptr))
++ptr;
remainder = ptr;
/* Does remainder start with a decimal point? */
*has_decimal = ptr<end && *remainder == '.';
/* Skip the decimal point. */
if (*has_decimal)
remainder++;
*n_remainder = end - remainder;
}
/* not all fields of format are used. for example, precision is /* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier about what it does? or is passing a single format parameter easier
and more efficient enough to justify a little obfuscation? */ and more efficient enough to justify a little obfuscation? */
static void static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign, calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_ssize_t n_prefix, Py_ssize_t n_digits, STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
Py_ssize_t n_number, Py_ssize_t n_remainder,
int has_decimal, const LocaleInfo *locale,
const InternalFormatSpec *format) const InternalFormatSpec *format)
{ {
Py_ssize_t n_non_digit_non_padding;
Py_ssize_t n_padding;
spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
spec->n_lpadding = 0; spec->n_lpadding = 0;
spec->n_prefix = 0; spec->n_prefix = n_prefix;
spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
spec->n_remainder = n_remainder;
spec->n_spadding = 0; spec->n_spadding = 0;
spec->n_rpadding = 0; spec->n_rpadding = 0;
spec->lsign = '\0'; spec->sign = '\0';
spec->n_lsign = 0; spec->n_sign = 0;
spec->rsign = '\0';
spec->n_rsign = 0;
/* the output will look like: /* the output will look like:
| | | |
| <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> | | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
| | | |
lsign and rsign are computed from format->sign and the actual sign is computed from format->sign and the actual
sign of the number sign of the number
prefix is given (it's for the '0x' prefix) prefix is given (it's for the '0x' prefix)
@ -291,108 +378,191 @@ calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign,
*/ */
/* compute the various parts we're going to write */ /* compute the various parts we're going to write */
if (format->sign == '+') { switch (format->sign) {
case '+':
/* always put a + or - */ /* always put a + or - */
spec->n_lsign = 1; spec->n_sign = 1;
spec->lsign = (actual_sign == '-' ? '-' : '+'); spec->sign = (sign_char == '-' ? '-' : '+');
} break;
#if ALLOW_PARENS_FOR_SIGN case ' ':
else if (format->sign == '(') { spec->n_sign = 1;
if (actual_sign == '-') { spec->sign = (sign_char == '-' ? '-' : ' ');
spec->n_lsign = 1; break;
spec->lsign = '('; default:
spec->n_rsign = 1; /* Not specified, or the default (-) */
spec->rsign = ')'; if (sign_char == '-') {
} spec->n_sign = 1;
} spec->sign = '-';
#endif
else if (format->sign == ' ') {
spec->n_lsign = 1;
spec->lsign = (actual_sign == '-' ? '-' : ' ');
}
else {
/* non specified, or the default (-) */
if (actual_sign == '-') {
spec->n_lsign = 1;
spec->lsign = '-';
} }
} }
spec->n_prefix = n_prefix; /* The number of chars used for non-digits and non-padding. */
n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
spec->n_remainder;
/* now the number of padding characters */ /* min_width can go negative, that's okay. format->width == -1 means
if (format->width == -1) { we don't care. */
/* no padding at all, nothing to do */ if (format->fill_char == '0')
} spec->n_min_width = format->width - n_non_digit_non_padding;
else {
/* see if any padding is needed */
if (spec->n_lsign + n_digits + spec->n_rsign +
spec->n_prefix >= format->width) {
/* no padding needed, we're already bigger than the
requested width */
}
else {
/* determine which of left, space, or right padding is
needed */
Py_ssize_t padding = format->width -
(spec->n_lsign + spec->n_prefix +
n_digits + spec->n_rsign);
if (format->align == '<')
spec->n_rpadding = padding;
else if (format->align == '>')
spec->n_lpadding = padding;
else if (format->align == '^') {
spec->n_lpadding = padding / 2;
spec->n_rpadding = padding - spec->n_lpadding;
}
else if (format->align == '=')
spec->n_spadding = padding;
else else
spec->n_lpadding = padding; spec->n_min_width = 0;
if (spec->n_digits == 0)
/* This case only occurs when using 'c' formatting, we need
to special case it because the grouping code always wants
to have at least one character. */
spec->n_grouped_digits = 0;
else
spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
spec->n_digits,
spec->n_min_width,
locale->grouping,
locale->thousands_sep);
/* Given the desired width and the total of digit and non-digit
space we consume, see if we need any padding. format->width can
be negative (meaning no padding), but this code still works in
that case. */
n_padding = format->width -
(n_non_digit_non_padding + spec->n_grouped_digits);
if (n_padding > 0) {
/* Some padding is needed. Determine if it's left, space, or right. */
switch (format->align) {
case '<':
spec->n_rpadding = n_padding;
break;
case '^':
spec->n_lpadding = n_padding / 2;
spec->n_rpadding = n_padding - spec->n_lpadding;
break;
case '=':
spec->n_spadding = n_padding;
break;
default:
/* Handles '>', plus catch-all just in case. */
spec->n_lpadding = n_padding;
break;
} }
} }
spec->n_total = spec->n_lpadding + spec->n_lsign + spec->n_prefix + return spec->n_lpadding + spec->n_sign + spec->n_prefix +
spec->n_spadding + n_digits + spec->n_rsign + spec->n_rpadding; spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
spec->n_remainder + spec->n_rpadding;
} }
/* fill in the non-digit parts of a numbers's string representation, /* Fill in the digit parts of a numbers's string representation,
as determined in calc_number_widths(). returns the pointer to as determined in calc_number_widths().
where the digits go. */ No error checking, since we know the buffer is the correct size. */
static STRINGLIB_CHAR * static void
fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
STRINGLIB_CHAR *prefix, Py_ssize_t n_digits, STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
STRINGLIB_CHAR fill_char) STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
LocaleInfo *locale, int toupper)
{ {
STRINGLIB_CHAR *p_digits; /* Used to keep track of digits, decimal, and remainder. */
STRINGLIB_CHAR *p = digits;
#ifndef NDEBUG
Py_ssize_t r;
#endif
if (spec->n_lpadding) { if (spec->n_lpadding) {
STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding); STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
p_buf += spec->n_lpadding; buf += spec->n_lpadding;
} }
if (spec->n_lsign == 1) { if (spec->n_sign == 1) {
*p_buf++ = spec->lsign; *buf++ = spec->sign;
} }
if (spec->n_prefix) { if (spec->n_prefix) {
memmove(p_buf, memmove(buf,
prefix, prefix,
spec->n_prefix * sizeof(STRINGLIB_CHAR)); spec->n_prefix * sizeof(STRINGLIB_CHAR));
p_buf += spec->n_prefix; if (toupper) {
Py_ssize_t t;
for (t = 0; t < spec->n_prefix; ++t)
buf[t] = STRINGLIB_TOUPPER(buf[t]);
}
buf += spec->n_prefix;
} }
if (spec->n_spadding) { if (spec->n_spadding) {
STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding); STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
p_buf += spec->n_spadding; buf += spec->n_spadding;
} }
p_digits = p_buf;
p_buf += n_digits; /* Only for type 'c' special case, it has no digits. */
if (spec->n_rsign == 1) { if (spec->n_digits != 0) {
*p_buf++ = spec->rsign; /* Fill the digits with InsertThousandsGrouping. */
#ifndef NDEBUG
r =
#endif
STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
spec->n_digits, spec->n_min_width,
locale->grouping, locale->thousands_sep);
#ifndef NDEBUG
assert(r == spec->n_grouped_digits);
#endif
p += spec->n_digits;
} }
if (toupper) {
Py_ssize_t t;
for (t = 0; t < spec->n_grouped_digits; ++t)
buf[t] = STRINGLIB_TOUPPER(buf[t]);
}
buf += spec->n_grouped_digits;
if (spec->n_decimal) {
Py_ssize_t t;
for (t = 0; t < spec->n_decimal; ++t)
buf[t] = locale->decimal_point[t];
buf += spec->n_decimal;
p += 1;
}
if (spec->n_remainder) {
memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
buf += spec->n_remainder;
p += spec->n_remainder;
}
if (spec->n_rpadding) { if (spec->n_rpadding) {
STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding); STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
p_buf += spec->n_rpadding; buf += spec->n_rpadding;
} }
return p_digits;
} }
static char no_grouping[1] = {CHAR_MAX};
/* Find the decimal point character(s?), thousands_separator(s?), and
grouping description, either for the current locale if type is
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
none if LT_NO_LOCALE. */
static void
get_locale_info(int type, LocaleInfo *locale_info)
{
switch (type) {
case LT_CURRENT_LOCALE: {
struct lconv *locale_data = localeconv();
locale_info->decimal_point = locale_data->decimal_point;
locale_info->thousands_sep = locale_data->thousands_sep;
locale_info->grouping = locale_data->grouping;
break;
}
case LT_DEFAULT_LOCALE:
locale_info->decimal_point = ".";
locale_info->thousands_sep = ",";
locale_info->grouping = "\3"; /* Group every 3 characters,
trailing 0 means repeat
infinitely. */
break;
case LT_NO_LOCALE:
locale_info->decimal_point = ".";
locale_info->thousands_sep = "";
locale_info->grouping = no_grouping;
break;
default:
assert(0);
}
}
#endif /* FORMAT_FLOAT || FORMAT_LONG */ #endif /* FORMAT_FLOAT || FORMAT_LONG */
/************************************************************************/ /************************************************************************/
@ -510,19 +680,21 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
PyObject *tmp = NULL; PyObject *tmp = NULL;
STRINGLIB_CHAR *pnumeric_chars; STRINGLIB_CHAR *pnumeric_chars;
STRINGLIB_CHAR numeric_char; STRINGLIB_CHAR numeric_char;
STRINGLIB_CHAR sign = '\0'; STRINGLIB_CHAR sign_char = '\0';
STRINGLIB_CHAR *p;
Py_ssize_t n_digits; /* count of digits need from the computed Py_ssize_t n_digits; /* count of digits need from the computed
string */ string */
Py_ssize_t n_leading_chars; Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to produces non-digits */
allocate, used for 'n'
formatting. */
Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
Py_ssize_t n_total;
STRINGLIB_CHAR *prefix = NULL; STRINGLIB_CHAR *prefix = NULL;
NumberFieldWidths spec; NumberFieldWidths spec;
long x; long x;
/* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
/* no precision allowed on integers */ /* no precision allowed on integers */
if (format->precision != -1) { if (format->precision != -1) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
@ -530,7 +702,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
goto done; goto done;
} }
/* special case for character formatting */ /* special case for character formatting */
if (format->type == 'c') { if (format->type == 'c') {
/* error to specify a sign */ /* error to specify a sign */
@ -541,6 +712,14 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
goto done; goto done;
} }
/* Error to specify a comma. */
if (format->thousands_separators) {
PyErr_SetString(PyExc_ValueError,
"Thousands separators not allowed with integer"
" format specifier 'c'");
goto done;
}
/* taken from unicodeobject.c formatchar() */ /* taken from unicodeobject.c formatchar() */
/* Integer input truncated to a character */ /* Integer input truncated to a character */
/* XXX: won't work for int */ /* XXX: won't work for int */
@ -565,6 +744,13 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
numeric_char = (STRINGLIB_CHAR)x; numeric_char = (STRINGLIB_CHAR)x;
pnumeric_chars = &numeric_char; pnumeric_chars = &numeric_char;
n_digits = 1; n_digits = 1;
/* As a sort-of hack, we tell calc_number_widths that we only
have "remainder" characters. calc_number_widths thinks
these are characters that don't get formatted, only copied
into the output string. We do this for 'c' formatting,
because the characters are likely to be non-digits. */
n_remainder = 1;
} }
else { else {
int base; int base;
@ -616,8 +802,8 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
/* Is a sign character present in the output? If so, remember it /* Is a sign character present in the output? If so, remember it
and skip it */ and skip it */
sign = pnumeric_chars[0]; if (pnumeric_chars[0] == '-') {
if (sign == '-') { sign_char = pnumeric_chars[0];
++prefix; ++prefix;
++leading_chars_to_skip; ++leading_chars_to_skip;
} }
@ -627,70 +813,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
pnumeric_chars += leading_chars_to_skip; pnumeric_chars += leading_chars_to_skip;
} }
if (format->type == 'n') /* Determine the grouping, separator, and decimal point, if any. */
/* Compute how many additional chars we need to allocate get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
to hold the thousands grouping. */ (format->thousands_separators ?
STRINGLIB_GROUPING(NULL, n_digits, n_digits, LT_DEFAULT_LOCALE :
0, &n_grouping_chars, 0); LT_NO_LOCALE),
&locale);
/* Calculate the widths of the various leading and trailing parts */ /* Calculate how much memory we'll need. */
calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars, n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
format); n_digits, n_remainder, 0, &locale, format);
/* Allocate a new string to hold the result */ /* Allocate the memory. */
result = STRINGLIB_NEW(NULL, spec.n_total); result = STRINGLIB_NEW(NULL, n_total);
if (!result) if (!result)
goto done; goto done;
p = STRINGLIB_STR(result);
/* XXX There is too much magic here regarding the internals of
spec and the location of the prefix and digits. It would be
better if calc_number_widths returned a number of logical
offsets into the buffer, and those were used. Maybe in a
future code cleanup. */
/* Fill in the digit parts */
n_leading_chars = spec.n_lpadding + spec.n_lsign +
spec.n_prefix + spec.n_spadding;
memmove(p + n_leading_chars,
pnumeric_chars,
n_digits * sizeof(STRINGLIB_CHAR));
/* If type is 'X', convert the filled in digits to uppercase */
if (format->type == 'X') {
Py_ssize_t t;
for (t = 0; t < n_digits; ++t)
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
}
/* Insert the grouping, if any, after the uppercasing of the digits, so
we can ensure that grouping chars won't be affected. */
if (n_grouping_chars) {
/* We know this can't fail, since we've already
reserved enough space. */
STRINGLIB_CHAR *pstart = p + n_leading_chars;
#ifndef NDEBUG
int r =
#endif
STRINGLIB_GROUPING(pstart, n_digits, n_digits,
spec.n_total+n_grouping_chars-n_leading_chars,
NULL, 0);
assert(r);
}
/* Fill in the non-digit parts (padding, sign, etc.) */
fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars,
format->fill_char == '\0' ? ' ' : format->fill_char);
/* If type is 'X', uppercase the prefix. This has to be done after the
prefix is filled in by fill_non_digits */
if (format->type == 'X') {
Py_ssize_t t;
for (t = 0; t < n_prefix; ++t)
p[t + spec.n_lpadding + spec.n_lsign] =
STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]);
}
/* Populate the memory. */
fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
&locale, format->type == 'X');
done: done:
Py_XDECREF(tmp); Py_XDECREF(tmp);
@ -704,64 +846,45 @@ done:
#ifdef FORMAT_FLOAT #ifdef FORMAT_FLOAT
#if STRINGLIB_IS_UNICODE #if STRINGLIB_IS_UNICODE
/* taken from unicodeobject.c */ static void
static Py_ssize_t strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
strtounicode(Py_UNICODE *buffer, const char *charbuffer)
{ {
register Py_ssize_t i; Py_ssize_t i;
Py_ssize_t len = strlen(charbuffer); for (i = 0; i < len; ++i)
for (i = len - 1; i >= 0; --i)
buffer[i] = (Py_UNICODE)charbuffer[i]; buffer[i] = (Py_UNICODE)charbuffer[i];
return len;
} }
#endif #endif
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
/* much of this is taken from unicodeobject.c */ /* much of this is taken from unicodeobject.c */
static PyObject * static PyObject *
format_float_internal(PyObject *value, format_float_internal(PyObject *value,
const InternalFormatSpec *format) const InternalFormatSpec *format)
{ {
/* fmt = '%.' + `prec` + `type` + '%%' char *buf = NULL; /* buffer returned from PyOS_double_to_string */
worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
char fmt[20];
/* taken from unicodeobject.c */
/* Worst case length calc to ensure no buffer overrun:
'g' formats:
fmt = %#.<prec>g
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
for any double rep.)
len = 1 + prec + 1 + 2 + 5 = 9 + prec
'f' formats:
buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
len = 1 + 50 + 1 + prec = 52 + prec
If prec=0 the effective precision is 1 (the leading digit is
always given), therefore increase the length by one.
*/
char charbuf[FLOAT_FORMATBUFLEN];
Py_ssize_t n_digits; Py_ssize_t n_digits;
double x; Py_ssize_t n_remainder;
Py_ssize_t n_total;
int has_decimal;
double val;
Py_ssize_t precision = format->precision; Py_ssize_t precision = format->precision;
PyObject *result = NULL; STRINGLIB_CHAR type = format->type;
STRINGLIB_CHAR sign; int add_pct = 0;
char* trailing = "";
STRINGLIB_CHAR *p; STRINGLIB_CHAR *p;
NumberFieldWidths spec; NumberFieldWidths spec;
STRINGLIB_CHAR type = format->type; int flags = 0;
PyObject *result = NULL;
STRINGLIB_CHAR sign_char = '\0';
int float_type; /* Used to see if we have a nan, inf, or regular float. */
#if STRINGLIB_IS_UNICODE #if STRINGLIB_IS_UNICODE
Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN]; Py_UNICODE *unicode_tmp = NULL;
#endif #endif
/* alternate is not allowed on floats. */ /* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
/* Alternate is not allowed on floats. */
if (format->alternate) { if (format->alternate) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Alternate form (#) not allowed in float format " "Alternate form (#) not allowed in float format "
@ -769,84 +892,106 @@ format_float_internal(PyObject *value,
goto done; goto done;
} }
/* first, do the conversion as 8-bit chars, using the platform's if (type == '\0') {
snprintf. then, if needed, convert to unicode. */ /* Omitted type specifier. This is like 'g' but with at least
one digit after the decimal point. */
type = 'g';
flags |= Py_DTSF_ADD_DOT_0;
}
if (type == 'n')
/* 'n' is the same as 'g', except for the locale used to
format the result. We take care of that later. */
type = 'g';
/* 'F' is the same as 'f', per the PEP */ /* 'F' is the same as 'f', per the PEP */
if (type == 'F') if (type == 'F')
type = 'f'; type = 'f';
x = PyFloat_AsDouble(value); val = PyFloat_AsDouble(value);
if (val == -1.0 && PyErr_Occurred())
if (x == -1.0 && PyErr_Occurred())
goto done; goto done;
if (type == '%') { if (type == '%') {
type = 'f'; type = 'f';
x *= 100; val *= 100;
trailing = "%"; add_pct = 1;
} }
if (precision < 0) if (precision < 0)
precision = 6; precision = 6;
if (type == 'f' && fabs(x) >= 1e50) if ((type == 'f' || type == 'F') && fabs(val) >= 1e50)
type = 'g'; type = 'g';
/* cast "type", because if we're in unicode we need to pass a /* Cast "type", because if we're in unicode we need to pass a
8-bit char. this is safe, because we've restricted what "type" 8-bit char. This is safe, because we've restricted what "type"
can be */ can be. */
PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision, buf = PyOS_double_to_string(val, (char)type, precision, flags,
(char)type); &float_type);
if (buf == NULL)
goto done;
n_digits = strlen(buf);
/* do the actual formatting */ if (add_pct) {
PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x); /* We know that buf has a trailing zero (since we just called
strlen() on it), and we don't use that fact any more. So we
can just write over the trailing zero. */
buf[n_digits] = '%';
n_digits += 1;
}
/* adding trailing to fmt with PyOS_snprintf doesn't work, not /* Since there is no unicode version of PyOS_double_to_string,
sure why. we'll just concatentate it here, no harm done. we just use the 8 bit version and then convert to unicode. */
know we can't have a buffer overflow from the fmt size
analysis */
strcat(charbuf, trailing);
/* rather than duplicate the code for snprintf for both unicode
and 8 bit strings, we just use the 8 bit version and then
convert to unicode in a separate code path. that's probably
the lesser of 2 evils. */
#if STRINGLIB_IS_UNICODE #if STRINGLIB_IS_UNICODE
n_digits = strtounicode(unicodebuf, charbuf); unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
p = unicodebuf; if (unicode_tmp == NULL) {
PyErr_NoMemory();
goto done;
}
strtounicode(unicode_tmp, buf, n_digits);
p = unicode_tmp;
#else #else
/* compute the length. I believe this is done because the return p = buf;
value from snprintf above is unreliable */
n_digits = strlen(charbuf);
p = charbuf;
#endif #endif
/* is a sign character present in the output? if so, remember it /* Is a sign character present in the output? If so, remember it
and skip it */ and skip it */
sign = p[0]; if (*p == '-') {
if (sign == '-') { sign_char = *p;
++p; ++p;
--n_digits; --n_digits;
} }
calc_number_widths(&spec, sign, 0, n_digits, format); /* Determine if we have any "remainder" (after the digits, might include
decimal or exponent or both (or neither)) */
parse_number(p, n_digits, &n_remainder, &has_decimal);
/* allocate a string with enough space */ /* Determine the grouping, separator, and decimal point, if any. */
result = STRINGLIB_NEW(NULL, spec.n_total); get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale);
/* Calculate how much memory we'll need. */
n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
n_remainder, has_decimal, &locale, format);
/* Allocate the memory. */
result = STRINGLIB_NEW(NULL, n_total);
if (result == NULL) if (result == NULL)
goto done; goto done;
/* Fill in the non-digit parts (padding, sign, etc.) */ /* Populate the memory. */
fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits, fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
format->fill_char == '\0' ? ' ' : format->fill_char); format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
0);
/* fill in the digit parts */
memmove(STRINGLIB_STR(result) +
(spec.n_lpadding + spec.n_lsign + spec.n_spadding),
p,
n_digits * sizeof(STRINGLIB_CHAR));
done: done:
PyMem_Free(buf);
#if STRINGLIB_IS_UNICODE
PyMem_Free(unicode_tmp);
#endif
return result; return result;
} }
#endif /* FORMAT_FLOAT */ #endif /* FORMAT_FLOAT */
@ -1027,11 +1172,7 @@ FORMAT_FLOAT(PyObject *obj,
/* type conversion? */ /* type conversion? */
switch (format.type) { switch (format.type) {
case '\0': case '\0': /* No format code: like 'g', but with at least one decimal. */
/* 'Z' means like 'g', but with at least one decimal. See
PyOS_ascii_formatd */
format.type = 'Z';
/* Deliberate fall through to the next case statement */
case 'e': case 'e':
case 'E': case 'E':
case 'f': case 'f':

View file

@ -5,126 +5,208 @@
#include <locale.h> #include <locale.h>
#define MAX(x, y) ((x) < (y) ? (y) : (x))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
typedef struct {
const char *grouping;
char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */
} GroupGenerator;
static void
_GroupGenerator_init(GroupGenerator *self, const char *grouping)
{
self->grouping = grouping;
self->i = 0;
self->previous = 0;
}
/* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t
_GroupGenerator_next(GroupGenerator *self)
{
/* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just
terminate the generator. That shouldn't happen, but at least we
fail gracefully. */
switch (self->grouping[self->i]) {
case 0:
return self->previous;
case CHAR_MAX:
/* Stop the generator. */
return 0;
default: {
char ch = self->grouping[self->i];
self->previous = ch;
self->i++;
return (Py_ssize_t)ch;
}
}
}
/* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */
static void
fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
Py_ssize_t thousands_sep_len)
{
#if STRINGLIB_IS_UNICODE
Py_ssize_t i;
#endif
if (thousands_sep) {
*buffer_end -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
for (i = 0; i < thousands_sep_len; ++i)
(*buffer_end)[i] = thousands_sep[i];
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(*buffer_end, thousands_sep, thousands_sep_len);
#endif
}
*buffer_end -= n_chars;
*digits_end -= n_chars;
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
*buffer_end -= n_zeros;
STRINGLIB_FILL(*buffer_end, '0', n_zeros);
}
/** /**
* _Py_InsertThousandsGrouping: * _Py_InsertThousandsGrouping:
* @buffer: A pointer to the start of a string. * @buffer: A pointer to the start of a string.
* @n_buffer: The length of the string. * @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
* is non-NULL, this is unused.
* @n_digits: The number of digits in the string, in which we want * @n_digits: The number of digits in the string, in which we want
* to put the grouping chars. * to put the grouping chars.
* @buf_size: The maximum size of the buffer pointed to by buffer. * @min_width: The minimum width of the digits in the output string.
* @count: If non-NULL, points to a variable that will receive the * Output will be zero-padded on the left to fill.
* number of characters we need to insert (and no formatting * @grouping: see definition in localeconv().
* will actually occur). * @thousands_sep: see definition in localeconv().
* @append_zero_char: If non-zero, put a trailing zero at the end of
* of the resulting string, if and only if we modified the
* string.
* *
* Inserts thousand grouping characters (as defined in the current * There are 2 modes: counting and filling. If @buffer is NULL,
* locale) into the string between buffer and buffer+n_digits. If * we are in counting mode, else filling mode.
* count is non-NULL, don't do any formatting, just count the number * If counting, the required buffer size is returned.
* of characters to insert. This is used by the caller to * If filling, we know the buffer will be large enough, so we don't
* appropriately resize the buffer, if needed. If count is non-NULL, * need to pass in the buffer size.
* buffer can be NULL (it is not dereferenced at all in that case). * Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into the string between buffer and buffer+n_digits.
* *
* Return value: 0 on error, else 1. Note that no error can occur if * Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL. * count is non-NULL.
* *
* This name won't be used, the includer of this file should define * This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string. * it to be the actual function name, based on unicode or string.
*
* As closely as possible, this code mimics the logic in decimal.py's
_insert_thousands_sep().
**/ **/
int Py_ssize_t
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer, _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer, Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits, Py_ssize_t n_digits,
Py_ssize_t buf_size, Py_ssize_t min_width,
Py_ssize_t *count, const char *grouping,
int append_zero_char) const char *thousands_sep)
{
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
int use_separator = 0; /* First time through, don't append the
separator. They only go between
groups. */
STRINGLIB_CHAR *buffer_end = NULL;
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
returns 0. */
GroupGenerator groupgen;
_GroupGenerator_init(&groupgen, grouping);
if (buffer) {
buffer_end = buffer + n_buffer;
digits_end = digits + n_digits;
}
while ((l = _GroupGenerator_next(&groupgen)) > 0) {
l = MIN(l, MAX(MAX(remaining, min_width), 1));
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
fill(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
/* Use a separator next time. */
use_separator = 1;
remaining -= n_chars;
min_width -= l;
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
min_width -= thousands_sep_len;
}
if (!loop_broken) {
/* We left the loop without using a break statement. */
l = MAX(MAX(remaining, min_width), 1);
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
fill(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
}
return count;
}
/**
* _Py_InsertThousandsGroupingLocale:
* @buffer: A pointer to the start of a string.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
*
* Reads thee current locale and calls _Py_InsertThousandsGrouping().
**/
Py_ssize_t
_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width)
{ {
struct lconv *locale_data = localeconv(); struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping; const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep; const char *thousands_sep = locale_data->thousands_sep;
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
STRINGLIB_CHAR *pend = NULL; /* current end of buffer */
STRINGLIB_CHAR *pmax = NULL; /* max of buffer */
char current_grouping;
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* Initialize the character count, if we're just counting. */ return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
if (count) min_width, grouping, thousands_sep);
*count = 0;
else {
/* We're not just counting, we're modifying buffer */
pend = buffer + n_buffer;
pmax = buffer + buf_size;
}
/* Starting at the end and working right-to-left, keep track of
what grouping needs to be added and insert that. */
current_grouping = *grouping++;
/* If the first character is 0, perform no grouping at all. */
if (current_grouping == 0)
return 1;
while (remaining > current_grouping) {
/* Always leave buffer and pend valid at the end of this
loop, since we might leave with a return statement. */
remaining -= current_grouping;
if (count) {
/* We're only counting, not touching the memory. */
*count += thousands_sep_len;
}
else {
/* Do the formatting. */
STRINGLIB_CHAR *plast = buffer + remaining;
/* Is there room to insert thousands_sep_len chars? */
if (pmax - pend < thousands_sep_len)
/* No room. */
return 0;
/* Move the rest of the string down. */
memmove(plast + thousands_sep_len,
plast,
(pend - plast) * sizeof(STRINGLIB_CHAR));
/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
{
Py_ssize_t i;
for (i = 0; i < thousands_sep_len; ++i)
plast[i] = thousands_sep[i];
}
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(plast, thousands_sep, thousands_sep_len);
#endif
}
/* Adjust end pointer. */
pend += thousands_sep_len;
/* Move to the next grouping character, unless we're
repeating (which is designated by a grouping of 0). */
if (*grouping != 0) {
current_grouping = *grouping++;
if (current_grouping == CHAR_MAX)
/* We're done. */
break;
}
}
if (append_zero_char) {
/* Append a zero character to mark the end of the string,
if there's room. */
if (pend - (buffer + remaining) < 1)
/* No room, error. */
return 0;
*pend = 0;
}
return 1;
} }
#endif /* STRINGLIB_LOCALEUTIL_H */ #endif /* STRINGLIB_LOCALEUTIL_H */

View file

@ -6,6 +6,15 @@
compiled as unicode. */ compiled as unicode. */
#define STRINGLIB_IS_UNICODE 0 #define STRINGLIB_IS_UNICODE 0
/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
/* This needs to be cleaned up. See issue 5793. */
#ifndef _tolower
#define _tolower tolower
#endif
#ifndef _toupper
#define _toupper toupper
#endif
#define STRINGLIB_OBJECT PyStringObject #define STRINGLIB_OBJECT PyStringObject
#define STRINGLIB_CHAR char #define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_TYPE_NAME "string"
@ -13,8 +22,8 @@
#define STRINGLIB_EMPTY nullstring #define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) #define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) #define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER toupper #define STRINGLIB_TOUPPER(x) _toupper(Py_CHARMASK(x))
#define STRINGLIB_TOLOWER tolower #define STRINGLIB_TOLOWER(x) _tolower(Py_CHARMASK(x))
#define STRINGLIB_FILL memset #define STRINGLIB_FILL memset
#define STRINGLIB_STR PyString_AS_STRING #define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_LEN PyString_GET_SIZE #define STRINGLIB_LEN PyString_GET_SIZE
@ -24,5 +33,6 @@
#define STRINGLIB_CMP memcmp #define STRINGLIB_CMP memcmp
#define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping #define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
#endif /* !STRINGLIB_STRINGDEFS_H */ #endif /* !STRINGLIB_STRINGDEFS_H */

View file

@ -37,6 +37,15 @@
* *
* Return value: the #gdouble value. * Return value: the #gdouble value.
**/ **/
/*
Use system strtod; since strtod is locale aware, we may
have to first fix the decimal separator.
Note that unlike _Py_dg_strtod, the system strtod may not always give
correctly rounded results.
*/
double double
PyOS_ascii_strtod(const char *nptr, char **endptr) PyOS_ascii_strtod(const char *nptr, char **endptr)
{ {
@ -187,6 +196,13 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val; return val;
} }
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}
/* Given a string that may have a decimal point in the current /* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */ longer, no need for a maximum buffer size parameter. */
@ -292,8 +308,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size)
} }
} }
/* Ensure that buffer has a decimal point in it. The decimal point /* Ensure that buffer has a decimal point in it. The decimal point will not
will not be in the current locale, it will always be '.' */ be in the current locale, it will always be '.'. Don't add a decimal if an
exponent is present. */
Py_LOCAL_INLINE(void) Py_LOCAL_INLINE(void)
ensure_decimal_point(char* buffer, size_t buf_size) ensure_decimal_point(char* buffer, size_t buf_size)
{ {
@ -322,7 +339,8 @@ ensure_decimal_point(char* buffer, size_t buf_size)
insert_count = 1; insert_count = 1;
} }
} }
else { else if (!(*p == 'e' || *p == 'E')) {
/* Don't add ".0" if we have an exponent. */
chars_to_insert = ".0"; chars_to_insert = ".0";
insert_count = 2; insert_count = 2;
} }
@ -341,37 +359,6 @@ ensure_decimal_point(char* buffer, size_t buf_size)
} }
} }
/* Add the locale specific grouping characters to buffer. Note
that any decimal point (if it's present) in buffer is already
locale-specific. Return 0 on error, else 1. */
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
Py_ssize_t len = strlen(buffer);
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
adding grouping. */
char *p = strstr(buffer, decimal_point);
if (!p) {
/* No decimal, use the entire string. */
/* If any exponent, adjust p. */
p = strpbrk(buffer, "eE");
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = buffer + len;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
buf_size, NULL, 1);
}
/* see FORMATBUFLEN in unicodeobject.c */ /* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120 #define FLOAT_FORMATBUFLEN 120
@ -386,9 +373,8 @@ add_thousands_grouping(char* buffer, size_t buf_size)
* Converts a #gdouble to a string, using the '.' as * Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in * decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion * a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'. * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
* *
* 'n' is the same as 'g', except it uses the current locale.
* 'Z' is the same as 'g', except it always has a decimal and * 'Z' is the same as 'g', except it always has a decimal and
* at least one digit after the decimal. * at least one digit after the decimal.
* *
@ -403,11 +389,6 @@ PyOS_ascii_formatd(char *buffer,
char format_char; char format_char;
size_t format_len = strlen(format); size_t format_len = strlen(format);
/* For type 'n', we need to make a copy of the format string, because
we're going to modify 'n' -> 'g', and format is const char*, so we
can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
we ever need this to be. There's an upcoming check to ensure it's
big enough. */
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
also with at least one character past the decimal. */ also with at least one character past the decimal. */
char tmp_format[FLOAT_FORMATBUFLEN]; char tmp_format[FLOAT_FORMATBUFLEN];
@ -433,12 +414,12 @@ PyOS_ascii_formatd(char *buffer,
if (!(format_char == 'e' || format_char == 'E' || if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' || format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G' || format_char == 'g' || format_char == 'G' ||
format_char == 'n' || format_char == 'Z')) format_char == 'Z'))
return NULL; return NULL;
/* Map 'n' or 'Z' format_char to 'g', by copying the format string and /* Map 'Z' format_char to 'g', by copying the format string and
replacing the final char with a 'g' */ replacing the final char with a 'g' */
if (format_char == 'n' || format_char == 'Z') { if (format_char == 'Z') {
if (format_len + 1 >= sizeof(tmp_format)) { if (format_len + 1 >= sizeof(tmp_format)) {
/* The format won't fit in our copy. Error out. In /* The format won't fit in our copy. Error out. In
practice, this will never happen and will be practice, this will never happen and will be
@ -457,10 +438,7 @@ PyOS_ascii_formatd(char *buffer,
/* Do various fixups on the return string */ /* Do various fixups on the return string */
/* Get the current locale, and find the decimal point string. /* Get the current locale, and find the decimal point string.
Convert that string back to a dot. Do not do this if using the Convert that string back to a dot. */
'n' (number) format code, since we want to keep the localized
decimal point in that case. */
if (format_char != 'n')
change_decimal_from_locale_to_dot(buffer); change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least /* If an exponent exists, ensure that the exponent is at least
@ -475,16 +453,111 @@ PyOS_ascii_formatd(char *buffer,
if (format_char == 'Z') if (format_char == 'Z')
ensure_decimal_point(buffer, buf_size); ensure_decimal_point(buffer, buf_size);
/* If format_char is 'n', add the thousands grouping. */
if (format_char == 'n')
if (!add_thousands_grouping(buffer, buf_size))
return NULL;
return buffer; return buffer;
} }
double PyAPI_FUNC(char *) PyOS_double_to_string(double val,
PyOS_ascii_atof(const char *nptr) char format_code,
int precision,
int flags,
int *type)
{ {
return PyOS_ascii_strtod(nptr, NULL); char buf[128];
char format[32];
Py_ssize_t len;
char *result;
char *p;
int t;
int upper = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
break;
case 'E':
upper = 1;
format_code = 'e';
break;
case 'F':
upper = 1;
format_code = 'f';
break;
case 'G':
upper = 1;
format_code = 'g';
break;
case 'r': /* repr format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 17;
format_code = 'g';
break;
case 's': /* str format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
t = Py_DTST_NAN;
} else if (Py_IS_INFINITY(val)) {
if (copysign(1., val) == 1.)
strcpy(buf, "inf");
else
strcpy(buf, "-inf");
t = Py_DTST_INFINITE;
} else {
t = Py_DTST_FINITE;
if (flags & Py_DTSF_ADD_DOT_0)
format_code = 'Z';
PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
PyOS_ascii_formatd(buf, sizeof(buf), format, val);
}
len = strlen(buf);
/* Add 1 for the trailing 0 byte.
Add 1 because we might need to make room for the sign.
*/
result = PyMem_Malloc(len + 2);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
}
p = result;
/* Never add sign for nan/inf, even if asked. */
if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE)
*p++ = '+';
strcpy(p, buf);
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = p; *p1; p1++)
*p1 = toupper(*p1);
}
if (type)
*type = t;
return result;
} }