mirror of
https://github.com/python/cpython.git
synced 2025-07-29 06:05:00 +00:00
Issue 2526, float.__format__ 'n' specifier does not support thousands grouping.
Implemented grouping, with tests. Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions.
This commit is contained in:
parent
48f6276ddc
commit
0a95063d73
2 changed files with 240 additions and 129 deletions
|
@ -1,8 +1,9 @@
|
||||||
# Python test set -- part 6, built-in types
|
# Python test set -- part 6, built-in types
|
||||||
|
|
||||||
from test.test_support import run_unittest, have_unicode
|
from test.test_support import run_unittest, have_unicode, run_with_locale
|
||||||
import unittest
|
import unittest
|
||||||
import sys
|
import sys
|
||||||
|
import locale
|
||||||
|
|
||||||
class TypesTests(unittest.TestCase):
|
class TypesTests(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -476,6 +477,15 @@ class TypesTests(unittest.TestCase):
|
||||||
self.assertEqual(value.__format__(format_spec),
|
self.assertEqual(value.__format__(format_spec),
|
||||||
float(value).__format__(format_spec))
|
float(value).__format__(format_spec))
|
||||||
|
|
||||||
|
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
|
||||||
|
def test_float__format__locale(self):
|
||||||
|
# test locale support for __format__ code 'n'
|
||||||
|
|
||||||
|
for i in range(-10, 10):
|
||||||
|
x = 1234567890.0 * (10.0 ** i)
|
||||||
|
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
|
||||||
|
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
|
||||||
|
|
||||||
def test_float__format__(self):
|
def test_float__format__(self):
|
||||||
# these should be rewritten to use both format(x, spec) and
|
# these should be rewritten to use both format(x, spec) and
|
||||||
# x.__format__(spec)
|
# x.__format__(spec)
|
||||||
|
|
|
@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given a string that may have a decimal point in the current
|
||||||
|
locale, change it back to a dot. Since the string cannot get
|
||||||
|
longer, no need for a maximum buffer size parameter. */
|
||||||
|
Py_LOCAL_INLINE(void)
|
||||||
|
change_decimal_from_locale_to_dot(char* buffer)
|
||||||
|
{
|
||||||
|
struct lconv *locale_data = localeconv();
|
||||||
|
const char *decimal_point = locale_data->decimal_point;
|
||||||
|
|
||||||
|
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
|
||||||
|
size_t decimal_point_len = strlen(decimal_point);
|
||||||
|
|
||||||
|
if (*buffer == '+' || *buffer == '-')
|
||||||
|
buffer++;
|
||||||
|
while (isdigit(Py_CHARMASK(*buffer)))
|
||||||
|
buffer++;
|
||||||
|
if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
|
||||||
|
*buffer = '.';
|
||||||
|
buffer++;
|
||||||
|
if (decimal_point_len > 1) {
|
||||||
|
/* buffer needs to get smaller */
|
||||||
|
size_t rest_len = strlen(buffer +
|
||||||
|
(decimal_point_len - 1));
|
||||||
|
memmove(buffer,
|
||||||
|
buffer + (decimal_point_len - 1),
|
||||||
|
rest_len);
|
||||||
|
buffer[rest_len] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* From the C99 standard, section 7.19.6:
|
/* From the C99 standard, section 7.19.6:
|
||||||
The exponent always contains at least two digits, and only as many more digits
|
The exponent always contains at least two digits, and only as many more digits
|
||||||
|
@ -194,6 +226,189 @@ as necessary to represent the exponent.
|
||||||
*/
|
*/
|
||||||
#define MIN_EXPONENT_DIGITS 2
|
#define MIN_EXPONENT_DIGITS 2
|
||||||
|
|
||||||
|
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
|
||||||
|
in length. */
|
||||||
|
Py_LOCAL_INLINE(void)
|
||||||
|
ensure_minumim_exponent_length(char* buffer, size_t buf_size)
|
||||||
|
{
|
||||||
|
char *p = strpbrk(buffer, "eE");
|
||||||
|
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
|
||||||
|
char *start = p + 2;
|
||||||
|
int exponent_digit_cnt = 0;
|
||||||
|
int leading_zero_cnt = 0;
|
||||||
|
int in_leading_zeros = 1;
|
||||||
|
int significant_digit_cnt;
|
||||||
|
|
||||||
|
/* Skip over the exponent and the sign. */
|
||||||
|
p += 2;
|
||||||
|
|
||||||
|
/* Find the end of the exponent, keeping track of leading
|
||||||
|
zeros. */
|
||||||
|
while (*p && isdigit(Py_CHARMASK(*p))) {
|
||||||
|
if (in_leading_zeros && *p == '0')
|
||||||
|
++leading_zero_cnt;
|
||||||
|
if (*p != '0')
|
||||||
|
in_leading_zeros = 0;
|
||||||
|
++p;
|
||||||
|
++exponent_digit_cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
|
||||||
|
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
|
||||||
|
/* If there are 2 exactly digits, we're done,
|
||||||
|
regardless of what they contain */
|
||||||
|
}
|
||||||
|
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
|
||||||
|
int extra_zeros_cnt;
|
||||||
|
|
||||||
|
/* There are more than 2 digits in the exponent. See
|
||||||
|
if we can delete some of the leading zeros */
|
||||||
|
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
|
||||||
|
significant_digit_cnt = MIN_EXPONENT_DIGITS;
|
||||||
|
extra_zeros_cnt = exponent_digit_cnt -
|
||||||
|
significant_digit_cnt;
|
||||||
|
|
||||||
|
/* Delete extra_zeros_cnt worth of characters from the
|
||||||
|
front of the exponent */
|
||||||
|
assert(extra_zeros_cnt >= 0);
|
||||||
|
|
||||||
|
/* Add one to significant_digit_cnt to copy the
|
||||||
|
trailing 0 byte, thus setting the length */
|
||||||
|
memmove(start,
|
||||||
|
start + extra_zeros_cnt,
|
||||||
|
significant_digit_cnt + 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* If there are fewer than 2 digits, add zeros
|
||||||
|
until there are 2, if there's enough room */
|
||||||
|
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
|
||||||
|
if (start + zeros + exponent_digit_cnt + 1
|
||||||
|
< buffer + buf_size) {
|
||||||
|
memmove(start + zeros, start,
|
||||||
|
exponent_digit_cnt + 1);
|
||||||
|
memset(start, '0', zeros);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure that buffer has a decimal point in it. The decimal point
|
||||||
|
will not be in the current locale, it will always be '.' */
|
||||||
|
Py_LOCAL_INLINE(void)
|
||||||
|
ensure_decimal_point(char* buffer, size_t buf_size)
|
||||||
|
{
|
||||||
|
int insert_count = 0;
|
||||||
|
char* chars_to_insert;
|
||||||
|
|
||||||
|
/* search for the first non-digit character */
|
||||||
|
char *p = buffer;
|
||||||
|
while (*p && isdigit(Py_CHARMASK(*p)))
|
||||||
|
++p;
|
||||||
|
|
||||||
|
if (*p == '.') {
|
||||||
|
if (isdigit(Py_CHARMASK(*(p+1)))) {
|
||||||
|
/* Nothing to do, we already have a decimal
|
||||||
|
point and a digit after it */
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* We have a decimal point, but no following
|
||||||
|
digit. Insert a zero after the decimal. */
|
||||||
|
++p;
|
||||||
|
chars_to_insert = "0";
|
||||||
|
insert_count = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
chars_to_insert = ".0";
|
||||||
|
insert_count = 2;
|
||||||
|
}
|
||||||
|
if (insert_count) {
|
||||||
|
size_t buf_len = strlen(buffer);
|
||||||
|
if (buf_len + insert_count + 1 >= buf_size) {
|
||||||
|
/* If there is not enough room in the buffer
|
||||||
|
for the additional text, just skip it. It's
|
||||||
|
not worth generating an error over. */
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memmove(p + insert_count, p,
|
||||||
|
buffer + strlen(buffer) - p + 1);
|
||||||
|
memcpy(p, chars_to_insert, insert_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add the locale specific grouping characters to buffer. Note
|
||||||
|
that any decimal point (if it's present) in buffer is already
|
||||||
|
locale-specific. Return 0 on error, else 1. */
|
||||||
|
Py_LOCAL_INLINE(int)
|
||||||
|
add_thousands_grouping(char* buffer, size_t buf_size)
|
||||||
|
{
|
||||||
|
struct lconv *locale_data = localeconv();
|
||||||
|
const char *grouping = locale_data->grouping;
|
||||||
|
const char *thousands_sep = locale_data->thousands_sep;
|
||||||
|
size_t thousands_sep_len = strlen(thousands_sep);
|
||||||
|
const char *decimal_point = locale_data->decimal_point;
|
||||||
|
char *pend = buffer + strlen(buffer); /* current end of buffer */
|
||||||
|
char *pmax = buffer + buf_size; /* max of buffer */
|
||||||
|
char current_grouping;
|
||||||
|
|
||||||
|
/* Find the decimal point, if any. We're only concerned
|
||||||
|
about the characters to the left of the decimal when
|
||||||
|
adding grouping. */
|
||||||
|
char *p = strstr(buffer, decimal_point);
|
||||||
|
if (!p) {
|
||||||
|
/* No decimal, use the entire string. */
|
||||||
|
|
||||||
|
/* If any exponent, adjust p. */
|
||||||
|
p = strpbrk(buffer, "eE");
|
||||||
|
if (!p)
|
||||||
|
/* No exponent and no decimal. Use the entire
|
||||||
|
string. */
|
||||||
|
p = pend;
|
||||||
|
}
|
||||||
|
/* At this point, p points just past the right-most character we
|
||||||
|
want to format. We need to add the grouping string for the
|
||||||
|
characters between buffer and p. */
|
||||||
|
|
||||||
|
/* Starting at p and working right-to-left, keep track of
|
||||||
|
what grouping needs to be added and insert that. */
|
||||||
|
current_grouping = *grouping++;
|
||||||
|
|
||||||
|
/* If the first character is 0, perform no grouping at all. */
|
||||||
|
if (current_grouping == 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
while (p - buffer > current_grouping) {
|
||||||
|
/* Always leave buffer and pend valid at the end of this
|
||||||
|
loop, since we might leave with a return statement. */
|
||||||
|
|
||||||
|
/* Is there room to insert thousands_sep_len chars?. */
|
||||||
|
if (pmax - pend <= thousands_sep_len)
|
||||||
|
/* No room. */
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Move the rest of the string down. */
|
||||||
|
p -= current_grouping;
|
||||||
|
memmove(p + thousands_sep_len,
|
||||||
|
p,
|
||||||
|
pend - p + 1);
|
||||||
|
/* Adjust end pointer. */
|
||||||
|
pend += thousands_sep_len;
|
||||||
|
/* Copy the thousands_sep chars into the buffer. */
|
||||||
|
memcpy(p, thousands_sep, thousands_sep_len);
|
||||||
|
|
||||||
|
/* Move to the next grouping character, unless we're
|
||||||
|
repeating (which is designated by a grouping of 0). */
|
||||||
|
if (*grouping != 0) {
|
||||||
|
current_grouping = *grouping++;
|
||||||
|
if (current_grouping == CHAR_MAX)
|
||||||
|
/* We're done. */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* see FORMATBUFLEN in unicodeobject.c */
|
/* see FORMATBUFLEN in unicodeobject.c */
|
||||||
#define FLOAT_FORMATBUFLEN 120
|
#define FLOAT_FORMATBUFLEN 120
|
||||||
|
|
||||||
|
@ -222,7 +437,6 @@ PyOS_ascii_formatd(char *buffer,
|
||||||
const char *format,
|
const char *format,
|
||||||
double d)
|
double d)
|
||||||
{
|
{
|
||||||
char *p;
|
|
||||||
char format_char;
|
char format_char;
|
||||||
size_t format_len = strlen(format);
|
size_t format_len = strlen(format);
|
||||||
|
|
||||||
|
@ -277,144 +491,31 @@ PyOS_ascii_formatd(char *buffer,
|
||||||
/* Have PyOS_snprintf do the hard work */
|
/* Have PyOS_snprintf do the hard work */
|
||||||
PyOS_snprintf(buffer, buf_size, format, d);
|
PyOS_snprintf(buffer, buf_size, format, d);
|
||||||
|
|
||||||
/* Get the current local, and find the decimal point character (or
|
/* Do various fixups on the return string */
|
||||||
string?). Convert that string back to a dot. Do not do this if
|
|
||||||
using the 'n' (number) format code. */
|
|
||||||
if (format_char != 'n') {
|
|
||||||
struct lconv *locale_data = localeconv();
|
|
||||||
const char *decimal_point = locale_data->decimal_point;
|
|
||||||
size_t decimal_point_len = strlen(decimal_point);
|
|
||||||
size_t rest_len;
|
|
||||||
|
|
||||||
assert(decimal_point_len != 0);
|
/* Get the current locale, and find the decimal point string.
|
||||||
|
Convert that string back to a dot. Do not do this if using the
|
||||||
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
|
'n' (number) format code, since we want to keep the localized
|
||||||
p = buffer;
|
decimal point in that case. */
|
||||||
|
if (format_char != 'n')
|
||||||
if (*p == '+' || *p == '-')
|
change_decimal_from_locale_to_dot(buffer);
|
||||||
p++;
|
|
||||||
|
|
||||||
while (isdigit(Py_CHARMASK(*p)))
|
|
||||||
p++;
|
|
||||||
|
|
||||||
if (strncmp(p, decimal_point,
|
|
||||||
decimal_point_len) == 0) {
|
|
||||||
*p = '.';
|
|
||||||
p++;
|
|
||||||
if (decimal_point_len > 1) {
|
|
||||||
rest_len = strlen(p +
|
|
||||||
(decimal_point_len - 1));
|
|
||||||
memmove(p, p + (decimal_point_len - 1),
|
|
||||||
rest_len);
|
|
||||||
p[rest_len] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If an exponent exists, ensure that the exponent is at least
|
/* If an exponent exists, ensure that the exponent is at least
|
||||||
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
|
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
|
||||||
for the extra zeros. Also, if there are more than
|
for the extra zeros. Also, if there are more than
|
||||||
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
|
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
|
||||||
back to MIN_EXPONENT_DIGITS */
|
back to MIN_EXPONENT_DIGITS */
|
||||||
p = strpbrk(buffer, "eE");
|
ensure_minumim_exponent_length(buffer, buf_size);
|
||||||
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
|
|
||||||
char *start = p + 2;
|
|
||||||
int exponent_digit_cnt = 0;
|
|
||||||
int leading_zero_cnt = 0;
|
|
||||||
int in_leading_zeros = 1;
|
|
||||||
int significant_digit_cnt;
|
|
||||||
|
|
||||||
p += 2;
|
|
||||||
while (*p && isdigit(Py_CHARMASK(*p))) {
|
|
||||||
if (in_leading_zeros && *p == '0')
|
|
||||||
++leading_zero_cnt;
|
|
||||||
if (*p != '0')
|
|
||||||
in_leading_zeros = 0;
|
|
||||||
++p;
|
|
||||||
++exponent_digit_cnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
|
|
||||||
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
|
|
||||||
/* If there are 2 exactly digits, we're done,
|
|
||||||
regardless of what they contain */
|
|
||||||
}
|
|
||||||
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
|
|
||||||
int extra_zeros_cnt;
|
|
||||||
|
|
||||||
/* There are more than 2 digits in the exponent. See
|
|
||||||
if we can delete some of the leading zeros */
|
|
||||||
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
|
|
||||||
significant_digit_cnt = MIN_EXPONENT_DIGITS;
|
|
||||||
extra_zeros_cnt = exponent_digit_cnt -
|
|
||||||
significant_digit_cnt;
|
|
||||||
|
|
||||||
/* Delete extra_zeros_cnt worth of characters from the
|
|
||||||
front of the exponent */
|
|
||||||
assert(extra_zeros_cnt >= 0);
|
|
||||||
|
|
||||||
/* Add one to significant_digit_cnt to copy the
|
|
||||||
trailing 0 byte, thus setting the length */
|
|
||||||
memmove(start,
|
|
||||||
start + extra_zeros_cnt,
|
|
||||||
significant_digit_cnt + 1);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* If there are fewer than 2 digits, add zeros
|
|
||||||
until there are 2, if there's enough room */
|
|
||||||
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
|
|
||||||
if (start + zeros + exponent_digit_cnt + 1
|
|
||||||
< buffer + buf_size) {
|
|
||||||
memmove(start + zeros, start,
|
|
||||||
exponent_digit_cnt + 1);
|
|
||||||
memset(start, '0', zeros);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If format_char is 'Z', make sure we have at least one character
|
/* If format_char is 'Z', make sure we have at least one character
|
||||||
after the decimal point (and make sure we have a decimal point). */
|
after the decimal point (and make sure we have a decimal point). */
|
||||||
if (format_char == 'Z') {
|
if (format_char == 'Z')
|
||||||
int insert_count = 0;
|
ensure_decimal_point(buffer, buf_size);
|
||||||
char* chars_to_insert;
|
|
||||||
|
|
||||||
/* search for the first non-digit character */
|
/* If format_char is 'n', add the thousands grouping. */
|
||||||
p = buffer;
|
if (format_char == 'n')
|
||||||
while (*p && isdigit(Py_CHARMASK(*p)))
|
if (!add_thousands_grouping(buffer, buf_size))
|
||||||
++p;
|
return NULL;
|
||||||
|
|
||||||
if (*p == '.') {
|
|
||||||
if (isdigit(Py_CHARMASK(*(p+1)))) {
|
|
||||||
/* Nothing to do, we already have a decimal
|
|
||||||
point and a digit after it */
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* We have a decimal point, but no following
|
|
||||||
digit. Insert a zero after the decimal. */
|
|
||||||
++p;
|
|
||||||
chars_to_insert = "0";
|
|
||||||
insert_count = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
chars_to_insert = ".0";
|
|
||||||
insert_count = 2;
|
|
||||||
}
|
|
||||||
if (insert_count) {
|
|
||||||
size_t buf_len = strlen(buffer);
|
|
||||||
if (buf_len + insert_count + 1 >= buf_size) {
|
|
||||||
/* If there is not enough room in the buffer
|
|
||||||
for the additional text, just skip it. It's
|
|
||||||
not worth generating an error over. */
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
memmove(p + insert_count, p,
|
|
||||||
buffer + strlen(buffer) - p + 1);
|
|
||||||
memcpy(p, chars_to_insert, insert_count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue