Issue 2526, float.__format__ 'n' specifier does not support thousands grouping.

Implemented grouping, with tests. Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions.
2025-09-13 12:17:24 +00:00 · 2008-04-30 01:09:30 +00:00 · 2008-04-30 01:09:30 +00:00 · 0a95063d73
commit 0a95063d73
parent 48f6276ddc
2 changed files with 240 additions and 129 deletions
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@ -1,8 +1,9 @@
 # Python test set -- part 6, built-in types
-from test.test_support import run_unittest, have_unicode
+from test.test_support import run_unittest, have_unicode, run_with_locale
 import unittest
 import sys
 import locale
 class TypesTests(unittest.TestCase):
@ -476,6 +477,15 @@ class TypesTests(unittest.TestCase):
                self.assertEqual(value.__format__(format_spec),
                                 float(value).__format__(format_spec))
    @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
    def test_float__format__locale(self):
        # test locale support for __format__ code 'n'
        for i in range(-10, 10):
            x = 1234567890.0 * (10.0 ** i)
            self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
            self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
    def test_float__format__(self):
        # these should be rewritten to use both format(x, spec) and
        # x.__format__(spec)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	return val;
 }
 /* Given a string that may have a decimal point in the current
   locale, change it back to a dot.  Since the string cannot get
   longer, no need for a maximum buffer size parameter. */
 Py_LOCAL_INLINE(void)
 change_decimal_from_locale_to_dot(char* buffer)
 {
 	struct lconv *locale_data = localeconv();
 	const char *decimal_point = locale_data->decimal_point;
 	if (decimal_point[0] != '.' || decimal_point[1] != 0) {
 		size_t decimal_point_len = strlen(decimal_point);
 		if (*buffer == '+' || *buffer == '-')
 			buffer++;
 		while (isdigit(Py_CHARMASK(*buffer)))
 			buffer++;
 		if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
 			*buffer = '.';
 			buffer++;
 			if (decimal_point_len > 1) {
 				/* buffer needs to get smaller */
 				size_t rest_len = strlen(buffer +
 						     (decimal_point_len - 1));
 				memmove(buffer,
 					buffer + (decimal_point_len - 1),
 					rest_len);
 				buffer[rest_len] = 0;
 			}
 		}
 	}
 }
 /* From the C99 standard, section 7.19.6:
 The exponent always contains at least two digits, and only as many more digits
@ -194,6 +226,189 @@ as necessary to represent the exponent.
 */
 #define MIN_EXPONENT_DIGITS 2
 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
   in length. */
 Py_LOCAL_INLINE(void)
 ensure_minumim_exponent_length(char* buffer, size_t buf_size)
 {
 	char *p = strpbrk(buffer, "eE");
 	if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
 		char *start = p + 2;
 		int exponent_digit_cnt = 0;
 		int leading_zero_cnt = 0;
 		int in_leading_zeros = 1;
 		int significant_digit_cnt;
 		/* Skip over the exponent and the sign. */
 		p += 2;
 		/* Find the end of the exponent, keeping track of leading
 		   zeros. */
 		while (*p && isdigit(Py_CHARMASK(*p))) {
 			if (in_leading_zeros && *p == '0')
 				++leading_zero_cnt;
 			if (*p != '0')
 				in_leading_zeros = 0;
 			++p;
 			++exponent_digit_cnt;
 		}
 		significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
 		if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
 			/* If there are 2 exactly digits, we're done,
 			   regardless of what they contain */
 		}
 		else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
 			int extra_zeros_cnt;
 			/* There are more than 2 digits in the exponent.  See
 			   if we can delete some of the leading zeros */
 			if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
 				significant_digit_cnt = MIN_EXPONENT_DIGITS;
 			extra_zeros_cnt = exponent_digit_cnt -
 				significant_digit_cnt;
 			/* Delete extra_zeros_cnt worth of characters from the
 			   front of the exponent */
 			assert(extra_zeros_cnt >= 0);
 			/* Add one to significant_digit_cnt to copy the
 			   trailing 0 byte, thus setting the length */
 			memmove(start,
 				start + extra_zeros_cnt,
 				significant_digit_cnt + 1);
 		}
 		else {
 			/* If there are fewer than 2 digits, add zeros
 			   until there are 2, if there's enough room */
 			int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
 			if (start + zeros + exponent_digit_cnt + 1
 			      < buffer + buf_size) {
 				memmove(start + zeros, start,
 					exponent_digit_cnt + 1);
 				memset(start, '0', zeros);
 			}
 		}
 	}
 }
 /* Ensure that buffer has a decimal point in it.  The decimal point
   will not be in the current locale, it will always be '.' */
 Py_LOCAL_INLINE(void)
 ensure_decimal_point(char* buffer, size_t buf_size)
 {
 	int insert_count = 0;
 	char* chars_to_insert;
 	/* search for the first non-digit character */
 	char *p = buffer;
 	while (*p && isdigit(Py_CHARMASK(*p)))
 		++p;
 	if (*p == '.') {
 		if (isdigit(Py_CHARMASK(*(p+1)))) {
 			/* Nothing to do, we already have a decimal
 			   point and a digit after it */
 		}
 		else {
 			/* We have a decimal point, but no following
 			   digit.  Insert a zero after the decimal. */
 			++p;
 			chars_to_insert = "0";
 			insert_count = 1;
 		}
 	}
 	else {
 		chars_to_insert = ".0";
 		insert_count = 2;
 	}
 	if (insert_count) {
 		size_t buf_len = strlen(buffer);
 		if (buf_len + insert_count + 1 >= buf_size) {
 			/* If there is not enough room in the buffer
 			   for the additional text, just skip it.  It's
 			   not worth generating an error over. */
 		}
 		else {
 			memmove(p + insert_count, p,
 				buffer + strlen(buffer) - p + 1);
 			memcpy(p, chars_to_insert, insert_count);
 		}
 	}
 }
 /* Add the locale specific grouping characters to buffer.  Note
   that any decimal point (if it's present) in buffer is already
   locale-specific.  Return 0 on error, else 1. */
 Py_LOCAL_INLINE(int)
 add_thousands_grouping(char* buffer, size_t buf_size)
 {
 	struct lconv *locale_data = localeconv();
 	const char *grouping = locale_data->grouping;
 	const char *thousands_sep = locale_data->thousands_sep;
 	size_t thousands_sep_len = strlen(thousands_sep);
 	const char *decimal_point = locale_data->decimal_point;
 	char *pend = buffer + strlen(buffer); /* current end of buffer */
 	char *pmax = buffer + buf_size;       /* max of buffer */
 	char current_grouping;
 	/* Find the decimal point, if any.  We're only concerned
 	   about the characters to the left of the decimal when
 	   adding grouping. */
 	char *p = strstr(buffer, decimal_point);
 	if (!p) {
 		/* No decimal, use the entire string. */
 		/* If any exponent, adjust p. */
 		p = strpbrk(buffer, "eE");
 		if (!p)
 			/* No exponent and no decimal.  Use the entire
 			   string. */
 			p = pend;
 	}
 	/* At this point, p points just past the right-most character we
 	   want to format.  We need to add the grouping string for the
 	   characters between buffer and p. */
 	/* Starting at p and working right-to-left, keep track of
 	   what grouping needs to be added and insert that. */
 	current_grouping = *grouping++;
 	/* If the first character is 0, perform no grouping at all. */
 	if (current_grouping == 0)
 		return 1;
 	while (p - buffer > current_grouping) {
 		/* Always leave buffer and pend valid at the end of this
 		   loop, since we might leave with a return statement. */
 		/* Is there room to insert thousands_sep_len chars?. */
 		if (pmax - pend <= thousands_sep_len)
 			/* No room. */
 			return 0;
 		/* Move the rest of the string down. */
 		p -= current_grouping;
 		memmove(p + thousands_sep_len,
 			p,
 			pend - p + 1);
 		/* Adjust end pointer. */
 		pend += thousands_sep_len;
 		/* Copy the thousands_sep chars into the buffer. */
 		memcpy(p, thousands_sep, thousands_sep_len);
 		/* Move to the next grouping character, unless we're
 		   repeating (which is designated by a grouping of 0). */
 		if (*grouping != 0) {
 			current_grouping = *grouping++;
 			if (current_grouping == CHAR_MAX)
 				/* We're done. */
 				return 1;
 		}
 	}
 	return 1;
 }
 /* see FORMATBUFLEN in unicodeobject.c */
 #define FLOAT_FORMATBUFLEN 120
@ -222,7 +437,6 @@ PyOS_ascii_formatd(char       *buffer,
 		   const char *format, 
 		   double      d)
 {
 	char *p;
 	char format_char;
 	size_t format_len = strlen(format);
@ -277,144 +491,31 @@ PyOS_ascii_formatd(char       *buffer,
 	/* Have PyOS_snprintf do the hard work */
 	PyOS_snprintf(buffer, buf_size, format, d);
-	/* Get the current local, and find the decimal point character (or
+	/* Do various fixups on the return string */
 	   string?).  Convert that string back to a dot.  Do not do this if
 	   using the 'n' (number) format code. */
 	if (format_char != 'n') {
 		struct lconv *locale_data = localeconv();
 		const char *decimal_point = locale_data->decimal_point;
 		size_t decimal_point_len = strlen(decimal_point);
 		size_t rest_len;
-		assert(decimal_point_len != 0);
+	/* Get the current locale, and find the decimal point string.
-
+	   Convert that string back to a dot.  Do not do this if using the
-		if (decimal_point[0] != '.' || decimal_point[1] != 0) {
+	   'n' (number) format code, since we want to keep the localized
-			p = buffer;
+	   decimal point in that case. */
-
+	if (format_char != 'n')
-			if (*p == '+' || *p == '-')
+		change_decimal_from_locale_to_dot(buffer);
 				p++;
 			while (isdigit(Py_CHARMASK(*p)))
 				p++;
 			if (strncmp(p, decimal_point,
 				    decimal_point_len) == 0) {
 				*p = '.';
 				p++;
 				if (decimal_point_len > 1) {
 					rest_len = strlen(p +
 						      (decimal_point_len - 1));
 					memmove(p, p + (decimal_point_len - 1),
 						rest_len);
 					p[rest_len] = 0;
 				}
 			}
 		}
 	}
 	/* If an exponent exists, ensure that the exponent is at least
 	   MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
 	   for the extra zeros.  Also, if there are more than
 	   MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
 	   back to MIN_EXPONENT_DIGITS */
-	p = strpbrk(buffer, "eE");
+	ensure_minumim_exponent_length(buffer, buf_size);
 	if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
 		char *start = p + 2;
 		int exponent_digit_cnt = 0;
 		int leading_zero_cnt = 0;
 		int in_leading_zeros = 1;
 		int significant_digit_cnt;
 		p += 2;
 		while (*p && isdigit(Py_CHARMASK(*p))) {
 			if (in_leading_zeros && *p == '0')
 				++leading_zero_cnt;
 			if (*p != '0')
 				in_leading_zeros = 0;
 			++p;
 			++exponent_digit_cnt;
 		}
 		significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
 		if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
 			/* If there are 2 exactly digits, we're done,
 			   regardless of what they contain */
 		}
 		else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
 			int extra_zeros_cnt;
 			/* There are more than 2 digits in the exponent.  See
 			   if we can delete some of the leading zeros */
 			if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
 				significant_digit_cnt = MIN_EXPONENT_DIGITS;
 			extra_zeros_cnt = exponent_digit_cnt -
 				significant_digit_cnt;
 			/* Delete extra_zeros_cnt worth of characters from the
 			   front of the exponent */
 			assert(extra_zeros_cnt >= 0);
 			/* Add one to significant_digit_cnt to copy the
 			   trailing 0 byte, thus setting the length */
 			memmove(start,
 				start + extra_zeros_cnt,
 				significant_digit_cnt + 1);
 		}
 		else {
 			/* If there are fewer than 2 digits, add zeros
 			   until there are 2, if there's enough room */
 			int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
 			if (start + zeros + exponent_digit_cnt + 1
 			      < buffer + buf_size) {
 				memmove(start + zeros, start,
 					exponent_digit_cnt + 1);
 				memset(start, '0', zeros);
 			}
 		}
 	}
 	/* If format_char is 'Z', make sure we have at least one character
 	   after the decimal point (and make sure we have a decimal point). */
-	if (format_char == 'Z') {
+	if (format_char == 'Z')
-		int insert_count = 0;
+		ensure_decimal_point(buffer, buf_size);
 		char* chars_to_insert;
-		/* search for the first non-digit character */
+	/* If format_char is 'n', add the thousands grouping. */
-		p = buffer;
+	if (format_char == 'n')
-		while (*p && isdigit(Py_CHARMASK(*p)))
+		if (!add_thousands_grouping(buffer, buf_size))
-			++p;
+			return NULL;
 		if (*p == '.') {
 			if (isdigit(Py_CHARMASK(*(p+1)))) {
 				/* Nothing to do, we already have a decimal
 				   point and a digit after it */
 			}
 			else {
 				/* We have a decimal point, but no following
 				   digit.  Insert a zero after the decimal. */
 				++p;
 				chars_to_insert = "0";
 				insert_count = 1;
 			}
 		}
 		else {
 			chars_to_insert = ".0";
 			insert_count = 2;
 		}
 		if (insert_count) {
 			size_t buf_len = strlen(buffer);
 			if (buf_len + insert_count + 1 >= buf_size) {
 				/* If there is not enough room in the buffer
 				   for the additional text, just skip it.  It's
 				   not worth generating an error over. */
 			}
 			else {
 				memmove(p + insert_count, p,
 					buffer + strlen(buffer) - p + 1);
 				memcpy(p, chars_to_insert, insert_count);
 			}
 		}
 	}
 	return buffer;
 }