Addresses issue 2802: 'n' formatting for integers.

Adds 'n' as a format specifier for integers, to mirror the same specifier which is already available for floats. 'n' is the same as 'd', but inserts the current locale-specific thousands grouping. I added this as a stringlib function, but it's only used by str type, not unicode. This is because of an implementation detail in unicode.format(), which does its own str->unicode conversion. But the unicode version will be needed in 3.0, and it may be needed by other code eventually in 2.6 (maybe decimal?), so I left it as a stringlib implementation. As long as the unicode version isn't instantiated, there's no overhead for this.
2025-11-02 19:12:55 +00:00 · 2008-05-11 19:52:48 +00:00 · 2008-05-11 19:52:48 +00:00 · cf537ff39e
commit cf537ff39e
parent 30ece44f2e
9 changed files with 184 additions and 52 deletions
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@ -176,7 +176,17 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
 				   (only possible for 0-terminated
 				   strings) */
    );
-    
+
 /* Using the current locale, insert the thousands grouping
   into the string pointed to by buffer.  For the argument descriptions,
   see Objects/stringlib/localeutil.h */
 PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
 						  Py_ssize_t len,
 						  char *plast,
 						  Py_ssize_t buf_size,
 						  Py_ssize_t *count,
 						  int append_zero_char);
 #ifdef __cplusplus
 }
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@ -377,7 +377,7 @@ class TypesTests(unittest.TestCase):
        # ensure that float type specifiers work; format converts
        #  the int to a float
-        for format_spec in 'eEfFgGn%':
+        for format_spec in 'eEfFgG%':
            for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
                self.assertEqual(value.__format__(format_spec),
                                 float(value).__format__(format_spec))
@ -472,7 +472,7 @@ class TypesTests(unittest.TestCase):
        # ensure that float type specifiers work; format converts
        #  the long to a float
-        for format_spec in 'eEfFgGn%':
+        for format_spec in 'eEfFgG%':
            for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
                self.assertEqual(value.__format__(format_spec),
                                 float(value).__format__(format_spec))
@ -486,6 +486,17 @@ class TypesTests(unittest.TestCase):
            self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
            self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
    @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
    def test_int__format__locale(self):
        # test locale support for __format__ code 'n' for integers
        x = 123456789012345678901234567890
        for i in range(0, 30):
            self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
            # move to the next integer to test
            x = x // 10
    def test_float__format__(self):
        # these should be rewritten to use both format(x, spec) and
        # x.__format__(spec)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@ -549,7 +549,8 @@ STRINGLIB_HEADERS= \
 		$(srcdir)/Objects/stringlib/stringdefs.h \
 		$(srcdir)/Objects/stringlib/string_format.h \
 		$(srcdir)/Objects/stringlib/transmogrify.h \
-		$(srcdir)/Objects/stringlib/unicodedefs.h
+		$(srcdir)/Objects/stringlib/unicodedefs.h \
 		$(srcdir)/Objects/stringlib/localeutil.h
 Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
 				$(STRINGLIB_HEADERS)
--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
    Py_ssize_t n_digits;       /* count of digits need from the computed
                                  string */
    Py_ssize_t n_leading_chars;
    Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
 					allocate, used for 'n'
 					formatting. */
    NumberFieldWidths spec;
    long x;
@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
            break;
        default:  /* shouldn't be needed, but stops a compiler warning */
        case 'd':
        case 'n':
            base = 10;
            leading_chars_to_skip = 0;
            break;
@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
    /* Calculate the widths of the various leading and trailing parts */
    calc_number_widths(&spec, sign, n_digits, format);
    if (format->type == 'n')
 	    /* Compute how many additional chars we need to allocate
 	       to hold the thousands grouping. */
 	    STRINGLIB_GROUPING(pnumeric_chars, n_digits,
 			       pnumeric_chars+n_digits,
 			       0, &n_grouping_chars, 0);
    /* Allocate a new string to hold the result */
-    result = STRINGLIB_NEW(NULL, spec.n_total);
+    result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
    if (!result)
 	goto done;
    p = STRINGLIB_STR(result);
@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
 	    pnumeric_chars,
 	    n_digits * sizeof(STRINGLIB_CHAR));
-    /* if X, convert to uppercase */
+    /* If type is 'X', convert to uppercase */
    if (format->type == 'X') {
 	Py_ssize_t t;
 	for (t = 0; t < n_digits; ++t)
 	    p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
    }
    /* Insert the grouping, if any, after the uppercasing of 'X', so we can
       ensure that grouping chars won't be affeted. */
    if (n_grouping_chars && format->type == 'n') {
 	    /* We know this can't fail, since we've already
 	       reserved enough space. */
 	    STRINGLIB_CHAR *pstart = p + n_leading_chars;
 	    int r = STRINGLIB_GROUPING(pstart, n_digits,
 				       pstart + n_digits,
 				       spec.n_total+n_grouping_chars-n_leading_chars,
 				       NULL, 0);
 	    assert(r);
    }
    /* Fill in the non-digit parts */
    fill_number(p, &spec, n_digits,
                format->fill_char == '\0' ? ' ' : format->fill_char);
@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
    case 'o':
    case 'x':
    case 'X':
    case 'n':
        /* no type conversion needed, already an int (or long).  do
 	   the formatting */
 	    result = format_int_or_long_internal(value, &format, tostring);
@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
    case 'F':
    case 'g':
    case 'G':
    case 'n':
    case '%':
        /* convert to float */
        tmp = PyNumber_Float(value);
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
@ -0,0 +1,121 @@
 /* stringlib: locale related helpers implementation */
 #ifndef STRINGLIB_LOCALEUTIL_H
 #define STRINGLIB_LOCALEUTIL_H
 #include <locale.h>
 /**
 * _Py_InsertThousandsGrouping:
 * @buffer: A pointer to the start of a string.
 * @len: The length of the string.
 * @plast: A pointer to the end of of the digits in the string.  This
 *         may be before the end of the string (if the string contains
 *         decimals, for example).
 * @buf_size: The maximum size of the buffer pointed to by buffer.
 * @count: If non-NULL, points to a variable that will receive the
 *         number of characters we need to insert (and no formatting
 *         will actually occur).
 * @append_zero_char: If non-zero, put a trailing zero at the end of
 *         of the resulting string, if and only if we modified the
 *         string.
 *
 * Inserts thousand grouping characters (as defined in the current
 *  locale) into the string between buffer and plast.  If count is
 *  non-NULL, don't do any formatting, just count the number of
 *  characters to insert.  This is used by the caller to appropriately
 *  resize the buffer, if needed.
 *
 * Return value: 0 on error, else 1.  Note that no error can occur if
 *  count is non-NULL.
 *
 * This name won't be used, the includer of this file should define
 *  it to be the actual function name, based on unicode or string.
 **/
 int
 _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
 			    Py_ssize_t len,
 			    STRINGLIB_CHAR *plast,
 			    Py_ssize_t buf_size,
 			    Py_ssize_t *count,
 			    int append_zero_char)
 {
 	struct lconv *locale_data = localeconv();
 	const char *grouping = locale_data->grouping;
 	const char *thousands_sep = locale_data->thousands_sep;
 	Py_ssize_t thousands_sep_len = strlen(thousands_sep);
 	STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */
 	STRINGLIB_CHAR *pmax = buffer + buf_size;       /* max of buffer */
 	char current_grouping;
 	/* Initialize the character count, if we're just counting. */
 	if (count)
 		*count = 0;
 	/* Starting at plast and working right-to-left, keep track of
 	   what grouping needs to be added and insert that. */
 	current_grouping = *grouping++;
 	/* If the first character is 0, perform no grouping at all. */
 	if (current_grouping == 0)
 		return 1;
 	while (plast - buffer > current_grouping) {
 		/* Always leave buffer and pend valid at the end of this
 		   loop, since we might leave with a return statement. */
 		plast -= current_grouping;
 		if (count) {
 			/* We're only counting, not touching the memory. */
 			*count += thousands_sep_len;
 		}
 		else {
 			/* Do the formatting. */
 			/* Is there room to insert thousands_sep_len chars? */
 			if (pmax - pend < thousands_sep_len)
 				/* No room. */
 				return 0;
 			/* Move the rest of the string down. */
 			memmove(plast + thousands_sep_len,
 				plast,
 				(pend - plast) * sizeof(STRINGLIB_CHAR));
 			/* Copy the thousands_sep chars into the buffer. */
 #if STRINGLIB_IS_UNICODE
 			/* Convert from the char's of the thousands_sep from
 			   the locale into unicode. */
 			{
 				Py_ssize_t i;
 				for (i = 0; i < thousands_sep_len; ++i)
 					plast[i] = thousands_sep[i];
 			}
 #else
 			/* No conversion, just memcpy the thousands_sep. */
 			memcpy(plast, thousands_sep, thousands_sep_len);
 #endif
 		}
 		/* Adjust end pointer. */
 		pend += thousands_sep_len;
 		/* Move to the next grouping character, unless we're
 		   repeating (which is designated by a grouping of 0). */
 		if (*grouping != 0) {
 			current_grouping = *grouping++;
 			if (current_grouping == CHAR_MAX)
 				/* We're done. */
 				break;
 		}
 	}
 	if (append_zero_char) {
 		/* Append a zero character to mark the end of the string,
 		   if there's room. */
 		if (pend - plast < 1)
 			/* No room, error. */
 			return 0;
 		*pend = 0;
 	}
 	return 1;
 }
 #endif /* STRINGLIB_LOCALEUTIL_H */
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@ -23,5 +23,6 @@
 #define STRINGLIB_CHECK          PyString_Check
 #define STRINGLIB_CMP            memcmp
 #define STRINGLIB_TOSTR          PyObject_Str
 #define STRINGLIB_GROUPING       _PyString_InsertThousandsGrouping
 #endif /* !STRINGLIB_STRINGDEFS_H */
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@ -21,6 +21,7 @@
 #define STRINGLIB_NEW            PyUnicode_FromUnicode
 #define STRINGLIB_RESIZE         PyUnicode_Resize
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_GROUPING       _PyUnicode_InsertThousandsGrouping
 #if PY_VERSION_HEX < 0x03000000
 #define STRINGLIB_TOSTR          PyObject_Unicode
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@ -784,6 +784,10 @@ PyString_AsStringAndSize(register PyObject *obj,
 #include "stringlib/find.h"
 #include "stringlib/partition.h"
 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
 #include "stringlib/localeutil.h"
 static int
 string_print(PyStringObject *op, FILE *fp, int flags)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size)
 Py_LOCAL_INLINE(int)
 add_thousands_grouping(char* buffer, size_t buf_size)
 {
 	Py_ssize_t len = strlen(buffer);
 	struct lconv *locale_data = localeconv();
 	const char *grouping = locale_data->grouping;
 	const char *thousands_sep = locale_data->thousands_sep;
 	size_t thousands_sep_len = strlen(thousands_sep);
 	const char *decimal_point = locale_data->decimal_point;
 	char *pend = buffer + strlen(buffer); /* current end of buffer */
 	char *pmax = buffer + buf_size;       /* max of buffer */
 	char current_grouping;
 	/* Find the decimal point, if any.  We're only concerned
 	   about the characters to the left of the decimal when
@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size)
 		if (!p)
 			/* No exponent and no decimal.  Use the entire
 			   string. */
-			p = pend;
+			p = buffer + len;
 	}
 	/* At this point, p points just past the right-most character we
 	   want to format.  We need to add the grouping string for the
 	   characters between buffer and p. */
-
+	return _PyString_InsertThousandsGrouping(buffer, len, p,
-	/* Starting at p and working right-to-left, keep track of
+						 buf_size, NULL, 1);
 	   what grouping needs to be added and insert that. */
 	current_grouping = *grouping++;
 	/* If the first character is 0, perform no grouping at all. */
 	if (current_grouping == 0)
 		return 1;
 	while (p - buffer > current_grouping) {
 		/* Always leave buffer and pend valid at the end of this
 		   loop, since we might leave with a return statement. */
 		/* Is there room to insert thousands_sep_len chars?. */
 		if (pmax - pend <= thousands_sep_len)
 			/* No room. */
 			return 0;
 		/* Move the rest of the string down. */
 		p -= current_grouping;
 		memmove(p + thousands_sep_len,
 			p,
 			pend - p + 1);
 		/* Adjust end pointer. */
 		pend += thousands_sep_len;
 		/* Copy the thousands_sep chars into the buffer. */
 		memcpy(p, thousands_sep, thousands_sep_len);
 		/* Move to the next grouping character, unless we're
 		   repeating (which is designated by a grouping of 0). */
 		if (*grouping != 0) {
 			current_grouping = *grouping++;
 			if (current_grouping == CHAR_MAX)
 				/* We're done. */
 				return 1;
 		}
 	}
 	return 1;
 }
 /* see FORMATBUFLEN in unicodeobject.c */