mirror of
https://github.com/python/cpython.git
synced 2025-07-29 06:05:00 +00:00
Addresses issue 2802: 'n' formatting for integers.
Adds 'n' as a format specifier for integers, to mirror the same specifier which is already available for floats. 'n' is the same as 'd', but inserts the current locale-specific thousands grouping. I added this as a stringlib function, but it's only used by str type, not unicode. This is because of an implementation detail in unicode.format(), which does its own str->unicode conversion. But the unicode version will be needed in 3.0, and it may be needed by other code eventually in 2.6 (maybe decimal?), so I left it as a stringlib implementation. As long as the unicode version isn't instantiated, there's no overhead for this.
This commit is contained in:
parent
30ece44f2e
commit
cf537ff39e
9 changed files with 184 additions and 52 deletions
|
@ -176,7 +176,17 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
|
||||||
(only possible for 0-terminated
|
(only possible for 0-terminated
|
||||||
strings) */
|
strings) */
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* Using the current locale, insert the thousands grouping
|
||||||
|
into the string pointed to by buffer. For the argument descriptions,
|
||||||
|
see Objects/stringlib/localeutil.h */
|
||||||
|
|
||||||
|
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
|
||||||
|
Py_ssize_t len,
|
||||||
|
char *plast,
|
||||||
|
Py_ssize_t buf_size,
|
||||||
|
Py_ssize_t *count,
|
||||||
|
int append_zero_char);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -377,7 +377,7 @@ class TypesTests(unittest.TestCase):
|
||||||
|
|
||||||
# ensure that float type specifiers work; format converts
|
# ensure that float type specifiers work; format converts
|
||||||
# the int to a float
|
# the int to a float
|
||||||
for format_spec in 'eEfFgGn%':
|
for format_spec in 'eEfFgG%':
|
||||||
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
|
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
|
||||||
self.assertEqual(value.__format__(format_spec),
|
self.assertEqual(value.__format__(format_spec),
|
||||||
float(value).__format__(format_spec))
|
float(value).__format__(format_spec))
|
||||||
|
@ -472,7 +472,7 @@ class TypesTests(unittest.TestCase):
|
||||||
|
|
||||||
# ensure that float type specifiers work; format converts
|
# ensure that float type specifiers work; format converts
|
||||||
# the long to a float
|
# the long to a float
|
||||||
for format_spec in 'eEfFgGn%':
|
for format_spec in 'eEfFgG%':
|
||||||
for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
|
for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
|
||||||
self.assertEqual(value.__format__(format_spec),
|
self.assertEqual(value.__format__(format_spec),
|
||||||
float(value).__format__(format_spec))
|
float(value).__format__(format_spec))
|
||||||
|
@ -486,6 +486,17 @@ class TypesTests(unittest.TestCase):
|
||||||
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
|
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
|
||||||
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
|
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
|
||||||
|
|
||||||
|
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
|
||||||
|
def test_int__format__locale(self):
|
||||||
|
# test locale support for __format__ code 'n' for integers
|
||||||
|
|
||||||
|
x = 123456789012345678901234567890
|
||||||
|
for i in range(0, 30):
|
||||||
|
self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
|
||||||
|
|
||||||
|
# move to the next integer to test
|
||||||
|
x = x // 10
|
||||||
|
|
||||||
def test_float__format__(self):
|
def test_float__format__(self):
|
||||||
# these should be rewritten to use both format(x, spec) and
|
# these should be rewritten to use both format(x, spec) and
|
||||||
# x.__format__(spec)
|
# x.__format__(spec)
|
||||||
|
|
|
@ -549,7 +549,8 @@ STRINGLIB_HEADERS= \
|
||||||
$(srcdir)/Objects/stringlib/stringdefs.h \
|
$(srcdir)/Objects/stringlib/stringdefs.h \
|
||||||
$(srcdir)/Objects/stringlib/string_format.h \
|
$(srcdir)/Objects/stringlib/string_format.h \
|
||||||
$(srcdir)/Objects/stringlib/transmogrify.h \
|
$(srcdir)/Objects/stringlib/transmogrify.h \
|
||||||
$(srcdir)/Objects/stringlib/unicodedefs.h
|
$(srcdir)/Objects/stringlib/unicodedefs.h \
|
||||||
|
$(srcdir)/Objects/stringlib/localeutil.h
|
||||||
|
|
||||||
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
|
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
|
||||||
$(STRINGLIB_HEADERS)
|
$(STRINGLIB_HEADERS)
|
||||||
|
|
|
@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
||||||
Py_ssize_t n_digits; /* count of digits need from the computed
|
Py_ssize_t n_digits; /* count of digits need from the computed
|
||||||
string */
|
string */
|
||||||
Py_ssize_t n_leading_chars;
|
Py_ssize_t n_leading_chars;
|
||||||
|
Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
|
||||||
|
allocate, used for 'n'
|
||||||
|
formatting. */
|
||||||
NumberFieldWidths spec;
|
NumberFieldWidths spec;
|
||||||
long x;
|
long x;
|
||||||
|
|
||||||
|
@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
||||||
break;
|
break;
|
||||||
default: /* shouldn't be needed, but stops a compiler warning */
|
default: /* shouldn't be needed, but stops a compiler warning */
|
||||||
case 'd':
|
case 'd':
|
||||||
|
case 'n':
|
||||||
base = 10;
|
base = 10;
|
||||||
leading_chars_to_skip = 0;
|
leading_chars_to_skip = 0;
|
||||||
break;
|
break;
|
||||||
|
@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
||||||
/* Calculate the widths of the various leading and trailing parts */
|
/* Calculate the widths of the various leading and trailing parts */
|
||||||
calc_number_widths(&spec, sign, n_digits, format);
|
calc_number_widths(&spec, sign, n_digits, format);
|
||||||
|
|
||||||
|
if (format->type == 'n')
|
||||||
|
/* Compute how many additional chars we need to allocate
|
||||||
|
to hold the thousands grouping. */
|
||||||
|
STRINGLIB_GROUPING(pnumeric_chars, n_digits,
|
||||||
|
pnumeric_chars+n_digits,
|
||||||
|
0, &n_grouping_chars, 0);
|
||||||
|
|
||||||
/* Allocate a new string to hold the result */
|
/* Allocate a new string to hold the result */
|
||||||
result = STRINGLIB_NEW(NULL, spec.n_total);
|
result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
|
||||||
if (!result)
|
if (!result)
|
||||||
goto done;
|
goto done;
|
||||||
p = STRINGLIB_STR(result);
|
p = STRINGLIB_STR(result);
|
||||||
|
@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
||||||
pnumeric_chars,
|
pnumeric_chars,
|
||||||
n_digits * sizeof(STRINGLIB_CHAR));
|
n_digits * sizeof(STRINGLIB_CHAR));
|
||||||
|
|
||||||
/* if X, convert to uppercase */
|
/* If type is 'X', convert to uppercase */
|
||||||
if (format->type == 'X') {
|
if (format->type == 'X') {
|
||||||
Py_ssize_t t;
|
Py_ssize_t t;
|
||||||
for (t = 0; t < n_digits; ++t)
|
for (t = 0; t < n_digits; ++t)
|
||||||
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
|
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Insert the grouping, if any, after the uppercasing of 'X', so we can
|
||||||
|
ensure that grouping chars won't be affeted. */
|
||||||
|
if (n_grouping_chars && format->type == 'n') {
|
||||||
|
/* We know this can't fail, since we've already
|
||||||
|
reserved enough space. */
|
||||||
|
STRINGLIB_CHAR *pstart = p + n_leading_chars;
|
||||||
|
int r = STRINGLIB_GROUPING(pstart, n_digits,
|
||||||
|
pstart + n_digits,
|
||||||
|
spec.n_total+n_grouping_chars-n_leading_chars,
|
||||||
|
NULL, 0);
|
||||||
|
assert(r);
|
||||||
|
}
|
||||||
|
|
||||||
/* Fill in the non-digit parts */
|
/* Fill in the non-digit parts */
|
||||||
fill_number(p, &spec, n_digits,
|
fill_number(p, &spec, n_digits,
|
||||||
format->fill_char == '\0' ? ' ' : format->fill_char);
|
format->fill_char == '\0' ? ' ' : format->fill_char);
|
||||||
|
@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
|
||||||
case 'o':
|
case 'o':
|
||||||
case 'x':
|
case 'x':
|
||||||
case 'X':
|
case 'X':
|
||||||
|
case 'n':
|
||||||
/* no type conversion needed, already an int (or long). do
|
/* no type conversion needed, already an int (or long). do
|
||||||
the formatting */
|
the formatting */
|
||||||
result = format_int_or_long_internal(value, &format, tostring);
|
result = format_int_or_long_internal(value, &format, tostring);
|
||||||
|
@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
|
||||||
case 'F':
|
case 'F':
|
||||||
case 'g':
|
case 'g':
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'n':
|
|
||||||
case '%':
|
case '%':
|
||||||
/* convert to float */
|
/* convert to float */
|
||||||
tmp = PyNumber_Float(value);
|
tmp = PyNumber_Float(value);
|
||||||
|
|
121
Objects/stringlib/localeutil.h
Normal file
121
Objects/stringlib/localeutil.h
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
/* stringlib: locale related helpers implementation */
|
||||||
|
|
||||||
|
#ifndef STRINGLIB_LOCALEUTIL_H
|
||||||
|
#define STRINGLIB_LOCALEUTIL_H
|
||||||
|
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* _Py_InsertThousandsGrouping:
|
||||||
|
* @buffer: A pointer to the start of a string.
|
||||||
|
* @len: The length of the string.
|
||||||
|
* @plast: A pointer to the end of of the digits in the string. This
|
||||||
|
* may be before the end of the string (if the string contains
|
||||||
|
* decimals, for example).
|
||||||
|
* @buf_size: The maximum size of the buffer pointed to by buffer.
|
||||||
|
* @count: If non-NULL, points to a variable that will receive the
|
||||||
|
* number of characters we need to insert (and no formatting
|
||||||
|
* will actually occur).
|
||||||
|
* @append_zero_char: If non-zero, put a trailing zero at the end of
|
||||||
|
* of the resulting string, if and only if we modified the
|
||||||
|
* string.
|
||||||
|
*
|
||||||
|
* Inserts thousand grouping characters (as defined in the current
|
||||||
|
* locale) into the string between buffer and plast. If count is
|
||||||
|
* non-NULL, don't do any formatting, just count the number of
|
||||||
|
* characters to insert. This is used by the caller to appropriately
|
||||||
|
* resize the buffer, if needed.
|
||||||
|
*
|
||||||
|
* Return value: 0 on error, else 1. Note that no error can occur if
|
||||||
|
* count is non-NULL.
|
||||||
|
*
|
||||||
|
* This name won't be used, the includer of this file should define
|
||||||
|
* it to be the actual function name, based on unicode or string.
|
||||||
|
**/
|
||||||
|
int
|
||||||
|
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
|
||||||
|
Py_ssize_t len,
|
||||||
|
STRINGLIB_CHAR *plast,
|
||||||
|
Py_ssize_t buf_size,
|
||||||
|
Py_ssize_t *count,
|
||||||
|
int append_zero_char)
|
||||||
|
{
|
||||||
|
struct lconv *locale_data = localeconv();
|
||||||
|
const char *grouping = locale_data->grouping;
|
||||||
|
const char *thousands_sep = locale_data->thousands_sep;
|
||||||
|
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
|
||||||
|
STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */
|
||||||
|
STRINGLIB_CHAR *pmax = buffer + buf_size; /* max of buffer */
|
||||||
|
char current_grouping;
|
||||||
|
|
||||||
|
/* Initialize the character count, if we're just counting. */
|
||||||
|
if (count)
|
||||||
|
*count = 0;
|
||||||
|
|
||||||
|
/* Starting at plast and working right-to-left, keep track of
|
||||||
|
what grouping needs to be added and insert that. */
|
||||||
|
current_grouping = *grouping++;
|
||||||
|
|
||||||
|
/* If the first character is 0, perform no grouping at all. */
|
||||||
|
if (current_grouping == 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
while (plast - buffer > current_grouping) {
|
||||||
|
/* Always leave buffer and pend valid at the end of this
|
||||||
|
loop, since we might leave with a return statement. */
|
||||||
|
|
||||||
|
plast -= current_grouping;
|
||||||
|
if (count) {
|
||||||
|
/* We're only counting, not touching the memory. */
|
||||||
|
*count += thousands_sep_len;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Do the formatting. */
|
||||||
|
|
||||||
|
/* Is there room to insert thousands_sep_len chars? */
|
||||||
|
if (pmax - pend < thousands_sep_len)
|
||||||
|
/* No room. */
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Move the rest of the string down. */
|
||||||
|
memmove(plast + thousands_sep_len,
|
||||||
|
plast,
|
||||||
|
(pend - plast) * sizeof(STRINGLIB_CHAR));
|
||||||
|
/* Copy the thousands_sep chars into the buffer. */
|
||||||
|
#if STRINGLIB_IS_UNICODE
|
||||||
|
/* Convert from the char's of the thousands_sep from
|
||||||
|
the locale into unicode. */
|
||||||
|
{
|
||||||
|
Py_ssize_t i;
|
||||||
|
for (i = 0; i < thousands_sep_len; ++i)
|
||||||
|
plast[i] = thousands_sep[i];
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* No conversion, just memcpy the thousands_sep. */
|
||||||
|
memcpy(plast, thousands_sep, thousands_sep_len);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjust end pointer. */
|
||||||
|
pend += thousands_sep_len;
|
||||||
|
|
||||||
|
/* Move to the next grouping character, unless we're
|
||||||
|
repeating (which is designated by a grouping of 0). */
|
||||||
|
if (*grouping != 0) {
|
||||||
|
current_grouping = *grouping++;
|
||||||
|
if (current_grouping == CHAR_MAX)
|
||||||
|
/* We're done. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (append_zero_char) {
|
||||||
|
/* Append a zero character to mark the end of the string,
|
||||||
|
if there's room. */
|
||||||
|
if (pend - plast < 1)
|
||||||
|
/* No room, error. */
|
||||||
|
return 0;
|
||||||
|
*pend = 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif /* STRINGLIB_LOCALEUTIL_H */
|
|
@ -23,5 +23,6 @@
|
||||||
#define STRINGLIB_CHECK PyString_Check
|
#define STRINGLIB_CHECK PyString_Check
|
||||||
#define STRINGLIB_CMP memcmp
|
#define STRINGLIB_CMP memcmp
|
||||||
#define STRINGLIB_TOSTR PyObject_Str
|
#define STRINGLIB_TOSTR PyObject_Str
|
||||||
|
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
|
||||||
|
|
||||||
#endif /* !STRINGLIB_STRINGDEFS_H */
|
#endif /* !STRINGLIB_STRINGDEFS_H */
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#define STRINGLIB_NEW PyUnicode_FromUnicode
|
#define STRINGLIB_NEW PyUnicode_FromUnicode
|
||||||
#define STRINGLIB_RESIZE PyUnicode_Resize
|
#define STRINGLIB_RESIZE PyUnicode_Resize
|
||||||
#define STRINGLIB_CHECK PyUnicode_Check
|
#define STRINGLIB_CHECK PyUnicode_Check
|
||||||
|
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
|
||||||
|
|
||||||
#if PY_VERSION_HEX < 0x03000000
|
#if PY_VERSION_HEX < 0x03000000
|
||||||
#define STRINGLIB_TOSTR PyObject_Unicode
|
#define STRINGLIB_TOSTR PyObject_Unicode
|
||||||
|
|
|
@ -784,6 +784,10 @@ PyString_AsStringAndSize(register PyObject *obj,
|
||||||
#include "stringlib/find.h"
|
#include "stringlib/find.h"
|
||||||
#include "stringlib/partition.h"
|
#include "stringlib/partition.h"
|
||||||
|
|
||||||
|
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
|
||||||
|
#include "stringlib/localeutil.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
string_print(PyStringObject *op, FILE *fp, int flags)
|
string_print(PyStringObject *op, FILE *fp, int flags)
|
||||||
|
|
|
@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size)
|
||||||
Py_LOCAL_INLINE(int)
|
Py_LOCAL_INLINE(int)
|
||||||
add_thousands_grouping(char* buffer, size_t buf_size)
|
add_thousands_grouping(char* buffer, size_t buf_size)
|
||||||
{
|
{
|
||||||
|
Py_ssize_t len = strlen(buffer);
|
||||||
struct lconv *locale_data = localeconv();
|
struct lconv *locale_data = localeconv();
|
||||||
const char *grouping = locale_data->grouping;
|
|
||||||
const char *thousands_sep = locale_data->thousands_sep;
|
|
||||||
size_t thousands_sep_len = strlen(thousands_sep);
|
|
||||||
const char *decimal_point = locale_data->decimal_point;
|
const char *decimal_point = locale_data->decimal_point;
|
||||||
char *pend = buffer + strlen(buffer); /* current end of buffer */
|
|
||||||
char *pmax = buffer + buf_size; /* max of buffer */
|
|
||||||
char current_grouping;
|
|
||||||
|
|
||||||
/* Find the decimal point, if any. We're only concerned
|
/* Find the decimal point, if any. We're only concerned
|
||||||
about the characters to the left of the decimal when
|
about the characters to the left of the decimal when
|
||||||
|
@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size)
|
||||||
if (!p)
|
if (!p)
|
||||||
/* No exponent and no decimal. Use the entire
|
/* No exponent and no decimal. Use the entire
|
||||||
string. */
|
string. */
|
||||||
p = pend;
|
p = buffer + len;
|
||||||
}
|
}
|
||||||
/* At this point, p points just past the right-most character we
|
/* At this point, p points just past the right-most character we
|
||||||
want to format. We need to add the grouping string for the
|
want to format. We need to add the grouping string for the
|
||||||
characters between buffer and p. */
|
characters between buffer and p. */
|
||||||
|
return _PyString_InsertThousandsGrouping(buffer, len, p,
|
||||||
/* Starting at p and working right-to-left, keep track of
|
buf_size, NULL, 1);
|
||||||
what grouping needs to be added and insert that. */
|
|
||||||
current_grouping = *grouping++;
|
|
||||||
|
|
||||||
/* If the first character is 0, perform no grouping at all. */
|
|
||||||
if (current_grouping == 0)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
while (p - buffer > current_grouping) {
|
|
||||||
/* Always leave buffer and pend valid at the end of this
|
|
||||||
loop, since we might leave with a return statement. */
|
|
||||||
|
|
||||||
/* Is there room to insert thousands_sep_len chars?. */
|
|
||||||
if (pmax - pend <= thousands_sep_len)
|
|
||||||
/* No room. */
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Move the rest of the string down. */
|
|
||||||
p -= current_grouping;
|
|
||||||
memmove(p + thousands_sep_len,
|
|
||||||
p,
|
|
||||||
pend - p + 1);
|
|
||||||
/* Adjust end pointer. */
|
|
||||||
pend += thousands_sep_len;
|
|
||||||
/* Copy the thousands_sep chars into the buffer. */
|
|
||||||
memcpy(p, thousands_sep, thousands_sep_len);
|
|
||||||
|
|
||||||
/* Move to the next grouping character, unless we're
|
|
||||||
repeating (which is designated by a grouping of 0). */
|
|
||||||
if (*grouping != 0) {
|
|
||||||
current_grouping = *grouping++;
|
|
||||||
if (current_grouping == CHAR_MAX)
|
|
||||||
/* We're done. */
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* see FORMATBUFLEN in unicodeobject.c */
|
/* see FORMATBUFLEN in unicodeobject.c */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue