mirror of
https://github.com/python/cpython.git
synced 2025-07-23 11:15:24 +00:00
Issue #5859: Remove use of fixed-length buffers for float formatting
in unicodeobject.c and the fallback version of PyOS_double_to_string. As a result, operations like '%.120e' % 12.34 no longer raise an exception.
This commit is contained in:
parent
fb526ac34a
commit
f489caf5da
4 changed files with 90 additions and 98 deletions
|
@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest:
|
||||||
value = 0.01
|
value = 0.01
|
||||||
for x in range(60):
|
for x in range(60):
|
||||||
value = value * 3.141592655 / 3.0 * 10.0
|
value = value * 3.141592655 / 3.0 * 10.0
|
||||||
# The formatfloat() code in stringobject.c and
|
self.checkcall(format, "__mod__", value)
|
||||||
# unicodeobject.c uses a 120 byte buffer and switches from
|
|
||||||
# 'f' formatting to 'g' at precision 50, so we expect
|
|
||||||
# OverflowErrors for the ranges x < 50 and prec >= 67.
|
|
||||||
if x < 50 and prec >= 67:
|
|
||||||
self.checkraises(OverflowError, format, "__mod__", value)
|
|
||||||
else:
|
|
||||||
self.checkcall(format, "__mod__", value)
|
|
||||||
|
|
||||||
def test_inplace_rewrites(self):
|
def test_inplace_rewrites(self):
|
||||||
# Check that strings don't copy and modify cached single-character strings
|
# Check that strings don't copy and modify cached single-character strings
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #5859: Remove length restrictions for float formatting:
|
||||||
|
'%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
|
||||||
|
|
||||||
- Issue #1588: Add complex.__format__. For example,
|
- Issue #1588: Add complex.__format__. For example,
|
||||||
format(complex(1, 2./3), '.5') now produces a sensible result.
|
format(complex(1, 2./3), '.5') now produces a sensible result.
|
||||||
|
|
||||||
|
|
|
@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
|
||||||
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
|
|
||||||
{
|
|
||||||
register Py_ssize_t i;
|
|
||||||
for (i = len - 1; i >= 0; i--)
|
|
||||||
buffer[i] = (Py_UNICODE) charbuffer[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static PyObject *
|
||||||
formatfloat(Py_UNICODE *buf,
|
formatfloat(PyObject *v, int flags, int prec, int type)
|
||||||
size_t buflen,
|
|
||||||
int flags,
|
|
||||||
int prec,
|
|
||||||
int type,
|
|
||||||
PyObject *v)
|
|
||||||
{
|
{
|
||||||
/* eric.smith: To minimize disturbances in PyUnicode_Format (the
|
char *p;
|
||||||
only caller of this routine), I'm going to keep the existing
|
PyObject *result;
|
||||||
API to this function. That means that we'll allocate memory and
|
|
||||||
then copy back into the supplied buffer. But that's better than
|
|
||||||
all of the changes that would be required in PyUnicode_Format
|
|
||||||
because it does lots of memory management tricks. */
|
|
||||||
|
|
||||||
char* p = NULL;
|
|
||||||
int result = -1;
|
|
||||||
double x;
|
double x;
|
||||||
Py_ssize_t len;
|
|
||||||
|
|
||||||
x = PyFloat_AsDouble(v);
|
x = PyFloat_AsDouble(v);
|
||||||
if (x == -1.0 && PyErr_Occurred())
|
if (x == -1.0 && PyErr_Occurred())
|
||||||
goto done;
|
return NULL;
|
||||||
|
|
||||||
if (prec < 0)
|
if (prec < 0)
|
||||||
prec = 6;
|
prec = 6;
|
||||||
|
|
||||||
/* make sure that the decimal representation of precision really does
|
|
||||||
need at most 10 digits: platforms with sizeof(int) == 8 exist! */
|
|
||||||
if (prec > 0x7fffffffL) {
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"outrageously large precision "
|
|
||||||
"for formatted float");
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (type == 'f' && fabs(x) >= 1e50)
|
if (type == 'f' && fabs(x) >= 1e50)
|
||||||
type = 'g';
|
type = 'g';
|
||||||
|
|
||||||
if (((type == 'g' || type == 'G') &&
|
|
||||||
buflen <= (size_t)10 + (size_t)prec) ||
|
|
||||||
((type == 'f' || type == 'F') &&
|
|
||||||
buflen <= (size_t)53 + (size_t)prec)) {
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"formatted float is too long (precision too large?)");
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
p = PyOS_double_to_string(x, type, prec,
|
p = PyOS_double_to_string(x, type, prec,
|
||||||
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
|
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
|
||||||
len = strlen(p);
|
if (p == NULL)
|
||||||
if (len+1 >= buflen) {
|
return NULL;
|
||||||
/* Caller supplied buffer is not large enough. */
|
result = PyUnicode_FromStringAndSize(p, strlen(p));
|
||||||
PyErr_NoMemory();
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
strtounicode(buf, p, len);
|
|
||||||
result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
|
|
||||||
|
|
||||||
done:
|
|
||||||
PyMem_Free(p);
|
PyMem_Free(p);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
|
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
|
||||||
|
FORMATBUFLEN is the length of the buffer in which chars are formatted.
|
||||||
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
|
|
||||||
chars are formatted. XXX This is a magic number. Each formatting
|
|
||||||
routine does bounds checking to ensure no overflow, but a better
|
|
||||||
solution may be to malloc a buffer of appropriate size for each
|
|
||||||
format. For now, the current solution is sufficient.
|
|
||||||
*/
|
*/
|
||||||
#define FORMATBUFLEN (size_t)120
|
#define FORMATBUFLEN (size_t)10
|
||||||
|
|
||||||
PyObject *PyUnicode_Format(PyObject *format,
|
PyObject *PyUnicode_Format(PyObject *format,
|
||||||
PyObject *args)
|
PyObject *args)
|
||||||
|
@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
|
||||||
Py_UNICODE *pbuf;
|
Py_UNICODE *pbuf;
|
||||||
Py_UNICODE sign;
|
Py_UNICODE sign;
|
||||||
Py_ssize_t len;
|
Py_ssize_t len;
|
||||||
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
|
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
|
||||||
|
|
||||||
fmt++;
|
fmt++;
|
||||||
if (*fmt == '(') {
|
if (*fmt == '(') {
|
||||||
|
@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
|
||||||
case 'F':
|
case 'F':
|
||||||
case 'g':
|
case 'g':
|
||||||
case 'G':
|
case 'G':
|
||||||
pbuf = formatbuf;
|
temp = formatfloat(v, flags, prec, c);
|
||||||
len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
|
if (!temp)
|
||||||
flags, prec, c, v);
|
|
||||||
if (len < 0)
|
|
||||||
goto onError;
|
goto onError;
|
||||||
|
pbuf = PyUnicode_AS_UNICODE(temp);
|
||||||
|
len = PyUnicode_GET_SIZE(temp);
|
||||||
sign = 1;
|
sign = 1;
|
||||||
if (flags & F_ZERO)
|
if (flags & F_ZERO)
|
||||||
fill = '0';
|
fill = '0';
|
||||||
|
|
|
@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
||||||
int flags,
|
int flags,
|
||||||
int *type)
|
int *type)
|
||||||
{
|
{
|
||||||
char buf[128];
|
|
||||||
char format[32];
|
char format[32];
|
||||||
Py_ssize_t len;
|
Py_ssize_t bufsize;
|
||||||
char *result;
|
char *buf;
|
||||||
char *p;
|
int t, exp;
|
||||||
int t;
|
|
||||||
int upper = 0;
|
int upper = 0;
|
||||||
|
|
||||||
/* Validate format_code, and map upper and lower case */
|
/* Validate format_code, and map upper and lower case */
|
||||||
|
@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Here's a quick-and-dirty calculation to figure out how big a buffer
|
||||||
|
we need. In general, for a finite float we need:
|
||||||
|
|
||||||
|
1 byte for each digit of the decimal significand, and
|
||||||
|
|
||||||
|
1 for a possible sign
|
||||||
|
1 for a possible decimal point
|
||||||
|
2 for a possible [eE][+-]
|
||||||
|
1 for each digit of the exponent; if we allow 19 digits
|
||||||
|
total then we're safe up to exponents of 2**63.
|
||||||
|
1 for the trailing nul byte
|
||||||
|
|
||||||
|
This gives a total of 24 + the number of digits in the significand,
|
||||||
|
and the number of digits in the significand is:
|
||||||
|
|
||||||
|
for 'g' format: at most precision, except possibly
|
||||||
|
when precision == 0, when it's 1.
|
||||||
|
for 'e' format: precision+1
|
||||||
|
for 'f' format: precision digits after the point, at least 1
|
||||||
|
before. To figure out how many digits appear before the point
|
||||||
|
we have to examine the size of the number. If fabs(val) < 1.0
|
||||||
|
then there will be only one digit before the point. If
|
||||||
|
fabs(val) >= 1.0, then there are at most
|
||||||
|
|
||||||
|
1+floor(log10(ceiling(fabs(val))))
|
||||||
|
|
||||||
|
digits before the point (where the 'ceiling' allows for the
|
||||||
|
possibility that the rounding rounds the integer part of val
|
||||||
|
up). A safe upper bound for the above quantity is
|
||||||
|
1+floor(exp/3), where exp is the unique integer such that 0.5
|
||||||
|
<= fabs(val)/2**exp < 1.0. This exp can be obtained from
|
||||||
|
frexp.
|
||||||
|
|
||||||
|
So we allow room for precision+1 digits for all formats, plus an
|
||||||
|
extra floor(exp/3) digits for 'f' format.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
|
||||||
|
/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
|
||||||
|
bufsize = 5;
|
||||||
|
else {
|
||||||
|
bufsize = 25 + precision;
|
||||||
|
if (format_code == 'f' && fabs(val) >= 1.0) {
|
||||||
|
frexp(val, &exp);
|
||||||
|
bufsize += exp/3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = PyMem_Malloc(bufsize);
|
||||||
|
if (buf == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* Handle nan and inf. */
|
/* Handle nan and inf. */
|
||||||
if (Py_IS_NAN(val)) {
|
if (Py_IS_NAN(val)) {
|
||||||
strcpy(buf, "nan");
|
strcpy(buf, "nan");
|
||||||
|
@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
||||||
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
|
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
|
||||||
(flags & Py_DTSF_ALT ? "#" : ""), precision,
|
(flags & Py_DTSF_ALT ? "#" : ""), precision,
|
||||||
format_code);
|
format_code);
|
||||||
_PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
|
_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
len = strlen(buf);
|
|
||||||
|
|
||||||
/* Add 1 for the trailing 0 byte.
|
|
||||||
Add 1 because we might need to make room for the sign.
|
|
||||||
*/
|
|
||||||
result = PyMem_Malloc(len + 2);
|
|
||||||
if (result == NULL) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
p = result;
|
|
||||||
|
|
||||||
/* Add sign when requested. It's convenient (esp. when formatting
|
/* Add sign when requested. It's convenient (esp. when formatting
|
||||||
complex numbers) to include a sign even for inf and nan. */
|
complex numbers) to include a sign even for inf and nan. */
|
||||||
if (flags & Py_DTSF_SIGN && buf[0] != '-')
|
if (flags & Py_DTSF_SIGN && buf[0] != '-') {
|
||||||
*p++ = '+';
|
size_t len = strlen(buf);
|
||||||
|
/* the bufsize calculations above should ensure that we've got
|
||||||
strcpy(p, buf);
|
space to add a sign */
|
||||||
|
assert((size_t)bufsize >= len+2);
|
||||||
|
memmove(buf+1, buf, len+1);
|
||||||
|
buf[0] = '+';
|
||||||
|
}
|
||||||
if (upper) {
|
if (upper) {
|
||||||
/* Convert to upper case. */
|
/* Convert to upper case. */
|
||||||
char *p1;
|
char *p1;
|
||||||
for (p1 = p; *p1; p1++)
|
for (p1 = buf; *p1; p1++)
|
||||||
*p1 = Py_TOUPPER(*p1);
|
*p1 = Py_TOUPPER(*p1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type)
|
if (type)
|
||||||
*type = t;
|
*type = t;
|
||||||
return result;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue