Issue #5859: Remove use of fixed-length buffers for float formatting

in unicodeobject.c and the fallback version of PyOS_double_to_string. As a result, operations like '%.120e' % 12.34 no longer raise an exception.
2025-07-23 11:15:24 +00:00 · 2009-05-01 11:42:00 +00:00 · 2009-05-01 11:42:00 +00:00 · f489caf5da
commit f489caf5da
parent fb526ac34a
4 changed files with 90 additions and 98 deletions
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest:
            value = 0.01
            for x in range(60):
                value = value * 3.141592655 / 3.0 * 10.0
-                # The formatfloat() code in stringobject.c and
+                self.checkcall(format, "__mod__", value)
                # unicodeobject.c uses a 120 byte buffer and switches from
                # 'f' formatting to 'g' at precision 50, so we expect
                # OverflowErrors for the ranges x < 50 and prec >= 67.
                if x < 50 and prec >= 67:
                    self.checkraises(OverflowError, format, "__mod__", value)
                else:
                    self.checkcall(format, "__mod__", value)
    def test_inplace_rewrites(self):
        # Check that strings don't copy and modify cached single-character strings
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
 Core and Builtins
 -----------------
 - Issue #5859: Remove length restrictions for float formatting:
  '%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
 - Issue #1588: Add complex.__format__. For example, 
  format(complex(1, 2./3), '.5') now produces a sensible result.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
    return NULL;
 }
-static void
+/* Returns a new reference to a PyUnicode object, or NULL on failure. */
 strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
 {
    register Py_ssize_t i;
    for (i = len - 1; i >= 0; i--)
        buffer[i] = (Py_UNICODE) charbuffer[i];
 }
-static int
+static PyObject *
-formatfloat(Py_UNICODE *buf,
+formatfloat(PyObject *v, int flags, int prec, int type)
            size_t buflen,
            int flags,
            int prec,
            int type,
            PyObject *v)
 {
-    /* eric.smith: To minimize disturbances in PyUnicode_Format (the
+    char *p;
-       only caller of this routine), I'm going to keep the existing
+    PyObject *result;
       API to this function. That means that we'll allocate memory and
       then copy back into the supplied buffer. But that's better than
       all of the changes that would be required in PyUnicode_Format
       because it does lots of memory management tricks. */
    char* p = NULL;
    int result = -1;
    double x;
    Py_ssize_t len;
    x = PyFloat_AsDouble(v);
    if (x == -1.0 && PyErr_Occurred())
-        goto done;
+        return NULL;
    if (prec < 0)
        prec = 6;
    /* make sure that the decimal representation of precision really does
       need at most 10 digits: platforms with sizeof(int) == 8 exist! */
    if (prec > 0x7fffffffL) {
        PyErr_SetString(PyExc_OverflowError,
                        "outrageously large precision "
                        "for formatted float");
        goto done;
    }
    if (type == 'f' && fabs(x) >= 1e50)
        type = 'g';
    if (((type == 'g' || type == 'G') &&
         buflen <= (size_t)10 + (size_t)prec) ||
        ((type == 'f' || type == 'F') &&
         buflen <= (size_t)53 + (size_t)prec)) {
        PyErr_SetString(PyExc_OverflowError,
                        "formatted float is too long (precision too large?)");
        goto done;
    }
    p = PyOS_double_to_string(x, type, prec,
                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
-    len = strlen(p);
+    if (p == NULL)
-    if (len+1 >= buflen) {
+        return NULL;
-        /* Caller supplied buffer is not large enough. */
+    result = PyUnicode_FromStringAndSize(p, strlen(p));
        PyErr_NoMemory();
        goto done;
    }
    strtounicode(buf, p, len);
    result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
 done:
    PyMem_Free(p);
    return result;
 }
@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
 }
 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
+   FORMATBUFLEN is the length of the buffer in which chars are formatted.
   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
   chars are formatted. XXX This is a magic number. Each formatting
   routine does bounds checking to ensure no overflow, but a better
   solution may be to malloc a buffer of appropriate size for each
   format. For now, the current solution is sufficient.
 */
-#define FORMATBUFLEN (size_t)120
+#define FORMATBUFLEN (size_t)10
 PyObject *PyUnicode_Format(PyObject *format,
                           PyObject *args)
@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
            Py_UNICODE *pbuf;
            Py_UNICODE sign;
            Py_ssize_t len;
-            Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+            Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
            fmt++;
            if (*fmt == '(') {
@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
            case 'F':
            case 'g':
            case 'G':
-                pbuf = formatbuf;
+                temp = formatfloat(v, flags, prec, c);
-                len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+                if (!temp)
                                  flags, prec, c, v);
                if (len < 0)
                    goto onError;
                pbuf = PyUnicode_AS_UNICODE(temp);
                len = PyUnicode_GET_SIZE(temp);
                sign = 1;
                if (flags & F_ZERO)
                    fill = '0';
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                         int flags,
                                         int *type)
 {
 	char buf[128];
 	char format[32];
-	Py_ssize_t len;
+	Py_ssize_t bufsize;
-	char *result;
+	char *buf;
-	char *p;
+	int t, exp;
 	int t;
 	int upper = 0;
 	/* Validate format_code, and map upper and lower case */
@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
 		return NULL;
 	}
 	/* Here's a quick-and-dirty calculation to figure out how big a buffer
 	   we need.  In general, for a finite float we need:
 	     1 byte for each digit of the decimal significand, and
 	     1 for a possible sign
 	     1 for a possible decimal point
 	     2 for a possible [eE][+-]
 	     1 for each digit of the exponent;  if we allow 19 digits
 	       total then we're safe up to exponents of 2**63.
 	     1 for the trailing nul byte
 	   This gives a total of 24 + the number of digits in the significand,
 	   and the number of digits in the significand is:
 	     for 'g' format: at most precision, except possibly
 	       when precision == 0, when it's 1.
 	     for 'e' format: precision+1
 	     for 'f' format: precision digits after the point, at least 1
 	       before.  To figure out how many digits appear before the point
 	       we have to examine the size of the number.  If fabs(val) < 1.0
 	       then there will be only one digit before the point.  If
 	       fabs(val) >= 1.0, then there are at most
 	         1+floor(log10(ceiling(fabs(val))))
 	       digits before the point (where the 'ceiling' allows for the
 	       possibility that the rounding rounds the integer part of val
 	       up).  A safe upper bound for the above quantity is
 	       1+floor(exp/3), where exp is the unique integer such that 0.5
 	       <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
 	       frexp.
 	   So we allow room for precision+1 digits for all formats, plus an
 	   extra floor(exp/3) digits for 'f' format.
 	*/
 	if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
 		/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
 		bufsize = 5;
 	else {
 		bufsize = 25 + precision;
 		if (format_code == 'f' && fabs(val) >= 1.0) {
 			frexp(val, &exp);
 			bufsize += exp/3;
 		}
 	}
 	buf = PyMem_Malloc(bufsize);
 	if (buf == NULL) {
 		PyErr_NoMemory();
 		return NULL;
 	}
 	/* Handle nan and inf. */
 	if (Py_IS_NAN(val)) {
 		strcpy(buf, "nan");
@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
 		PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
 			      (flags & Py_DTSF_ALT ? "#" : ""), precision,
 			      format_code);
-		_PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
+		_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
 	}
 	len = strlen(buf);
 	/* Add 1 for the trailing 0 byte.
 	   Add 1 because we might need to make room for the sign.
 	   */
 	result = PyMem_Malloc(len + 2);
 	if (result == NULL) {
 		PyErr_NoMemory();
 		return NULL;
 	}
 	p = result;
 	/* Add sign when requested.  It's convenient (esp. when formatting
 	 complex numbers) to include a sign even for inf and nan. */
-	if (flags & Py_DTSF_SIGN && buf[0] != '-')
+	if (flags & Py_DTSF_SIGN && buf[0] != '-') {
-		*p++ = '+';
+		size_t len = strlen(buf);
-
+		/* the bufsize calculations above should ensure that we've got
-	strcpy(p, buf);
+		   space to add a sign */
-
+		assert((size_t)bufsize >= len+2);
 		memmove(buf+1, buf, len+1);
 		buf[0] = '+';
 	}
 	if (upper) {
 		/* Convert to upper case. */
 		char *p1;
-		for (p1 = p; *p1; p1++)
+		for (p1 = buf; *p1; p1++)
 			*p1 = Py_TOUPPER(*p1);
 	}
 	if (type)
 		*type = t;
-	return result;
+	return buf;
 }
 #else