Simplify and optimize formatlong()

* Remove _PyBytes_FormatLong(): inline it into formatlong() * the input type is always a long, so remove the code for bool * don't duplicate the string if the length does not change * Use PyUnicode_DATA() instead of _PyUnicode_AsString()
2025-10-17 12:18:23 +00:00 · 2012-04-27 23:40:13 +02:00 · 2012-04-27 23:40:13 +02:00 · d0880d57b0
commit d0880d57b0
parent 19b409a341
3 changed files with 128 additions and 152 deletions
--- a/Include/bytesobject.h
+++ b/Include/bytesobject.h
@ -62,8 +62,6 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
 PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
 PyAPI_FUNC(PyObject *) _PyBytes_FormatLong(PyObject*, int, int,
 						  int, char**, int*);
 #endif
 PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
 						   const char *, Py_ssize_t,
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@ -2860,149 +2860,6 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
    return 0;
 }
 /* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
 * Python's regular ints.
 * Return value:  a new PyBytes*, or NULL if error.
 *  .  *pbuf is set to point into it,
 *     *plen set to the # of chars following that.
 *     Caller must decref it when done using pbuf.
 *     The string starting at *pbuf is of the form
 *         "-"? ("0x" | "0X")? digit+
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
 *         set in flags.  The case of hex digits will be correct,
 *     There will be at least prec digits, zero-filled on the left if
 *         necessary to get that many.
 * val          object to be converted
 * flags        bitmask of format flags; only F_ALT is looked at
 * prec         minimum number of digits; 0-fill on left if needed
 * type         a character in [duoxX]; u acts the same as d
 *
 * CAUTION:  o, x and X conversions on regular ints can never
 * produce a '-' sign, but can for Python's unbounded ints.
 */
 PyObject*
 _PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
                     char **pbuf, int *plen)
 {
    PyObject *result = NULL;
    char *buf;
    Py_ssize_t i;
    int sign;           /* 1 if '-', else 0 */
    int len;            /* number of characters */
    Py_ssize_t llen;
    int numdigits;      /* len == numnondigits + numdigits */
    int numnondigits = 0;
    /* Avoid exceeding SSIZE_T_MAX */
    if (prec > INT_MAX-3) {
        PyErr_SetString(PyExc_OverflowError,
                        "precision too large");
        return NULL;
    }
    switch (type) {
    case 'd':
    case 'u':
        /* Special-case boolean: we want 0/1 */
        if (PyBool_Check(val))
            result = PyNumber_ToBase(val, 10);
        else
            result = Py_TYPE(val)->tp_str(val);
        break;
    case 'o':
        numnondigits = 2;
        result = PyNumber_ToBase(val, 8);
        break;
    case 'x':
    case 'X':
        numnondigits = 2;
        result = PyNumber_ToBase(val, 16);
        break;
    default:
        assert(!"'type' not in [duoxX]");
    }
    if (!result)
        return NULL;
    buf = _PyUnicode_AsString(result);
    if (!buf) {
        Py_DECREF(result);
        return NULL;
    }
    /* To modify the string in-place, there can only be one reference. */
    if (Py_REFCNT(result) != 1) {
        PyErr_BadInternalCall();
        return NULL;
    }
    llen = PyUnicode_GetLength(result);
    if (llen > INT_MAX) {
        PyErr_SetString(PyExc_ValueError,
                        "string too large in _PyBytes_FormatLong");
        return NULL;
    }
    len = (int)llen;
    if (buf[len-1] == 'L') {
        --len;
        buf[len] = '\0';
    }
    sign = buf[0] == '-';
    numnondigits += sign;
    numdigits = len - numnondigits;
    assert(numdigits > 0);
    /* Get rid of base marker unless F_ALT */
    if (((flags & F_ALT) == 0 &&
        (type == 'o' || type == 'x' || type == 'X'))) {
        assert(buf[sign] == '0');
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
               buf[sign+1] == 'o');
        numnondigits -= 2;
        buf += 2;
        len -= 2;
        if (sign)
            buf[0] = '-';
        assert(len == numnondigits + numdigits);
        assert(numdigits > 0);
    }
    /* Fill with leading zeroes to meet minimum width. */
    if (prec > numdigits) {
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
                                numnondigits + prec);
        char *b1;
        if (!r1) {
            Py_DECREF(result);
            return NULL;
        }
        b1 = PyBytes_AS_STRING(r1);
        for (i = 0; i < numnondigits; ++i)
            *b1++ = *buf++;
        for (i = 0; i < prec - numdigits; i++)
            *b1++ = '0';
        for (i = 0; i < numdigits; i++)
            *b1++ = *buf++;
        *b1 = '\0';
        Py_DECREF(result);
        result = r1;
        buf = PyBytes_AS_STRING(result);
        len = numnondigits + prec;
    }
    /* Fix up case for hex conversions. */
    if (type == 'X') {
        /* Need to convert all lower case letters to upper case.
           and need to convert 0x to 0X (and -0x to -0X). */
        for (i = 0; i < len; i++)
            if (buf[i] >= 'a' && buf[i] <= 'x')
                buf[i] -= 'a'-'A';
    }
    *pbuf = buf;
    *plen = len;
    return result;
 }
 void
 PyBytes_Fini(void)
 {
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -13438,19 +13438,140 @@ formatfloat(PyObject *v, int flags, int prec, int type)
    return result;
 }
 /* formatlong() emulates the format codes d, u, o, x and X, and
 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
 * Python's regular ints.
 * Return value:  a new PyUnicodeObject*, or NULL if error.
 *     The output string is of the form
 *         "-"? ("0x" | "0X")? digit+
 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
 *         set in flags.  The case of hex digits will be correct,
 *     There will be at least prec digits, zero-filled on the left if
 *         necessary to get that many.
 * val          object to be converted
 * flags        bitmask of format flags; only F_ALT is looked at
 * prec         minimum number of digits; 0-fill on left if needed
 * type         a character in [duoxX]; u acts the same as d
 *
 * CAUTION:  o, x and X conversions on regular ints can never
 * produce a '-' sign, but can for Python's unbounded ints.
 */
 static PyObject*
 formatlong(PyObject *val, int flags, int prec, int type)
 {
    PyObject *result = NULL;
    char *buf;
-    int len;
+    Py_ssize_t i;
-    PyObject *str; /* temporary string object. */
+    int sign;           /* 1 if '-', else 0 */
-    PyObject *result;
+    int len;            /* number of characters */
    Py_ssize_t llen;
    int numdigits;      /* len == numnondigits + numdigits */
    int numnondigits = 0;
-    str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len);
+    /* Avoid exceeding SSIZE_T_MAX */
-    if (!str)
+    if (prec > INT_MAX-3) {
        PyErr_SetString(PyExc_OverflowError,
                        "precision too large");
        return NULL;
-    result = PyUnicode_DecodeASCII(buf, len, NULL);
+    }
-    Py_DECREF(str);
+
    assert(PyLong_Check(val));
    switch (type) {
    case 'd':
    case 'u':
        /* Special-case boolean: we want 0/1 */
        result = Py_TYPE(val)->tp_str(val);
        break;
    case 'o':
        numnondigits = 2;
        result = PyNumber_ToBase(val, 8);
        break;
    case 'x':
    case 'X':
        numnondigits = 2;
        result = PyNumber_ToBase(val, 16);
        break;
    default:
        assert(!"'type' not in [duoxX]");
    }
    if (!result)
        return NULL;
    assert(unicode_modifiable(result));
    assert(PyUnicode_IS_READY(result));
    assert(PyUnicode_IS_ASCII(result));
    /* To modify the string in-place, there can only be one reference. */
    if (Py_REFCNT(result) != 1) {
        PyErr_BadInternalCall();
        return NULL;
    }
    buf = PyUnicode_DATA(result);
    llen = PyUnicode_GET_LENGTH(result);
    if (llen > INT_MAX) {
        PyErr_SetString(PyExc_ValueError,
                        "string too large in _PyBytes_FormatLong");
        return NULL;
    }
    len = (int)llen;
    sign = buf[0] == '-';
    numnondigits += sign;
    numdigits = len - numnondigits;
    assert(numdigits > 0);
    /* Get rid of base marker unless F_ALT */
    if (((flags & F_ALT) == 0 &&
        (type == 'o' || type == 'x' || type == 'X'))) {
        assert(buf[sign] == '0');
        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
               buf[sign+1] == 'o');
        numnondigits -= 2;
        buf += 2;
        len -= 2;
        if (sign)
            buf[0] = '-';
        assert(len == numnondigits + numdigits);
        assert(numdigits > 0);
    }
    /* Fill with leading zeroes to meet minimum width. */
    if (prec > numdigits) {
        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
                                numnondigits + prec);
        char *b1;
        if (!r1) {
            Py_DECREF(result);
            return NULL;
        }
        b1 = PyBytes_AS_STRING(r1);
        for (i = 0; i < numnondigits; ++i)
            *b1++ = *buf++;
        for (i = 0; i < prec - numdigits; i++)
            *b1++ = '0';
        for (i = 0; i < numdigits; i++)
            *b1++ = *buf++;
        *b1 = '\0';
        Py_DECREF(result);
        result = r1;
        buf = PyBytes_AS_STRING(result);
        len = numnondigits + prec;
    }
    /* Fix up case for hex conversions. */
    if (type == 'X') {
        /* Need to convert all lower case letters to upper case.
           and need to convert 0x to 0X (and -0x to -0X). */
        for (i = 0; i < len; i++)
            if (buf[i] >= 'a' && buf[i] <= 'x')
                buf[i] -= 'a'-'A';
    }
    if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
        PyObject *unicode;
        unicode = unicode_fromascii((unsigned char *)buf, len);
        Py_DECREF(result);
        result = unicode;
    }
    return result;
 }