Add _PyUnicodeWriter_WriteASCIIString() function

2025-09-26 10:19:53 +00:00 · 2013-11-19 12:54:53 +01:00 · 2013-11-19 12:54:53 +01:00 · 4a58707a34
commit 4a58707a34
parent 4d3f109ad3
4 changed files with 96 additions and 38 deletions
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -962,12 +962,20 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
    Py_ssize_t end
    );
 /* Append a ASCII-encoded byte string.
   Return 0 on success, raise an exception and return -1 on error. */
 PyAPI_FUNC(int)
 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
    const char *str,           /* ASCII-encoded byte string */
    Py_ssize_t len             /* number of bytes, or -1 if unknown */
    );
 /* Append a latin1-encoded byte string.
   Return 0 on success, raise an exception and return -1 on error. */
 PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer,
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
-    const char *str,            /* latin1-encoded byte string */
+    const char *str,           /* latin1-encoded byte string */
-    Py_ssize_t len              /* length in bytes */
+    Py_ssize_t len             /* length in bytes */
    );
 /* Get the value of the writer as an Unicode string. Clear the
@ -979,6 +987,9 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
 /* Deallocate memory of a writer (clear its internal buffer). */
 PyAPI_FUNC(void)
 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
 PyAPI_FUNC(int) _PyObject_ReprWriter(_PyUnicodeWriter *writer,
    PyObject *v);
 #endif
 #ifndef Py_LIMITED_API
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@ -339,19 +339,12 @@ list_repr(PyListObject *v)
 {
    Py_ssize_t i;
    PyObject *s;
    static PyObject *sep = NULL;
    _PyUnicodeWriter writer;
    if (Py_SIZE(v) == 0) {
        return PyUnicode_FromString("[]");
    }
    if (sep == NULL) {
        sep = PyUnicode_FromString(", ");
        if (sep == NULL)
            return NULL;
    }
    i = Py_ReprEnter((PyObject*)v);
    if (i != 0) {
        return i > 0 ? PyUnicode_FromString("[...]") : NULL;
@ -369,7 +362,7 @@ list_repr(PyListObject *v)
       so must refetch the list size on each iteration. */
    for (i = 0; i < Py_SIZE(v); ++i) {
        if (i > 0) {
-            if (_PyUnicodeWriter_WriteStr(&writer, sep) < 0)
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
                goto error;
        }
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -140,9 +140,9 @@ extern "C" {
   buffer where the result characters are written to. */
 #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
    do {                                                \
-        to_type *_to = (to_type *) to;                  \
+        to_type *_to = (to_type *)(to);                \
-        const from_type *_iter = (begin);               \
+        const from_type *_iter = (from_type *)(begin);  \
-        const from_type *_end = (end);                  \
+        const from_type *_end = (from_type *)(end);     \
        Py_ssize_t n = (_end) - (_iter);                \
        const from_type *_unrolled_end =                \
            _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
@ -2562,7 +2562,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
            precision = len;
        arglen = Py_MAX(precision, width);
        assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127);
        if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
            return NULL;
@ -2581,8 +2580,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
            writer->pos += fill;
        }
-        unicode_write_cstr(writer->buffer, writer->pos, buffer, len);
+        if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0)
-        writer->pos += len;
+            return NULL;
        break;
    }
@ -2604,11 +2603,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
            len += 2;
        }
-        assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127);
+        if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0)
        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
            return NULL;
        unicode_write_cstr(writer->buffer, writer->pos, number, len);
        writer->pos += len;
        break;
    }
@ -2707,7 +2703,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
           skip the code, since there's no way to know what's in the
           argument list) */
        len = strlen(p);
-        if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1)
+        if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
            return NULL;
        f = p+len;
        return f;
@ -2759,10 +2755,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
            if (*p == '\0')
                writer.overallocate = 0;
-            if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1)
+
            if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
                goto fail;
            unicode_write_cstr(writer.buffer, writer.pos, f, len);
            writer.pos += len;
            f = p;
        }
@ -13461,7 +13456,68 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
 }
 int
-_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
                                  const char *ascii, Py_ssize_t len)
 {
    if (len == -1)
        len = strlen(ascii);
    assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128);
    if (writer->buffer == NULL && !writer->overallocate) {
        PyObject *str;
        str = _PyUnicode_FromASCII(ascii, len);
        if (str == NULL)
            return -1;
        writer->readonly = 1;
        writer->buffer = str;
        _PyUnicodeWriter_Update(writer);
        writer->pos += len;
        return 0;
    }
    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
        return -1;
    switch (writer->kind)
    {
    case PyUnicode_1BYTE_KIND:
    {
        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
        Py_UCS1 *data = writer->data;
        Py_MEMCPY(data + writer->pos, str, len);
        break;
    }
    case PyUnicode_2BYTE_KIND:
    {
        _PyUnicode_CONVERT_BYTES(
            Py_UCS1, Py_UCS2,
            ascii, ascii + len,
            (Py_UCS2 *)writer->data + writer->pos);
        break;
    }
    case PyUnicode_4BYTE_KIND:
    {
        _PyUnicode_CONVERT_BYTES(
            Py_UCS1, Py_UCS4,
            ascii, ascii + len,
            (Py_UCS4 *)writer->data + writer->pos);
        break;
    }
    default:
        assert(0);
    }
    writer->pos += len;
    return 0;
 }
 int
 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
                                   const char *str, Py_ssize_t len)
 {
    Py_UCS4 maxchar;
@ -13828,12 +13884,10 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
        return -1;
    len = strlen(p);
    if (writer) {
-        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) {
+        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
            PyMem_Free(p);
            return -1;
        }
        unicode_write_cstr(writer->buffer, writer->pos, p, len);
        writer->pos += len;
    }
    else
        *p_output = _PyUnicode_FromASCII(p, len);
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@ -1053,24 +1053,24 @@ format_float_internal(PyObject *value,
        n_digits += 1;
    }
    /* Since there is no unicode version of PyOS_double_to_string,
       just use the 8 bit version and then convert to unicode. */
    unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
    PyMem_Free(buf);
    if (unicode_tmp == NULL)
        goto done;
    if (format->sign != '+' && format->sign != ' '
        && format->width == -1
        && format->type != 'n'
        && !format->thousands_separators)
    {
        /* Fast path */
-        result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp);
+        result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
-        Py_DECREF(unicode_tmp);
+        PyMem_Free(buf);
        return result;
    }
    /* Since there is no unicode version of PyOS_double_to_string,
       just use the 8 bit version and then convert to unicode. */
    unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
    PyMem_Free(buf);
    if (unicode_tmp == NULL)
        goto done;
    /* Is a sign character present in the output?  If so, remember it
       and skip it */
    index = 0;