Unicode: optimize creating of 1-character strings

2025-09-26 18:29:57 +00:00 · 2012-05-03 02:17:04 +02:00 · 2012-05-03 02:17:04 +02:00 · b6cd014d75
commit b6cd014d75
parent bff7c96834
1 changed files with 50 additions and 8 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
        return unicode_empty;
    }
    assert(size > 0);
-    if (size == 1 && u[0] < 256)
+    if (size == 1) {
-        return get_latin1_char((unsigned char)u[0]);
+        Py_UCS4 ch = u[0];
        if (ch < 256)
            return get_latin1_char((unsigned char)ch);
        res = PyUnicode_New(1, ch);
        if (res == NULL)
            return NULL;
        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
        assert(_PyUnicode_CheckConsistency(res, 1));
        return res;
    }
    max_char = ucs2lib_find_max_char(u, u + size);
    res = PyUnicode_New(size, max_char);
@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
        return unicode_empty;
    }
    assert(size > 0);
-    if (size == 1 && u[0] < 256)
+    if (size == 1) {
-        return get_latin1_char((unsigned char)u[0]);
+        Py_UCS4 ch = u[0];
        if (ch < 256)
            return get_latin1_char((unsigned char)ch);
        res = PyUnicode_New(1, ch);
        if (res == NULL)
            return NULL;
        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
        assert(_PyUnicode_CheckConsistency(res, 1));
        return res;
    }
    max_char = ucs4lib_find_max_char(u, u + size);
    res = PyUnicode_New(size, max_char);
@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args)
 static PyObject *
 unicode_getitem(PyObject *self, Py_ssize_t index)
 {
-    Py_UCS4 ch = PyUnicode_ReadChar(self, index);
+    void *data;
-    if (ch == (Py_UCS4)-1)
+    enum PyUnicode_Kind kind;
    Py_UCS4 ch;
    PyObject *res;
    if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
        PyErr_BadArgument();
        return NULL;
-    return PyUnicode_FromOrdinal(ch);
+    }
    if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
        PyErr_SetString(PyExc_IndexError, "string index out of range");
        return NULL;
    }
    kind = PyUnicode_KIND(self);
    data = PyUnicode_DATA(self);
    ch = PyUnicode_READ(kind, data, index);
    if (ch < 256)
        return get_latin1_char(ch);
    res = PyUnicode_New(1, ch);
    if (res == NULL)
        return NULL;
    kind = PyUnicode_KIND(res);
    data = PyUnicode_DATA(res);
    PyUnicode_WRITE(kind, data, 0, ch);
    assert(_PyUnicode_CheckConsistency(res, 1));
    return res;
 }
 /* Believe it or not, this produces the same value for ASCII strings
@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
    }
    if (PyUnicode_IS_ASCII(self)) {
        kind = PyUnicode_KIND(self);
        data = PyUnicode_1BYTE_DATA(self);
        return unicode_fromascii(data + start, length);
    }