GH-84436: Skip refcounting for known immortals (GH-107605)

2025-10-10 00:43:41 +00:00 · 2023-08-04 16:24:50 -07:00 · 2023-08-04 16:24:50 -07:00 · 05a824f294
commit 05a824f294
parent ec0a0d2bd9
19 changed files with 52 additions and 65 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -211,21 +211,13 @@ static int unicode_is_singleton(PyObject *unicode);
 #endif


-// Return a borrowed reference to the empty string singleton.
+// Return a reference to the immortal empty string singleton.
 static inline PyObject* unicode_get_empty(void)
 {
    _Py_DECLARE_STR(empty, "");
    return &_Py_STR(empty);
 }

-
-// Return a strong reference to the empty string singleton.
-static inline PyObject* unicode_new_empty(void)
-{
-    PyObject *empty = unicode_get_empty();
-    return Py_NewRef(empty);
-}
-
 /* This dictionary holds all interned unicode strings.  Note that references
   to strings in this dictionary are *not* counted in the string's ob_refcnt.
   When the interned string reaches a refcnt of 0 the string deallocation
@ -310,7 +302,7 @@ clear_interned_dict(PyInterpreterState *interp)

 #define _Py_RETURN_UNICODE_EMPTY()   \
    do {                             \
-        return unicode_new_empty();  \
+        return unicode_get_empty();  \
    } while (0)

 static inline void
@ -650,7 +642,6 @@ unicode_result(PyObject *unicode)
        PyObject *empty = unicode_get_empty();
        if (unicode != empty) {
            Py_DECREF(unicode);
-            Py_INCREF(empty);
        }
        return empty;
    }
@ -662,7 +653,6 @@ unicode_result(PyObject *unicode)
            Py_UCS1 ch = data[0];
            PyObject *latin1_char = LATIN1(ch);
            if (unicode != latin1_char) {
-                Py_INCREF(latin1_char);
                Py_DECREF(unicode);
            }
            return latin1_char;
@ -1199,7 +1189,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
 {
    /* Optimization for empty strings */
    if (size == 0) {
-        return unicode_new_empty();
+        return unicode_get_empty();
    }

    PyObject *obj;
@ -1669,7 +1659,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
        return 0;

    if (length == 0) {
-        PyObject *empty = unicode_new_empty();
+        PyObject *empty = unicode_get_empty();
        Py_SETREF(*p_unicode, empty);
        return 0;
    }
@ -1764,7 +1754,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
 static PyObject*
 get_latin1_char(Py_UCS1 ch)
 {
-    return Py_NewRef(LATIN1(ch));
+    PyObject *o = LATIN1(ch);
+    assert(_Py_IsImmortal(o));
+    return o;
 }

 static PyObject*
@ -1891,7 +1883,7 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
            "NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize");
        return NULL;
    }
-    return unicode_new_empty();
+    return unicode_get_empty();
 }

 PyObject *
@ -10261,7 +10253,7 @@ replace(PyObject *self, PyObject *str1,
        }
        new_size = slen + n * (len2 - len1);
        if (new_size == 0) {
-            u = unicode_new_empty();
+            u = unicode_get_empty();
            goto done;
        }
        if (new_size > (PY_SSIZE_T_MAX / rkind)) {
@ -14505,7 +14497,7 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
 {
    PyObject *unicode;
    if (x == NULL) {
-        unicode = unicode_new_empty();
+        unicode = unicode_get_empty();
    }
    else if (encoding == NULL && errors == NULL) {
        unicode = PyObject_Str(x);
@ -14994,8 +14986,7 @@ unicode_ascii_iter_next(unicodeiterobject *it)
        Py_UCS1 chr = (Py_UCS1)PyUnicode_READ(PyUnicode_1BYTE_KIND,
                                              data, it->it_index);
        it->it_index++;
-        PyObject *item = (PyObject*)&_Py_SINGLETON(strings).ascii[chr];
-        return Py_NewRef(item);
+        return (PyObject*)&_Py_SINGLETON(strings).ascii[chr];
    }
    it->it_seq = NULL;
    Py_DECREF(seq);
@ -15025,7 +15016,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
    if (it->it_seq != NULL) {
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
    } else {
-        PyObject *u = unicode_new_empty();
+        PyObject *u = unicode_get_empty();
        if (u == NULL) {
            Py_XDECREF(iter);
            return NULL;