mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
[3.13] gh-122291: Intern latin-1 one-byte strings at startup (GH-122303) (GH-122347)
(cherry picked from commit bb09ba6792
)
Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
parent
6b9a5af72f
commit
40925103fc
2 changed files with 43 additions and 65 deletions
|
@ -320,7 +320,8 @@ init_global_interned_strings(PyInterpreterState *interp)
|
|||
return _PyStatus_ERR("failed to create global interned dict");
|
||||
}
|
||||
|
||||
/* Intern statically allocated string identifiers and deepfreeze strings.
|
||||
/* Intern statically allocated string identifiers, deepfreeze strings,
|
||||
* and one-byte latin-1 strings.
|
||||
* This must be done before any module initialization so that statically
|
||||
* allocated string identifiers are used instead of heap allocated strings.
|
||||
* Deepfreeze uses the interned identifiers if present to save space
|
||||
|
@ -328,14 +329,11 @@ init_global_interned_strings(PyInterpreterState *interp)
|
|||
*/
|
||||
_PyUnicode_InitStaticStrings(interp);
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// In the free-threaded build, intern the 1-byte strings as well
|
||||
for (int i = 0; i < 256; i++) {
|
||||
PyObject *s = LATIN1(i);
|
||||
_PyUnicode_InternStatic(interp, &s);
|
||||
assert(s == LATIN1(i));
|
||||
}
|
||||
#endif
|
||||
#ifdef Py_DEBUG
|
||||
assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
|
||||
|
||||
|
@ -15051,26 +15049,14 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
|
|||
assert(s != NULL);
|
||||
assert(_PyUnicode_CHECK(s));
|
||||
assert(_PyUnicode_STATE(s).statically_allocated);
|
||||
|
||||
switch (PyUnicode_CHECK_INTERNED(s)) {
|
||||
case SSTATE_NOT_INTERNED:
|
||||
break;
|
||||
case SSTATE_INTERNED_IMMORTAL_STATIC:
|
||||
return s;
|
||||
default:
|
||||
Py_FatalError("_PyUnicode_InternStatic called on wrong string");
|
||||
}
|
||||
assert(!PyUnicode_CHECK_INTERNED(s));
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
/* We must not add process-global interned string if there's already a
|
||||
* per-interpreter interned_dict, which might contain duplicates.
|
||||
* Except "short string" singletons: those are special-cased. */
|
||||
*/
|
||||
PyObject *interned = get_interned_dict(interp);
|
||||
assert(interned == NULL || unicode_is_singleton(s));
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// In the free-threaded build, don't allow even the short strings.
|
||||
assert(interned == NULL);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Look in the global cache first. */
|
||||
|
@ -15142,11 +15128,6 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
|
|||
return s;
|
||||
}
|
||||
|
||||
/* Handle statically allocated strings. */
|
||||
if (_PyUnicode_STATE(s).statically_allocated) {
|
||||
return intern_static(interp, s);
|
||||
}
|
||||
|
||||
/* Is it already interned? */
|
||||
switch (PyUnicode_CHECK_INTERNED(s)) {
|
||||
case SSTATE_NOT_INTERNED:
|
||||
|
@ -15163,6 +15144,9 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
|
|||
return s;
|
||||
}
|
||||
|
||||
/* Statically allocated strings must be already interned. */
|
||||
assert(!_PyUnicode_STATE(s).statically_allocated);
|
||||
|
||||
#if Py_GIL_DISABLED
|
||||
/* In the free-threaded build, all interned strings are immortal */
|
||||
immortalize = 1;
|
||||
|
@ -15173,13 +15157,11 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
|
|||
immortalize = 1;
|
||||
}
|
||||
|
||||
/* if it's a short string, get the singleton -- and intern it */
|
||||
/* if it's a short string, get the singleton */
|
||||
if (PyUnicode_GET_LENGTH(s) == 1 &&
|
||||
PyUnicode_KIND(s) == PyUnicode_1BYTE_KIND) {
|
||||
PyObject *r = LATIN1(*(unsigned char*)PyUnicode_DATA(s));
|
||||
if (!PyUnicode_CHECK_INTERNED(r)) {
|
||||
r = intern_static(interp, r);
|
||||
}
|
||||
assert(PyUnicode_CHECK_INTERNED(r));
|
||||
Py_DECREF(s);
|
||||
return r;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue