[3.12] gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-125205)

Fix a crash caused by immortal interned strings being shared between
sub-interpreters that use basic single-phase init. In that case, the string
can be used by an interpreter that outlives the interpreter that created and
interned it. For interpreters that share obmalloc state, also share the
interned dict with the main interpreter.

This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS
failures identified by gh-124785 (i.e. backporting gh-125709 too).

(cherry picked from commit f2cb399470, AKA gh-124865)

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
Miss Islington (bot) 2024-12-03 18:26:25 +01:00 committed by GitHub
parent b49e902b81
commit 49da170709
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 124 additions and 20 deletions

View file

@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
}
}
/* Return true if this interpreter should share the main interpreter's
intern_dict. That's important for interpreters which load basic
single-phase init extension modules (m_size == -1). There could be interned
immortal strings that are shared between interpreters, due to the
PyDict_Update(mdict, m_copy) call in import_find_extension().
It's not safe to deallocate those strings until all interpreters that
potentially use them are freed. By storing them in the main interpreter, we
ensure they get freed after all other interpreters are freed.
*/
static bool
has_shared_intern_dict(PyInterpreterState *interp)
{
PyInterpreterState *main_interp = _PyInterpreterState_Main();
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
}
static int
init_interned_dict(PyInterpreterState *interp)
{
assert(get_interned_dict(interp) == NULL);
PyObject *interned = interned = PyDict_New();
if (interned == NULL) {
return -1;
PyObject *interned;
if (has_shared_intern_dict(interp)) {
interned = get_interned_dict(_PyInterpreterState_Main());
Py_INCREF(interned);
}
else {
interned = PyDict_New();
if (interned == NULL) {
return -1;
}
}
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
return 0;
@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp)
{
PyObject *interned = get_interned_dict(interp);
if (interned != NULL) {
PyDict_Clear(interned);
if (!has_shared_intern_dict(interp)) {
// only clear if the dict belongs to this interpreter
PyDict_Clear(interned);
}
Py_DECREF(interned);
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}
@ -15152,6 +15179,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
}
assert(PyDict_CheckExact(interned));
if (has_shared_intern_dict(interp)) {
// the dict doesn't belong to this interpreter, skip the debug
// checks on it and just clear the pointer to it
clear_interned_dict(interp);
return;
}
#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));
@ -15670,8 +15704,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(get_interned_dict(interp) == NULL);
if (!has_shared_intern_dict(interp)) {
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(get_interned_dict(interp) == NULL);
}
_PyUnicode_FiniEncodings(&state->fs_codec);