bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422)

This reverts commit ea251806b8.

Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for
the main interpreter.

Keep _PyUnicode_ClearInterned() changes avoiding the creation of a
temporary Python list object.
This commit is contained in:
Victor Stinner 2022-01-06 08:53:44 +01:00 committed by GitHub
parent e5894ca8fd
commit 35d6540c90
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 30 deletions

View file

@ -48,21 +48,11 @@ struct _Py_unicode_state {
PyObject *latin1[256]; PyObject *latin1[256];
struct _Py_unicode_fs_codec fs_codec; struct _Py_unicode_fs_codec fs_codec;
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
PyObject *interned;
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids; struct _Py_unicode_ids ids;
}; };
extern void _PyUnicode_ClearInterned(PyInterpreterState *); extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -0,0 +1,5 @@
Fix a regression when a type method like ``__init__()`` is modified in a
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
``update_slot()``. Revert the change which made the Unicode dictionary of
interned strings compatible with subinterpreters: the internal interned
dictionary is shared again by all interpreters. Patch by Victor Stinner.

View file

@ -54,6 +54,11 @@ typedef struct PySlot_Offset {
} PySlot_Offset; } PySlot_Offset;
/* bpo-40521: Interned strings are shared by all subinterpreters */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERN_NAME_STRINGS
#endif
/* alphabetical order */ /* alphabetical order */
_Py_IDENTIFIER(__abstractmethods__); _Py_IDENTIFIER(__abstractmethods__);
_Py_IDENTIFIER(__annotations__); _Py_IDENTIFIER(__annotations__);
@ -4028,6 +4033,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
if (name == NULL) if (name == NULL)
return -1; return -1;
} }
#ifdef INTERN_NAME_STRINGS
if (!PyUnicode_CHECK_INTERNED(name)) { if (!PyUnicode_CHECK_INTERNED(name)) {
PyUnicode_InternInPlace(&name); PyUnicode_InternInPlace(&name);
if (!PyUnicode_CHECK_INTERNED(name)) { if (!PyUnicode_CHECK_INTERNED(name)) {
@ -4037,6 +4043,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
return -1; return -1;
} }
} }
#endif
} }
else { else {
/* Will fail in _PyObject_GenericSetAttrWithDict. */ /* Will fail in _PyObject_GenericSetAttrWithDict. */
@ -8424,10 +8431,17 @@ _PyTypes_InitSlotDefs(void)
for (slotdef *p = slotdefs; p->name; p++) { for (slotdef *p = slotdefs; p->name; p++) {
/* Slots must be ordered by their offset in the PyHeapTypeObject. */ /* Slots must be ordered by their offset in the PyHeapTypeObject. */
assert(!p[1].name || p->offset <= p[1].offset); assert(!p[1].name || p->offset <= p[1].offset);
#ifdef INTERN_NAME_STRINGS
p->name_strobj = PyUnicode_InternFromString(p->name); p->name_strobj = PyUnicode_InternFromString(p->name);
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) { if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
return _PyStatus_NO_MEMORY(); return _PyStatus_NO_MEMORY();
} }
#else
p->name_strobj = PyUnicode_FromString(p->name);
if (!p->name_strobj) {
return _PyStatus_NO_MEMORY();
}
#endif
} }
slotdefs_initialized = 1; slotdefs_initialized = 1;
return _PyStatus_OK(); return _PyStatus_OK();
@ -8452,16 +8466,24 @@ update_slot(PyTypeObject *type, PyObject *name)
int offset; int offset;
assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CheckExact(name));
#ifdef INTERN_NAME_STRINGS
assert(PyUnicode_CHECK_INTERNED(name)); assert(PyUnicode_CHECK_INTERNED(name));
#endif
assert(slotdefs_initialized); assert(slotdefs_initialized);
pp = ptrs; pp = ptrs;
for (p = slotdefs; p->name; p++) { for (p = slotdefs; p->name; p++) {
assert(PyUnicode_CheckExact(p->name_strobj)); assert(PyUnicode_CheckExact(p->name_strobj));
assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CheckExact(name));
#ifdef INTERN_NAME_STRINGS
if (p->name_strobj == name) { if (p->name_strobj == name) {
*pp++ = p; *pp++ = p;
} }
#else
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
*pp++ = p;
}
#endif
} }
*pp = NULL; *pp = NULL;
for (pp = ptrs; *pp; pp++) { for (pp = ptrs; *pp; pp++) {

View file

@ -214,6 +214,22 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4 # define OVERALLOCATE_FACTOR 4
#endif #endif
/* bpo-40521: Interned strings are shared by all interpreters. */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERNED_STRINGS
#endif
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
#ifdef INTERNED_STRINGS
static PyObject *interned = NULL;
#endif
/* Forward declaration */ /* Forward declaration */
static inline int static inline int
@ -1950,7 +1966,7 @@ unicode_dealloc(PyObject *unicode)
case SSTATE_INTERNED_MORTAL: case SSTATE_INTERNED_MORTAL:
{ {
struct _Py_unicode_state *state = get_unicode_state(); #ifdef INTERNED_STRINGS
/* Revive the dead object temporarily. PyDict_DelItem() removes two /* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by references (key and value) which were ignored by
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
@ -1958,12 +1974,13 @@ unicode_dealloc(PyObject *unicode)
PyDict_DelItem(). */ PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0); assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3); Py_SET_REFCNT(unicode, 3);
if (PyDict_DelItem(state->interned, unicode) != 0) { if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed", _PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL); NULL);
} }
assert(Py_REFCNT(unicode) == 1); assert(Py_REFCNT(unicode) == 1);
Py_SET_REFCNT(unicode, 0); Py_SET_REFCNT(unicode, 0);
#endif
break; break;
} }
@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if (PyUnicode_CHECK_INTERNED(left)) if (PyUnicode_CHECK_INTERNED(left))
return 0; return 0;
#ifdef INTERNED_STRINGS
assert(_PyUnicode_HASH(right_uni) != -1); assert(_PyUnicode_HASH(right_uni) != -1);
Py_hash_t hash = _PyUnicode_HASH(left); Py_hash_t hash = _PyUnicode_HASH(left);
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) { if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
return 0; return 0;
} }
#endif
return unicode_compare_eq(left, right_uni); return unicode_compare_eq(left, right_uni);
} }
@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
return; return;
} }
#ifdef INTERNED_STRINGS
if (PyUnicode_READY(s) == -1) { if (PyUnicode_READY(s) == -1) {
PyErr_Clear(); PyErr_Clear();
return; return;
} }
struct _Py_unicode_state *state = get_unicode_state(); if (interned == NULL) {
if (state->interned == NULL) { interned = PyDict_New();
state->interned = PyDict_New(); if (interned == NULL) {
if (state->interned == NULL) {
PyErr_Clear(); /* Don't leave an exception */ PyErr_Clear(); /* Don't leave an exception */
return; return;
} }
} }
PyObject *t = PyDict_SetDefault(state->interned, s, s); PyObject *t = PyDict_SetDefault(interned, s, s);
if (t == NULL) { if (t == NULL) {
PyErr_Clear(); PyErr_Clear();
return; return;
@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
this. */ this. */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2); Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
#else
// PyDict expects that interned strings have their hash
// (PyASCIIObject.hash) already computed.
(void)unicode_hash(s);
#endif
} }
void void
PyUnicode_InternImmortal(PyObject **p) PyUnicode_InternImmortal(PyObject **p)
{ {
@ -15658,11 +15681,15 @@ PyUnicode_InternFromString(const char *cp)
void void
_PyUnicode_ClearInterned(PyInterpreterState *interp) _PyUnicode_ClearInterned(PyInterpreterState *interp)
{ {
struct _Py_unicode_state *state = &interp->unicode; if (!_Py_IsMainInterpreter(interp)) {
if (state->interned == NULL) { // interned dict is shared by all interpreters
return; return;
} }
assert(PyDict_CheckExact(state->interned));
if (interned == NULL) {
return;
}
assert(PyDict_CheckExact(interned));
/* Interned unicode strings are not forcibly deallocated; rather, we give /* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the them their stolen references back, and then clear and DECREF the
@ -15670,13 +15697,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
#ifdef INTERNED_STATS #ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n", fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(state->interned)); PyDict_GET_SIZE(interned));
Py_ssize_t immortal_size = 0, mortal_size = 0; Py_ssize_t immortal_size = 0, mortal_size = 0;
#endif #endif
Py_ssize_t pos = 0; Py_ssize_t pos = 0;
PyObject *s, *ignored_value; PyObject *s, *ignored_value;
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
assert(PyUnicode_IS_READY(s)); assert(PyUnicode_IS_READY(s));
switch (PyUnicode_CHECK_INTERNED(s)) { switch (PyUnicode_CHECK_INTERNED(s)) {
@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
mortal_size, immortal_size); mortal_size, immortal_size);
#endif #endif
PyDict_Clear(state->interned); PyDict_Clear(interned);
Py_CLEAR(state->interned); Py_CLEAR(interned);
} }
@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
static inline int static inline int
unicode_is_finalizing(void) unicode_is_finalizing(void)
{ {
struct _Py_unicode_state *state = get_unicode_state(); return (interned == NULL);
return (state->interned == NULL);
} }
#endif #endif
@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{ {
struct _Py_unicode_state *state = &interp->unicode; struct _Py_unicode_state *state = &interp->unicode;
// _PyUnicode_ClearInterned() must be called before if (_Py_IsMainInterpreter(interp)) {
assert(state->interned == NULL); // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(interned == NULL);
}
_PyUnicode_FiniEncodings(&state->fs_codec); _PyUnicode_FiniEncodings(&state->fs_codec);