mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422)
This reverts commit ea251806b8
.
Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for
the main interpreter.
Keep _PyUnicode_ClearInterned() changes avoiding the creation of a
temporary Python list object.
This commit is contained in:
parent
e5894ca8fd
commit
35d6540c90
4 changed files with 75 additions and 30 deletions
|
@ -48,21 +48,11 @@ struct _Py_unicode_state {
|
||||||
PyObject *latin1[256];
|
PyObject *latin1[256];
|
||||||
struct _Py_unicode_fs_codec fs_codec;
|
struct _Py_unicode_fs_codec fs_codec;
|
||||||
|
|
||||||
/* This dictionary holds all interned unicode strings. Note that references
|
|
||||||
to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
|
||||||
When the interned string reaches a refcnt of 0 the string deallocation
|
|
||||||
function will delete the reference from this dictionary.
|
|
||||||
|
|
||||||
Another way to look at this is that to say that the actual reference
|
|
||||||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
|
||||||
*/
|
|
||||||
PyObject *interned;
|
|
||||||
|
|
||||||
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
|
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
|
||||||
struct _Py_unicode_ids ids;
|
struct _Py_unicode_ids ids;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void _PyUnicode_ClearInterned(PyInterpreterState *);
|
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
Fix a regression when a type method like ``__init__()`` is modified in a
|
||||||
|
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
|
||||||
|
``update_slot()``. Revert the change which made the Unicode dictionary of
|
||||||
|
interned strings compatible with subinterpreters: the internal interned
|
||||||
|
dictionary is shared again by all interpreters. Patch by Victor Stinner.
|
|
@ -54,6 +54,11 @@ typedef struct PySlot_Offset {
|
||||||
} PySlot_Offset;
|
} PySlot_Offset;
|
||||||
|
|
||||||
|
|
||||||
|
/* bpo-40521: Interned strings are shared by all subinterpreters */
|
||||||
|
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||||
|
# define INTERN_NAME_STRINGS
|
||||||
|
#endif
|
||||||
|
|
||||||
/* alphabetical order */
|
/* alphabetical order */
|
||||||
_Py_IDENTIFIER(__abstractmethods__);
|
_Py_IDENTIFIER(__abstractmethods__);
|
||||||
_Py_IDENTIFIER(__annotations__);
|
_Py_IDENTIFIER(__annotations__);
|
||||||
|
@ -4028,6 +4033,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
if (!PyUnicode_CHECK_INTERNED(name)) {
|
if (!PyUnicode_CHECK_INTERNED(name)) {
|
||||||
PyUnicode_InternInPlace(&name);
|
PyUnicode_InternInPlace(&name);
|
||||||
if (!PyUnicode_CHECK_INTERNED(name)) {
|
if (!PyUnicode_CHECK_INTERNED(name)) {
|
||||||
|
@ -4037,6 +4043,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Will fail in _PyObject_GenericSetAttrWithDict. */
|
/* Will fail in _PyObject_GenericSetAttrWithDict. */
|
||||||
|
@ -8424,10 +8431,17 @@ _PyTypes_InitSlotDefs(void)
|
||||||
for (slotdef *p = slotdefs; p->name; p++) {
|
for (slotdef *p = slotdefs; p->name; p++) {
|
||||||
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
|
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
|
||||||
assert(!p[1].name || p->offset <= p[1].offset);
|
assert(!p[1].name || p->offset <= p[1].offset);
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
p->name_strobj = PyUnicode_InternFromString(p->name);
|
p->name_strobj = PyUnicode_InternFromString(p->name);
|
||||||
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
|
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
|
||||||
return _PyStatus_NO_MEMORY();
|
return _PyStatus_NO_MEMORY();
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
p->name_strobj = PyUnicode_FromString(p->name);
|
||||||
|
if (!p->name_strobj) {
|
||||||
|
return _PyStatus_NO_MEMORY();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
slotdefs_initialized = 1;
|
slotdefs_initialized = 1;
|
||||||
return _PyStatus_OK();
|
return _PyStatus_OK();
|
||||||
|
@ -8452,16 +8466,24 @@ update_slot(PyTypeObject *type, PyObject *name)
|
||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
assert(PyUnicode_CheckExact(name));
|
assert(PyUnicode_CheckExact(name));
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
assert(PyUnicode_CHECK_INTERNED(name));
|
assert(PyUnicode_CHECK_INTERNED(name));
|
||||||
|
#endif
|
||||||
|
|
||||||
assert(slotdefs_initialized);
|
assert(slotdefs_initialized);
|
||||||
pp = ptrs;
|
pp = ptrs;
|
||||||
for (p = slotdefs; p->name; p++) {
|
for (p = slotdefs; p->name; p++) {
|
||||||
assert(PyUnicode_CheckExact(p->name_strobj));
|
assert(PyUnicode_CheckExact(p->name_strobj));
|
||||||
assert(PyUnicode_CheckExact(name));
|
assert(PyUnicode_CheckExact(name));
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
if (p->name_strobj == name) {
|
if (p->name_strobj == name) {
|
||||||
*pp++ = p;
|
*pp++ = p;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
|
||||||
|
*pp++ = p;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
*pp = NULL;
|
*pp = NULL;
|
||||||
for (pp = ptrs; *pp; pp++) {
|
for (pp = ptrs; *pp; pp++) {
|
||||||
|
|
|
@ -214,6 +214,22 @@ extern "C" {
|
||||||
# define OVERALLOCATE_FACTOR 4
|
# define OVERALLOCATE_FACTOR 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* bpo-40521: Interned strings are shared by all interpreters. */
|
||||||
|
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||||
|
# define INTERNED_STRINGS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This dictionary holds all interned unicode strings. Note that references
|
||||||
|
to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
||||||
|
When the interned string reaches a refcnt of 0 the string deallocation
|
||||||
|
function will delete the reference from this dictionary.
|
||||||
|
|
||||||
|
Another way to look at this is that to say that the actual reference
|
||||||
|
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
||||||
|
*/
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
|
static PyObject *interned = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
static inline int
|
static inline int
|
||||||
|
@ -1950,7 +1966,7 @@ unicode_dealloc(PyObject *unicode)
|
||||||
|
|
||||||
case SSTATE_INTERNED_MORTAL:
|
case SSTATE_INTERNED_MORTAL:
|
||||||
{
|
{
|
||||||
struct _Py_unicode_state *state = get_unicode_state();
|
#ifdef INTERNED_STRINGS
|
||||||
/* Revive the dead object temporarily. PyDict_DelItem() removes two
|
/* Revive the dead object temporarily. PyDict_DelItem() removes two
|
||||||
references (key and value) which were ignored by
|
references (key and value) which were ignored by
|
||||||
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
|
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
|
||||||
|
@ -1958,12 +1974,13 @@ unicode_dealloc(PyObject *unicode)
|
||||||
PyDict_DelItem(). */
|
PyDict_DelItem(). */
|
||||||
assert(Py_REFCNT(unicode) == 0);
|
assert(Py_REFCNT(unicode) == 0);
|
||||||
Py_SET_REFCNT(unicode, 3);
|
Py_SET_REFCNT(unicode, 3);
|
||||||
if (PyDict_DelItem(state->interned, unicode) != 0) {
|
if (PyDict_DelItem(interned, unicode) != 0) {
|
||||||
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
assert(Py_REFCNT(unicode) == 1);
|
assert(Py_REFCNT(unicode) == 1);
|
||||||
Py_SET_REFCNT(unicode, 0);
|
Py_SET_REFCNT(unicode, 0);
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
||||||
if (PyUnicode_CHECK_INTERNED(left))
|
if (PyUnicode_CHECK_INTERNED(left))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
assert(_PyUnicode_HASH(right_uni) != -1);
|
assert(_PyUnicode_HASH(right_uni) != -1);
|
||||||
Py_hash_t hash = _PyUnicode_HASH(left);
|
Py_hash_t hash = _PyUnicode_HASH(left);
|
||||||
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
|
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return unicode_compare_eq(left, right_uni);
|
return unicode_compare_eq(left, right_uni);
|
||||||
}
|
}
|
||||||
|
@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
if (PyUnicode_READY(s) == -1) {
|
if (PyUnicode_READY(s) == -1) {
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct _Py_unicode_state *state = get_unicode_state();
|
if (interned == NULL) {
|
||||||
if (state->interned == NULL) {
|
interned = PyDict_New();
|
||||||
state->interned = PyDict_New();
|
if (interned == NULL) {
|
||||||
if (state->interned == NULL) {
|
|
||||||
PyErr_Clear(); /* Don't leave an exception */
|
PyErr_Clear(); /* Don't leave an exception */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *t = PyDict_SetDefault(state->interned, s, s);
|
PyObject *t = PyDict_SetDefault(interned, s, s);
|
||||||
if (t == NULL) {
|
if (t == NULL) {
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
return;
|
return;
|
||||||
|
@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
|
||||||
this. */
|
this. */
|
||||||
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
|
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
|
||||||
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
|
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
|
||||||
|
#else
|
||||||
|
// PyDict expects that interned strings have their hash
|
||||||
|
// (PyASCIIObject.hash) already computed.
|
||||||
|
(void)unicode_hash(s);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
PyUnicode_InternImmortal(PyObject **p)
|
PyUnicode_InternImmortal(PyObject **p)
|
||||||
{
|
{
|
||||||
|
@ -15658,11 +15681,15 @@ PyUnicode_InternFromString(const char *cp)
|
||||||
void
|
void
|
||||||
_PyUnicode_ClearInterned(PyInterpreterState *interp)
|
_PyUnicode_ClearInterned(PyInterpreterState *interp)
|
||||||
{
|
{
|
||||||
struct _Py_unicode_state *state = &interp->unicode;
|
if (!_Py_IsMainInterpreter(interp)) {
|
||||||
if (state->interned == NULL) {
|
// interned dict is shared by all interpreters
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assert(PyDict_CheckExact(state->interned));
|
|
||||||
|
if (interned == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assert(PyDict_CheckExact(interned));
|
||||||
|
|
||||||
/* Interned unicode strings are not forcibly deallocated; rather, we give
|
/* Interned unicode strings are not forcibly deallocated; rather, we give
|
||||||
them their stolen references back, and then clear and DECREF the
|
them their stolen references back, and then clear and DECREF the
|
||||||
|
@ -15670,13 +15697,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
|
||||||
|
|
||||||
#ifdef INTERNED_STATS
|
#ifdef INTERNED_STATS
|
||||||
fprintf(stderr, "releasing %zd interned strings\n",
|
fprintf(stderr, "releasing %zd interned strings\n",
|
||||||
PyDict_GET_SIZE(state->interned));
|
PyDict_GET_SIZE(interned));
|
||||||
|
|
||||||
Py_ssize_t immortal_size = 0, mortal_size = 0;
|
Py_ssize_t immortal_size = 0, mortal_size = 0;
|
||||||
#endif
|
#endif
|
||||||
Py_ssize_t pos = 0;
|
Py_ssize_t pos = 0;
|
||||||
PyObject *s, *ignored_value;
|
PyObject *s, *ignored_value;
|
||||||
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
|
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
|
||||||
assert(PyUnicode_IS_READY(s));
|
assert(PyUnicode_IS_READY(s));
|
||||||
|
|
||||||
switch (PyUnicode_CHECK_INTERNED(s)) {
|
switch (PyUnicode_CHECK_INTERNED(s)) {
|
||||||
|
@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
|
||||||
mortal_size, immortal_size);
|
mortal_size, immortal_size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PyDict_Clear(state->interned);
|
PyDict_Clear(interned);
|
||||||
Py_CLEAR(state->interned);
|
Py_CLEAR(interned);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
|
||||||
static inline int
|
static inline int
|
||||||
unicode_is_finalizing(void)
|
unicode_is_finalizing(void)
|
||||||
{
|
{
|
||||||
struct _Py_unicode_state *state = get_unicode_state();
|
return (interned == NULL);
|
||||||
return (state->interned == NULL);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
|
||||||
{
|
{
|
||||||
struct _Py_unicode_state *state = &interp->unicode;
|
struct _Py_unicode_state *state = &interp->unicode;
|
||||||
|
|
||||||
// _PyUnicode_ClearInterned() must be called before
|
if (_Py_IsMainInterpreter(interp)) {
|
||||||
assert(state->interned == NULL);
|
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
|
||||||
|
assert(interned == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
_PyUnicode_FiniEncodings(&state->fs_codec);
|
_PyUnicode_FiniEncodings(&state->fs_codec);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue