gh-84436: Implement Immortal Objects (gh-19474)

This is the implementation of PEP683

Motivation:

The PR introduces the ability to immortalize instances in CPython which bypasses reference counting. Tagging objects as immortal allows up to skip certain operations when we know that the object will be around for the entire execution of the runtime.

Note that this by itself will bring a performance regression to the runtime due to the extra reference count checks. However, this brings the ability of having truly immutable objects that are useful in other contexts such as immutable data sharing between sub-interpreters.
This commit is contained in:
Eddie Elizondo 2023-04-22 15:39:37 -04:00 committed by GitHub
parent 916de04fd1
commit ea2c001650
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 483 additions and 171 deletions

View file

@ -228,14 +228,18 @@ static inline PyObject* unicode_new_empty(void)
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}
Py_ssize_t
_PyUnicode_InternedSize()
{
return PyObject_Length(get_interned_dict(_PyInterpreterState_GET()));
}
static int
init_interned_dict(PyInterpreterState *interp)
{
@ -1538,30 +1542,19 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
static void
unicode_dealloc(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
#ifdef Py_DEBUG
if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) {
_Py_FatalRefcountError("deallocating an Unicode singleton");
}
#endif
/* This should never get called, but we also don't want to SEGV if
* we accidentally decref an immortal string out of existence. Since
* the string is an immortal object, just re-set the reference count.
*/
if (PyUnicode_CHECK_INTERNED(unicode)) {
/* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
to prevent calling unicode_dealloc() again. Adjust refcnt after
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
PyObject *interned = get_interned_dict(interp);
assert(interned != NULL);
if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
assert(Py_REFCNT(unicode) == 1);
Py_SET_REFCNT(unicode, 0);
_Py_SetImmortal(unicode);
return;
}
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyObject_Free(_PyUnicode_UTF8(unicode));
}
@ -14637,11 +14630,21 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
return;
}
/* The two references in interned dict (key and value) are not counted by
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
this. */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = 1;
if (_Py_IsImmortal(s)) {
_PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
return;
}
#ifdef Py_REF_DEBUG
/* The reference count value excluding the 2 references from the
interned dictionary should be excluded from the RefTotal. The
decrements to these objects will not be registered so they
need to be accounted for in here. */
for (Py_ssize_t i = 0; i < Py_REFCNT(s) - 2; i++) {
_Py_DecRefTotal(_PyInterpreterState_GET());
}
#endif
_Py_SetImmortal(s);
_PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
}
void
@ -14681,10 +14684,20 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
}
assert(PyDict_CheckExact(interned));
/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */
/* TODO:
* Currently, the runtime is not able to guarantee that it can exit without
* allocations that carry over to a future initialization of Python within
* the same process. i.e:
* ./python -X showrefcount -c 'import itertools'
* [237 refs, 237 blocks]
*
* Therefore, this should remain disabled for until there is a strict guarantee
* that no memory will be left after `Py_Finalize`.
*/
#ifdef Py_DEBUG
/* For all non-singleton interned strings, restore the two valid references
to that instance from within the intern string dictionary and let the
normal reference counting process clean up these instances. */
#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));
@ -14694,15 +14707,27 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
Py_ssize_t pos = 0;
PyObject *s, *ignored_value;
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
assert(PyUnicode_CHECK_INTERNED(s));
// Restore the two references (key and value) ignored
// by PyUnicode_InternInPlace().
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
assert(PyUnicode_IS_READY(s));
switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
// Skip the Immortal Instance check and restore
// the two references (key and value) ignored
// by PyUnicode_InternInPlace().
s->ob_refcnt = 2;
#ifdef INTERNED_STATS
total_length += PyUnicode_GET_LENGTH(s);
total_length += PyUnicode_GET_LENGTH(s);
#endif
_PyUnicode_STATE(s).interned = 0;
break;
case SSTATE_INTERNED_IMMORTAL_STATIC:
break;
case SSTATE_INTERNED_MORTAL:
/* fall through */
case SSTATE_NOT_INTERNED:
/* fall through */
default:
Py_UNREACHABLE();
}
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
}
#ifdef INTERNED_STATS
fprintf(stderr,
@ -14710,6 +14735,12 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
total_length);
#endif
struct _Py_unicode_state *state = &interp->unicode;
struct _Py_unicode_ids *ids = &state->ids;
for (Py_ssize_t i=0; i < ids->size; i++) {
Py_XINCREF(ids->array[i]);
}
#endif /* Py_DEBUG */
clear_interned_dict(interp);
}