[3.13] gh-113993: Allow interned strings to be mortal, and fix related issues (GH-120520) (GH-120945)

* Add an InternalDocs file describing how interning should work and how to use it.

* Add internal functions to *explicitly* request what kind of interning is done:
  - `_PyUnicode_InternMortal`
  - `_PyUnicode_InternImmortal`
  - `_PyUnicode_InternStatic`

* Switch uses of `PyUnicode_InternInPlace` to those.

* Disallow using `_Py_SetImmortal` on strings directly.
  You should use `_PyUnicode_InternImmortal` instead:
  - Strings should be interned before immortalization, otherwise you're possibly
    interning a immortalizing copy.
  - `_Py_SetImmortal` doesn't handle the `SSTATE_INTERNED_MORTAL` to
    `SSTATE_INTERNED_IMMORTAL` update, and those flags can't be changed in
    backports, as they are now part of public API and version-specific ABI.

* Add private `_only_immortal` argument for `sys.getunicodeinternedsize`, used in refleak test machinery.

* Make sure the statically allocated string singletons are unique. This means these sets are now disjoint:
  - `_Py_ID`
  - `_Py_STR` (including the empty string)
  - one-character latin-1 singletons

  Now, when you intern a singleton, that exact singleton will be interned.

* Add a `_Py_LATIN1_CHR` macro, use it instead of `_Py_ID`/`_Py_STR` for one-character latin-1 singletons everywhere (including Clinic).

* Intern `_Py_STR` singletons at startup.

* For free-threaded builds, intern `_Py_LATIN1_CHR` singletons at startup.

* Beef up the tests. Cover internal details (marked with `@cpython_only`).

* Add lots of assertions

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
Petr Viktorin 2024-06-24 20:24:19 +02:00 committed by GitHub
parent 447e07ab3d
commit 9769b7ae06
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 2460 additions and 1140 deletions

View file

@ -747,7 +747,7 @@ sys_displayhook(PyObject *module, PyObject *o)
if (o == Py_None) {
Py_RETURN_NONE;
}
if (PyObject_SetAttr(builtins, &_Py_ID(_), Py_None) != 0)
if (PyObject_SetAttr(builtins, _Py_LATIN1_CHR('_'), Py_None) != 0)
return NULL;
outf = _PySys_GetAttr(tstate, &_Py_ID(stdout));
if (outf == NULL || outf == Py_None) {
@ -769,10 +769,9 @@ sys_displayhook(PyObject *module, PyObject *o)
return NULL;
}
}
_Py_DECLARE_STR(newline, "\n");
if (PyFile_WriteObject(&_Py_STR(newline), outf, Py_PRINT_RAW) != 0)
if (PyFile_WriteObject(_Py_LATIN1_CHR('\n'), outf, Py_PRINT_RAW) != 0)
return NULL;
if (PyObject_SetAttr(builtins, &_Py_ID(_), o) != 0)
if (PyObject_SetAttr(builtins, _Py_LATIN1_CHR('_'), o) != 0)
return NULL;
Py_RETURN_NONE;
}
@ -930,7 +929,7 @@ sys_getfilesystemencoding_impl(PyObject *module)
if (u == NULL) {
return NULL;
}
_PyUnicode_InternInPlace(interp, &u);
_PyUnicode_InternImmortal(interp, &u);
return u;
}
@ -950,7 +949,7 @@ sys_getfilesystemencodeerrors_impl(PyObject *module)
if (u == NULL) {
return NULL;
}
_PyUnicode_InternInPlace(interp, &u);
_PyUnicode_InternImmortal(interp, &u);
return u;
}
@ -972,8 +971,9 @@ sys_intern_impl(PyObject *module, PyObject *s)
/*[clinic end generated code: output=be680c24f5c9e5d6 input=849483c006924e2f]*/
{
if (PyUnicode_CheckExact(s)) {
PyInterpreterState *interp = _PyInterpreterState_GET();
Py_INCREF(s);
PyUnicode_InternInPlace(&s);
_PyUnicode_InternMortal(interp, &s);
return s;
}
else {
@ -2007,14 +2007,22 @@ sys_getallocatedblocks_impl(PyObject *module)
/*[clinic input]
sys.getunicodeinternedsize -> Py_ssize_t
*
_only_immortal: bool = False
Return the number of elements of the unicode interned dictionary
[clinic start generated code]*/
static Py_ssize_t
sys_getunicodeinternedsize_impl(PyObject *module)
/*[clinic end generated code: output=ad0e4c9738ed4129 input=726298eaa063347a]*/
sys_getunicodeinternedsize_impl(PyObject *module, int _only_immortal)
/*[clinic end generated code: output=29a6377a94a14f70 input=0330b3408dd5bcc6]*/
{
return _PyUnicode_InternedSize();
if (_only_immortal) {
return _PyUnicode_InternedSize_Immortal();
}
else {
return _PyUnicode_InternedSize();
}
}
/*[clinic input]