mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
gh-113993: Allow interned strings to be mortal, and fix related issues (GH-120520)
* Add an InternalDocs file describing how interning should work and how to use it. * Add internal functions to *explicitly* request what kind of interning is done: - `_PyUnicode_InternMortal` - `_PyUnicode_InternImmortal` - `_PyUnicode_InternStatic` * Switch uses of `PyUnicode_InternInPlace` to those. * Disallow using `_Py_SetImmortal` on strings directly. You should use `_PyUnicode_InternImmortal` instead: - Strings should be interned before immortalization, otherwise you're possibly interning a immortalizing copy. - `_Py_SetImmortal` doesn't handle the `SSTATE_INTERNED_MORTAL` to `SSTATE_INTERNED_IMMORTAL` update, and those flags can't be changed in backports, as they are now part of public API and version-specific ABI. * Add private `_only_immortal` argument for `sys.getunicodeinternedsize`, used in refleak test machinery. * Make sure the statically allocated string singletons are unique. This means these sets are now disjoint: - `_Py_ID` - `_Py_STR` (including the empty string) - one-character latin-1 singletons Now, when you intern a singleton, that exact singleton will be interned. * Add a `_Py_LATIN1_CHR` macro, use it instead of `_Py_ID`/`_Py_STR` for one-character latin-1 singletons everywhere (including Clinic). * Intern `_Py_STR` singletons at startup. * For free-threaded builds, intern `_Py_LATIN1_CHR` singletons at startup. * Beef up the tests. Cover internal details (marked with `@cpython_only`). * Add lots of assertions Co-Authored-By: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
parent
7595e6743a
commit
6f1d448bc1
42 changed files with 2464 additions and 1140 deletions
|
@ -37,12 +37,10 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
|
||||
STRUCT_FOR_STR(anon_string, "<string>")
|
||||
STRUCT_FOR_STR(anon_unknown, "<unknown>")
|
||||
STRUCT_FOR_STR(close_br, "}")
|
||||
STRUCT_FOR_STR(dbl_close_br, "}}")
|
||||
STRUCT_FOR_STR(dbl_open_br, "{{")
|
||||
STRUCT_FOR_STR(dbl_percent, "%%")
|
||||
STRUCT_FOR_STR(defaults, ".defaults")
|
||||
STRUCT_FOR_STR(dot, ".")
|
||||
STRUCT_FOR_STR(dot_locals, ".<locals>")
|
||||
STRUCT_FOR_STR(empty, "")
|
||||
STRUCT_FOR_STR(format, ".format")
|
||||
|
@ -50,9 +48,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_STR(json_decoder, "json.decoder")
|
||||
STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
|
||||
STRUCT_FOR_STR(list_err, "list index out of range")
|
||||
STRUCT_FOR_STR(newline, "\n")
|
||||
STRUCT_FOR_STR(open_br, "{")
|
||||
STRUCT_FOR_STR(percent, "%")
|
||||
STRUCT_FOR_STR(type_params, ".type_params")
|
||||
STRUCT_FOR_STR(utf_8, "utf-8")
|
||||
} literals;
|
||||
|
@ -67,7 +62,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(TextIOWrapper)
|
||||
STRUCT_FOR_ID(True)
|
||||
STRUCT_FOR_ID(WarningMessage)
|
||||
STRUCT_FOR_ID(_)
|
||||
STRUCT_FOR_ID(_WindowsConsoleIO)
|
||||
STRUCT_FOR_ID(__IOBase_closed)
|
||||
STRUCT_FOR_ID(__abc_tpflags__)
|
||||
|
@ -261,6 +255,7 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(_lock_unlock_module)
|
||||
STRUCT_FOR_ID(_loop)
|
||||
STRUCT_FOR_ID(_needs_com_addref_)
|
||||
STRUCT_FOR_ID(_only_immortal)
|
||||
STRUCT_FOR_ID(_pack_)
|
||||
STRUCT_FOR_ID(_restype_)
|
||||
STRUCT_FOR_ID(_showwarnmsg)
|
||||
|
@ -273,7 +268,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(_uninitialized_submodules)
|
||||
STRUCT_FOR_ID(_warn_unawaited_coroutine)
|
||||
STRUCT_FOR_ID(_xoptions)
|
||||
STRUCT_FOR_ID(a)
|
||||
STRUCT_FOR_ID(abs_tol)
|
||||
STRUCT_FOR_ID(access)
|
||||
STRUCT_FOR_ID(aclose)
|
||||
|
@ -297,7 +291,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(attribute)
|
||||
STRUCT_FOR_ID(authorizer_callback)
|
||||
STRUCT_FOR_ID(autocommit)
|
||||
STRUCT_FOR_ID(b)
|
||||
STRUCT_FOR_ID(backtick)
|
||||
STRUCT_FOR_ID(base)
|
||||
STRUCT_FOR_ID(before)
|
||||
|
@ -315,7 +308,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(byteorder)
|
||||
STRUCT_FOR_ID(bytes)
|
||||
STRUCT_FOR_ID(bytes_per_sep)
|
||||
STRUCT_FOR_ID(c)
|
||||
STRUCT_FOR_ID(c_call)
|
||||
STRUCT_FOR_ID(c_exception)
|
||||
STRUCT_FOR_ID(c_return)
|
||||
|
@ -371,7 +363,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(count)
|
||||
STRUCT_FOR_ID(covariant)
|
||||
STRUCT_FOR_ID(cwd)
|
||||
STRUCT_FOR_ID(d)
|
||||
STRUCT_FOR_ID(data)
|
||||
STRUCT_FOR_ID(database)
|
||||
STRUCT_FOR_ID(day)
|
||||
|
@ -400,7 +391,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(dont_inherit)
|
||||
STRUCT_FOR_ID(dst)
|
||||
STRUCT_FOR_ID(dst_dir_fd)
|
||||
STRUCT_FOR_ID(e)
|
||||
STRUCT_FOR_ID(eager_start)
|
||||
STRUCT_FOR_ID(effective_ids)
|
||||
STRUCT_FOR_ID(element_factory)
|
||||
|
@ -424,7 +414,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(exp)
|
||||
STRUCT_FOR_ID(extend)
|
||||
STRUCT_FOR_ID(extra_tokens)
|
||||
STRUCT_FOR_ID(f)
|
||||
STRUCT_FOR_ID(facility)
|
||||
STRUCT_FOR_ID(factory)
|
||||
STRUCT_FOR_ID(false)
|
||||
|
@ -457,7 +446,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(fset)
|
||||
STRUCT_FOR_ID(func)
|
||||
STRUCT_FOR_ID(future)
|
||||
STRUCT_FOR_ID(g)
|
||||
STRUCT_FOR_ID(generation)
|
||||
STRUCT_FOR_ID(genexpr)
|
||||
STRUCT_FOR_ID(get)
|
||||
|
@ -471,7 +459,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(globals)
|
||||
STRUCT_FOR_ID(groupindex)
|
||||
STRUCT_FOR_ID(groups)
|
||||
STRUCT_FOR_ID(h)
|
||||
STRUCT_FOR_ID(handle)
|
||||
STRUCT_FOR_ID(handle_seq)
|
||||
STRUCT_FOR_ID(has_location)
|
||||
|
@ -582,7 +569,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(msg)
|
||||
STRUCT_FOR_ID(mutex)
|
||||
STRUCT_FOR_ID(mycmp)
|
||||
STRUCT_FOR_ID(n)
|
||||
STRUCT_FOR_ID(n_arg)
|
||||
STRUCT_FOR_ID(n_fields)
|
||||
STRUCT_FOR_ID(n_sequence_fields)
|
||||
|
@ -628,7 +614,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(outgoing)
|
||||
STRUCT_FOR_ID(overlapped)
|
||||
STRUCT_FOR_ID(owner)
|
||||
STRUCT_FOR_ID(p)
|
||||
STRUCT_FOR_ID(pages)
|
||||
STRUCT_FOR_ID(parent)
|
||||
STRUCT_FOR_ID(password)
|
||||
|
@ -656,7 +641,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(ps2)
|
||||
STRUCT_FOR_ID(query)
|
||||
STRUCT_FOR_ID(quotetabs)
|
||||
STRUCT_FOR_ID(r)
|
||||
STRUCT_FOR_ID(raw)
|
||||
STRUCT_FOR_ID(read)
|
||||
STRUCT_FOR_ID(read1)
|
||||
|
@ -680,7 +664,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(return)
|
||||
STRUCT_FOR_ID(reverse)
|
||||
STRUCT_FOR_ID(reversed)
|
||||
STRUCT_FOR_ID(s)
|
||||
STRUCT_FOR_ID(salt)
|
||||
STRUCT_FOR_ID(sched_priority)
|
||||
STRUCT_FOR_ID(scheduler)
|
||||
|
@ -787,7 +770,6 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_ID(writable)
|
||||
STRUCT_FOR_ID(write)
|
||||
STRUCT_FOR_ID(write_through)
|
||||
STRUCT_FOR_ID(x)
|
||||
STRUCT_FOR_ID(year)
|
||||
STRUCT_FOR_ID(zdict)
|
||||
} identifiers;
|
||||
|
@ -810,6 +792,10 @@ struct _Py_global_strings {
|
|||
(_Py_SINGLETON(strings.identifiers._py_ ## NAME._ascii.ob_base))
|
||||
#define _Py_STR(NAME) \
|
||||
(_Py_SINGLETON(strings.literals._py_ ## NAME._ascii.ob_base))
|
||||
#define _Py_LATIN1_CHR(CH) \
|
||||
((CH) < 128 \
|
||||
? (PyObject*)&_Py_SINGLETON(strings).ascii[(CH)] \
|
||||
: (PyObject*)&_Py_SINGLETON(strings).latin1[(CH) - 128])
|
||||
|
||||
/* _Py_DECLARE_STR() should precede all uses of _Py_STR() in a function.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue