gh-106931: Intern Statically Allocated Strings Globally (gh-107272)

We tried this before with a dict and for all interned strings.  That ran into problems due to interpreter isolation.  However, exclusively using a per-interpreter cache caused some inconsistency that can eliminate the benefit of interning.  Here we circle back to using a global cache, but only for statically allocated strings.  We also use a more-basic _Py_hashtable_t for that global cache instead of a dict.

Ideally we would only have the global cache, but the optional isolation of each interpreter's allocator means that a non-static string object must not outlive its interpreter.  Thus we would have to store a copy of each such interned string in the global cache, tied to the main interpreter.
This commit is contained in:
Eric Snow 2023-07-27 13:56:59 -06:00 committed by GitHub
parent 4f67921ad2
commit b72947a8d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 123 additions and 4 deletions

View file

@ -14,6 +14,7 @@ from test.support import os_helper
from test.support.script_helper import assert_python_ok, assert_python_failure
from test.support import threading_helper
from test.support import import_helper
from test.support import interpreters
import textwrap
import unittest
import warnings
@ -699,6 +700,35 @@ class SysModuleTest(unittest.TestCase):
self.assertRaises(TypeError, sys.intern, S("abc"))
def test_subinterp_intern_dynamically_allocated(self):
global INTERN_NUMRUNS
INTERN_NUMRUNS += 1
s = "never interned before" + str(INTERN_NUMRUNS)
t = sys.intern(s)
self.assertIs(t, s)
interp = interpreters.create()
interp.run(textwrap.dedent(f'''
import sys
t = sys.intern({s!r})
assert id(t) != {id(s)}, (id(t), {id(s)})
assert id(t) != {id(t)}, (id(t), {id(t)})
'''))
def test_subinterp_intern_statically_allocated(self):
# See Tools/build/generate_global_objects.py for the list
# of strings that are always statically allocated.
s = '__init__'
t = sys.intern(s)
print('------------------------')
interp = interpreters.create()
interp.run(textwrap.dedent(f'''
import sys
t = sys.intern({s!r})
assert id(t) == {id(t)}, (id(t), {id(t)})
'''))
def test_sys_flags(self):
self.assertTrue(sys.flags)
attrs = ("debug",