This commit is contained in:
Albert Eduardovich N. 2025-12-23 11:32:30 +03:00 committed by GitHub
commit 170cecd367
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 118 additions and 25 deletions

View file

@ -10,6 +10,7 @@ extern "C" {
#include "pycore_fileutils.h" // _Py_error_handler
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "pycore_global_objects.h"// _Py_INTERP_CACHED_OBJECT
// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
@ -358,6 +359,19 @@ extern PyTypeObject _PyUnicodeASCIIIter_Type;
// All these are "ref-neutral", like the public PyUnicode_InternInPlace.
/* This hashtable holds statically allocated interned strings.
* See InternalDocs/string_interning.md for details.
*/
#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
/* This dictionary holds per-interpreter interned strings.
* See InternalDocs/string_interning.md for details.
*/
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}
// Explicit interning routines:
PyAPI_FUNC(void) _PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **);
PyAPI_FUNC(void) _PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **);

View file

@ -0,0 +1,23 @@
import re
from pathlib import Path
from typing import Iterable
from test.support.project_files_helper import iter_all_c_files
# copypaste from 'Tools/build/generate_global_objects.py'
def iter_global_strings() -> Iterable[str]:
id_regex = re.compile(r"\b_Py_ID\((\w+)\)")
str_regex = re.compile(r'\b_Py_DECLARE_STR\((?:\w+), "(.*?)"\)')
for filename in iter_all_c_files():
infile = Path(filename)
if not infile.exists():
# The file must have been a temporary file.
continue
with infile.open(encoding="utf-8") as infile_open:
for line in infile_open:
for m in id_regex.finditer(line):
yield m.group(1)
for m in str_regex.finditer(line):
yield m.group(1)

View file

@ -0,0 +1,22 @@
from pathlib import Path
from typing import Iterable
ROOT = Path(__file__).resolve().parents[3]
# copypaste from 'Tools/build/generate_global_objects.py'
def iter_all_c_files() -> Iterable[Path]:
for top_directory_name in (
"Modules",
"Objects",
"Parser",
"PC",
"Programs",
"Python",
):
for dirname, _, files in (ROOT / top_directory_name).walk():
for name in files:
if not name.endswith((".c", ".h")):
continue
yield dirname / name

View file

@ -211,6 +211,7 @@ except ImportError:
from test.support import (cpython_only,
check_impl_detail, requires_debug_ranges,
gc_collect, Py_GIL_DISABLED)
from test.support.constants_helper import iter_global_strings
from test.support.script_helper import assert_python_ok
from test.support import threading_helper, import_helper
from test.support.bytecode_helper import instructions_with_positions
@ -1251,6 +1252,41 @@ class CodeConstsTest(unittest.TestCase):
self.assertIsInstance(code.co_consts[1], Unhashable)
self.assertEqual(code.co_consts[2], code.co_consts[3])
@cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
def test__Py_DECLARE_STR_is_interned(self):
for global_string in iter_global_strings():
with self.subTest(global_string=global_string):
self.assertIsInterned(eval(f"'{global_string}'"))
noninternable_by_default = textwrap.dedent(f'''
not-internable
not.internable
не_интернируемый
str with spaces
{chr(0x011111)}
{chr(0x9999)}
{chr(0x100)}
''')
@cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
def test_non_internable_strings_not_interned(self):
for noninternable in self.noninternable_by_default.strip().splitlines():
with self.subTest(noninternable=noninternable):
self.assertIsNotInterned(eval(f"'{noninternable}'"))
@cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
def test_explicitly_interned_strings(self):
for noninternable in self.noninternable_by_default.strip().splitlines():
self.assertIsNotInterned(noninternable)
sys.intern(noninternable)
with self.subTest(noninternable=noninternable):
self.assertIsInterned(noninternable)
interned_from_code = eval(f"'{noninternable}'")
self.assertIsInterned(interned_from_code)
self.assertIs(noninternable, interned_from_code)
class CodeWeakRefTest(unittest.TestCase):

View file

@ -113,7 +113,7 @@ PyCode_ClearWatcher(int watcher_id)
#define _PyCodeObject_CAST(op) (assert(PyCode_Check(op)), (PyCodeObject *)(op))
static int
static inline int
should_intern_string(PyObject *o)
{
#ifdef Py_GIL_DISABLED
@ -196,6 +196,8 @@ intern_strings(PyObject *tuple)
return 0;
}
#define _constants_tuple_modified(modified) if (modified) *modified = 1
/* Intern constants. In the default build, this interns selected string
constants. In the free-threaded build, this also interns non-string
constants. */
@ -206,14 +208,28 @@ intern_constants(PyObject *tuple, int *modified)
for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
PyObject *v = PyTuple_GET_ITEM(tuple, i);
if (PyUnicode_CheckExact(v)) {
if (PyUnicode_CHECK_INTERNED(v) != 0) {
continue;
}
#if !defined(Py_GIL_DISABLED)
PyObject *interned = _Py_hashtable_get(INTERNED_STRINGS, v);
if (interned == NULL) {
interned = PyDict_GetItemWithError(get_interned_dict(interp), v);
if (PyErr_Occurred()) return -1;
}
if (interned != NULL && interned != v) {
Py_INCREF(interned);
PyTuple_SET_ITEM(tuple, i, interned);
Py_DECREF(v);
_constants_tuple_modified(modified);
} else
#endif
if (should_intern_string(v)) {
PyObject *w = v;
_PyUnicode_InternMortal(interp, &v);
if (w != v) {
PyTuple_SET_ITEM(tuple, i, v);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
}
}
@ -242,9 +258,7 @@ intern_constants(PyObject *tuple, int *modified)
PyTuple_SET_ITEM(tuple, i, v);
Py_DECREF(w);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
Py_DECREF(tmp);
}
@ -273,9 +287,7 @@ intern_constants(PyObject *tuple, int *modified)
}
PyTuple_SET_ITEM(tuple, i, v);
Py_DECREF(slice);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
Py_DECREF(tmp);
}
@ -293,9 +305,7 @@ intern_constants(PyObject *tuple, int *modified)
else if (interned != v) {
PyTuple_SET_ITEM(tuple, i, interned);
Py_SETREF(v, interned);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
}
#endif

View file

@ -207,18 +207,6 @@ _PyUnicode_GetEmpty(void)
return &_Py_STR(empty);
}
/* This dictionary holds per-interpreter interned strings.
* See InternalDocs/string_interning.md for details.
*/
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}
/* This hashtable holds statically allocated interned strings.
* See InternalDocs/string_interning.md for details.
*/
#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
/* Get number of all interned strings for the current interpreter. */
Py_ssize_t