mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
Merge 056e2c55d5 into f9704f1d84
This commit is contained in:
commit
170cecd367
6 changed files with 118 additions and 25 deletions
|
|
@ -10,6 +10,7 @@ extern "C" {
|
|||
|
||||
#include "pycore_fileutils.h" // _Py_error_handler
|
||||
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
|
||||
#include "pycore_global_objects.h"// _Py_INTERP_CACHED_OBJECT
|
||||
|
||||
|
||||
// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
|
||||
|
|
@ -358,6 +359,19 @@ extern PyTypeObject _PyUnicodeASCIIIter_Type;
|
|||
|
||||
// All these are "ref-neutral", like the public PyUnicode_InternInPlace.
|
||||
|
||||
/* This hashtable holds statically allocated interned strings.
|
||||
* See InternalDocs/string_interning.md for details.
|
||||
*/
|
||||
#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
|
||||
|
||||
/* This dictionary holds per-interpreter interned strings.
|
||||
* See InternalDocs/string_interning.md for details.
|
||||
*/
|
||||
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
|
||||
{
|
||||
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
|
||||
}
|
||||
|
||||
// Explicit interning routines:
|
||||
PyAPI_FUNC(void) _PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **);
|
||||
PyAPI_FUNC(void) _PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **);
|
||||
|
|
|
|||
23
Lib/test/support/constants_helper.py
Normal file
23
Lib/test/support/constants_helper.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from test.support.project_files_helper import iter_all_c_files
|
||||
|
||||
|
||||
# copypaste from 'Tools/build/generate_global_objects.py'
|
||||
def iter_global_strings() -> Iterable[str]:
|
||||
id_regex = re.compile(r"\b_Py_ID\((\w+)\)")
|
||||
str_regex = re.compile(r'\b_Py_DECLARE_STR\((?:\w+), "(.*?)"\)')
|
||||
for filename in iter_all_c_files():
|
||||
infile = Path(filename)
|
||||
if not infile.exists():
|
||||
# The file must have been a temporary file.
|
||||
continue
|
||||
with infile.open(encoding="utf-8") as infile_open:
|
||||
for line in infile_open:
|
||||
for m in id_regex.finditer(line):
|
||||
yield m.group(1)
|
||||
for m in str_regex.finditer(line):
|
||||
yield m.group(1)
|
||||
22
Lib/test/support/project_files_helper.py
Normal file
22
Lib/test/support/project_files_helper.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from pathlib import Path
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
# copypaste from 'Tools/build/generate_global_objects.py'
|
||||
def iter_all_c_files() -> Iterable[Path]:
|
||||
for top_directory_name in (
|
||||
"Modules",
|
||||
"Objects",
|
||||
"Parser",
|
||||
"PC",
|
||||
"Programs",
|
||||
"Python",
|
||||
):
|
||||
for dirname, _, files in (ROOT / top_directory_name).walk():
|
||||
for name in files:
|
||||
if not name.endswith((".c", ".h")):
|
||||
continue
|
||||
yield dirname / name
|
||||
|
|
@ -211,6 +211,7 @@ except ImportError:
|
|||
from test.support import (cpython_only,
|
||||
check_impl_detail, requires_debug_ranges,
|
||||
gc_collect, Py_GIL_DISABLED)
|
||||
from test.support.constants_helper import iter_global_strings
|
||||
from test.support.script_helper import assert_python_ok
|
||||
from test.support import threading_helper, import_helper
|
||||
from test.support.bytecode_helper import instructions_with_positions
|
||||
|
|
@ -1251,6 +1252,41 @@ class CodeConstsTest(unittest.TestCase):
|
|||
self.assertIsInstance(code.co_consts[1], Unhashable)
|
||||
self.assertEqual(code.co_consts[2], code.co_consts[3])
|
||||
|
||||
@cpython_only
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
|
||||
def test__Py_DECLARE_STR_is_interned(self):
|
||||
for global_string in iter_global_strings():
|
||||
with self.subTest(global_string=global_string):
|
||||
self.assertIsInterned(eval(f"'{global_string}'"))
|
||||
|
||||
noninternable_by_default = textwrap.dedent(f'''
|
||||
not-internable
|
||||
not.internable
|
||||
не_интернируемый
|
||||
str with spaces
|
||||
{chr(0x011111)}
|
||||
{chr(0x9999)}
|
||||
{chr(0x100)}
|
||||
''')
|
||||
|
||||
@cpython_only
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
|
||||
def test_non_internable_strings_not_interned(self):
|
||||
for noninternable in self.noninternable_by_default.strip().splitlines():
|
||||
with self.subTest(noninternable=noninternable):
|
||||
self.assertIsNotInterned(eval(f"'{noninternable}'"))
|
||||
|
||||
@cpython_only
|
||||
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
|
||||
def test_explicitly_interned_strings(self):
|
||||
for noninternable in self.noninternable_by_default.strip().splitlines():
|
||||
self.assertIsNotInterned(noninternable)
|
||||
sys.intern(noninternable)
|
||||
with self.subTest(noninternable=noninternable):
|
||||
self.assertIsInterned(noninternable)
|
||||
interned_from_code = eval(f"'{noninternable}'")
|
||||
self.assertIsInterned(interned_from_code)
|
||||
self.assertIs(noninternable, interned_from_code)
|
||||
|
||||
class CodeWeakRefTest(unittest.TestCase):
|
||||
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ PyCode_ClearWatcher(int watcher_id)
|
|||
|
||||
#define _PyCodeObject_CAST(op) (assert(PyCode_Check(op)), (PyCodeObject *)(op))
|
||||
|
||||
static int
|
||||
static inline int
|
||||
should_intern_string(PyObject *o)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
|
@ -196,6 +196,8 @@ intern_strings(PyObject *tuple)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define _constants_tuple_modified(modified) if (modified) *modified = 1
|
||||
|
||||
/* Intern constants. In the default build, this interns selected string
|
||||
constants. In the free-threaded build, this also interns non-string
|
||||
constants. */
|
||||
|
|
@ -206,14 +208,28 @@ intern_constants(PyObject *tuple, int *modified)
|
|||
for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
|
||||
PyObject *v = PyTuple_GET_ITEM(tuple, i);
|
||||
if (PyUnicode_CheckExact(v)) {
|
||||
if (PyUnicode_CHECK_INTERNED(v) != 0) {
|
||||
continue;
|
||||
}
|
||||
#if !defined(Py_GIL_DISABLED)
|
||||
PyObject *interned = _Py_hashtable_get(INTERNED_STRINGS, v);
|
||||
if (interned == NULL) {
|
||||
interned = PyDict_GetItemWithError(get_interned_dict(interp), v);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
}
|
||||
if (interned != NULL && interned != v) {
|
||||
Py_INCREF(interned);
|
||||
PyTuple_SET_ITEM(tuple, i, interned);
|
||||
Py_DECREF(v);
|
||||
_constants_tuple_modified(modified);
|
||||
} else
|
||||
#endif
|
||||
if (should_intern_string(v)) {
|
||||
PyObject *w = v;
|
||||
_PyUnicode_InternMortal(interp, &v);
|
||||
if (w != v) {
|
||||
PyTuple_SET_ITEM(tuple, i, v);
|
||||
if (modified) {
|
||||
*modified = 1;
|
||||
}
|
||||
_constants_tuple_modified(modified);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -242,9 +258,7 @@ intern_constants(PyObject *tuple, int *modified)
|
|||
|
||||
PyTuple_SET_ITEM(tuple, i, v);
|
||||
Py_DECREF(w);
|
||||
if (modified) {
|
||||
*modified = 1;
|
||||
}
|
||||
_constants_tuple_modified(modified);
|
||||
}
|
||||
Py_DECREF(tmp);
|
||||
}
|
||||
|
|
@ -273,9 +287,7 @@ intern_constants(PyObject *tuple, int *modified)
|
|||
}
|
||||
PyTuple_SET_ITEM(tuple, i, v);
|
||||
Py_DECREF(slice);
|
||||
if (modified) {
|
||||
*modified = 1;
|
||||
}
|
||||
_constants_tuple_modified(modified);
|
||||
}
|
||||
Py_DECREF(tmp);
|
||||
}
|
||||
|
|
@ -293,9 +305,7 @@ intern_constants(PyObject *tuple, int *modified)
|
|||
else if (interned != v) {
|
||||
PyTuple_SET_ITEM(tuple, i, interned);
|
||||
Py_SETREF(v, interned);
|
||||
if (modified) {
|
||||
*modified = 1;
|
||||
}
|
||||
_constants_tuple_modified(modified);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -207,18 +207,6 @@ _PyUnicode_GetEmpty(void)
|
|||
return &_Py_STR(empty);
|
||||
}
|
||||
|
||||
/* This dictionary holds per-interpreter interned strings.
|
||||
* See InternalDocs/string_interning.md for details.
|
||||
*/
|
||||
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
|
||||
{
|
||||
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
|
||||
}
|
||||
|
||||
/* This hashtable holds statically allocated interned strings.
|
||||
* See InternalDocs/string_interning.md for details.
|
||||
*/
|
||||
#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
|
||||
|
||||
/* Get number of all interned strings for the current interpreter. */
|
||||
Py_ssize_t
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue