bpo-47084: Clear Unicode cached representations on finalization (GH-32032)

This commit is contained in:
Jeremy Kloth 2022-03-22 06:53:51 -06:00 committed by GitHub
parent 7d810b6a4e
commit 88872a29f1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 78 additions and 18 deletions

View file

@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *); extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
extern void _PyUnicode_Fini(PyInterpreterState *); extern void _PyUnicode_Fini(PyInterpreterState *);
extern void _PyUnicode_FiniTypes(PyInterpreterState *); extern void _PyUnicode_FiniTypes(PyInterpreterState *);
extern void _PyStaticUnicode_Dealloc(PyObject *);
/* other API */ /* other API */

View file

@ -1,5 +1,14 @@
initialized = True initialized = True
class TestFrozenUtf8_1:
"""\u00b6"""
class TestFrozenUtf8_2:
"""\u03c0"""
class TestFrozenUtf8_4:
"""\U0001f600"""
def main(): def main():
print("Hello world!") print("Hello world!")

View file

@ -1645,24 +1645,29 @@ class MiscTests(EmbeddingTestsMixin, unittest.TestCase):
'-X showrefcount requires a Python debug build') '-X showrefcount requires a Python debug build')
def test_no_memleak(self): def test_no_memleak(self):
# bpo-1635741: Python must release all memory at exit # bpo-1635741: Python must release all memory at exit
cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"] tests = (
proc = subprocess.run(cmd, ('off', 'pass'),
stdout=subprocess.PIPE, ('on', 'pass'),
stderr=subprocess.STDOUT, ('off', 'import __hello__'),
text=True) ('on', 'import __hello__'),
self.assertEqual(proc.returncode, 0) )
out = proc.stdout.rstrip() for flag, stmt in tests:
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out) xopt = f"frozen_modules={flag}"
if not match: cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt]
self.fail(f"unexpected output: {out!a}") proc = subprocess.run(cmd,
refs = int(match.group(1)) stdout=subprocess.PIPE,
blocks = int(match.group(2)) stderr=subprocess.STDOUT,
self.assertEqual(refs, 0, out) text=True)
if not MS_WINDOWS: self.assertEqual(proc.returncode, 0)
self.assertEqual(blocks, 0, out) out = proc.stdout.rstrip()
else: match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
# bpo-46857: on Windows, Python still leaks 1 memory block at exit if not match:
self.assertIn(blocks, (0, 1), out) self.fail(f"unexpected output: {out!a}")
refs = int(match.group(1))
blocks = int(match.group(2))
with self.subTest(frozen_modules=flag, stmt=stmt):
self.assertEqual(refs, 0, out)
self.assertEqual(blocks, 0, out)
class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase): class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):

View file

@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
} }
static void unicode_static_dealloc(PyObject *op)
{
PyASCIIObject* ascii = (PyASCIIObject*)op;
assert(ascii->state.compact);
if (ascii->state.ascii) {
if (ascii->wstr) {
PyObject_Free(ascii->wstr);
ascii->wstr = NULL;
}
}
else {
PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
void* data = (void*)(compact + 1);
if (ascii->wstr && ascii->wstr != data) {
PyObject_Free(ascii->wstr);
ascii->wstr = NULL;
compact->wstr_length = 0;
}
if (compact->utf8) {
PyObject_Free(compact->utf8);
compact->utf8 = NULL;
compact->utf8_length = 0;
}
}
}
void void
_PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_Fini(PyInterpreterState *interp)
{ {
@ -16070,6 +16099,21 @@ _PyUnicode_Fini(PyInterpreterState *interp)
_PyUnicode_FiniEncodings(&state->fs_codec); _PyUnicode_FiniEncodings(&state->fs_codec);
unicode_clear_identifiers(state); unicode_clear_identifiers(state);
// Clear the single character singletons
for (int i = 0; i < 128; i++) {
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
}
for (int i = 0; i < 128; i++) {
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
}
}
void
_PyStaticUnicode_Dealloc(PyObject *op)
{
unicode_static_dealloc(op);
} }

View file

@ -185,6 +185,7 @@ class Printer:
else: else:
self.write("PyCompactUnicodeObject _compact;") self.write("PyCompactUnicodeObject _compact;")
self.write(f"{datatype} _data[{len(s)+1}];") self.write(f"{datatype} _data[{len(s)+1}];")
self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
with self.block(f"{name} =", ";"): with self.block(f"{name} =", ";"):
if ascii: if ascii:
with self.block("._ascii =", ","): with self.block("._ascii =", ","):