gh-113028: Correctly memoize str in pickle when escapes added (GH-113436)

This fixes a divergence between the Python and C implementations of pickle for protocol 0, such that it pickle.py fails to re-use the first pickled representation of strings involving characters that have to be escaped.
2025-08-04 08:59:19 +00:00 · 2023-12-24 09:43:44 +00:00 · 2023-12-24 09:43:44 +00:00 · 08398631a0
commit 08398631a0
parent 894f0e573d
3 changed files with 21 additions and 7 deletions
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@ -1825,6 +1825,14 @@ class AbstractPickleTests:
            t2 = self.loads(p)
            self.assert_is_copy(t, t2)

+    def test_unicode_memoization(self):
+        # Repeated str is re-used (even when escapes added).
+        for proto in protocols:
+            for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r':
+                p = self.dumps((s, s), proto)
+                s1, s2 = self.loads(p)
+                self.assertIs(s1, s2)
+
    def test_bytes(self):
        for proto in protocols:
            for s in b'', b'xyz', b'xyz'*100: