gh-113028: Correctly memoize str in pickle when escapes added (GH-113436)

This fixes a divergence between the Python and C implementations of pickle
for protocol 0, such that it pickle.py fails to re-use the first pickled
representation of strings involving characters that have to be escaped.
This commit is contained in:
Jeff Allen 2023-12-24 09:43:44 +00:00 committed by GitHub
parent 894f0e573d
commit 08398631a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 7 deletions

View file

@ -1825,6 +1825,14 @@ class AbstractPickleTests:
t2 = self.loads(p)
self.assert_is_copy(t, t2)
def test_unicode_memoization(self):
# Repeated str is re-used (even when escapes added).
for proto in protocols:
for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r':
p = self.dumps((s, s), proto)
s1, s2 = self.loads(p)
self.assertIs(s1, s2)
def test_bytes(self):
for proto in protocols:
for s in b'', b'xyz', b'xyz'*100: