cpython/Lib/test/test_pickletools.py
Olivier Grisel 3cd7c6e6eb bpo-31993: Do not allocate large temporary buffers in pickle dump. (#4353)
The picklers do no longer allocate temporary memory when dumping large
bytes and str objects into a file object. Instead the data is
directly streamed into the underlying file object.

Previously the C implementation would buffer all content and issue a
single call to file.write() at the end of the dump. With protocol 4
this behavior has changed to issue one call to file.write() per frame.

The Python pickler with protocol 4 now dumps each frame content as a
memoryview to an IOBytes instance that is never reused and the
memoryview is no longer released after the call to write. This makes it
possible for the file object to delay access to the memoryview of
previous frames without forcing any additional memory copy as was
already possible with the C pickler.
2018-01-06 17:18:54 +02:00

102 lines
4.2 KiB
Python

import pickle
import pickletools
from test import support
from test.pickletester import AbstractPickleTests
import unittest
class OptimizedPickleTests(AbstractPickleTests):
def dumps(self, arg, proto=None):
return pickletools.optimize(pickle.dumps(arg, proto))
def loads(self, buf, **kwds):
return pickle.loads(buf, **kwds)
# Test relies on precise output of dumps()
test_pickle_to_2x = None
# Test relies on writing by chunks into a file object.
test_framed_write_sizes_with_delayed_writer = None
def test_optimize_long_binget(self):
data = [str(i) for i in range(257)]
data.append(data[-1])
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
pickled = pickle.dumps(data, proto)
unpickled = pickle.loads(pickled)
self.assertEqual(unpickled, data)
self.assertIs(unpickled[-1], unpickled[-2])
pickled2 = pickletools.optimize(pickled)
unpickled2 = pickle.loads(pickled2)
self.assertEqual(unpickled2, data)
self.assertIs(unpickled2[-1], unpickled2[-2])
self.assertNotIn(pickle.LONG_BINGET, pickled2)
self.assertNotIn(pickle.LONG_BINPUT, pickled2)
def test_optimize_binput_and_memoize(self):
pickled = (b'\x80\x04\x95\x15\x00\x00\x00\x00\x00\x00\x00'
b']\x94(\x8c\x04spamq\x01\x8c\x03ham\x94h\x02e.')
# 0: \x80 PROTO 4
# 2: \x95 FRAME 21
# 11: ] EMPTY_LIST
# 12: \x94 MEMOIZE
# 13: ( MARK
# 14: \x8c SHORT_BINUNICODE 'spam'
# 20: q BINPUT 1
# 22: \x8c SHORT_BINUNICODE 'ham'
# 27: \x94 MEMOIZE
# 28: h BINGET 2
# 30: e APPENDS (MARK at 13)
# 31: . STOP
self.assertIn(pickle.BINPUT, pickled)
unpickled = pickle.loads(pickled)
self.assertEqual(unpickled, ['spam', 'ham', 'ham'])
self.assertIs(unpickled[1], unpickled[2])
pickled2 = pickletools.optimize(pickled)
unpickled2 = pickle.loads(pickled2)
self.assertEqual(unpickled2, ['spam', 'ham', 'ham'])
self.assertIs(unpickled2[1], unpickled2[2])
self.assertNotIn(pickle.BINPUT, pickled2)
class MiscTestCase(unittest.TestCase):
def test__all__(self):
blacklist = {'bytes_types',
'UP_TO_NEWLINE', 'TAKEN_FROM_ARGUMENT1',
'TAKEN_FROM_ARGUMENT4', 'TAKEN_FROM_ARGUMENT4U',
'TAKEN_FROM_ARGUMENT8U', 'ArgumentDescriptor',
'read_uint1', 'read_uint2', 'read_int4', 'read_uint4',
'read_uint8', 'read_stringnl', 'read_stringnl_noescape',
'read_stringnl_noescape_pair', 'read_string1',
'read_string4', 'read_bytes1', 'read_bytes4',
'read_bytes8', 'read_unicodestringnl',
'read_unicodestring1', 'read_unicodestring4',
'read_unicodestring8', 'read_decimalnl_short',
'read_decimalnl_long', 'read_floatnl', 'read_float8',
'read_long1', 'read_long4',
'uint1', 'uint2', 'int4', 'uint4', 'uint8', 'stringnl',
'stringnl_noescape', 'stringnl_noescape_pair', 'string1',
'string4', 'bytes1', 'bytes4', 'bytes8',
'unicodestringnl', 'unicodestring1', 'unicodestring4',
'unicodestring8', 'decimalnl_short', 'decimalnl_long',
'floatnl', 'float8', 'long1', 'long4',
'StackObject',
'pyint', 'pylong', 'pyinteger_or_bool', 'pybool', 'pyfloat',
'pybytes_or_str', 'pystring', 'pybytes', 'pyunicode',
'pynone', 'pytuple', 'pylist', 'pydict', 'pyset',
'pyfrozenset', 'anyobject', 'markobject', 'stackslice',
'OpcodeInfo', 'opcodes', 'code2op',
}
support.check__all__(self, pickletools, blacklist=blacklist)
def test_main():
support.run_unittest(OptimizedPickleTests)
support.run_unittest(MiscTestCase)
support.run_doctest(pickletools)
if __name__ == "__main__":
test_main()