mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
bpo-36785: PEP 574 implementation (GH-7076)
This commit is contained in:
parent
22ccb0b490
commit
91f4380ced
19 changed files with 1888 additions and 242 deletions
152
Lib/pickle.py
152
Lib/pickle.py
|
@ -36,8 +36,10 @@ import io
|
|||
import codecs
|
||||
import _compat_pickle
|
||||
|
||||
from _pickle import PickleBuffer
|
||||
|
||||
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
|
||||
"Unpickler", "dump", "dumps", "load", "loads"]
|
||||
"Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]
|
||||
|
||||
# Shortcut for use in isinstance testing
|
||||
bytes_types = (bytes, bytearray)
|
||||
|
@ -51,10 +53,11 @@ compatible_formats = ["1.0", # Original protocol 0
|
|||
"2.0", # Protocol 2
|
||||
"3.0", # Protocol 3
|
||||
"4.0", # Protocol 4
|
||||
"5.0", # Protocol 5
|
||||
] # Old format versions we can read
|
||||
|
||||
# This is the highest protocol number we know how to read.
|
||||
HIGHEST_PROTOCOL = 4
|
||||
HIGHEST_PROTOCOL = 5
|
||||
|
||||
# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
|
||||
# Only bump this if the oldest still supported version of Python already
|
||||
|
@ -167,6 +170,7 @@ BINBYTES = b'B' # push bytes; counted binary string argument
|
|||
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
|
||||
|
||||
# Protocol 4
|
||||
|
||||
SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
|
||||
BINUNICODE8 = b'\x8d' # push very long string
|
||||
BINBYTES8 = b'\x8e' # push very long bytes string
|
||||
|
@ -178,6 +182,12 @@ STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
|
|||
MEMOIZE = b'\x94' # store top of the stack in memo
|
||||
FRAME = b'\x95' # indicate the beginning of a new frame
|
||||
|
||||
# Protocol 5
|
||||
|
||||
BYTEARRAY8 = b'\x96' # push bytearray
|
||||
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
|
||||
READONLY_BUFFER = b'\x98' # make top of stack readonly
|
||||
|
||||
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
|
||||
|
||||
|
||||
|
@ -251,6 +261,23 @@ class _Unframer:
|
|||
self.file_readline = file_readline
|
||||
self.current_frame = None
|
||||
|
||||
def readinto(self, buf):
|
||||
if self.current_frame:
|
||||
n = self.current_frame.readinto(buf)
|
||||
if n == 0 and len(buf) != 0:
|
||||
self.current_frame = None
|
||||
n = len(buf)
|
||||
buf[:] = self.file_read(n)
|
||||
return n
|
||||
if n < len(buf):
|
||||
raise UnpicklingError(
|
||||
"pickle exhausted before end of frame")
|
||||
return n
|
||||
else:
|
||||
n = len(buf)
|
||||
buf[:] = self.file_read(n)
|
||||
return n
|
||||
|
||||
def read(self, n):
|
||||
if self.current_frame:
|
||||
data = self.current_frame.read(n)
|
||||
|
@ -371,7 +398,8 @@ def decode_long(data):
|
|||
|
||||
class _Pickler:
|
||||
|
||||
def __init__(self, file, protocol=None, *, fix_imports=True):
|
||||
def __init__(self, file, protocol=None, *, fix_imports=True,
|
||||
buffer_callback=None):
|
||||
"""This takes a binary file for writing a pickle data stream.
|
||||
|
||||
The optional *protocol* argument tells the pickler to use the
|
||||
|
@ -393,6 +421,17 @@ class _Pickler:
|
|||
will try to map the new Python 3 names to the old module names
|
||||
used in Python 2, so that the pickle data stream is readable
|
||||
with Python 2.
|
||||
|
||||
If *buffer_callback* is None (the default), buffer views are
|
||||
serialized into *file* as part of the pickle stream.
|
||||
|
||||
If *buffer_callback* is not None, then it can be called any number
|
||||
of times with a buffer view. If the callback returns a false value
|
||||
(such as None), the given buffer is out-of-band; otherwise the
|
||||
buffer is serialized in-band, i.e. inside the pickle stream.
|
||||
|
||||
It is an error if *buffer_callback* is not None and *protocol*
|
||||
is None or smaller than 5.
|
||||
"""
|
||||
if protocol is None:
|
||||
protocol = DEFAULT_PROTOCOL
|
||||
|
@ -400,6 +439,9 @@ class _Pickler:
|
|||
protocol = HIGHEST_PROTOCOL
|
||||
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
|
||||
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
|
||||
if buffer_callback is not None and protocol < 5:
|
||||
raise ValueError("buffer_callback needs protocol >= 5")
|
||||
self._buffer_callback = buffer_callback
|
||||
try:
|
||||
self._file_write = file.write
|
||||
except AttributeError:
|
||||
|
@ -756,6 +798,46 @@ class _Pickler:
|
|||
self.memoize(obj)
|
||||
dispatch[bytes] = save_bytes
|
||||
|
||||
def save_bytearray(self, obj):
|
||||
if self.proto < 5:
|
||||
if not obj: # bytearray is empty
|
||||
self.save_reduce(bytearray, (), obj=obj)
|
||||
else:
|
||||
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
|
||||
return
|
||||
n = len(obj)
|
||||
if n >= self.framer._FRAME_SIZE_TARGET:
|
||||
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
|
||||
else:
|
||||
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
|
||||
dispatch[bytearray] = save_bytearray
|
||||
|
||||
def save_picklebuffer(self, obj):
|
||||
if self.proto < 5:
|
||||
raise PicklingError("PickleBuffer can only pickled with "
|
||||
"protocol >= 5")
|
||||
with obj.raw() as m:
|
||||
if not m.contiguous:
|
||||
raise PicklingError("PickleBuffer can not be pickled when "
|
||||
"pointing to a non-contiguous buffer")
|
||||
in_band = True
|
||||
if self._buffer_callback is not None:
|
||||
in_band = bool(self._buffer_callback(obj))
|
||||
if in_band:
|
||||
# Write data in-band
|
||||
# XXX The C implementation avoids a copy here
|
||||
if m.readonly:
|
||||
self.save_bytes(m.tobytes())
|
||||
else:
|
||||
self.save_bytearray(m.tobytes())
|
||||
else:
|
||||
# Write data out-of-band
|
||||
self.write(NEXT_BUFFER)
|
||||
if m.readonly:
|
||||
self.write(READONLY_BUFFER)
|
||||
|
||||
dispatch[PickleBuffer] = save_picklebuffer
|
||||
|
||||
def save_str(self, obj):
|
||||
if self.bin:
|
||||
encoded = obj.encode('utf-8', 'surrogatepass')
|
||||
|
@ -1042,7 +1124,7 @@ class _Pickler:
|
|||
class _Unpickler:
|
||||
|
||||
def __init__(self, file, *, fix_imports=True,
|
||||
encoding="ASCII", errors="strict"):
|
||||
encoding="ASCII", errors="strict", buffers=None):
|
||||
"""This takes a binary file for reading a pickle data stream.
|
||||
|
||||
The protocol version of the pickle is detected automatically, so
|
||||
|
@ -1061,7 +1143,17 @@ class _Unpickler:
|
|||
reading, a BytesIO object, or any other custom object that
|
||||
meets this interface.
|
||||
|
||||
Optional keyword arguments are *fix_imports*, *encoding* and
|
||||
If *buffers* is not None, it should be an iterable of buffer-enabled
|
||||
objects that is consumed each time the pickle stream references
|
||||
an out-of-band buffer view. Such buffers have been given in order
|
||||
to the *buffer_callback* of a Pickler object.
|
||||
|
||||
If *buffers* is None (the default), then the buffers are taken
|
||||
from the pickle stream, assuming they are serialized there.
|
||||
It is an error for *buffers* to be None if the pickle stream
|
||||
was produced with a non-None *buffer_callback*.
|
||||
|
||||
Other optional arguments are *fix_imports*, *encoding* and
|
||||
*errors*, which are used to control compatibility support for
|
||||
pickle stream generated by Python 2. If *fix_imports* is True,
|
||||
pickle will try to map the old Python 2 names to the new names
|
||||
|
@ -1070,6 +1162,7 @@ class _Unpickler:
|
|||
default to 'ASCII' and 'strict', respectively. *encoding* can be
|
||||
'bytes' to read theses 8-bit string instances as bytes objects.
|
||||
"""
|
||||
self._buffers = iter(buffers) if buffers is not None else None
|
||||
self._file_readline = file.readline
|
||||
self._file_read = file.read
|
||||
self.memo = {}
|
||||
|
@ -1090,6 +1183,7 @@ class _Unpickler:
|
|||
"%s.__init__()" % (self.__class__.__name__,))
|
||||
self._unframer = _Unframer(self._file_read, self._file_readline)
|
||||
self.read = self._unframer.read
|
||||
self.readinto = self._unframer.readinto
|
||||
self.readline = self._unframer.readline
|
||||
self.metastack = []
|
||||
self.stack = []
|
||||
|
@ -1276,6 +1370,34 @@ class _Unpickler:
|
|||
self.append(self.read(len))
|
||||
dispatch[BINBYTES8[0]] = load_binbytes8
|
||||
|
||||
def load_bytearray8(self):
|
||||
len, = unpack('<Q', self.read(8))
|
||||
if len > maxsize:
|
||||
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
|
||||
"of %d bytes" % maxsize)
|
||||
b = bytearray(len)
|
||||
self.readinto(b)
|
||||
self.append(b)
|
||||
dispatch[BYTEARRAY8[0]] = load_bytearray8
|
||||
|
||||
def load_next_buffer(self):
|
||||
if self._buffers is None:
|
||||
raise UnpicklingError("pickle stream refers to out-of-band data "
|
||||
"but no *buffers* argument was given")
|
||||
try:
|
||||
buf = next(self._buffers)
|
||||
except StopIteration:
|
||||
raise UnpicklingError("not enough out-of-band buffers")
|
||||
self.append(buf)
|
||||
dispatch[NEXT_BUFFER[0]] = load_next_buffer
|
||||
|
||||
def load_readonly_buffer(self):
|
||||
buf = self.stack[-1]
|
||||
with memoryview(buf) as m:
|
||||
if not m.readonly:
|
||||
self.stack[-1] = m.toreadonly()
|
||||
dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
|
||||
|
||||
def load_short_binstring(self):
|
||||
len = self.read(1)[0]
|
||||
data = self.read(len)
|
||||
|
@ -1600,25 +1722,29 @@ class _Unpickler:
|
|||
|
||||
# Shorthands
|
||||
|
||||
def _dump(obj, file, protocol=None, *, fix_imports=True):
|
||||
_Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
|
||||
def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
|
||||
_Pickler(file, protocol, fix_imports=fix_imports,
|
||||
buffer_callback=buffer_callback).dump(obj)
|
||||
|
||||
def _dumps(obj, protocol=None, *, fix_imports=True):
|
||||
def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
|
||||
f = io.BytesIO()
|
||||
_Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
|
||||
_Pickler(f, protocol, fix_imports=fix_imports,
|
||||
buffer_callback=buffer_callback).dump(obj)
|
||||
res = f.getvalue()
|
||||
assert isinstance(res, bytes_types)
|
||||
return res
|
||||
|
||||
def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
|
||||
return _Unpickler(file, fix_imports=fix_imports,
|
||||
def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
|
||||
buffers=None):
|
||||
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
|
||||
encoding=encoding, errors=errors).load()
|
||||
|
||||
def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
|
||||
def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
|
||||
buffers=None):
|
||||
if isinstance(s, str):
|
||||
raise TypeError("Can't load pickle from unicode string")
|
||||
file = io.BytesIO(s)
|
||||
return _Unpickler(file, fix_imports=fix_imports,
|
||||
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
|
||||
encoding=encoding, errors=errors).load()
|
||||
|
||||
# Use the faster _pickle if possible
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue