mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.
Initial patch by Merlijn van Deen. I've added a few unrelated docstring fixes in the patch while I was at it, which makes the documentation for pickle a bit more consistent.
This commit is contained in:
parent
ee07b94788
commit
d05c9ff845
8 changed files with 447 additions and 366 deletions
|
@ -348,24 +348,25 @@ class _Pickler:
|
|||
def __init__(self, file, protocol=None, *, fix_imports=True):
|
||||
"""This takes a binary file for writing a pickle data stream.
|
||||
|
||||
The optional protocol argument tells the pickler to use the
|
||||
The optional *protocol* argument tells the pickler to use the
|
||||
given protocol; supported protocols are 0, 1, 2, 3 and 4. The
|
||||
default protocol is 3; a backward-incompatible protocol designed for
|
||||
Python 3.
|
||||
default protocol is 3; a backward-incompatible protocol designed
|
||||
for Python 3.
|
||||
|
||||
Specifying a negative protocol version selects the highest
|
||||
protocol version supported. The higher the protocol used, the
|
||||
more recent the version of Python needed to read the pickle
|
||||
produced.
|
||||
|
||||
The file argument must have a write() method that accepts a single
|
||||
bytes argument. It can thus be a file object opened for binary
|
||||
writing, a io.BytesIO instance, or any other custom object that
|
||||
meets this interface.
|
||||
The *file* argument must have a write() method that accepts a
|
||||
single bytes argument. It can thus be a file object opened for
|
||||
binary writing, a io.BytesIO instance, or any other custom
|
||||
object that meets this interface.
|
||||
|
||||
If fix_imports is True and protocol is less than 3, pickle will try to
|
||||
map the new Python 3 names to the old module names used in Python 2,
|
||||
so that the pickle data stream is readable with Python 2.
|
||||
If *fix_imports* is True and *protocol* is less than 3, pickle
|
||||
will try to map the new Python 3 names to the old module names
|
||||
used in Python 2, so that the pickle data stream is readable
|
||||
with Python 2.
|
||||
"""
|
||||
if protocol is None:
|
||||
protocol = DEFAULT_PROTOCOL
|
||||
|
@ -389,10 +390,9 @@ class _Pickler:
|
|||
"""Clears the pickler's "memo".
|
||||
|
||||
The memo is the data structure that remembers which objects the
|
||||
pickler has already seen, so that shared or recursive objects are
|
||||
pickled by reference and not by value. This method is useful when
|
||||
re-using picklers.
|
||||
|
||||
pickler has already seen, so that shared or recursive objects
|
||||
are pickled by reference and not by value. This method is
|
||||
useful when re-using picklers.
|
||||
"""
|
||||
self.memo.clear()
|
||||
|
||||
|
@ -975,8 +975,14 @@ class _Unpickler:
|
|||
encoding="ASCII", errors="strict"):
|
||||
"""This takes a binary file for reading a pickle data stream.
|
||||
|
||||
The protocol version of the pickle is detected automatically, so no
|
||||
proto argument is needed.
|
||||
The protocol version of the pickle is detected automatically, so
|
||||
no proto argument is needed.
|
||||
|
||||
The argument *file* must have two methods, a read() method that
|
||||
takes an integer argument, and a readline() method that requires
|
||||
no arguments. Both methods should return bytes. Thus *file*
|
||||
can be a binary file object opened for reading, a io.BytesIO
|
||||
object, or any other custom object that meets this interface.
|
||||
|
||||
The file-like object must have two methods, a read() method
|
||||
that takes an integer argument, and a readline() method that
|
||||
|
@ -985,13 +991,14 @@ class _Unpickler:
|
|||
reading, a BytesIO object, or any other custom object that
|
||||
meets this interface.
|
||||
|
||||
Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
|
||||
which are used to control compatiblity support for pickle stream
|
||||
generated by Python 2.x. If *fix_imports* is True, pickle will try to
|
||||
map the old Python 2.x names to the new names used in Python 3.x. The
|
||||
*encoding* and *errors* tell pickle how to decode 8-bit string
|
||||
instances pickled by Python 2.x; these default to 'ASCII' and
|
||||
'strict', respectively.
|
||||
Optional keyword arguments are *fix_imports*, *encoding* and
|
||||
*errors*, which are used to control compatiblity support for
|
||||
pickle stream generated by Python 2. If *fix_imports* is True,
|
||||
pickle will try to map the old Python 2 names to the new names
|
||||
used in Python 3. The *encoding* and *errors* tell pickle how
|
||||
to decode 8-bit string instances pickled by Python 2; these
|
||||
default to 'ASCII' and 'strict', respectively. *encoding* can be
|
||||
'bytes' to read theses 8-bit string instances as bytes objects.
|
||||
"""
|
||||
self._file_readline = file.readline
|
||||
self._file_read = file.read
|
||||
|
@ -1139,6 +1146,15 @@ class _Unpickler:
|
|||
self.append(unpack('>d', self.read(8))[0])
|
||||
dispatch[BINFLOAT[0]] = load_binfloat
|
||||
|
||||
def _decode_string(self, value):
|
||||
# Used to allow strings from Python 2 to be decoded either as
|
||||
# bytes or Unicode strings. This should be used only with the
|
||||
# STRING, BINSTRING and SHORT_BINSTRING opcodes.
|
||||
if self.encoding == "bytes":
|
||||
return value
|
||||
else:
|
||||
return value.decode(self.encoding, self.errors)
|
||||
|
||||
def load_string(self):
|
||||
data = self.readline()[:-1]
|
||||
# Strip outermost quotes
|
||||
|
@ -1146,8 +1162,7 @@ class _Unpickler:
|
|||
data = data[1:-1]
|
||||
else:
|
||||
raise UnpicklingError("the STRING opcode argument must be quoted")
|
||||
self.append(codecs.escape_decode(data)[0]
|
||||
.decode(self.encoding, self.errors))
|
||||
self.append(self._decode_string(codecs.escape_decode(data)[0]))
|
||||
dispatch[STRING[0]] = load_string
|
||||
|
||||
def load_binstring(self):
|
||||
|
@ -1156,8 +1171,7 @@ class _Unpickler:
|
|||
if len < 0:
|
||||
raise UnpicklingError("BINSTRING pickle has negative byte count")
|
||||
data = self.read(len)
|
||||
value = str(data, self.encoding, self.errors)
|
||||
self.append(value)
|
||||
self.append(self._decode_string(data))
|
||||
dispatch[BINSTRING[0]] = load_binstring
|
||||
|
||||
def load_binbytes(self):
|
||||
|
@ -1191,8 +1205,7 @@ class _Unpickler:
|
|||
def load_short_binstring(self):
|
||||
len = self.read(1)[0]
|
||||
data = self.read(len)
|
||||
value = str(data, self.encoding, self.errors)
|
||||
self.append(value)
|
||||
self.append(self._decode_string(data))
|
||||
dispatch[SHORT_BINSTRING[0]] = load_short_binstring
|
||||
|
||||
def load_short_binbytes(self):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue