Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.

Initial patch by Merlijn van Deen.

I've added a few unrelated docstring fixes in the patch while I was at
it, which makes the documentation for pickle a bit more consistent.
This commit is contained in:
Alexandre Vassalotti 2013-12-07 01:09:27 -08:00
parent ee07b94788
commit d05c9ff845
8 changed files with 447 additions and 366 deletions

View file

@ -348,24 +348,25 @@ class _Pickler:
def __init__(self, file, protocol=None, *, fix_imports=True):
"""This takes a binary file for writing a pickle data stream.
The optional protocol argument tells the pickler to use the
The optional *protocol* argument tells the pickler to use the
given protocol; supported protocols are 0, 1, 2, 3 and 4. The
default protocol is 3; a backward-incompatible protocol designed for
Python 3.
default protocol is 3; a backward-incompatible protocol designed
for Python 3.
Specifying a negative protocol version selects the highest
protocol version supported. The higher the protocol used, the
more recent the version of Python needed to read the pickle
produced.
The file argument must have a write() method that accepts a single
bytes argument. It can thus be a file object opened for binary
writing, a io.BytesIO instance, or any other custom object that
meets this interface.
The *file* argument must have a write() method that accepts a
single bytes argument. It can thus be a file object opened for
binary writing, a io.BytesIO instance, or any other custom
object that meets this interface.
If fix_imports is True and protocol is less than 3, pickle will try to
map the new Python 3 names to the old module names used in Python 2,
so that the pickle data stream is readable with Python 2.
If *fix_imports* is True and *protocol* is less than 3, pickle
will try to map the new Python 3 names to the old module names
used in Python 2, so that the pickle data stream is readable
with Python 2.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
@ -389,10 +390,9 @@ class _Pickler:
"""Clears the pickler's "memo".
The memo is the data structure that remembers which objects the
pickler has already seen, so that shared or recursive objects are
pickled by reference and not by value. This method is useful when
re-using picklers.
pickler has already seen, so that shared or recursive objects
are pickled by reference and not by value. This method is
useful when re-using picklers.
"""
self.memo.clear()
@ -975,8 +975,14 @@ class _Unpickler:
encoding="ASCII", errors="strict"):
"""This takes a binary file for reading a pickle data stream.
The protocol version of the pickle is detected automatically, so no
proto argument is needed.
The protocol version of the pickle is detected automatically, so
no proto argument is needed.
The argument *file* must have two methods, a read() method that
takes an integer argument, and a readline() method that requires
no arguments. Both methods should return bytes. Thus *file*
can be a binary file object opened for reading, a io.BytesIO
object, or any other custom object that meets this interface.
The file-like object must have two methods, a read() method
that takes an integer argument, and a readline() method that
@ -985,13 +991,14 @@ class _Unpickler:
reading, a BytesIO object, or any other custom object that
meets this interface.
Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
which are used to control compatiblity support for pickle stream
generated by Python 2.x. If *fix_imports* is True, pickle will try to
map the old Python 2.x names to the new names used in Python 3.x. The
*encoding* and *errors* tell pickle how to decode 8-bit string
instances pickled by Python 2.x; these default to 'ASCII' and
'strict', respectively.
Optional keyword arguments are *fix_imports*, *encoding* and
*errors*, which are used to control compatiblity support for
pickle stream generated by Python 2. If *fix_imports* is True,
pickle will try to map the old Python 2 names to the new names
used in Python 3. The *encoding* and *errors* tell pickle how
to decode 8-bit string instances pickled by Python 2; these
default to 'ASCII' and 'strict', respectively. *encoding* can be
'bytes' to read theses 8-bit string instances as bytes objects.
"""
self._file_readline = file.readline
self._file_read = file.read
@ -1139,6 +1146,15 @@ class _Unpickler:
self.append(unpack('>d', self.read(8))[0])
dispatch[BINFLOAT[0]] = load_binfloat
def _decode_string(self, value):
# Used to allow strings from Python 2 to be decoded either as
# bytes or Unicode strings. This should be used only with the
# STRING, BINSTRING and SHORT_BINSTRING opcodes.
if self.encoding == "bytes":
return value
else:
return value.decode(self.encoding, self.errors)
def load_string(self):
data = self.readline()[:-1]
# Strip outermost quotes
@ -1146,8 +1162,7 @@ class _Unpickler:
data = data[1:-1]
else:
raise UnpicklingError("the STRING opcode argument must be quoted")
self.append(codecs.escape_decode(data)[0]
.decode(self.encoding, self.errors))
self.append(self._decode_string(codecs.escape_decode(data)[0]))
dispatch[STRING[0]] = load_string
def load_binstring(self):
@ -1156,8 +1171,7 @@ class _Unpickler:
if len < 0:
raise UnpicklingError("BINSTRING pickle has negative byte count")
data = self.read(len)
value = str(data, self.encoding, self.errors)
self.append(value)
self.append(self._decode_string(data))
dispatch[BINSTRING[0]] = load_binstring
def load_binbytes(self):
@ -1191,8 +1205,7 @@ class _Unpickler:
def load_short_binstring(self):
len = self.read(1)[0]
data = self.read(len)
value = str(data, self.encoding, self.errors)
self.append(value)
self.append(self._decode_string(data))
dispatch[SHORT_BINSTRING[0]] = load_short_binstring
def load_short_binbytes(self):