mirror of
https://github.com/python/cpython.git
synced 2025-10-09 16:34:44 +00:00
- A new pickle protocol (protocol 3) is added with explicit support
for bytes. This is the default protocol. It intentionally cannot be unpickled by Python 2.x. - When a pickle written by Python 2.x contains an (8-bit) str instance, this is now decoded to a (Unicode) str instance. The encoding used to do this defaults to ASCII, but can be overridden via two new keyword arguments to the Unpickler class. Previously this would create bytes instances, which is usually wrong: str instances are often used to pickle attribute names etc., and text is more common than binary data anyway.
This commit is contained in:
parent
953e4e52c4
commit
f416981691
6 changed files with 165 additions and 55 deletions
|
@ -746,6 +746,11 @@ pyfloat = StackObject(
|
|||
doc="A Python float object.")
|
||||
|
||||
pystring = StackObject(
|
||||
name='string',
|
||||
obtype=bytes,
|
||||
doc="A Python (8-bit) string object.")
|
||||
|
||||
pybytes = StackObject(
|
||||
name='bytes',
|
||||
obtype=bytes,
|
||||
doc="A Python bytes object.")
|
||||
|
@ -753,7 +758,7 @@ pystring = StackObject(
|
|||
pyunicode = StackObject(
|
||||
name='str',
|
||||
obtype=str,
|
||||
doc="A Python string object.")
|
||||
doc="A Python (Unicode) string object.")
|
||||
|
||||
pynone = StackObject(
|
||||
name="None",
|
||||
|
@ -868,7 +873,7 @@ class OpcodeInfo(object):
|
|||
assert isinstance(x, StackObject)
|
||||
self.stack_after = stack_after
|
||||
|
||||
assert isinstance(proto, int) and 0 <= proto <= 2
|
||||
assert isinstance(proto, int) and 0 <= proto <= 3
|
||||
self.proto = proto
|
||||
|
||||
assert isinstance(doc, str)
|
||||
|
@ -995,7 +1000,9 @@ opcodes = [
|
|||
|
||||
The argument is a repr-style string, with bracketing quote characters,
|
||||
and perhaps embedded escapes. The argument extends until the next
|
||||
newline character.
|
||||
newline character. (Actually, they are decoded into a str instance
|
||||
using the encoding given to the Unpickler constructor. or the default,
|
||||
'ASCII'.)
|
||||
"""),
|
||||
|
||||
I(name='BINSTRING',
|
||||
|
@ -1008,7 +1015,9 @@ opcodes = [
|
|||
|
||||
There are two arguments: the first is a 4-byte little-endian signed int
|
||||
giving the number of bytes in the string, and the second is that many
|
||||
bytes, which are taken literally as the string content.
|
||||
bytes, which are taken literally as the string content. (Actually,
|
||||
they are decoded into a str instance using the encoding given to the
|
||||
Unpickler constructor. or the default, 'ASCII'.)
|
||||
"""),
|
||||
|
||||
I(name='SHORT_BINSTRING',
|
||||
|
@ -1019,6 +1028,36 @@ opcodes = [
|
|||
proto=1,
|
||||
doc="""Push a Python string object.
|
||||
|
||||
There are two arguments: the first is a 1-byte unsigned int giving
|
||||
the number of bytes in the string, and the second is that many bytes,
|
||||
which are taken literally as the string content. (Actually, they
|
||||
are decoded into a str instance using the encoding given to the
|
||||
Unpickler constructor. or the default, 'ASCII'.)
|
||||
"""),
|
||||
|
||||
# Bytes (protocol 3 only; older protocols don't support bytes at all)
|
||||
|
||||
I(name='BINBYTES',
|
||||
code='B',
|
||||
arg=string4,
|
||||
stack_before=[],
|
||||
stack_after=[pybytes],
|
||||
proto=3,
|
||||
doc="""Push a Python bytes object.
|
||||
|
||||
There are two arguments: the first is a 4-byte little-endian signed int
|
||||
giving the number of bytes in the string, and the second is that many
|
||||
bytes, which are taken literally as the bytes content.
|
||||
"""),
|
||||
|
||||
I(name='SHORT_BINBYTES',
|
||||
code='C',
|
||||
arg=string1,
|
||||
stack_before=[],
|
||||
stack_after=[pybytes],
|
||||
proto=1,
|
||||
doc="""Push a Python string object.
|
||||
|
||||
There are two arguments: the first is a 1-byte unsigned int giving
|
||||
the number of bytes in the string, and the second is that many bytes,
|
||||
which are taken literally as the string content.
|
||||
|
@ -2006,9 +2045,9 @@ class _Example:
|
|||
|
||||
_dis_test = r"""
|
||||
>>> import pickle
|
||||
>>> x = [1, 2, (3, 4), {bytes(b'abc'): "def"}]
|
||||
>>> pkl = pickle.dumps(x, 0)
|
||||
>>> dis(pkl)
|
||||
>>> x = [1, 2, (3, 4), {b'abc': "def"}]
|
||||
>>> pkl0 = pickle.dumps(x, 0)
|
||||
>>> dis(pkl0)
|
||||
0: ( MARK
|
||||
1: l LIST (MARK at 0)
|
||||
2: p PUT 0
|
||||
|
@ -2025,19 +2064,32 @@ _dis_test = r"""
|
|||
25: ( MARK
|
||||
26: d DICT (MARK at 25)
|
||||
27: p PUT 2
|
||||
30: S STRING 'abc'
|
||||
37: p PUT 3
|
||||
40: V UNICODE 'def'
|
||||
45: p PUT 4
|
||||
48: s SETITEM
|
||||
49: a APPEND
|
||||
50: . STOP
|
||||
30: c GLOBAL 'builtins bytes'
|
||||
46: p PUT 3
|
||||
49: ( MARK
|
||||
50: ( MARK
|
||||
51: l LIST (MARK at 50)
|
||||
52: p PUT 4
|
||||
55: L LONG 97
|
||||
59: a APPEND
|
||||
60: L LONG 98
|
||||
64: a APPEND
|
||||
65: L LONG 99
|
||||
69: a APPEND
|
||||
70: t TUPLE (MARK at 49)
|
||||
71: p PUT 5
|
||||
74: R REDUCE
|
||||
75: V UNICODE 'def'
|
||||
80: p PUT 6
|
||||
83: s SETITEM
|
||||
84: a APPEND
|
||||
85: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
|
||||
Try again with a "binary" pickle.
|
||||
|
||||
>>> pkl = pickle.dumps(x, 1)
|
||||
>>> dis(pkl)
|
||||
>>> pkl1 = pickle.dumps(x, 1)
|
||||
>>> dis(pkl1)
|
||||
0: ] EMPTY_LIST
|
||||
1: q BINPUT 0
|
||||
3: ( MARK
|
||||
|
@ -2050,13 +2102,24 @@ Try again with a "binary" pickle.
|
|||
14: q BINPUT 1
|
||||
16: } EMPTY_DICT
|
||||
17: q BINPUT 2
|
||||
19: U SHORT_BINSTRING 'abc'
|
||||
24: q BINPUT 3
|
||||
26: X BINUNICODE 'def'
|
||||
34: q BINPUT 4
|
||||
36: s SETITEM
|
||||
37: e APPENDS (MARK at 3)
|
||||
38: . STOP
|
||||
19: c GLOBAL 'builtins bytes'
|
||||
35: q BINPUT 3
|
||||
37: ( MARK
|
||||
38: ] EMPTY_LIST
|
||||
39: q BINPUT 4
|
||||
41: ( MARK
|
||||
42: K BININT1 97
|
||||
44: K BININT1 98
|
||||
46: K BININT1 99
|
||||
48: e APPENDS (MARK at 41)
|
||||
49: t TUPLE (MARK at 37)
|
||||
50: q BINPUT 5
|
||||
52: R REDUCE
|
||||
53: X BINUNICODE 'def'
|
||||
61: q BINPUT 6
|
||||
63: s SETITEM
|
||||
64: e APPENDS (MARK at 3)
|
||||
65: . STOP
|
||||
highest protocol among opcodes = 1
|
||||
|
||||
Exercise the INST/OBJ/BUILD family.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue