mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Issue #6784: Strings from Python 2 can now be unpickled as bytes objects.
Initial patch by Merlijn van Deen. I've added a few unrelated docstring fixes in the patch while I was at it, which makes the documentation for pickle a bit more consistent.
This commit is contained in:
parent
ee07b94788
commit
d05c9ff845
8 changed files with 447 additions and 366 deletions
|
@ -969,113 +969,107 @@ class StackObject(object):
|
|||
return self.name
|
||||
|
||||
|
||||
pyint = StackObject(
|
||||
name='int',
|
||||
obtype=int,
|
||||
doc="A short (as opposed to long) Python integer object.")
|
||||
|
||||
pylong = StackObject(
|
||||
name='long',
|
||||
obtype=int,
|
||||
doc="A long (as opposed to short) Python integer object.")
|
||||
pyint = pylong = StackObject(
|
||||
name='int',
|
||||
obtype=int,
|
||||
doc="A Python integer object.")
|
||||
|
||||
pyinteger_or_bool = StackObject(
|
||||
name='int_or_bool',
|
||||
obtype=(int, bool),
|
||||
doc="A Python integer object (short or long), or "
|
||||
"a Python bool.")
|
||||
name='int_or_bool',
|
||||
obtype=(int, bool),
|
||||
doc="A Python integer or boolean object.")
|
||||
|
||||
pybool = StackObject(
|
||||
name='bool',
|
||||
obtype=(bool,),
|
||||
doc="A Python bool object.")
|
||||
name='bool',
|
||||
obtype=bool,
|
||||
doc="A Python boolean object.")
|
||||
|
||||
pyfloat = StackObject(
|
||||
name='float',
|
||||
obtype=float,
|
||||
doc="A Python float object.")
|
||||
name='float',
|
||||
obtype=float,
|
||||
doc="A Python float object.")
|
||||
|
||||
pystring = StackObject(
|
||||
name='string',
|
||||
obtype=bytes,
|
||||
doc="A Python (8-bit) string object.")
|
||||
pybytes_or_str = pystring = StackObject(
|
||||
name='bytes_or_str',
|
||||
obtype=(bytes, str),
|
||||
doc="A Python bytes or (Unicode) string object.")
|
||||
|
||||
pybytes = StackObject(
|
||||
name='bytes',
|
||||
obtype=bytes,
|
||||
doc="A Python bytes object.")
|
||||
name='bytes',
|
||||
obtype=bytes,
|
||||
doc="A Python bytes object.")
|
||||
|
||||
pyunicode = StackObject(
|
||||
name='str',
|
||||
obtype=str,
|
||||
doc="A Python (Unicode) string object.")
|
||||
name='str',
|
||||
obtype=str,
|
||||
doc="A Python (Unicode) string object.")
|
||||
|
||||
pynone = StackObject(
|
||||
name="None",
|
||||
obtype=type(None),
|
||||
doc="The Python None object.")
|
||||
name="None",
|
||||
obtype=type(None),
|
||||
doc="The Python None object.")
|
||||
|
||||
pytuple = StackObject(
|
||||
name="tuple",
|
||||
obtype=tuple,
|
||||
doc="A Python tuple object.")
|
||||
name="tuple",
|
||||
obtype=tuple,
|
||||
doc="A Python tuple object.")
|
||||
|
||||
pylist = StackObject(
|
||||
name="list",
|
||||
obtype=list,
|
||||
doc="A Python list object.")
|
||||
name="list",
|
||||
obtype=list,
|
||||
doc="A Python list object.")
|
||||
|
||||
pydict = StackObject(
|
||||
name="dict",
|
||||
obtype=dict,
|
||||
doc="A Python dict object.")
|
||||
name="dict",
|
||||
obtype=dict,
|
||||
doc="A Python dict object.")
|
||||
|
||||
pyset = StackObject(
|
||||
name="set",
|
||||
obtype=set,
|
||||
doc="A Python set object.")
|
||||
name="set",
|
||||
obtype=set,
|
||||
doc="A Python set object.")
|
||||
|
||||
pyfrozenset = StackObject(
|
||||
name="frozenset",
|
||||
obtype=set,
|
||||
doc="A Python frozenset object.")
|
||||
name="frozenset",
|
||||
obtype=set,
|
||||
doc="A Python frozenset object.")
|
||||
|
||||
anyobject = StackObject(
|
||||
name='any',
|
||||
obtype=object,
|
||||
doc="Any kind of object whatsoever.")
|
||||
name='any',
|
||||
obtype=object,
|
||||
doc="Any kind of object whatsoever.")
|
||||
|
||||
markobject = StackObject(
|
||||
name="mark",
|
||||
obtype=StackObject,
|
||||
doc="""'The mark' is a unique object.
|
||||
name="mark",
|
||||
obtype=StackObject,
|
||||
doc="""'The mark' is a unique object.
|
||||
|
||||
Opcodes that operate on a variable number of objects
|
||||
generally don't embed the count of objects in the opcode,
|
||||
or pull it off the stack. Instead the MARK opcode is used
|
||||
to push a special marker object on the stack, and then
|
||||
some other opcodes grab all the objects from the top of
|
||||
the stack down to (but not including) the topmost marker
|
||||
object.
|
||||
""")
|
||||
Opcodes that operate on a variable number of objects
|
||||
generally don't embed the count of objects in the opcode,
|
||||
or pull it off the stack. Instead the MARK opcode is used
|
||||
to push a special marker object on the stack, and then
|
||||
some other opcodes grab all the objects from the top of
|
||||
the stack down to (but not including) the topmost marker
|
||||
object.
|
||||
""")
|
||||
|
||||
stackslice = StackObject(
|
||||
name="stackslice",
|
||||
obtype=StackObject,
|
||||
doc="""An object representing a contiguous slice of the stack.
|
||||
name="stackslice",
|
||||
obtype=StackObject,
|
||||
doc="""An object representing a contiguous slice of the stack.
|
||||
|
||||
This is used in conjunction with markobject, to represent all
|
||||
of the stack following the topmost markobject. For example,
|
||||
the POP_MARK opcode changes the stack from
|
||||
This is used in conjunction with markobject, to represent all
|
||||
of the stack following the topmost markobject. For example,
|
||||
the POP_MARK opcode changes the stack from
|
||||
|
||||
[..., markobject, stackslice]
|
||||
to
|
||||
[...]
|
||||
[..., markobject, stackslice]
|
||||
to
|
||||
[...]
|
||||
|
||||
No matter how many object are on the stack after the topmost
|
||||
markobject, POP_MARK gets rid of all of them (including the
|
||||
topmost markobject too).
|
||||
""")
|
||||
No matter how many object are on the stack after the topmost
|
||||
markobject, POP_MARK gets rid of all of them (including the
|
||||
topmost markobject too).
|
||||
""")
|
||||
|
||||
##############################################################################
|
||||
# Descriptors for pickle opcodes.
|
||||
|
@ -1212,7 +1206,7 @@ opcodes = [
|
|||
code='L',
|
||||
arg=decimalnl_long,
|
||||
stack_before=[],
|
||||
stack_after=[pylong],
|
||||
stack_after=[pyint],
|
||||
proto=0,
|
||||
doc="""Push a long integer.
|
||||
|
||||
|
@ -1230,7 +1224,7 @@ opcodes = [
|
|||
code='\x8a',
|
||||
arg=long1,
|
||||
stack_before=[],
|
||||
stack_after=[pylong],
|
||||
stack_after=[pyint],
|
||||
proto=2,
|
||||
doc="""Long integer using one-byte length.
|
||||
|
||||
|
@ -1241,7 +1235,7 @@ opcodes = [
|
|||
code='\x8b',
|
||||
arg=long4,
|
||||
stack_before=[],
|
||||
stack_after=[pylong],
|
||||
stack_after=[pyint],
|
||||
proto=2,
|
||||
doc="""Long integer using found-byte length.
|
||||
|
||||
|
@ -1254,45 +1248,50 @@ opcodes = [
|
|||
code='S',
|
||||
arg=stringnl,
|
||||
stack_before=[],
|
||||
stack_after=[pystring],
|
||||
stack_after=[pybytes_or_str],
|
||||
proto=0,
|
||||
doc="""Push a Python string object.
|
||||
|
||||
The argument is a repr-style string, with bracketing quote characters,
|
||||
and perhaps embedded escapes. The argument extends until the next
|
||||
newline character. (Actually, they are decoded into a str instance
|
||||
newline character. These are usually decoded into a str instance
|
||||
using the encoding given to the Unpickler constructor. or the default,
|
||||
'ASCII'.)
|
||||
'ASCII'. If the encoding given was 'bytes' however, they will be
|
||||
decoded as bytes object instead.
|
||||
"""),
|
||||
|
||||
I(name='BINSTRING',
|
||||
code='T',
|
||||
arg=string4,
|
||||
stack_before=[],
|
||||
stack_after=[pystring],
|
||||
stack_after=[pybytes_or_str],
|
||||
proto=1,
|
||||
doc="""Push a Python string object.
|
||||
|
||||
There are two arguments: the first is a 4-byte little-endian signed int
|
||||
giving the number of bytes in the string, and the second is that many
|
||||
bytes, which are taken literally as the string content. (Actually,
|
||||
they are decoded into a str instance using the encoding given to the
|
||||
Unpickler constructor. or the default, 'ASCII'.)
|
||||
There are two arguments: the first is a 4-byte little-endian
|
||||
signed int giving the number of bytes in the string, and the
|
||||
second is that many bytes, which are taken literally as the string
|
||||
content. These are usually decoded into a str instance using the
|
||||
encoding given to the Unpickler constructor. or the default,
|
||||
'ASCII'. If the encoding given was 'bytes' however, they will be
|
||||
decoded as bytes object instead.
|
||||
"""),
|
||||
|
||||
I(name='SHORT_BINSTRING',
|
||||
code='U',
|
||||
arg=string1,
|
||||
stack_before=[],
|
||||
stack_after=[pystring],
|
||||
stack_after=[pybytes_or_str],
|
||||
proto=1,
|
||||
doc="""Push a Python string object.
|
||||
|
||||
There are two arguments: the first is a 1-byte unsigned int giving
|
||||
the number of bytes in the string, and the second is that many bytes,
|
||||
which are taken literally as the string content. (Actually, they
|
||||
are decoded into a str instance using the encoding given to the
|
||||
Unpickler constructor. or the default, 'ASCII'.)
|
||||
There are two arguments: the first is a 1-byte unsigned int giving
|
||||
the number of bytes in the string, and the second is that many
|
||||
bytes, which are taken literally as the string content. These are
|
||||
usually decoded into a str instance using the encoding given to
|
||||
the Unpickler constructor. or the default, 'ASCII'. If the
|
||||
encoding given was 'bytes' however, they will be decoded as bytes
|
||||
object instead.
|
||||
"""),
|
||||
|
||||
# Bytes (protocol 3 only; older protocols don't support bytes at all)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue