mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
produce more compact result and no longer produces invalid output if input data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
This commit is contained in:
parent
df9386940a
commit
05dadcfb28
3 changed files with 92 additions and 24 deletions
|
|
@ -2282,40 +2282,61 @@ def genops(pickle):
|
|||
|
||||
def optimize(p):
|
||||
'Optimize a pickle string by removing unused PUT opcodes'
|
||||
not_a_put = object()
|
||||
gets = { not_a_put } # set of args used by a GET opcode
|
||||
opcodes = [] # (startpos, stoppos, putid)
|
||||
put = 'PUT'
|
||||
get = 'GET'
|
||||
oldids = set() # set of all PUT ids
|
||||
newids = {} # set of ids used by a GET opcode
|
||||
opcodes = [] # (op, idx) or (pos, end_pos)
|
||||
proto = 0
|
||||
protoheader = b''
|
||||
for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
|
||||
if 'PUT' in opcode.name:
|
||||
opcodes.append((pos, end_pos, arg))
|
||||
oldids.add(arg)
|
||||
opcodes.append((put, arg))
|
||||
elif opcode.name == 'MEMOIZE':
|
||||
idx = len(oldids)
|
||||
oldids.add(idx)
|
||||
opcodes.append((put, idx))
|
||||
elif 'FRAME' in opcode.name:
|
||||
pass
|
||||
else:
|
||||
if 'GET' in opcode.name:
|
||||
gets.add(arg)
|
||||
elif opcode.name == 'PROTO':
|
||||
assert pos == 0, pos
|
||||
elif 'GET' in opcode.name:
|
||||
if opcode.proto > proto:
|
||||
proto = opcode.proto
|
||||
newids[arg] = None
|
||||
opcodes.append((get, arg))
|
||||
elif opcode.name == 'PROTO':
|
||||
if arg > proto:
|
||||
proto = arg
|
||||
opcodes.append((pos, end_pos, not_a_put))
|
||||
prevpos, prevarg = pos, None
|
||||
if pos == 0:
|
||||
protoheader = p[pos: end_pos]
|
||||
else:
|
||||
opcodes.append((pos, end_pos))
|
||||
else:
|
||||
opcodes.append((pos, end_pos))
|
||||
del oldids
|
||||
|
||||
# Copy the opcodes except for PUTS without a corresponding GET
|
||||
out = io.BytesIO()
|
||||
opcodes = iter(opcodes)
|
||||
if proto >= 2:
|
||||
# Write the PROTO header before any framing
|
||||
start, stop, _ = next(opcodes)
|
||||
out.write(p[start:stop])
|
||||
buf = pickle._Framer(out.write)
|
||||
# Write the PROTO header before any framing
|
||||
out.write(protoheader)
|
||||
pickler = pickle._Pickler(out, proto)
|
||||
if proto >= 4:
|
||||
buf.start_framing()
|
||||
for start, stop, putid in opcodes:
|
||||
if putid in gets:
|
||||
buf.commit_frame()
|
||||
buf.write(p[start:stop])
|
||||
if proto >= 4:
|
||||
buf.end_framing()
|
||||
pickler.framer.start_framing()
|
||||
idx = 0
|
||||
for op, arg in opcodes:
|
||||
if op is put:
|
||||
if arg not in newids:
|
||||
continue
|
||||
data = pickler.put(idx)
|
||||
newids[arg] = idx
|
||||
idx += 1
|
||||
elif op is get:
|
||||
data = pickler.get(newids[arg])
|
||||
else:
|
||||
data = p[op:arg]
|
||||
pickler.framer.commit_frame()
|
||||
pickler.write(data)
|
||||
pickler.framer.end_framing()
|
||||
return out.getvalue()
|
||||
|
||||
##############################################################################
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue