Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can

produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
This commit is contained in:
Serhiy Storchaka 2014-12-16 18:00:56 +02:00
parent df9386940a
commit 05dadcfb28
3 changed files with 92 additions and 24 deletions

View file

@ -2282,40 +2282,61 @@ def genops(pickle):
def optimize(p):
'Optimize a pickle string by removing unused PUT opcodes'
not_a_put = object()
gets = { not_a_put } # set of args used by a GET opcode
opcodes = [] # (startpos, stoppos, putid)
put = 'PUT'
get = 'GET'
oldids = set() # set of all PUT ids
newids = {} # set of ids used by a GET opcode
opcodes = [] # (op, idx) or (pos, end_pos)
proto = 0
protoheader = b''
for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
if 'PUT' in opcode.name:
opcodes.append((pos, end_pos, arg))
oldids.add(arg)
opcodes.append((put, arg))
elif opcode.name == 'MEMOIZE':
idx = len(oldids)
oldids.add(idx)
opcodes.append((put, idx))
elif 'FRAME' in opcode.name:
pass
else:
if 'GET' in opcode.name:
gets.add(arg)
elif opcode.name == 'PROTO':
assert pos == 0, pos
elif 'GET' in opcode.name:
if opcode.proto > proto:
proto = opcode.proto
newids[arg] = None
opcodes.append((get, arg))
elif opcode.name == 'PROTO':
if arg > proto:
proto = arg
opcodes.append((pos, end_pos, not_a_put))
prevpos, prevarg = pos, None
if pos == 0:
protoheader = p[pos: end_pos]
else:
opcodes.append((pos, end_pos))
else:
opcodes.append((pos, end_pos))
del oldids
# Copy the opcodes except for PUTS without a corresponding GET
out = io.BytesIO()
opcodes = iter(opcodes)
if proto >= 2:
# Write the PROTO header before any framing
start, stop, _ = next(opcodes)
out.write(p[start:stop])
buf = pickle._Framer(out.write)
# Write the PROTO header before any framing
out.write(protoheader)
pickler = pickle._Pickler(out, proto)
if proto >= 4:
buf.start_framing()
for start, stop, putid in opcodes:
if putid in gets:
buf.commit_frame()
buf.write(p[start:stop])
if proto >= 4:
buf.end_framing()
pickler.framer.start_framing()
idx = 0
for op, arg in opcodes:
if op is put:
if arg not in newids:
continue
data = pickler.put(idx)
newids[arg] = idx
idx += 1
elif op is get:
data = pickler.get(newids[arg])
else:
data = p[op:arg]
pickler.framer.commit_frame()
pickler.write(data)
pickler.framer.end_framing()
return out.getvalue()
##############################################################################