Make framing optional in pickle protocol 4.

This will allow us to control in the future whether to use framing or not.
For example, we may want to turn it off for tiny pickle where it doesn't
help.

The change also improves performance slightly:

### fastpickle ###
Min: 0.608517 -> 0.557358: 1.09x faster
Avg: 0.798892 -> 0.694738: 1.15x faster
Significant (t=3.45)
Stddev: 0.17145 -> 0.12704: 1.3496x smaller
Timeline: http://goo.gl/3xQE1J

### pickle_dict ###
Min: 0.669920 -> 0.615271: 1.09x faster
Avg: 0.733633 -> 0.645058: 1.14x faster
Significant (t=5.05)
Stddev: 0.12041 -> 0.02961: 4.0662x smaller
Timeline: http://goo.gl/LpLSXI

### pickle_list ###
Min: 0.397583 -> 0.368112: 1.08x faster
Avg: 0.412784 -> 0.397223: 1.04x faster
Significant (t=2.78)
Stddev: 0.01518 -> 0.03653: 2.4068x larger
Timeline: http://goo.gl/v39E59

### unpickle_list ###
Min: 0.692935 -> 0.594870: 1.16x faster
Avg: 0.730012 -> 0.628395: 1.16x faster
Significant (t=17.76)
Stddev: 0.02720 -> 0.02995: 1.1012x larger
Timeline: http://goo.gl/2P9AEt

The following not significant results are hidden, use -v to show them:
fastunpickle.
This commit is contained in:
Alexandre Vassalotti 2013-11-23 20:30:03 -08:00
parent 1889623e1a
commit b6a2f2a0d1
3 changed files with 132 additions and 197 deletions

View file

@ -188,87 +188,72 @@ class _Framer:
self.file_write = file_write
self.current_frame = None
def _commit_frame(self):
f = self.current_frame
with f.getbuffer() as data:
n = len(data)
write = self.file_write
write(FRAME)
write(pack("<Q", n))
write(data)
f.seek(0)
f.truncate()
def start_framing(self):
self.current_frame = io.BytesIO()
def end_framing(self):
if self.current_frame is not None:
self._commit_frame()
if self.current_frame and self.current_frame.tell() > 0:
self.commit_frame(force=True)
self.current_frame = None
def commit_frame(self, force=False):
if self.current_frame:
f = self.current_frame
if f.tell() >= self._FRAME_SIZE_TARGET or force:
with f.getbuffer() as data:
n = len(data)
write = self.file_write
write(FRAME)
write(pack("<Q", n))
write(data)
f.seek(0)
f.truncate()
def write(self, data):
f = self.current_frame
if f is None:
return self.file_write(data)
if self.current_frame:
return self.current_frame.write(data)
else:
n = len(data)
if f.tell() >= self._FRAME_SIZE_TARGET:
self._commit_frame()
return f.write(data)
return self.file_write(data)
class _Unframer:
def __init__(self, file_read, file_readline, file_tell=None):
self.file_read = file_read
self.file_readline = file_readline
self.file_tell = file_tell
self.framing_enabled = False
self.current_frame = None
self.frame_start = None
def read(self, n):
if n == 0:
return b''
_file_read = self.file_read
if not self.framing_enabled:
return _file_read(n)
f = self.current_frame
if f is not None:
data = f.read(n)
if data:
if len(data) < n:
raise UnpicklingError(
"pickle exhausted before end of frame")
return data
frame_opcode = _file_read(1)
if frame_opcode != FRAME:
raise UnpicklingError(
"expected a FRAME opcode, got {} instead".format(frame_opcode))
frame_size, = unpack("<Q", _file_read(8))
if frame_size > sys.maxsize:
raise ValueError("frame size > sys.maxsize: %d" % frame_size)
if self.file_tell is not None:
self.frame_start = self.file_tell()
f = self.current_frame = io.BytesIO(_file_read(frame_size))
self.readline = f.readline
data = f.read(n)
assert len(data) == n, (len(data), n)
return data
if self.current_frame:
data = self.current_frame.read(n)
if not data and n != 0:
self.current_frame = None
return self.file_read(n)
if len(data) < n:
raise UnpicklingError(
"pickle exhausted before end of frame")
return data
else:
return self.file_read(n)
def readline(self):
if not self.framing_enabled:
if self.current_frame:
data = self.current_frame.readline()
if not data:
self.current_frame = None
return self.file_readline()
if data[-1] != b'\n':
raise UnpicklingError(
"pickle exhausted before end of frame")
return data
else:
return self.file_readline()
else:
return self.current_frame.readline()
def tell(self):
if self.file_tell is None:
return None
elif self.current_frame is None:
return self.file_tell()
else:
return self.frame_start + self.current_frame.tell()
def load_frame(self, frame_size):
if self.current_frame and self.current_frame.read() != b'':
raise UnpicklingError(
"beginning of a new frame before end of current frame")
self.current_frame = io.BytesIO(self.file_read(frame_size))
# Tools used for pickling.
@ -392,6 +377,8 @@ class _Pickler:
self._file_write = file.write
except AttributeError:
raise TypeError("file must have a 'write' attribute")
self.framer = _Framer(self._file_write)
self.write = self.framer.write
self.memo = {}
self.proto = int(protocol)
self.bin = protocol >= 1
@ -417,18 +404,12 @@ class _Pickler:
raise PicklingError("Pickler.__init__() was not called by "
"%s.__init__()" % (self.__class__.__name__,))
if self.proto >= 2:
self._file_write(PROTO + pack("<B", self.proto))
self.write(PROTO + pack("<B", self.proto))
if self.proto >= 4:
framer = _Framer(self._file_write)
framer.start_framing()
self.write = framer.write
else:
framer = None
self.write = self._file_write
self.framer.start_framing()
self.save(obj)
self.write(STOP)
if framer is not None:
framer.end_framing()
self.framer.end_framing()
def memoize(self, obj):
"""Store an object in the memo."""
@ -475,6 +456,8 @@ class _Pickler:
return GET + repr(i).encode("ascii") + b'\n'
def save(self, obj, save_persistent_id=True):
self.framer.commit_frame()
# Check for persistent id (defined by a subclass)
pid = self.persistent_id(obj)
if pid is not None and save_persistent_id:
@ -1078,10 +1061,15 @@ class _Unpickler:
if not 0 <= proto <= HIGHEST_PROTOCOL:
raise ValueError("unsupported pickle protocol: %d" % proto)
self.proto = proto
if proto >= 4:
self._unframer.framing_enabled = True
dispatch[PROTO[0]] = load_proto
def load_frame(self):
frame_size, = unpack('<Q', self.read(8))
if frame_size > sys.maxsize:
raise ValueError("frame size > sys.maxsize: %d" % frame_size)
self._unframer.load_frame(frame_size)
dispatch[FRAME[0]] = load_frame
def load_persid(self):
pid = self.readline()[:-1].decode("ascii")
self.append(self.persistent_load(pid))