mirror of
https://github.com/python/cpython.git
synced 2025-11-03 11:23:31 +00:00
[lib2to3] Make grammar pickling faster (#6491)
* Now uses pickle protocol 4 * Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as dictionaries in Python 3.6+ are ordered by default This still produces deterministic pickles (that hash the same with MD5). Tested with different PYTHONHASHSEED values.
This commit is contained in:
parent
2bea947628
commit
76618061b9
2 changed files with 3 additions and 25 deletions
|
|
@ -86,21 +86,9 @@ class Grammar(object):
|
||||||
self.start = 256
|
self.start = 256
|
||||||
|
|
||||||
def dump(self, filename):
|
def dump(self, filename):
|
||||||
"""Dump the grammar tables to a pickle file.
|
"""Dump the grammar tables to a pickle file."""
|
||||||
|
|
||||||
dump() recursively changes all dict to OrderedDict, so the pickled file
|
|
||||||
is not exactly the same as what was passed in to dump(). load() uses the
|
|
||||||
pickled file to create the tables, but only changes OrderedDict to dict
|
|
||||||
at the top level; it does not recursively change OrderedDict to dict.
|
|
||||||
So, the loaded tables are different from the original tables that were
|
|
||||||
passed to load() in that some of the OrderedDict (from the pickled file)
|
|
||||||
are not changed back to dict. For parsing, this has no effect on
|
|
||||||
performance because OrderedDict uses dict's __getitem__ with nothing in
|
|
||||||
between.
|
|
||||||
"""
|
|
||||||
with open(filename, "wb") as f:
|
with open(filename, "wb") as f:
|
||||||
d = _make_deterministic(self.__dict__)
|
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
|
||||||
pickle.dump(d, f, 2)
|
|
||||||
|
|
||||||
def load(self, filename):
|
def load(self, filename):
|
||||||
"""Load the grammar tables from a pickle file."""
|
"""Load the grammar tables from a pickle file."""
|
||||||
|
|
@ -141,17 +129,6 @@ class Grammar(object):
|
||||||
print("start", self.start)
|
print("start", self.start)
|
||||||
|
|
||||||
|
|
||||||
def _make_deterministic(top):
|
|
||||||
if isinstance(top, dict):
|
|
||||||
return collections.OrderedDict(
|
|
||||||
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
|
|
||||||
if isinstance(top, list):
|
|
||||||
return [_make_deterministic(e) for e in top]
|
|
||||||
if isinstance(top, tuple):
|
|
||||||
return tuple(_make_deterministic(e) for e in top)
|
|
||||||
return top
|
|
||||||
|
|
||||||
|
|
||||||
# Map from operator to number (since tokenize doesn't do this)
|
# Map from operator to number (since tokenize doesn't do this)
|
||||||
|
|
||||||
opmap_raw = """
|
opmap_raw = """
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
lib2to3 now uses pickle protocol 4 for pre-computed grammars.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue