[lib2to3] Make grammar pickling faster (#6491)

* Now uses pickle protocol 4 * Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as dictionaries in Python 3.6+ are ordered by default This still produces deterministic pickles (that hash the same with MD5). Tested with different PYTHONHASHSEED values.
2025-11-03 19:34:08 +00:00 · 2018-04-16 17:33:59 -07:00 · 2018-04-16 17:33:59 -07:00 · 76618061b9
commit 76618061b9
parent 2bea947628
2 changed files with 3 additions and 25 deletions
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@ -86,21 +86,9 @@ class Grammar(object):
        self.start = 256
    def dump(self, filename):
-        """Dump the grammar tables to a pickle file.
+        """Dump the grammar tables to a pickle file."""
        dump() recursively changes all dict to OrderedDict, so the pickled file
        is not exactly the same as what was passed in to dump(). load() uses the
        pickled file to create the tables, but  only changes OrderedDict to dict
        at the top level; it does not recursively change OrderedDict to dict.
        So, the loaded tables are different from the original tables that were
        passed to load() in that some of the OrderedDict (from the pickled file)
        are not changed back to dict. For parsing, this has no effect on
        performance because OrderedDict uses dict's __getitem__ with nothing in
        between.
        """
        with open(filename, "wb") as f:
-            d = _make_deterministic(self.__dict__)
+            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
            pickle.dump(d, f, 2)
    def load(self, filename):
        """Load the grammar tables from a pickle file."""
@ -141,17 +129,6 @@ class Grammar(object):
        print("start", self.start)
 def _make_deterministic(top):
    if isinstance(top, dict):
        return collections.OrderedDict(
            sorted(((k, _make_deterministic(v)) for k, v in top.items())))
    if isinstance(top, list):
        return [_make_deterministic(e) for e in top]
    if isinstance(top, tuple):
        return tuple(_make_deterministic(e) for e in top)
    return top
 # Map from operator to number (since tokenize doesn't do this)
 opmap_raw = """
--- a/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst
+++ b/Misc/NEWS.d/next/Library/2018-04-16-16-21-09.bpo-23403.rxR1Q_.rst
@ -0,0 +1 @@
 lib2to3 now uses pickle protocol 4 for pre-computed grammars.
		`@ -0,0 +1 @@`
							`lib2to3 now uses pickle protocol 4 for pre-computed grammars.`