mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	[lib2to3] Make grammar pickling faster (#6491)
* Now uses pickle protocol 4 * Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as dictionaries in Python 3.6+ are ordered by default This still produces deterministic pickles (that hash the same with MD5). Tested with different PYTHONHASHSEED values.
This commit is contained in:
		
							parent
							
								
									2bea947628
								
							
						
					
					
						commit
						76618061b9
					
				
					 2 changed files with 3 additions and 25 deletions
				
			
		| 
						 | 
					@ -86,21 +86,9 @@ class Grammar(object):
 | 
				
			||||||
        self.start = 256
 | 
					        self.start = 256
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def dump(self, filename):
 | 
					    def dump(self, filename):
 | 
				
			||||||
        """Dump the grammar tables to a pickle file.
 | 
					        """Dump the grammar tables to a pickle file."""
 | 
				
			||||||
 | 
					 | 
				
			||||||
        dump() recursively changes all dict to OrderedDict, so the pickled file
 | 
					 | 
				
			||||||
        is not exactly the same as what was passed in to dump(). load() uses the
 | 
					 | 
				
			||||||
        pickled file to create the tables, but  only changes OrderedDict to dict
 | 
					 | 
				
			||||||
        at the top level; it does not recursively change OrderedDict to dict.
 | 
					 | 
				
			||||||
        So, the loaded tables are different from the original tables that were
 | 
					 | 
				
			||||||
        passed to load() in that some of the OrderedDict (from the pickled file)
 | 
					 | 
				
			||||||
        are not changed back to dict. For parsing, this has no effect on
 | 
					 | 
				
			||||||
        performance because OrderedDict uses dict's __getitem__ with nothing in
 | 
					 | 
				
			||||||
        between.
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        with open(filename, "wb") as f:
 | 
					        with open(filename, "wb") as f:
 | 
				
			||||||
            d = _make_deterministic(self.__dict__)
 | 
					            pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
 | 
				
			||||||
            pickle.dump(d, f, 2)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load(self, filename):
 | 
					    def load(self, filename):
 | 
				
			||||||
        """Load the grammar tables from a pickle file."""
 | 
					        """Load the grammar tables from a pickle file."""
 | 
				
			||||||
| 
						 | 
					@ -141,17 +129,6 @@ class Grammar(object):
 | 
				
			||||||
        print("start", self.start)
 | 
					        print("start", self.start)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _make_deterministic(top):
 | 
					 | 
				
			||||||
    if isinstance(top, dict):
 | 
					 | 
				
			||||||
        return collections.OrderedDict(
 | 
					 | 
				
			||||||
            sorted(((k, _make_deterministic(v)) for k, v in top.items())))
 | 
					 | 
				
			||||||
    if isinstance(top, list):
 | 
					 | 
				
			||||||
        return [_make_deterministic(e) for e in top]
 | 
					 | 
				
			||||||
    if isinstance(top, tuple):
 | 
					 | 
				
			||||||
        return tuple(_make_deterministic(e) for e in top)
 | 
					 | 
				
			||||||
    return top
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Map from operator to number (since tokenize doesn't do this)
 | 
					# Map from operator to number (since tokenize doesn't do this)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
opmap_raw = """
 | 
					opmap_raw = """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					lib2to3 now uses pickle protocol 4 for pre-computed grammars.
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue