bpo-24960: use pkgutil.get_data in lib2to3 to read pickled grammar files (GH-4977) (#4979)

This is more complicated than it should be because we need to preserve the useful mtime-based regeneration feature that lib2to3.pgen2.driver.load_grammar has. We only look for the pickled grammar file with pkgutil.get_data and only if the source file does not exist. (cherry picked from commit 8a5877165e)
2025-10-01 21:02:15 +00:00 · 2017-12-22 12:51:46 -08:00 · 2017-12-22 12:51:46 -08:00 · c1b8eb8006
commit c1b8eb8006
parent 2e1ef00171
5 changed files with 45 additions and 2 deletions
--- a/Lib/lib2to3/pgen2/driver.py
+++ b/Lib/lib2to3/pgen2/driver.py
@ -20,6 +20,7 @@ import codecs
 import io
 import os
 import logging
 import pkgutil
 import sys
 # Pgen imports
@ -143,6 +144,26 @@ def _newer(a, b):
    return os.path.getmtime(a) >= os.path.getmtime(b)
 def load_packaged_grammar(package, grammar_source):
    """Normally, loads a pickled grammar by doing
        pkgutil.get_data(package, pickled_grammar)
    where *pickled_grammar* is computed from *grammar_source* by adding the
    Python version and using a ``.pickle`` extension.
    However, if *grammar_source* is an extant file, load_grammar(grammar_source)
    is called instead. This facilities using a packaged grammar file when needed
    but preserves load_grammar's automatic regeneration behavior when possible.
    """
    if os.path.isfile(grammar_source):
        return load_grammar(grammar_source)
    pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
    data = pkgutil.get_data(package, pickled_name)
    g = grammar.Grammar()
    g.loads(data)
    return g
 def main(*args):
    """Main program, when run as a script: produce grammar pickle files.
--- a/Lib/lib2to3/pgen2/grammar.py
+++ b/Lib/lib2to3/pgen2/grammar.py
@ -108,6 +108,10 @@ class Grammar(object):
            d = pickle.load(f)
        self.__dict__.update(d)
    def loads(self, pkl):
        """Load the grammar tables from a pickle bytes object."""
        self.__dict__.update(pickle.loads(pkl))
    def copy(self):
        """
        Copy the grammar.
--- a/Lib/lib2to3/pygram.py
+++ b/Lib/lib2to3/pygram.py
@ -29,12 +29,12 @@ class Symbols(object):
            setattr(self, name, symbol)
-python_grammar = driver.load_grammar(_GRAMMAR_FILE)
+python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE)
 python_symbols = Symbols(python_grammar)
 python_grammar_no_print_statement = python_grammar.copy()
 del python_grammar_no_print_statement.keywords["print"]
-pattern_grammar = driver.load_grammar(_PATTERN_GRAMMAR_FILE)
+pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE)
 pattern_symbols = Symbols(pattern_grammar)
--- a/Lib/lib2to3/tests/test_parser.py
+++ b/Lib/lib2to3/tests/test_parser.py
@ -12,7 +12,10 @@ from .support import driver
 from test.support import verbose
 # Python imports
 import importlib
 import operator
 import os
 import pickle
 import shutil
 import subprocess
 import sys
@ -99,6 +102,18 @@ pgen2_driver.load_grammar(%r, save=True, force=True)
        finally:
            shutil.rmtree(tmpdir)
    def test_load_packaged_grammar(self):
        modname = __name__ + '.load_test'
        class MyLoader:
            def get_data(self, where):
                return pickle.dumps({'elephant': 19})
        class MyModule:
            __file__ = 'parsertestmodule'
            __spec__ = importlib.util.spec_from_loader(modname, MyLoader())
        sys.modules[modname] = MyModule()
        self.addCleanup(operator.delitem, sys.modules, modname)
        g = pgen2_driver.load_packaged_grammar(modname, 'Grammar.txt')
        self.assertEqual(g.elephant, 19)
 class GrammarTest(support.TestCase):
--- a/Misc/NEWS.d/next/Tools-Demos/2017-12-22-09-25-51.bpo-24960.TGdAgO.rst
+++ b/Misc/NEWS.d/next/Tools-Demos/2017-12-22-09-25-51.bpo-24960.TGdAgO.rst
@ -0,0 +1,3 @@
 2to3 and lib2to3 can now read pickled grammar files using pkgutil.get_data()
 rather than probing the filesystem. This lets 2to3 and lib2to3 work when run
 from a zipfile.