Refactor source and bytecode file loaders in importlib so that there

are source-only and source/bytecode loaders.
This commit is contained in:
Brett Cannon 2009-02-21 05:41:15 +00:00
parent 0515619dbc
commit 91cf882b36
4 changed files with 163 additions and 149 deletions

View file

@ -1,31 +1,6 @@
to do to do
///// /////
* Refactor source/bytecode finder/loader code such that bytecode support is a
subclass of source support (makes it nicer for VMs that don't use CPython
bytecode).
+ PyLoader (for ABC)
- load_module for source only
- get_code for source only
+ PyFileLoader(PyLoader)
- get_data
- source_mtime
- source_path
+PyPycLoader (PyLoader, for ABC)
- load_module for source and bytecode
- get_code for source and bytecode
+ PyPycFileLoader(PyPycLoader, PyFileLoader)
- bytecode_path
- write_bytecode
* Implement PEP 302 protocol for loaders (should just be a matter of testing). * Implement PEP 302 protocol for loaders (should just be a matter of testing).
+ Source/bytecode. + Source/bytecode.
@ -42,7 +17,6 @@ to do
* load_module * load_module
- (?) Importer(Finder, Loader)
- ResourceLoader(Loader) - ResourceLoader(Loader)
* get_data * get_data
@ -89,6 +63,8 @@ to do
* Add leading underscores to all objects in importlib._bootstrap that are not * Add leading underscores to all objects in importlib._bootstrap that are not
publicly exposed. publicly exposed.
* Reorder importlib/_bootstrap.py so definitions are not in inverted order.
* Make sure that there is documentation *somewhere* fully explaining the * Make sure that there is documentation *somewhere* fully explaining the
semantics of import that can be referenced from the package's documentation semantics of import that can be referenced from the package's documentation
(even if it is in the package documentation itself, although it might be best (even if it is in the package documentation itself, although it might be best

View file

@ -315,17 +315,124 @@ def module_for_loader(fxn):
return decorated return decorated
class _PyFileLoader: class PyLoader:
# XXX Still smart to have this as a separate class? Or would it work
# better to integrate with PyFileFinder? Could cache _is_pkg info.
# FileFinder can be changed to return self instead of a specific loader
# call. Otherwise _base_path can be calculated on the fly without issue if
# it is known whether a module should be treated as a path or package to
# minimize stat calls. Could even go as far as to stat the directory the
# importer is in to detect changes and then cache all the info about what
# files were found (if stating directories is platform-dependent).
"""Load a Python source or bytecode file.""" """Loader base class for Python source.
Requires implementing the optional PEP 302 protocols as well as
source_mtime and source_path.
"""
@module_for_loader
def load_module(self, module):
"""Load a source module."""
return _load_module(module)
def _load_module(self, module):
"""Initialize a module from source."""
name = module.__name__
source_path = self.source_path(name)
code_object = self.get_code(module.__name__)
if not hasattr(module, '__file__'):
module.__file__ = source_path
if self.is_package(name):
module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]]
module.__package__ = module.__name__
if not hasattr(module, '__path__'):
module.__package__ = module.__package__.rpartition('.')[0]
exec(code_object, module.__dict__)
return module
def get_code(self, fullname):
"""Get a code object from source."""
source_path = self.source_path(fullname)
source = self.get_data(source_path)
# Convert to universal newlines.
line_endings = b'\n'
for index, c in enumerate(source):
if c == ord(b'\n'):
break
elif c == ord(b'\r'):
line_endings = b'\r'
try:
if source[index+1] == ord(b'\n'):
line_endings += b'\n'
except IndexError:
pass
break
if line_endings != b'\n':
source = source.replace(line_endings, b'\n')
return compile(source, source_path, 'exec', dont_inherit=True)
class PyPycLoader(PyLoader):
"""Loader base class for Python source and bytecode.
Requires implementing the methods needed for PyLoader as well as
bytecode_path and write_bytecode.
"""
@module_for_loader
def load_module(self, module):
"""Load a module from source or bytecode."""
name = module.__name__
source_path = self.source_path(name)
bytecode_path = self.bytecode_path(name)
module.__file__ = source_path if source_path else bytecode_path
return self._load_module(module)
def get_code(self, fullname):
"""Get a code object from source or bytecode."""
# XXX Care enough to make sure this call does not happen if the magic
# number is bad?
source_timestamp = self.source_mtime(fullname)
# Try to use bytecode if it is available.
bytecode_path = self.bytecode_path(fullname)
if bytecode_path:
data = self.get_data(bytecode_path)
magic = data[:4]
pyc_timestamp = marshal._r_long(data[4:8])
bytecode = data[8:]
try:
# Verify that the magic number is valid.
if imp.get_magic() != magic:
raise ImportError("bad magic number")
# Verify that the bytecode is not stale (only matters when
# there is source to fall back on.
if source_timestamp:
if pyc_timestamp < source_timestamp:
raise ImportError("bytecode is stale")
except ImportError:
# If source is available give it a shot.
if source_timestamp is not None:
pass
else:
raise
else:
# Bytecode seems fine, so try to use it.
# XXX If the bytecode is ill-formed, would it be beneficial to
# try for using source if available and issue a warning?
return marshal.loads(bytecode)
elif source_timestamp is None:
raise ImportError("no source or bytecode available to create code "
"object for {0!r}".format(fullname))
# Use the source.
code_object = super().get_code(fullname)
# Generate bytecode and write it out.
if not sys.dont_write_bytecode:
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_timestamp))
data.extend(marshal.dumps(code_object))
self.write_bytecode(fullname, data)
return code_object
class PyFileLoader(PyLoader):
"""Load a Python source file."""
def __init__(self, name, path, is_pkg): def __init__(self, name, path, is_pkg):
self._name = name self._name = name
@ -354,29 +461,6 @@ class _PyFileLoader:
# Not a property so that it is easy to override. # Not a property so that it is easy to override.
return self._find_path(imp.PY_SOURCE) return self._find_path(imp.PY_SOURCE)
@check_name
def bytecode_path(self, fullname):
"""Return the path to a bytecode file, or None if one does not
exist."""
# Not a property for easy overriding.
return self._find_path(imp.PY_COMPILED)
@module_for_loader
def load_module(self, module):
"""Load a Python source or bytecode module."""
name = module.__name__
source_path = self.source_path(name)
bytecode_path = self.bytecode_path(name)
code_object = self.get_code(module.__name__)
module.__file__ = source_path if source_path else bytecode_path
module.__loader__ = self
if self.is_package(name):
module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]]
module.__package__ = module.__name__
if not hasattr(module, '__path__'):
module.__package__ = module.__package__.rpartition('.')[0]
exec(code_object, module.__dict__)
return module
@check_name @check_name
def source_mtime(self, name): def source_mtime(self, name):
@ -405,6 +489,34 @@ class _PyFileLoader:
# anything other than UTF-8. # anything other than UTF-8.
return open(source_path, encoding=encoding).read() return open(source_path, encoding=encoding).read()
def get_data(self, path):
"""Return the data from path as raw bytes."""
return _fileio._FileIO(path, 'r').read()
@check_name
def is_package(self, fullname):
"""Return a boolean based on whether the module is a package.
Raises ImportError (like get_source) if the loader cannot handle the
package.
"""
return self._is_pkg
# XXX Rename _PyFileLoader throughout
class PyPycFileLoader(PyPycLoader, PyFileLoader):
"""Load a module from a source or bytecode file."""
@check_name
def bytecode_path(self, fullname):
"""Return the path to a bytecode file, or None if one does not
exist."""
# Not a property for easy overriding.
return self._find_path(imp.PY_COMPILED)
@check_name @check_name
def write_bytecode(self, name, data): def write_bytecode(self, name, data):
"""Write out 'data' for the specified module, returning a boolean """Write out 'data' for the specified module, returning a boolean
@ -428,82 +540,6 @@ class _PyFileLoader:
else: else:
raise raise
def get_code(self, name):
"""Return the code object for the module."""
# XXX Care enough to make sure this call does not happen if the magic
# number is bad?
source_timestamp = self.source_mtime(name)
# Try to use bytecode if it is available.
bytecode_path = self.bytecode_path(name)
if bytecode_path:
data = self.get_data(bytecode_path)
magic = data[:4]
pyc_timestamp = marshal._r_long(data[4:8])
bytecode = data[8:]
try:
# Verify that the magic number is valid.
if imp.get_magic() != magic:
raise ImportError("bad magic number")
# Verify that the bytecode is not stale (only matters when
# there is source to fall back on.
if source_timestamp:
if pyc_timestamp < source_timestamp:
raise ImportError("bytcode is stale")
except ImportError:
# If source is available give it a shot.
if source_timestamp is not None:
pass
else:
raise
else:
# Bytecode seems fine, so try to use it.
# XXX If the bytecode is ill-formed, would it be beneficial to
# try for using source if available and issue a warning?
return marshal.loads(bytecode)
elif source_timestamp is None:
raise ImportError("no source or bytecode available to create code "
"object for {0!r}".format(name))
# Use the source.
source_path = self.source_path(name)
source = self.get_data(source_path)
# Convert to universal newlines.
line_endings = b'\n'
for index, c in enumerate(source):
if c == ord(b'\n'):
break
elif c == ord(b'\r'):
line_endings = b'\r'
try:
if source[index+1] == ord(b'\n'):
line_endings += b'\n'
except IndexError:
pass
break
if line_endings != b'\n':
source = source.replace(line_endings, b'\n')
code_object = compile(source, source_path, 'exec', dont_inherit=True)
# Generate bytecode and write it out.
if not sys.dont_write_bytecode:
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_timestamp))
data.extend(marshal.dumps(code_object))
self.write_bytecode(name, data)
return code_object
def get_data(self, path):
"""Return the data from path as raw bytes."""
return _fileio._FileIO(path, 'r').read()
@check_name
def is_package(self, fullname):
"""Return a boolean based on whether the module is a package.
Raises ImportError (like get_source) if the loader cannot handle the
package.
"""
return self._is_pkg
class FileFinder: class FileFinder:
@ -583,7 +619,7 @@ class PyFileFinder(FileFinder):
"""Importer for source/bytecode files.""" """Importer for source/bytecode files."""
_possible_package = True _possible_package = True
_loader = _PyFileLoader _loader = PyFileLoader
def __init__(self, path_entry): def __init__(self, path_entry):
# Lack of imp during class creation means _suffixes is set here. # Lack of imp during class creation means _suffixes is set here.
@ -597,6 +633,8 @@ class PyPycFileFinder(PyFileFinder):
"""Finder for source and bytecode files.""" """Finder for source and bytecode files."""
_loader = PyPycFileLoader
def __init__(self, path_entry): def __init__(self, path_entry):
super().__init__(path_entry) super().__init__(path_entry)
self._suffixes += suffix_list(imp.PY_COMPILED) self._suffixes += suffix_list(imp.PY_COMPILED)

View file

@ -19,7 +19,7 @@ class SimpleTest(unittest.TestCase):
# [basic] # [basic]
def test_module(self): def test_module(self):
with source_util.create_modules('_temp') as mapping: with source_util.create_modules('_temp') as mapping:
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
module = loader.load_module('_temp') module = loader.load_module('_temp')
self.assert_('_temp' in sys.modules) self.assert_('_temp' in sys.modules)
check = {'__name__': '_temp', '__file__': mapping['_temp'], check = {'__name__': '_temp', '__file__': mapping['_temp'],
@ -29,7 +29,7 @@ class SimpleTest(unittest.TestCase):
def test_package(self): def test_package(self):
with source_util.create_modules('_pkg.__init__') as mapping: with source_util.create_modules('_pkg.__init__') as mapping:
loader = importlib._PyFileLoader('_pkg', mapping['_pkg.__init__'], loader = importlib.PyPycFileLoader('_pkg', mapping['_pkg.__init__'],
True) True)
module = loader.load_module('_pkg') module = loader.load_module('_pkg')
self.assert_('_pkg' in sys.modules) self.assert_('_pkg' in sys.modules)
@ -42,7 +42,7 @@ class SimpleTest(unittest.TestCase):
def test_lacking_parent(self): def test_lacking_parent(self):
with source_util.create_modules('_pkg.__init__', '_pkg.mod')as mapping: with source_util.create_modules('_pkg.__init__', '_pkg.mod')as mapping:
loader = importlib._PyFileLoader('_pkg.mod', mapping['_pkg.mod'], loader = importlib.PyPycFileLoader('_pkg.mod', mapping['_pkg.mod'],
False) False)
module = loader.load_module('_pkg.mod') module = loader.load_module('_pkg.mod')
self.assert_('_pkg.mod' in sys.modules) self.assert_('_pkg.mod' in sys.modules)
@ -57,7 +57,7 @@ class SimpleTest(unittest.TestCase):
def test_module_reuse(self): def test_module_reuse(self):
with source_util.create_modules('_temp') as mapping: with source_util.create_modules('_temp') as mapping:
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
module = loader.load_module('_temp') module = loader.load_module('_temp')
module_id = id(module) module_id = id(module)
module_dict_id = id(module.__dict__) module_dict_id = id(module.__dict__)
@ -87,7 +87,7 @@ class SimpleTest(unittest.TestCase):
setattr(orig_module, attr, value) setattr(orig_module, attr, value)
with open(mapping[name], 'w') as file: with open(mapping[name], 'w') as file:
file.write('+++ bad syntax +++') file.write('+++ bad syntax +++')
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
self.assertRaises(SyntaxError, loader.load_module, name) self.assertRaises(SyntaxError, loader.load_module, name)
for attr in attributes: for attr in attributes:
self.assertEqual(getattr(orig_module, attr), value) self.assertEqual(getattr(orig_module, attr), value)
@ -97,7 +97,7 @@ class SimpleTest(unittest.TestCase):
with source_util.create_modules('_temp') as mapping: with source_util.create_modules('_temp') as mapping:
with open(mapping['_temp'], 'w') as file: with open(mapping['_temp'], 'w') as file:
file.write('=') file.write('=')
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
self.assertRaises(SyntaxError, loader.load_module, '_temp') self.assertRaises(SyntaxError, loader.load_module, '_temp')
self.assert_('_temp' not in sys.modules) self.assert_('_temp' not in sys.modules)
@ -112,7 +112,7 @@ class DontWriteBytecodeTest(unittest.TestCase):
@source_util.writes_bytecode @source_util.writes_bytecode
def run_test(self, assertion): def run_test(self, assertion):
with source_util.create_modules('_temp') as mapping: with source_util.create_modules('_temp') as mapping:
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
loader.load_module('_temp') loader.load_module('_temp')
bytecode_path = source_util.bytecode_path(mapping['_temp']) bytecode_path = source_util.bytecode_path(mapping['_temp'])
assertion(bytecode_path) assertion(bytecode_path)
@ -144,7 +144,7 @@ class BadDataTest(unittest.TestCase):
with open(bytecode_path, 'r+b') as file: with open(bytecode_path, 'r+b') as file:
file.seek(0) file.seek(0)
file.write(b'\x00\x00\x00\x00') file.write(b'\x00\x00\x00\x00')
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
self.assertRaises(ImportError, loader.load_module, '_temp') self.assertRaises(ImportError, loader.load_module, '_temp')
self.assert_('_temp' not in sys.modules) self.assert_('_temp' not in sys.modules)
@ -159,7 +159,7 @@ class SourceBytecodeInteraction(unittest.TestCase):
""" """
def import_(self, file, module, *, pkg=False): def import_(self, file, module, *, pkg=False):
loader = importlib._PyFileLoader(module, file, pkg) loader = importlib.PyPycFileLoader(module, file, pkg)
return loader.load_module(module) return loader.load_module(module)
def run_test(self, test, *create, pkg=False): def run_test(self, test, *create, pkg=False):
@ -171,7 +171,7 @@ class SourceBytecodeInteraction(unittest.TestCase):
import_name = test.rsplit('.', 1)[0] import_name = test.rsplit('.', 1)[0]
else: else:
import_name = test import_name = test
loader = importlib._PyFileLoader(import_name, mapping[test], pkg) loader = importlib.PyPycFileLoader(import_name, mapping[test], pkg)
# Because some platforms only have a granularity to the second for # Because some platforms only have a granularity to the second for
# atime you can't check the physical files. Instead just make it an # atime you can't check the physical files. Instead just make it an
# exception trigger if source was read. # exception trigger if source was read.
@ -212,7 +212,7 @@ class BadBytecodeTest(unittest.TestCase):
""" """
def import_(self, file, module_name): def import_(self, file, module_name):
loader = importlib._PyFileLoader(module_name, file, False) loader = importlib.PyPycFileLoader(module_name, file, False)
module = loader.load_module(module_name) module = loader.load_module(module_name)
self.assert_(module_name in sys.modules) self.assert_(module_name in sys.modules)

View file

@ -35,7 +35,7 @@ class EncodingTest(unittest.TestCase):
with source_util.create_modules(self.module_name) as mapping: with source_util.create_modules(self.module_name) as mapping:
with open(mapping[self.module_name], 'wb')as file: with open(mapping[self.module_name], 'wb')as file:
file.write(source) file.write(source)
loader = importlib._PyFileLoader(self.module_name, loader = importlib.PyPycFileLoader(self.module_name,
mapping[self.module_name], False) mapping[self.module_name], False)
return loader.load_module(self.module_name) return loader.load_module(self.module_name)
@ -96,7 +96,7 @@ class LineEndingTest(unittest.TestCase):
with source_util.create_modules(module_name) as mapping: with source_util.create_modules(module_name) as mapping:
with open(mapping[module_name], 'wb') as file: with open(mapping[module_name], 'wb') as file:
file.write(source) file.write(source)
loader = importlib._PyFileLoader(module_name, mapping[module_name], loader = importlib.PyPycFileLoader(module_name, mapping[module_name],
False) False)
return loader.load_module(module_name) return loader.load_module(module_name)