Issue #13645: pyc files now contain the size of the corresponding source

code, to avoid timestamp collisions (especially on filesystems with a low
timestamp resolution) when checking for freshness of the bytecode.
This commit is contained in:
Antoine Pitrou 2012-01-13 18:52:16 +01:00
parent 1f918c1480
commit 5136ac0ca2
14 changed files with 167 additions and 49 deletions

View file

@ -331,25 +331,40 @@ class _LoaderBasics:
filename = self.get_filename(fullname).rpartition(path_sep)[2]
return filename.rsplit('.', 1)[0] == '__init__'
def _bytes_from_bytecode(self, fullname, data, source_mtime):
def _bytes_from_bytecode(self, fullname, data, source_stats):
"""Return the marshalled bytes from bytecode, verifying the magic
number and timestamp along the way.
number, timestamp and source size along the way.
If source_mtime is None then skip the timestamp check.
If source_stats is None then skip the timestamp check.
"""
magic = data[:4]
raw_timestamp = data[4:8]
raw_size = data[8:12]
if len(magic) != 4 or magic != imp.get_magic():
raise ImportError("bad magic number in {}".format(fullname))
elif len(raw_timestamp) != 4:
raise EOFError("bad timestamp in {}".format(fullname))
elif source_mtime is not None:
if marshal._r_long(raw_timestamp) != source_mtime:
raise ImportError("bytecode is stale for {}".format(fullname))
elif len(raw_size) != 4:
raise EOFError("bad size in {}".format(fullname))
if source_stats is not None:
try:
source_mtime = int(source_stats['mtime'])
except KeyError:
pass
else:
if marshal._r_long(raw_timestamp) != source_mtime:
raise ImportError("bytecode is stale for {}".format(fullname))
try:
source_size = source_stats['size'] & 0xFFFFFFFF
except KeyError:
pass
else:
if marshal._r_long(raw_size) != source_size:
raise ImportError("bytecode is stale for {}".format(fullname))
# Can't return the code object as errors from marshal loading need to
# propagate even when source is available.
return data[8:]
return data[12:]
@module_for_loader
def _load_module(self, module, *, sourceless=False):
@ -377,12 +392,21 @@ class SourceLoader(_LoaderBasics):
def path_mtime(self, path):
"""Optional method that returns the modification time (an int) for the
specified path, where path is a str.
Implementing this method allows the loader to read bytecode files.
"""
raise NotImplementedError
def path_stats(self, path):
"""Optional method returning a metadata dict for the specified path
to by the path (str).
Possible keys:
- 'mtime' (mandatory) is the numeric timestamp of last source
code modification;
- 'size' (optional) is the size in bytes of the source code.
Implementing this method allows the loader to read bytecode files.
"""
return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Optional method which writes data (bytes) to a file path (a str).
@ -407,7 +431,7 @@ class SourceLoader(_LoaderBasics):
def get_code(self, fullname):
"""Concrete implementation of InspectLoader.get_code.
Reading of bytecode requires path_mtime to be implemented. To write
Reading of bytecode requires path_stats to be implemented. To write
bytecode, set_data must also be implemented.
"""
@ -416,10 +440,11 @@ class SourceLoader(_LoaderBasics):
source_mtime = None
if bytecode_path is not None:
try:
source_mtime = self.path_mtime(source_path)
st = self.path_stats(source_path)
except NotImplementedError:
pass
else:
source_mtime = int(st['mtime'])
try:
data = self.get_data(bytecode_path)
except IOError:
@ -427,7 +452,7 @@ class SourceLoader(_LoaderBasics):
else:
try:
bytes_data = self._bytes_from_bytecode(fullname, data,
source_mtime)
st)
except (ImportError, EOFError):
pass
else:
@ -448,6 +473,7 @@ class SourceLoader(_LoaderBasics):
# throw an exception.
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_mtime))
data.extend(marshal._w_long(len(source_bytes)))
data.extend(marshal.dumps(code_object))
try:
self.set_data(bytecode_path, data)
@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader):
"""Concrete implementation of SourceLoader using the file system."""
def path_mtime(self, path):
"""Return the modification time for the path."""
return int(_os.stat(path).st_mtime)
def path_stats(self, path):
"""Return the metadat for the path."""
st = _os.stat(path)
return {'mtime': st.st_mtime, 'size': st.st_size}
def set_data(self, path, data):
"""Write bytes data to a file."""

View file

@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
def path_mtime(self, path):
"""Return the (int) modification time for the path (str)."""
raise NotImplementedError
if self.path_stats.__func__ is SourceLoader.path_stats:
raise NotImplementedError
return int(self.path_stats(path)['mtime'])
def path_stats(self, path):
"""Return a metadata dict for the source pointed to by the path (str).
Possible keys:
- 'mtime' (mandatory) is the numeric timestamp of last source
code modification;
- 'size' (optional) is the size in bytes of the source code.
"""
if self.path_mtime.__func__ is SourceLoader.path_mtime:
raise NotImplementedError
return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Write the bytes to the path (if possible).

View file

@ -5,6 +5,7 @@ from .. import abc as testing_abc
from .. import util
from . import util as source_util
import collections
import imp
import inspect
import io
@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
def __init__(self, path, magic=imp.get_magic()):
super().__init__(path)
self.bytecode_path = imp.cache_from_source(self.path)
self.source_size = len(self.source)
data = bytearray(magic)
data.extend(marshal._w_long(self.source_mtime))
data.extend(marshal._w_long(self.source_size))
code_object = compile(self.source, self.path, 'exec',
dont_inherit=True)
data.extend(marshal.dumps(code_object))
@ -56,9 +59,9 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
else:
raise IOError
def path_mtime(self, path):
def path_stats(self, path):
assert path == self.path
return self.source_mtime
return {'mtime': self.source_mtime, 'size': self.source_size}
def set_data(self, path, data):
self.written[path] = bytes(data)
@ -657,6 +660,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness):
self.assertIn(self.cached, self.loader.written)
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(self.loader.source_mtime))
data.extend(marshal._w_long(self.loader.source_size))
data.extend(marshal.dumps(code_object))
self.assertEqual(self.loader.written[self.cached], bytes(data))
@ -847,7 +851,7 @@ class AbstractMethodImplTests(unittest.TestCase):
# Required abstractmethods.
self.raises_NotImplementedError(ins, 'get_filename', 'get_data')
# Optional abstractmethods.
self.raises_NotImplementedError(ins,'path_mtime', 'set_data')
self.raises_NotImplementedError(ins,'path_stats', 'set_data')
def test_PyLoader(self):
self.raises_NotImplementedError(self.PyLoader(), 'source_path',

View file

@ -70,11 +70,6 @@ class SimpleTest(unittest.TestCase):
module_dict_id = id(module.__dict__)
with open(mapping['_temp'], 'w') as file:
file.write("testing_var = 42\n")
# For filesystems where the mtime is only to a second granularity,
# everything that has happened above can be too fast;
# force an mtime on the source that is guaranteed to be different
# than the original mtime.
loader.path_mtime = self.fake_mtime(loader.path_mtime)
module = loader.load_module('_temp')
self.assertTrue('testing_var' in module.__dict__,
"'testing_var' not in "
@ -190,10 +185,17 @@ class BadBytecodeTest(unittest.TestCase):
del_source=del_source)
test('_temp', mapping, bc_path)
def _test_partial_size(self, test, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:11],
del_source=del_source)
test('_temp', mapping, bc_path)
def _test_no_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8],
lambda bc: bc[:12],
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bc_path
with self.assertRaises(EOFError):
@ -202,7 +204,7 @@ class BadBytecodeTest(unittest.TestCase):
def _test_non_code_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8] + marshal.dumps(b'abcd'),
lambda bc: bc[:12] + marshal.dumps(b'abcd'),
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ImportError):
@ -211,7 +213,7 @@ class BadBytecodeTest(unittest.TestCase):
def _test_bad_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8] + b'<test>',
lambda bc: bc[:12] + b'<test>',
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(EOFError):
@ -235,7 +237,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_empty_file(test)
@ -243,7 +245,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_partial_magic(test)
@ -254,7 +256,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_magic_only(test)
@ -276,10 +278,21 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_partial_timestamp(test)
@source_util.writes_bytecode_files
def test_partial_size(self):
# When the size is partial, regenerate the .pyc, else
# raise EOFError.
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
self.assertGreater(len(file.read()), 12)
self._test_partial_size(test)
@source_util.writes_bytecode_files
def test_no_marshal(self):
# When there is only the magic number and timestamp, raise EOFError.
@ -375,6 +388,13 @@ class SourcelessLoaderBadBytecodeTest(BadBytecodeTest):
self._test_partial_timestamp(test, del_source=True)
def test_partial_size(self):
def test(name, mapping, bytecode_path):
with self.assertRaises(EOFError):
self.import_(bytecode_path, name)
self._test_partial_size(test, del_source=True)
def test_no_marshal(self):
self._test_no_marshal(del_source=True)