#11999: sync based on comparing mtimes, not mtime to system clock

This commit is contained in:
R David Murray 2011-05-06 21:56:22 -04:00
parent 58d6b1b7a4
commit 8b26c4b8ea
3 changed files with 58 additions and 51 deletions

View file

@ -234,19 +234,24 @@ class Maildir(Mailbox):
def __init__(self, dirname, factory=rfc822.Message, create=True): def __init__(self, dirname, factory=rfc822.Message, create=True):
"""Initialize a Maildir instance.""" """Initialize a Maildir instance."""
Mailbox.__init__(self, dirname, factory, create) Mailbox.__init__(self, dirname, factory, create)
self._paths = {
'tmp': os.path.join(self._path, 'tmp'),
'new': os.path.join(self._path, 'new'),
'cur': os.path.join(self._path, 'cur'),
}
if not os.path.exists(self._path): if not os.path.exists(self._path):
if create: if create:
os.mkdir(self._path, 0700) os.mkdir(self._path, 0700)
os.mkdir(os.path.join(self._path, 'tmp'), 0700) for path in self._paths.values():
os.mkdir(os.path.join(self._path, 'new'), 0700) os.mkdir(path, 0o700)
os.mkdir(os.path.join(self._path, 'cur'), 0700)
else: else:
raise NoSuchMailboxError(self._path) raise NoSuchMailboxError(self._path)
self._toc = {} self._toc = {}
self._last_read = None # Records last time we read cur/new self._toc_mtimes = {}
# NOTE: we manually invalidate _last_read each time we do any for subdir in ('cur', 'new'):
# modifications ourselves, otherwise we might get tripped up by self._toc_mtimes[subdir] = os.path.getmtime(self._paths[subdir])
# bogus mtime behaviour on some systems (see issue #6896). self._last_read = time.time() # Records last time we read cur/new
self._skewfactor = 0.1 # Adjust if os/fs clocks are skewing
def add(self, message): def add(self, message):
"""Add message and return assigned key.""" """Add message and return assigned key."""
@ -283,15 +288,11 @@ class Maildir(Mailbox):
raise raise
if isinstance(message, MaildirMessage): if isinstance(message, MaildirMessage):
os.utime(dest, (os.path.getatime(dest), message.get_date())) os.utime(dest, (os.path.getatime(dest), message.get_date()))
# Invalidate cached toc
self._last_read = None
return uniq return uniq
def remove(self, key): def remove(self, key):
"""Remove the keyed message; raise KeyError if it doesn't exist.""" """Remove the keyed message; raise KeyError if it doesn't exist."""
os.remove(os.path.join(self._path, self._lookup(key))) os.remove(os.path.join(self._path, self._lookup(key)))
# Invalidate cached toc (only on success)
self._last_read = None
def discard(self, key): def discard(self, key):
"""If the keyed message exists, remove it.""" """If the keyed message exists, remove it."""
@ -326,8 +327,6 @@ class Maildir(Mailbox):
if isinstance(message, MaildirMessage): if isinstance(message, MaildirMessage):
os.utime(new_path, (os.path.getatime(new_path), os.utime(new_path, (os.path.getatime(new_path),
message.get_date())) message.get_date()))
# Invalidate cached toc
self._last_read = None
def get_message(self, key): def get_message(self, key):
"""Return a Message representation or raise a KeyError.""" """Return a Message representation or raise a KeyError."""
@ -383,8 +382,8 @@ class Maildir(Mailbox):
def flush(self): def flush(self):
"""Write any pending changes to disk.""" """Write any pending changes to disk."""
# Maildir changes are always written immediately, so there's nothing # Maildir changes are always written immediately, so there's nothing
# to do except invalidate our cached toc. # to do.
self._last_read = None pass
def lock(self): def lock(self):
"""Lock the mailbox.""" """Lock the mailbox."""
@ -482,36 +481,39 @@ class Maildir(Mailbox):
def _refresh(self): def _refresh(self):
"""Update table of contents mapping.""" """Update table of contents mapping."""
if self._last_read is not None: # If it has been less than two seconds since the last _refresh() call,
for subdir in ('new', 'cur'): # we have to unconditionally re-read the mailbox just in case it has
mtime = os.path.getmtime(os.path.join(self._path, subdir)) # been modified, because os.path.mtime() has a 2 sec resolution in the
if mtime > self._last_read: # most common worst case (FAT) and a 1 sec resolution typically. This
break # results in a few unnecessary re-reads when _refresh() is called
else: # multiple times in that interval, but once the clock ticks over, we
# will only re-read as needed. Because the filesystem might be being
# served by an independent system with its own clock, we record and
# compare with the mtimes from the filesystem. Because the other
# system's clock might be skewing relative to our clock, we add an
# extra delta to our wait. The default is one tenth second, but is an
# instance variable and so can be adjusted if dealing with a
# particularly skewed or irregular system.
if time.time() - self._last_read > 2 + self._skewfactor:
refresh = False
for subdir in self._toc_mtimes:
mtime = os.path.getmtime(self._paths[subdir])
if mtime > self._toc_mtimes[subdir]:
refresh = True
self._toc_mtimes[subdir] = mtime
if not refresh:
return return
# Refresh toc
# We record the current time - 1sec so that, if _refresh() is called
# again in the same second, we will always re-read the mailbox
# just in case it's been modified. (os.path.mtime() only has
# 1sec resolution.) This results in a few unnecessary re-reads
# when _refresh() is called multiple times in the same second,
# but once the clock ticks over, we will only re-read as needed.
now = time.time() - 1
self._toc = {} self._toc = {}
def update_dir (subdir): for subdir in self._toc_mtimes:
path = os.path.join(self._path, subdir) path = self._paths[subdir]
for entry in os.listdir(path): for entry in os.listdir(path):
p = os.path.join(path, entry) p = os.path.join(path, entry)
if os.path.isdir(p): if os.path.isdir(p):
continue continue
uniq = entry.split(self.colon)[0] uniq = entry.split(self.colon)[0]
self._toc[uniq] = os.path.join(subdir, entry) self._toc[uniq] = os.path.join(subdir, entry)
self._last_read = time.time()
update_dir('new')
update_dir('cur')
self._last_read = now
def _lookup(self, key): def _lookup(self, key):
"""Use TOC to return subpath for given key, or raise a KeyError.""" """Use TOC to return subpath for given key, or raise a KeyError."""

View file

@ -751,27 +751,28 @@ class TestMaildir(TestMailbox):
self.assertFalse((perms & 0111)) # Execute bits should all be off. self.assertFalse((perms & 0111)) # Execute bits should all be off.
def test_reread(self): def test_reread(self):
# Wait for 2 seconds
time.sleep(2)
# Initially, the mailbox has not been read and the time is null. # Put the last modified times more than two seconds into the past
assert getattr(self._box, '_last_read', None) is None # (because mtime may have only a two second granularity).
for subdir in ('cur', 'new'):
os.utime(os.path.join(self._box._path, subdir),
(time.time()-5,)*2)
# Refresh mailbox; the times should now be set to something. # Because mtime has a two second granularity in worst case (FAT), a
self._box._refresh() # refresh is done unconditionally if called for within
assert getattr(self._box, '_last_read', None) is not None # two-second-plus-a-bit of the last one, just in case the mbox has
# changed; so now we have to wait for that interval to expire.
time.sleep(2.01 + self._box._skewfactor)
# Try calling _refresh() again; the modification times shouldn't have # Re-reading causes the ._toc attribute to be assigned a new dictionary
# changed, so the mailbox should not be re-reading. Re-reading causes # object, so we'll check that the ._toc attribute isn't a different
# the ._toc attribute to be assigned a new dictionary object, so # object.
# we'll check that the ._toc attribute isn't a different object.
orig_toc = self._box._toc orig_toc = self._box._toc
def refreshed(): def refreshed():
return self._box._toc is not orig_toc return self._box._toc is not orig_toc
time.sleep(1) # Wait 1sec to ensure time.time()'s value changes
self._box._refresh() self._box._refresh()
assert not refreshed() self.assertFalse(refreshed())
# Now, write something into cur and remove it. This changes # Now, write something into cur and remove it. This changes
# the mtime and should cause a re-read. # the mtime and should cause a re-read.
@ -780,7 +781,7 @@ class TestMaildir(TestMailbox):
f.close() f.close()
os.unlink(filename) os.unlink(filename)
self._box._refresh() self._box._refresh()
assert refreshed() self.assertTrue(refreshed())
class _TestMboxMMDF(TestMailbox): class _TestMboxMMDF(TestMailbox):

View file

@ -77,6 +77,10 @@ Core and Builtins
Library Library
------- -------
- Issue 11999: fixed sporadic sync failure mailbox.Maildir due to its trying to
detect mtime changes by comparing to the system clock instead of to the
previous value of the mtime.
- Issue #10684: shutil.move used to delete a folder on case insensitive - Issue #10684: shutil.move used to delete a folder on case insensitive
filesystems when the source and destination name where the same except filesystems when the source and destination name where the same except
for the case. for the case.