Merge pull request #457 from meejah/2882.magic-folder-metadata.0

ticket #2882: preserve user mtime
This commit is contained in:
meejah 2018-02-06 17:38:42 -07:00 committed by GitHub
commit 0442b49846
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 33 deletions

View File

@ -371,7 +371,7 @@ this procedure for an overwrite in response to a remote change:
to obtain an initial classification as an overwrite or a
conflict. (This takes as input the ``last_downloaded_uri``
field from the directory entry of the changed ``foo``.)
3. Set the ``mtime`` of the replacement file to be *T* seconds
3. Set the ``mtime`` of the replacement file to be at least *T* seconds
before the current local time. Stat the replacement file
to obtain its ``mtime`` and ``ctime`` as stored in the local
filesystem, and update the file's last-seen statinfo in
@ -460,17 +460,17 @@ On Unix, for the case where the replaced file already exists, we have:
``foo.backup``, the other process' changes end up at ``foo``, and
ours at ``foo.conflicted``. This avoids data loss.
* Interleaving D: its rename happens after our link in step 4c,
and causes an ``IN_MOVED_TO`` event for ``foo``. Its rename also
changes the ``mtime`` for ``foo`` so that it is different from
the ``mtime`` calculated in step 3, and therefore different
from the metadata recorded for ``foo`` in the magic folder db.
(Assuming no system clock changes, its rename will set an ``mtime``
timestamp corresponding to a time after step 4c, which is not
equal to the timestamp *T* seconds before step 4a, provided that
*T* seconds is sufficiently greater than the timestamp granularity.)
Therefore, an upload will be triggered for ``foo`` after its
change, which is correct and avoids data loss.
* Interleaving D: its rename happens after our link in step 4c, and
causes an ``IN_MOVED_TO`` event for ``foo``. Its rename also changes
the ``mtime`` for ``foo`` so that it is different from the ``mtime``
calculated in step 3, and therefore different from the metadata
recorded for ``foo`` in the magic folder db. (Assuming no system
clock changes, its rename will set an ``mtime`` timestamp
corresponding to a time after step 4c, which is after the timestamp
*T* seconds before step 4a, provided that *T* seconds is
sufficiently greater than the timestamp granularity.) Therefore, an
upload will be triggered for ``foo`` after its change, which is
correct and avoids data loss.
If the replaced file did not already exist, an ``ENOENT`` error
occurs at step 4b, and we continue with steps 4c and 4d. The other
@ -560,15 +560,15 @@ we have:
to rename ``foo.other`` to ``foo`` both happen after all internal
operations of `ReplaceFileW`_ have completed. This causes deletion
and rename events for ``foo`` (which will in practice be merged due
to the pending delay, although we don't rely on that for correctness).
The rename also changes the ``mtime`` for ``foo`` so that it is
different from the ``mtime`` calculated in step 3, and therefore
different from the metadata recorded for ``foo`` in the magic folder
db. (Assuming no system clock changes, its rename will set an
``mtime`` timestamp corresponding to a time after the internal
operations of `ReplaceFileW`_ have completed, which is not equal to
the timestamp *T* seconds before `ReplaceFileW`_ is called, provided
that *T* seconds is sufficiently greater than the timestamp
to the pending delay, although we don't rely on that for
correctness). The rename also changes the ``mtime`` for ``foo`` so
that it is different from the ``mtime`` calculated in step 3, and
therefore different from the metadata recorded for ``foo`` in the
magic folder db. (Assuming no system clock changes, its rename will
set an ``mtime`` timestamp corresponding to a time after the
internal operations of `ReplaceFileW`_ have completed, which is
after the timestamp *T* seconds before `ReplaceFileW`_ is called,
provided that *T* seconds is sufficiently greater than the timestamp
granularity.) Therefore, an upload will be triggered for ``foo``
after its change, which is correct and avoids data loss.

View File

@ -1,8 +1,8 @@
import sys
import time
import shutil
from os import mkdir, unlink, listdir
from os.path import join, exists
from os import mkdir, unlink, listdir, utime
from os.path import join, exists, getmtime
import util
@ -23,6 +23,22 @@ def test_alice_writes_bob_receives(magic_folder):
return
def test_alice_writes_bob_receives_old_timestamp(magic_folder):
alice_dir, bob_dir = magic_folder
fname = join(alice_dir, "ts_file")
ts = time.time() - (60 * 60 * 36) # 36 hours ago
with open(fname, "w") as f:
f.write("alice wrote this")
utime(fname, (time.time(), ts))
fname = join(bob_dir, "ts_file")
util.await_file_contents(fname, "alice wrote this")
# make sure the timestamp is correct
assert int(getmtime(fname)) == int(ts)
return
def test_bob_writes_alice_receives(magic_folder):
alice_dir, bob_dir = magic_folder

View File

@ -703,10 +703,13 @@ class Uploader(QueueMixin):
self._count('objects_not_uploaded')
return False
# look out! there's another place we set a "metadata"
# object like this (for new, not deleted files)
metadata = {
'version': new_version,
'deleted': True,
'last_downloaded_timestamp': last_downloaded_timestamp,
'user_mtime': pathinfo.ctime_ns / 1000000000.0, # why are we using ns in PathInfo??
}
if db_entry.last_downloaded_uri is not None:
metadata['last_downloaded_uri'] = db_entry.last_downloaded_uri
@ -782,6 +785,7 @@ class Uploader(QueueMixin):
metadata = {
'version': new_version,
'last_downloaded_timestamp': last_downloaded_timestamp,
'user_mtime': pathinfo.mtime_ns / 1000000000.0, # why are we using ns in PathInfo??
}
if db_entry is not None and db_entry.last_downloaded_uri is not None:
metadata['last_downloaded_uri'] = db_entry.last_downloaded_uri
@ -849,14 +853,23 @@ class WriteFileMixin(object):
def _get_conflicted_filename(self, abspath_u):
return abspath_u + u".conflict"
def _write_downloaded_file(self, local_path_u, abspath_u, file_contents, is_conflict=False, now=None):
self._log("_write_downloaded_file(%r, <%d bytes>, is_conflict=%r, now=%r)"
% (abspath_u, len(file_contents), is_conflict, now))
def _write_downloaded_file(self, local_path_u, abspath_u, file_contents,
is_conflict=False, now=None, mtime=None):
self._log(
("_write_downloaded_file({abspath}, <{file_size} bytes>,"
" is_conflict={is_conflict}, now={now}, mtime={mtime})").format(
abspath=abspath_u,
file_size=len(file_contents),
is_conflict=is_conflict,
now=now,
mtime=mtime,
)
)
# 1. Write a temporary file, say .foo.tmp.
# 2. is_conflict determines whether this is an overwrite or a conflict.
# 3. Set the mtime of the replacement file to be T seconds before the
# current local time.
# current local time, or mtime whichever is oldest
# 4. Perform a file replacement with backup filename foo.backup,
# replaced file foo, and replacement file .foo.tmp. If any step of
# this operation fails, reclassify as a conflict and stop.
@ -874,11 +887,14 @@ class WriteFileMixin(object):
fileutil.write(replacement_path_u, file_contents)
os.chmod(replacement_path_u, (~ self._umask) & 0666)
# FUDGE_SECONDS is used to determine if another process
# has written to the same file concurrently. This is described
# in the Earth Dragon section of our design document:
# FUDGE_SECONDS is used to determine if another process has
# written to the same file concurrently. This is described in
# the Earth Dragon section of our design document ("T" in the
# spec is FUDGE_SECONDS here):
# docs/proposed/magic-folder/remote-to-local-sync.rst
os.utime(replacement_path_u, (now, now - self.FUDGE_SECONDS))
fudge_time = now - self.FUDGE_SECONDS
modified_time = min(fudge_time, mtime) if mtime else fudge_time
os.utime(replacement_path_u, (now, modified_time))
if is_conflict:
return self._rename_conflicted_file(abspath_u, replacement_path_u)
else:
@ -1224,8 +1240,13 @@ class Downloader(QueueMixin, WriteFileMixin):
d.addCallback(lambda ign: self._rename_deleted_file(abspath_u))
else:
d.addCallback(lambda ign: item.file_node.download_best_version(progress=item.progress))
d.addCallback(lambda contents: self._write_downloaded_file(self._local_path_u, abspath_u, contents,
is_conflict=is_conflict))
d.addCallback(
lambda contents: self._write_downloaded_file(
self._local_path_u, abspath_u, contents,
is_conflict=is_conflict,
mtime=item.metadata.get('user_mtime', item.metadata.get('tahoe', {}).get('linkmotime')),
)
)
d.addCallbacks(do_update_db)
d.addErrback(failed)