mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-20 21:43:09 +00:00
#598: first cut of 'tahoe backup' command: no backupdb, but yes shared-unchanged-directories and Archives/TIMESTAMP and Latest/
This commit is contained in:
parent
91dacc3dbc
commit
cfce8b5eab
@ -190,6 +190,20 @@ class LnOptions(VDriveOptions):
|
||||
def getSynopsis(self):
|
||||
return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
|
||||
|
||||
class BackupOptions(VDriveOptions):
|
||||
optFlags = [
|
||||
("verbose", "v", "Be noisy about what is happening."),
|
||||
]
|
||||
|
||||
def parseArgs(self, localdir, topath):
|
||||
self.from_dir = localdir
|
||||
self.to_dir = topath
|
||||
|
||||
def getSynopsis(Self):
|
||||
return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
|
||||
|
||||
longdesc = """Add a versioned backup of the local FROM directory to a timestamped subdir of the (tahoe) TO/Archives directory, sharing as many files and directories as possible with the previous backup. Creates TO/Latest as a reference to the latest backup. Behaves somewhat like 'rsync -a --link-dest=TO/Archives/(previous) FROM TO/Archives/(new); ln -sf TO/Archives/(new) TO/Latest'."""
|
||||
|
||||
class WebopenOptions(VDriveOptions):
|
||||
def parseArgs(self, where=''):
|
||||
self.where = where
|
||||
@ -266,6 +280,7 @@ subCommands = [
|
||||
["rm", None, RmOptions, "Unlink a file or directory in the virtual drive."],
|
||||
["mv", None, MvOptions, "Move a file within the virtual drive."],
|
||||
["ln", None, LnOptions, "Make an additional link to an existing file."],
|
||||
["backup", None, BackupOptions, "Make target dir look like local dir."],
|
||||
["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"],
|
||||
["manifest", None, ManifestOptions, "List all files/dirs in a subtree"],
|
||||
["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"],
|
||||
@ -337,6 +352,11 @@ def ln(options):
|
||||
rc = tahoe_mv.mv(options, mode="link")
|
||||
return rc
|
||||
|
||||
def backup(options):
|
||||
from allmydata.scripts import tahoe_backup
|
||||
rc = tahoe_backup.backup(options)
|
||||
return rc
|
||||
|
||||
def webopen(options, opener=None):
|
||||
from allmydata.scripts import tahoe_webopen
|
||||
rc = tahoe_webopen.webopen(options, opener=opener)
|
||||
@ -374,6 +394,7 @@ dispatch = {
|
||||
"rm": rm,
|
||||
"mv": mv,
|
||||
"ln": ln,
|
||||
"backup": backup,
|
||||
"webopen": webopen,
|
||||
"manifest": manifest,
|
||||
"stats": stats,
|
||||
|
264
src/allmydata/scripts/tahoe_backup.py
Normal file
264
src/allmydata/scripts/tahoe_backup.py
Normal file
@ -0,0 +1,264 @@
|
||||
|
||||
import os.path
|
||||
import time
|
||||
import urllib
|
||||
import simplejson
|
||||
from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
|
||||
from allmydata.scripts.common_http import do_http
|
||||
from allmydata import uri
|
||||
from allmydata.util import time_format
|
||||
|
||||
def raiseHTTPError(msg, resp):
|
||||
msg = msg + ": %s %s %s" % (resp.status, resp.reason, resp.read())
|
||||
raise RuntimeError(msg)
|
||||
|
||||
def readonly(writedircap):
|
||||
return uri.from_string_dirnode(writedircap).get_readonly().to_string()
|
||||
|
||||
def parse_old_timestamp(s, options):
|
||||
try:
|
||||
if not s.endswith("Z"):
|
||||
raise ValueError
|
||||
# the "local" in this "localseconds" is superfluous and
|
||||
# misleading. This returns seconds-since-epoch for an
|
||||
# ISO-8601-ish-formatted UTC time string. This might raise
|
||||
# ValueError if the string is not in the right format.
|
||||
when = time_format.iso_utc_time_to_localseconds(s[:-1])
|
||||
return when
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
# "2008-11-16 10.34 PM" (localtime)
|
||||
if s[-3:] in (" AM", " PM"):
|
||||
# this might raise ValueError
|
||||
when = time.strptime(s[:-3], "%Y-%m-%d %H.%M")
|
||||
if s[-3:] == "PM":
|
||||
when += 12*60*60
|
||||
return when
|
||||
except ValueError:
|
||||
pass
|
||||
print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
|
||||
|
||||
def readdir(dircap, options):
|
||||
# returns a dict of (childname: (type, readcap, metadata)), or None if the
|
||||
# dircap didn't point to a directory
|
||||
url = options['node-url'] + "uri/%s?t=json" % urllib.quote(dircap)
|
||||
resp = do_http("GET", url)
|
||||
if resp.status != 200:
|
||||
raiseHTTPError("Error during directory GET", resp)
|
||||
jd = simplejson.load(resp)
|
||||
ntype, ndata = jd
|
||||
if ntype != "dirnode":
|
||||
return None
|
||||
contents = {}
|
||||
for (childname, (childtype, childdata)) in ndata["children"].items():
|
||||
contents[childname] = (childtype,
|
||||
str(childdata["ro_uri"]),
|
||||
childdata["metadata"])
|
||||
return contents
|
||||
|
||||
def get_local_metadata(path):
|
||||
metadata = {}
|
||||
|
||||
# posix stat(2) metadata, depends on the platform
|
||||
os.stat_float_times(True)
|
||||
s = os.stat(path)
|
||||
metadata["ctime"] = s.st_ctime
|
||||
metadata["mtime"] = s.st_mtime
|
||||
|
||||
misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
|
||||
macos_misc_fields = ("st_rsize", "st_creator", "st_type")
|
||||
for field in misc_fields + macos_misc_fields:
|
||||
if hasattr(s, field):
|
||||
metadata[field] = getattr(s, field)
|
||||
|
||||
# TODO: extended attributes, like on OS-X's HFS+
|
||||
return metadata
|
||||
|
||||
def mkdir(contents, options):
|
||||
url = options['node-url'] + "uri?t=mkdir"
|
||||
resp = do_http("POST", url)
|
||||
if resp.status < 200 or resp.status >= 300:
|
||||
raiseHTTPError("error during mkdir", resp)
|
||||
dircap = str(resp.read().strip())
|
||||
url = options['node-url'] + "uri/%s?t=set_children" % urllib.quote(dircap)
|
||||
body = dict([ (childname, (contents[childname][0],
|
||||
{"ro_uri": contents[childname][1],
|
||||
"metadata": contents[childname][2],
|
||||
}))
|
||||
for childname in contents
|
||||
])
|
||||
resp = do_http("POST", url, simplejson.dumps(body))
|
||||
if resp.status != 200:
|
||||
raiseHTTPError("error during set_children", resp)
|
||||
return dircap
|
||||
|
||||
def put_child(dirurl, childname, childcap):
|
||||
assert dirurl[-1] == "/"
|
||||
url = dirurl + urllib.quote(childname) + "?t=uri"
|
||||
resp = do_http("PUT", url, childcap)
|
||||
if resp.status not in (200, 201):
|
||||
raiseHTTPError("error during put_child", resp)
|
||||
|
||||
def directory_is_changed(a, b):
|
||||
# each is a mapping from childname to (type, cap, metadata)
|
||||
significant_metadata = ("ctime", "mtime")
|
||||
# other metadata keys are preserved, but changes to them won't trigger a
|
||||
# new backup
|
||||
|
||||
if set(a.keys()) != set(b.keys()):
|
||||
return True
|
||||
for childname in a:
|
||||
a_type, a_cap, a_metadata = a[childname]
|
||||
b_type, b_cap, b_metadata = b[childname]
|
||||
if a_type != b_type:
|
||||
return True
|
||||
if a_cap != b_cap:
|
||||
return True
|
||||
for k in significant_metadata:
|
||||
if a_metadata.get(k) != b_metadata.get(k):
|
||||
return True
|
||||
return False
|
||||
|
||||
def backup(options):
|
||||
nodeurl = options['node-url']
|
||||
from_dir = options.from_dir
|
||||
to_dir = options.to_dir
|
||||
if options['quiet']:
|
||||
verbosity = 0
|
||||
else:
|
||||
verbosity = 2
|
||||
stdin = options.stdin
|
||||
stdout = options.stdout
|
||||
stderr = options.stderr
|
||||
|
||||
rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
|
||||
to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
|
||||
if path:
|
||||
to_url += escape_path(path)
|
||||
if not to_url.endswith("/"):
|
||||
to_url += "/"
|
||||
|
||||
archives_url = to_url + "Archives/"
|
||||
latest_url = to_url + "Latest"
|
||||
|
||||
# first step: make sure the target directory exists, as well as the
|
||||
# Archives/ subdirectory.
|
||||
resp = do_http("GET", archives_url + "?t=json")
|
||||
if resp.status == 404:
|
||||
resp = do_http("POST", archives_url + "?t=mkdir")
|
||||
if resp.status != 200:
|
||||
print >>stderr, "Unable to create target directory: %s %s %s" % \
|
||||
(resp.status, resp.reason, resp.read())
|
||||
return 1
|
||||
archives_dir = {}
|
||||
else:
|
||||
jdata = simplejson.load(resp)
|
||||
(otype, attrs) = jdata
|
||||
archives_dir = attrs["children"]
|
||||
|
||||
# second step: locate the most recent backup in TODIR/Archives/*
|
||||
latest_backup_time = 0
|
||||
latest_backup_name = None
|
||||
latest_backup_dircap = None
|
||||
|
||||
# we have various time formats. The allmydata.com windows backup tool
|
||||
# appears to create things like "2008-11-16 10.34 PM". This script
|
||||
# creates things like "2009-11-16--17.34Z".
|
||||
for archive_name in archives_dir.keys():
|
||||
if archives_dir[archive_name][0] != "dirnode":
|
||||
continue
|
||||
when = parse_old_timestamp(archive_name, options)
|
||||
if when is not None:
|
||||
if when > latest_backup_time:
|
||||
latest_backup_time = when
|
||||
latest_backup_name = archive_name
|
||||
latest_backup_dircap = str(archives_dir[archive_name][1]["ro_uri"])
|
||||
|
||||
# third step: process the tree
|
||||
new_backup_dircap = Node().process(options.from_dir,
|
||||
latest_backup_dircap,
|
||||
options)
|
||||
print >>stdout, "new backup done"
|
||||
|
||||
# fourth: attach the new backup to the list
|
||||
new_readonly_backup_dircap = readonly(new_backup_dircap)
|
||||
now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
|
||||
|
||||
put_child(archives_url, now, new_readonly_backup_dircap)
|
||||
put_child(to_url, "Latest", new_readonly_backup_dircap)
|
||||
|
||||
print >>stdout, "backup done"
|
||||
# done!
|
||||
return 0
|
||||
|
||||
|
||||
class Node:
|
||||
def verboseprint(self, msg):
|
||||
if self.options["verbose"]:
|
||||
print >>self.options.stdout, msg
|
||||
|
||||
def process(self, localpath, olddircap, options):
|
||||
# returns newdircap
|
||||
self.options = options
|
||||
|
||||
self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap))
|
||||
olddircontents = {}
|
||||
if olddircap:
|
||||
olddircontents = readdir(olddircap, options)
|
||||
|
||||
newdircontents = {} # childname -> (type, rocap, metadata)
|
||||
for child in os.listdir(localpath):
|
||||
childpath = os.path.join(localpath, child)
|
||||
if os.path.isdir(childpath):
|
||||
metadata = get_local_metadata(childpath)
|
||||
oldchildcap = None
|
||||
if olddircontents is not None and child in olddircontents:
|
||||
oldchildcap = olddircontents[child][1]
|
||||
newchilddircap = self.recurse(childpath, oldchildcap)
|
||||
newdircontents[child] = ("dirnode", newchilddircap, metadata)
|
||||
elif os.path.isfile(childpath):
|
||||
newfilecap, metadata = self.upload(childpath)
|
||||
newdircontents[child] = ("filenode", newfilecap, metadata)
|
||||
else:
|
||||
raise RuntimeError("how do I back this up?")
|
||||
|
||||
if (olddircap
|
||||
and olddircontents is not None
|
||||
and not directory_is_changed(newdircontents, olddircontents)
|
||||
):
|
||||
self.verboseprint(" %s not changed, re-using old directory" % localpath)
|
||||
# yay! they're identical!
|
||||
return olddircap
|
||||
else:
|
||||
self.verboseprint(" %s changed, making new directory" % localpath)
|
||||
# something changed, or there was no previous directory, so we
|
||||
# must make a new directory
|
||||
newdircap = mkdir(newdircontents, options)
|
||||
return readonly(newdircap)
|
||||
|
||||
def recurse(self, localpath, olddircap):
|
||||
n = self.__class__()
|
||||
return n.process(localpath, olddircap, self.options)
|
||||
|
||||
def upload(self, childpath):
|
||||
self.verboseprint("uploading %s.." % childpath)
|
||||
# we can use the backupdb here
|
||||
#s = os.stat(childpath)
|
||||
# ...
|
||||
# if we go with the old file, we're obligated to use the old
|
||||
# metadata, to make sure it matches the metadata for this child in
|
||||
# the old parent directory
|
||||
# return oldcap, old_metadata
|
||||
|
||||
metadata = get_local_metadata(childpath)
|
||||
infileobj = open(os.path.expanduser(childpath), "rb")
|
||||
url = self.options['node-url'] + "uri"
|
||||
resp = do_http("PUT", url, infileobj)
|
||||
if resp.status not in (200, 201):
|
||||
raiseHTTPError("Error during file PUT", resp)
|
||||
filecap = resp.read().strip()
|
||||
self.verboseprint(" %s -> %s" % (childpath, filecap))
|
||||
self.verboseprint(" metadata: %s" % (metadata,))
|
||||
return filecap, metadata
|
||||
|
@ -4,6 +4,7 @@ import os.path
|
||||
from twisted.trial import unittest
|
||||
from cStringIO import StringIO
|
||||
import urllib
|
||||
import time
|
||||
|
||||
from allmydata.util import fileutil, hashutil
|
||||
from allmydata import uri
|
||||
@ -617,3 +618,111 @@ class Cp(SystemTestMixin, CLITestMixin, unittest.TestCase):
|
||||
d.addCallback(lambda res: self.do_cli("cp", "--recursive",
|
||||
dn, "tahoe:"))
|
||||
return d
|
||||
|
||||
class Backup(SystemTestMixin, CLITestMixin, unittest.TestCase):
|
||||
def writeto(self, path, data):
|
||||
d = os.path.dirname(os.path.join(self.basedir, "home", path))
|
||||
fileutil.make_dirs(d)
|
||||
f = open(os.path.join(self.basedir, "home", path), "w")
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
||||
def test_backup(self):
|
||||
self.basedir = os.path.dirname(self.mktemp())
|
||||
|
||||
# create a small local directory with a couple of files
|
||||
source = os.path.join(self.basedir, "home")
|
||||
fileutil.make_dirs(os.path.join(source, "empty"))
|
||||
self.writeto("parent/subdir/foo.txt", "foo")
|
||||
self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
|
||||
self.writeto("parent/blah.txt", "blah")
|
||||
|
||||
d = self.set_up_nodes()
|
||||
d.addCallback(lambda res: self.do_cli("create-alias", "tahoe"))
|
||||
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:backups"))
|
||||
def _check0((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
d.addCallback(_check0)
|
||||
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups"))
|
||||
def _check1((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.failUnlessEqual(sorted(out.split()), ["Archives", "Latest"])
|
||||
d.addCallback(_check1)
|
||||
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
|
||||
def _check2((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.failUnlessEqual(sorted(out.split()), ["empty", "parent"])
|
||||
d.addCallback(_check2)
|
||||
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
|
||||
def _check2a((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.failUnlessEqual(out.strip(), "")
|
||||
d.addCallback(_check2a)
|
||||
d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
|
||||
def _check3((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.failUnlessEqual(out, "foo")
|
||||
d.addCallback(_check3)
|
||||
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
|
||||
def _check4((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.old_archives = out.split()
|
||||
self.failUnlessEqual(len(self.old_archives), 1)
|
||||
d.addCallback(_check4)
|
||||
|
||||
|
||||
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:backups"))
|
||||
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
|
||||
def _check5((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.new_archives = out.split()
|
||||
self.failUnlessEqual(len(self.new_archives), 2)
|
||||
self.failUnlessEqual(sorted(self.new_archives)[0],
|
||||
self.old_archives[0])
|
||||
d.addCallback(_check5)
|
||||
|
||||
def _modify(res):
|
||||
time.sleep(1) # get us to a new second
|
||||
self.writeto("parent/subdir/foo.txt", "FOOF!")
|
||||
# and turn a file into a directory
|
||||
os.unlink(os.path.join(source, "parent/blah.txt"))
|
||||
os.mkdir(os.path.join(source, "parent/blah.txt"))
|
||||
self.writeto("parent/blah.txt/surprise file", "surprise")
|
||||
self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
|
||||
# turn a directory into a file
|
||||
os.rmdir(os.path.join(source, "empty"))
|
||||
self.writeto("empty", "imagine nothing being here")
|
||||
return self.do_cli("backup", source, "tahoe:backups")
|
||||
d.addCallback(_modify)
|
||||
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
|
||||
def _check6((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.new_archives = out.split()
|
||||
self.failUnlessEqual(len(self.new_archives), 3)
|
||||
self.failUnlessEqual(sorted(self.new_archives)[0],
|
||||
self.old_archives[0])
|
||||
d.addCallback(_check6)
|
||||
d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
|
||||
def _check7((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.failUnlessEqual(out, "FOOF!")
|
||||
# the old snapshot should not be modified
|
||||
return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
|
||||
d.addCallback(_check7)
|
||||
def _check8((rc, out, err)):
|
||||
self.failUnlessEqual(err, "")
|
||||
self.failUnlessEqual(rc, 0)
|
||||
self.failUnlessEqual(out, "foo")
|
||||
d.addCallback(_check8)
|
||||
|
||||
return d
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user