#598: first cut of 'tahoe backup' command: no backupdb, but yes shared-unchanged-directories and Archives/TIMESTAMP and Latest/

This commit is contained in:
Brian Warner 2009-02-02 21:09:02 -07:00
parent 91dacc3dbc
commit cfce8b5eab
3 changed files with 394 additions and 0 deletions

View File

@ -190,6 +190,20 @@ class LnOptions(VDriveOptions):
def getSynopsis(self): def getSynopsis(self):
return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),) return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
class BackupOptions(VDriveOptions):
optFlags = [
("verbose", "v", "Be noisy about what is happening."),
]
def parseArgs(self, localdir, topath):
self.from_dir = localdir
self.to_dir = topath
def getSynopsis(Self):
return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
longdesc = """Add a versioned backup of the local FROM directory to a timestamped subdir of the (tahoe) TO/Archives directory, sharing as many files and directories as possible with the previous backup. Creates TO/Latest as a reference to the latest backup. Behaves somewhat like 'rsync -a --link-dest=TO/Archives/(previous) FROM TO/Archives/(new); ln -sf TO/Archives/(new) TO/Latest'."""
class WebopenOptions(VDriveOptions): class WebopenOptions(VDriveOptions):
def parseArgs(self, where=''): def parseArgs(self, where=''):
self.where = where self.where = where
@ -266,6 +280,7 @@ subCommands = [
["rm", None, RmOptions, "Unlink a file or directory in the virtual drive."], ["rm", None, RmOptions, "Unlink a file or directory in the virtual drive."],
["mv", None, MvOptions, "Move a file within the virtual drive."], ["mv", None, MvOptions, "Move a file within the virtual drive."],
["ln", None, LnOptions, "Make an additional link to an existing file."], ["ln", None, LnOptions, "Make an additional link to an existing file."],
["backup", None, BackupOptions, "Make target dir look like local dir."],
["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"], ["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"],
["manifest", None, ManifestOptions, "List all files/dirs in a subtree"], ["manifest", None, ManifestOptions, "List all files/dirs in a subtree"],
["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"], ["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"],
@ -337,6 +352,11 @@ def ln(options):
rc = tahoe_mv.mv(options, mode="link") rc = tahoe_mv.mv(options, mode="link")
return rc return rc
def backup(options):
from allmydata.scripts import tahoe_backup
rc = tahoe_backup.backup(options)
return rc
def webopen(options, opener=None): def webopen(options, opener=None):
from allmydata.scripts import tahoe_webopen from allmydata.scripts import tahoe_webopen
rc = tahoe_webopen.webopen(options, opener=opener) rc = tahoe_webopen.webopen(options, opener=opener)
@ -374,6 +394,7 @@ dispatch = {
"rm": rm, "rm": rm,
"mv": mv, "mv": mv,
"ln": ln, "ln": ln,
"backup": backup,
"webopen": webopen, "webopen": webopen,
"manifest": manifest, "manifest": manifest,
"stats": stats, "stats": stats,

View File

@ -0,0 +1,264 @@
import os.path
import time
import urllib
import simplejson
from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
from allmydata.scripts.common_http import do_http
from allmydata import uri
from allmydata.util import time_format
def raiseHTTPError(msg, resp):
msg = msg + ": %s %s %s" % (resp.status, resp.reason, resp.read())
raise RuntimeError(msg)
def readonly(writedircap):
return uri.from_string_dirnode(writedircap).get_readonly().to_string()
def parse_old_timestamp(s, options):
try:
if not s.endswith("Z"):
raise ValueError
# the "local" in this "localseconds" is superfluous and
# misleading. This returns seconds-since-epoch for an
# ISO-8601-ish-formatted UTC time string. This might raise
# ValueError if the string is not in the right format.
when = time_format.iso_utc_time_to_localseconds(s[:-1])
return when
except ValueError:
pass
try:
# "2008-11-16 10.34 PM" (localtime)
if s[-3:] in (" AM", " PM"):
# this might raise ValueError
when = time.strptime(s[:-3], "%Y-%m-%d %H.%M")
if s[-3:] == "PM":
when += 12*60*60
return when
except ValueError:
pass
print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
def readdir(dircap, options):
# returns a dict of (childname: (type, readcap, metadata)), or None if the
# dircap didn't point to a directory
url = options['node-url'] + "uri/%s?t=json" % urllib.quote(dircap)
resp = do_http("GET", url)
if resp.status != 200:
raiseHTTPError("Error during directory GET", resp)
jd = simplejson.load(resp)
ntype, ndata = jd
if ntype != "dirnode":
return None
contents = {}
for (childname, (childtype, childdata)) in ndata["children"].items():
contents[childname] = (childtype,
str(childdata["ro_uri"]),
childdata["metadata"])
return contents
def get_local_metadata(path):
metadata = {}
# posix stat(2) metadata, depends on the platform
os.stat_float_times(True)
s = os.stat(path)
metadata["ctime"] = s.st_ctime
metadata["mtime"] = s.st_mtime
misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
macos_misc_fields = ("st_rsize", "st_creator", "st_type")
for field in misc_fields + macos_misc_fields:
if hasattr(s, field):
metadata[field] = getattr(s, field)
# TODO: extended attributes, like on OS-X's HFS+
return metadata
def mkdir(contents, options):
url = options['node-url'] + "uri?t=mkdir"
resp = do_http("POST", url)
if resp.status < 200 or resp.status >= 300:
raiseHTTPError("error during mkdir", resp)
dircap = str(resp.read().strip())
url = options['node-url'] + "uri/%s?t=set_children" % urllib.quote(dircap)
body = dict([ (childname, (contents[childname][0],
{"ro_uri": contents[childname][1],
"metadata": contents[childname][2],
}))
for childname in contents
])
resp = do_http("POST", url, simplejson.dumps(body))
if resp.status != 200:
raiseHTTPError("error during set_children", resp)
return dircap
def put_child(dirurl, childname, childcap):
assert dirurl[-1] == "/"
url = dirurl + urllib.quote(childname) + "?t=uri"
resp = do_http("PUT", url, childcap)
if resp.status not in (200, 201):
raiseHTTPError("error during put_child", resp)
def directory_is_changed(a, b):
# each is a mapping from childname to (type, cap, metadata)
significant_metadata = ("ctime", "mtime")
# other metadata keys are preserved, but changes to them won't trigger a
# new backup
if set(a.keys()) != set(b.keys()):
return True
for childname in a:
a_type, a_cap, a_metadata = a[childname]
b_type, b_cap, b_metadata = b[childname]
if a_type != b_type:
return True
if a_cap != b_cap:
return True
for k in significant_metadata:
if a_metadata.get(k) != b_metadata.get(k):
return True
return False
def backup(options):
nodeurl = options['node-url']
from_dir = options.from_dir
to_dir = options.to_dir
if options['quiet']:
verbosity = 0
else:
verbosity = 2
stdin = options.stdin
stdout = options.stdout
stderr = options.stderr
rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
if path:
to_url += escape_path(path)
if not to_url.endswith("/"):
to_url += "/"
archives_url = to_url + "Archives/"
latest_url = to_url + "Latest"
# first step: make sure the target directory exists, as well as the
# Archives/ subdirectory.
resp = do_http("GET", archives_url + "?t=json")
if resp.status == 404:
resp = do_http("POST", archives_url + "?t=mkdir")
if resp.status != 200:
print >>stderr, "Unable to create target directory: %s %s %s" % \
(resp.status, resp.reason, resp.read())
return 1
archives_dir = {}
else:
jdata = simplejson.load(resp)
(otype, attrs) = jdata
archives_dir = attrs["children"]
# second step: locate the most recent backup in TODIR/Archives/*
latest_backup_time = 0
latest_backup_name = None
latest_backup_dircap = None
# we have various time formats. The allmydata.com windows backup tool
# appears to create things like "2008-11-16 10.34 PM". This script
# creates things like "2009-11-16--17.34Z".
for archive_name in archives_dir.keys():
if archives_dir[archive_name][0] != "dirnode":
continue
when = parse_old_timestamp(archive_name, options)
if when is not None:
if when > latest_backup_time:
latest_backup_time = when
latest_backup_name = archive_name
latest_backup_dircap = str(archives_dir[archive_name][1]["ro_uri"])
# third step: process the tree
new_backup_dircap = Node().process(options.from_dir,
latest_backup_dircap,
options)
print >>stdout, "new backup done"
# fourth: attach the new backup to the list
new_readonly_backup_dircap = readonly(new_backup_dircap)
now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
put_child(archives_url, now, new_readonly_backup_dircap)
put_child(to_url, "Latest", new_readonly_backup_dircap)
print >>stdout, "backup done"
# done!
return 0
class Node:
def verboseprint(self, msg):
if self.options["verbose"]:
print >>self.options.stdout, msg
def process(self, localpath, olddircap, options):
# returns newdircap
self.options = options
self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap))
olddircontents = {}
if olddircap:
olddircontents = readdir(olddircap, options)
newdircontents = {} # childname -> (type, rocap, metadata)
for child in os.listdir(localpath):
childpath = os.path.join(localpath, child)
if os.path.isdir(childpath):
metadata = get_local_metadata(childpath)
oldchildcap = None
if olddircontents is not None and child in olddircontents:
oldchildcap = olddircontents[child][1]
newchilddircap = self.recurse(childpath, oldchildcap)
newdircontents[child] = ("dirnode", newchilddircap, metadata)
elif os.path.isfile(childpath):
newfilecap, metadata = self.upload(childpath)
newdircontents[child] = ("filenode", newfilecap, metadata)
else:
raise RuntimeError("how do I back this up?")
if (olddircap
and olddircontents is not None
and not directory_is_changed(newdircontents, olddircontents)
):
self.verboseprint(" %s not changed, re-using old directory" % localpath)
# yay! they're identical!
return olddircap
else:
self.verboseprint(" %s changed, making new directory" % localpath)
# something changed, or there was no previous directory, so we
# must make a new directory
newdircap = mkdir(newdircontents, options)
return readonly(newdircap)
def recurse(self, localpath, olddircap):
n = self.__class__()
return n.process(localpath, olddircap, self.options)
def upload(self, childpath):
self.verboseprint("uploading %s.." % childpath)
# we can use the backupdb here
#s = os.stat(childpath)
# ...
# if we go with the old file, we're obligated to use the old
# metadata, to make sure it matches the metadata for this child in
# the old parent directory
# return oldcap, old_metadata
metadata = get_local_metadata(childpath)
infileobj = open(os.path.expanduser(childpath), "rb")
url = self.options['node-url'] + "uri"
resp = do_http("PUT", url, infileobj)
if resp.status not in (200, 201):
raiseHTTPError("Error during file PUT", resp)
filecap = resp.read().strip()
self.verboseprint(" %s -> %s" % (childpath, filecap))
self.verboseprint(" metadata: %s" % (metadata,))
return filecap, metadata

View File

@ -4,6 +4,7 @@ import os.path
from twisted.trial import unittest from twisted.trial import unittest
from cStringIO import StringIO from cStringIO import StringIO
import urllib import urllib
import time
from allmydata.util import fileutil, hashutil from allmydata.util import fileutil, hashutil
from allmydata import uri from allmydata import uri
@ -617,3 +618,111 @@ class Cp(SystemTestMixin, CLITestMixin, unittest.TestCase):
d.addCallback(lambda res: self.do_cli("cp", "--recursive", d.addCallback(lambda res: self.do_cli("cp", "--recursive",
dn, "tahoe:")) dn, "tahoe:"))
return d return d
class Backup(SystemTestMixin, CLITestMixin, unittest.TestCase):
def writeto(self, path, data):
d = os.path.dirname(os.path.join(self.basedir, "home", path))
fileutil.make_dirs(d)
f = open(os.path.join(self.basedir, "home", path), "w")
f.write(data)
f.close()
def test_backup(self):
self.basedir = os.path.dirname(self.mktemp())
# create a small local directory with a couple of files
source = os.path.join(self.basedir, "home")
fileutil.make_dirs(os.path.join(source, "empty"))
self.writeto("parent/subdir/foo.txt", "foo")
self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
self.writeto("parent/blah.txt", "blah")
d = self.set_up_nodes()
d.addCallback(lambda res: self.do_cli("create-alias", "tahoe"))
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:backups"))
def _check0((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
d.addCallback(_check0)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups"))
def _check1((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.failUnlessEqual(sorted(out.split()), ["Archives", "Latest"])
d.addCallback(_check1)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
def _check2((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.failUnlessEqual(sorted(out.split()), ["empty", "parent"])
d.addCallback(_check2)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
def _check2a((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.failUnlessEqual(out.strip(), "")
d.addCallback(_check2a)
d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
def _check3((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.failUnlessEqual(out, "foo")
d.addCallback(_check3)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
def _check4((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.old_archives = out.split()
self.failUnlessEqual(len(self.old_archives), 1)
d.addCallback(_check4)
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:backups"))
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
def _check5((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.new_archives = out.split()
self.failUnlessEqual(len(self.new_archives), 2)
self.failUnlessEqual(sorted(self.new_archives)[0],
self.old_archives[0])
d.addCallback(_check5)
def _modify(res):
time.sleep(1) # get us to a new second
self.writeto("parent/subdir/foo.txt", "FOOF!")
# and turn a file into a directory
os.unlink(os.path.join(source, "parent/blah.txt"))
os.mkdir(os.path.join(source, "parent/blah.txt"))
self.writeto("parent/blah.txt/surprise file", "surprise")
self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
# turn a directory into a file
os.rmdir(os.path.join(source, "empty"))
self.writeto("empty", "imagine nothing being here")
return self.do_cli("backup", source, "tahoe:backups")
d.addCallback(_modify)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
def _check6((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.new_archives = out.split()
self.failUnlessEqual(len(self.new_archives), 3)
self.failUnlessEqual(sorted(self.new_archives)[0],
self.old_archives[0])
d.addCallback(_check6)
d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
def _check7((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.failUnlessEqual(out, "FOOF!")
# the old snapshot should not be modified
return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
d.addCallback(_check7)
def _check8((rc, out, err)):
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
self.failUnlessEqual(out, "foo")
d.addCallback(_check8)
return d