add 'tahoe debug consolidate' command, to merge directories created by repeated 'tahoe cp -r' or the allmydata win32 backup tool, into the form that would have been created by 'tahoe backup'.

This commit is contained in:
Brian Warner 2009-03-12 13:56:06 -07:00
parent 969b758bf5
commit c4c6a62954
3 changed files with 680 additions and 0 deletions

View File

@ -0,0 +1,364 @@
import os, pickle
import sqlite3 as sqlite
import urllib
import simplejson
from allmydata.scripts.common_http import do_http
from allmydata.scripts.tahoe_backup import parse_old_timestamp, readonly, \
raiseHTTPError, HTTPError
from allmydata.util import hashutil, base32
from allmydata.util.netstring import netstring
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
TAG = "consolidator_dirhash_v1"
class CycleDetected(Exception):
pass
class Consolidator:
def __init__(self, options):
self.options = options
self.rootcap, path = get_alias(options.aliases, options.where,
DEFAULT_ALIAS)
assert path == ""
self.dbfile = options["dbfile"]
assert self.dbfile, "--dbfile is required"
self.backupfile = options["backupfile"]
assert self.backupfile, "--backupfile is required"
self.nodeurl = options["node-url"]
if not self.nodeurl.endswith("/"):
self.nodeurl += "/"
self.must_rescan_readonly_snapshots = not os.path.exists(self.dbfile)
self.db = sqlite.connect(self.dbfile)
self.cursor = self.db.cursor()
try:
self.cursor.execute("CREATE TABLE dirhashes"
"("
" dirhash TEXT PRIMARY KEY,"
" dircap TEXT"
")")
except sqlite.OperationalError, e:
if "table dirhashes already exists" not in str(e):
raise
self.visited = set()
def read_directory_json(self, dircap):
url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap)
resp = do_http("GET", url)
if resp.status != 200:
raiseHTTPError("Error during directory GET", resp)
jd = simplejson.load(resp)
ntype, ndata = jd
if ntype != "dirnode":
return None
return ndata
def msg(self, text):
print >>self.options.stdout, text
def err(self, text):
print >>self.options.stderr, text
def consolidate(self):
try:
data = self.read_directory_json(self.rootcap + "/Backups")
except HTTPError:
self.err("Unable to list /Backups, maybe this account has none?")
return 1
kids = data["children"]
potential_systems = {}
for (childname, (childtype, childdata)) in kids.items():
if childtype != "dirnode":
continue
potential_systems[childname] = str(childdata["rw_uri"])
backup_data = {"Backups": data, "systems": {}, "archives": {}}
systems = {}
for name, sdircap in potential_systems.items():
sdata = self.read_directory_json(sdircap)
kids = sdata["children"]
if not u"Archives" in kids and not u"Latest Backup" in kids:
self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % name)
continue
self.msg("%s is a system" % name)
backup_data["systems"][name] = sdata
archives_dircap = kids[u"Archives"][1]["rw_uri"]
archives_data = self.read_directory_json(archives_dircap)
backup_data["archives"][name] = archives_data
systems[name] = archives_dircap
if not systems:
self.msg("No systems under /Backups, nothing to consolidate")
return 0
if not os.path.exists(self.backupfile):
f = open(self.backupfile, "wb")
pickle.dump(backup_data, f)
f.close()
for name, archives_dircap in sorted(systems.items()):
self.do_system(name, archives_dircap)
return 0
def do_system(self, system_name, archives_dircap):
# first we walk through the Archives list, looking for the existing
# snapshots. Each one will have a $NAME like "2008-11-16 10.34 PM"
# (in various forms: we use tahoe_backup.parse_old_timestamp to
# interpret it). At first, they'll all have $NAME and be writecaps.
# As we run, we will create $NAME-readonly (with a readcap) for each
# one (the first one will just be the readonly equivalent of the
# oldest snapshot: the others will be constructed out of shared
# directories). When we're done we'll have a $NAME-readonly for
# everything except the latest snapshot (to avoid any danger of
# modifying a backup that's already in progress). The very last step,
# which won't be enabled until we're sure that everything is working
# correctly, will replace each $NAME with $NAME-readonly.
# We maintain a table that maps dirhash (hash of directory contents)
# to a directory readcap which contains those contents. We use this
# to decide if we can link to an existing directory, or if we must
# create a brand new one. Usually we add to this table in two places:
# when we scan the oldest snapshot (which we've just converted to
# readonly form), and when we must create a brand new one. If the
# table doesn't exist (probably because we've manually deleted it),
# we will scan *all* the existing readonly snapshots, and repopulate
# the table. We keep this table in a SQLite database (rather than a
# pickle) because we want to update it persistently after every
# directory creation, and writing out a 10k entry pickle takes about
# 250ms
# 'snapshots' maps timestamp to [rwname, writecap, roname, readcap].
# The possibilities are:
# [$NAME, writecap, None, None] : haven't touched it
# [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
# [None, None, $NAME, readcap] : processed and replaced
self.msg("consolidating system %s" % system_name)
self.directories_reused = 0
self.directories_used_as_is = 0
self.directories_created = 0
data = self.read_directory_json(archives_dircap)
snapshots = {}
for (childname, (childtype, childdata)) in data["children"].items():
if childtype != "dirnode":
self.msg("non-dirnode %s in Archives/" % childname)
continue
timename = childname
if childname.endswith("-readonly"):
timename = childname[:-len("-readonly")]
timestamp = parse_old_timestamp(timename, self.options)
assert timestamp is not None, timename
snapshots.setdefault(timestamp, [None, None, None, None])
# if the snapshot is readonly (i.e. it has no rw_uri), we might
# need to re-scan it
is_readonly = not childdata.has_key("rw_uri")
if is_readonly:
readcap = str(childdata["ro_uri"])
if self.must_rescan_readonly_snapshots:
self.scan_old_directory(str(childdata["ro_uri"]))
snapshots[timestamp][2] = childname
snapshots[timestamp][3] = readcap
else:
writecap = str(childdata["rw_uri"])
snapshots[timestamp][0] = childname
snapshots[timestamp][1] = writecap
else:
self.msg(" No snapshots in Backups/Archives")
snapshots = [ [timestamp] + values
for (timestamp, values) in snapshots.items() ]
# now 'snapshots' is [timestamp, rwname, writecap, roname, readcap],
# which makes it easier to process in temporal order
snapshots.sort()
self.msg(" %d snapshots" % len(snapshots))
# we always ignore the last one, for safety
snapshots = snapshots[:-1]
first_snapshot = True
for (timestamp, rwname, writecap, roname, readcap) in snapshots:
start_created = self.directories_created
start_used_as_is = self.directories_used_as_is
start_reused = self.directories_reused
# [None, None, $NAME, readcap] : processed and replaced
# [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
# [$NAME, writecap, None, None] : haven't touched it
if readcap and not writecap:
# skip past anything we've already processed and replaced
assert roname
assert not rwname
first_snapshot = False
self.msg(" %s already readonly" % roname)
continue
if readcap and writecap:
# we've processed it, creating a -readonly version, but we
# haven't replaced it.
assert roname
assert rwname
first_snapshot = False
self.msg(" %s processed but not yet replaced" % roname)
if self.options["really"]:
self.msg(" replacing %s with %s" % (rwname, roname))
self.put_child(archives_dircap, rwname, readcap)
self.delete_child(archives_dircap, roname)
continue
assert writecap
assert rwname
assert not readcap
assert not roname
roname = rwname + "-readonly"
# for the oldest snapshot, we can do a simple readonly conversion
if first_snapshot:
first_snapshot = False
readcap = readonly(writecap)
self.directories_used_as_is += 1
self.msg(" %s: oldest snapshot, using as-is" % rwname)
self.scan_old_directory(readcap)
else:
# for the others, we must scan their contents and build up a new
# readonly directory (which shares common subdirs with previous
# backups)
self.msg(" %s: processing" % rwname)
readcap = self.process_directory(readonly(writecap))
if self.options["really"]:
self.msg(" replaced %s" % rwname)
self.put_child(archives_dircap, rwname, readcap)
else:
self.msg(" created %s" % roname)
self.put_child(archives_dircap, roname, readcap)
snapshot_created = self.directories_created - start_created
snapshot_used_as_is = self.directories_used_as_is - start_used_as_is
snapshot_reused = self.directories_reused - start_reused
self.msg(" %s: done: %d dirs created, %d used as-is, %d reused"
% (rwname,
snapshot_created, snapshot_used_as_is, snapshot_reused))
# done!
self.msg(" system done, %d dirs created, %d used as-is, %d reused" \
% (self.directories_created, self.directories_used_as_is,
self.directories_reused))
def process_directory(self, readcap):
# I walk all my children (recursing over any subdirectories), build
# up a table of my contents, then see if I can re-use an old
# directory with the same contents. If not, I create a new directory
# for my contents. In all cases I return a directory readcap that
# points to my contents.
# build up contents to pass to mkdir() (which uses t=set_children)
contents = {} # childname -> (type, rocap, metadata)
data = self.read_directory_json(readcap)
assert data is not None
hashkids = []
num_dirs = 0
for (childname, (childtype, childdata)) in sorted(data["children"].items()):
if childtype == "dirnode":
num_dirs += 1
childcap = self.process_directory(str(childdata["ro_uri"]))
contents[childname] = ("dirnode", childcap, None)
else:
childcap = str(childdata["ro_uri"])
contents[childname] = (childtype, childcap, None)
hashkids.append( (childname, childcap) )
dirhash = self.hash_directory_contents(hashkids)
old_dircap = self.get_old_dirhash(dirhash)
if old_dircap:
assert isinstance(old_dircap, str)
self.directories_reused += 1
return old_dircap
if num_dirs == 0:
# we're allowed to re-use this directory
new_dircap = readonly(readcap)
assert isinstance(new_dircap, str)
self.store_dirhash(dirhash, new_dircap)
self.directories_used_as_is += 1
return new_dircap
# otherwise, we need to create a new directory
new_dircap = readonly(self.mkdir(contents))
assert isinstance(new_dircap, str)
self.store_dirhash(dirhash, new_dircap)
self.directories_created += 1
return new_dircap
def put_child(self, dircap, childname, childcap):
url = self.nodeurl + "uri/%s/%s?t=uri" % (urllib.quote(dircap),
urllib.quote(childname))
resp = do_http("PUT", url, childcap)
if resp.status not in (200, 201):
raiseHTTPError("error during put_child", resp)
def delete_child(self, dircap, childname):
url = self.nodeurl + "uri/%s/%s" % (urllib.quote(dircap),
urllib.quote(childname))
resp = do_http("DELETE", url)
if resp.status not in (200, 201):
raiseHTTPError("error during delete_child", resp)
def mkdir(self, contents):
url = self.nodeurl + "uri?t=mkdir"
resp = do_http("POST", url)
if resp.status < 200 or resp.status >= 300:
raiseHTTPError("error during mkdir", resp)
dircap = str(resp.read().strip())
url = self.nodeurl + "uri/%s?t=set_children" % urllib.quote(dircap)
body = dict([ (childname, (contents[childname][0],
{"ro_uri": contents[childname][1],
"metadata": contents[childname][2],
}))
for childname in contents
])
resp = do_http("POST", url, simplejson.dumps(body))
if resp.status != 200:
raiseHTTPError("error during set_children", resp)
return dircap
def scan_old_directory(self, dircap):
# scan this directory (recursively) and stash a hash of its contents
# in the DB. This assumes that all subdirs can be used as-is (i.e.
# they've already been declared immutable)
dircap = readonly(dircap)
if dircap in self.visited:
raise CycleDetected
self.visited.add(dircap)
data = self.read_directory_json(dircap)
kids = []
for (childname, (childtype, childdata)) in data["children"].items():
childcap = str(childdata["ro_uri"])
if childtype == "dirnode":
self.scan_old_directory(childcap)
kids.append( (childname, childcap) )
dirhash = self.hash_directory_contents(kids)
self.store_dirhash(dirhash, dircap)
return dirhash
def hash_directory_contents(self, kids):
kids.sort()
s = "".join([netstring(childname.encode("utf-8"))+netstring(childcap)
for (childname, childcap) in kids])
return hashutil.tagged_hash(TAG, s)
def store_dirhash(self, dirhash, dircap):
assert isinstance(dircap, str)
# existing items should prevail
try:
c = self.cursor
c.execute("INSERT INTO dirhashes (dirhash, dircap) VALUES (?,?)",
(base32.b2a(dirhash), dircap))
self.db.commit()
except sqlite.IntegrityError:
# already present
pass
def get_old_dirhash(self, dirhash):
self.cursor.execute("SELECT dircap FROM dirhashes WHERE dirhash=?",
(base32.b2a(dirhash),))
row = self.cursor.fetchone()
if not row:
return None
(dircap,) = row
return str(dircap)
def main(options):
c = Consolidator(options)
return c.consolidate()

View File

@ -4,6 +4,7 @@
import struct, time, os import struct, time, os
from twisted.python import usage, failure from twisted.python import usage, failure
from twisted.internet import defer from twisted.internet import defer
from allmydata.scripts.cli import VDriveOptions
class DumpOptions(usage.Options): class DumpOptions(usage.Options):
def getSynopsis(self): def getSynopsis(self):
@ -760,6 +761,21 @@ def repl(options):
return code.interact() return code.interact()
class ConsolidateOptions(VDriveOptions):
optParameters = [
("dbfile", None, None, "persistent file for reusable dirhashes"),
("backupfile", "b", None, "file to store backup of Archives/ contents"),
]
optFlags = [
("really", None, "Really remove old snapshot directories"),
]
def parseArgs(self, where):
self.where = where
def consolidate(options):
from consolidate import main; return main(options)
class DebugCommand(usage.Options): class DebugCommand(usage.Options):
subCommands = [ subCommands = [
["dump-share", None, DumpOptions, ["dump-share", None, DumpOptions,
@ -769,6 +785,7 @@ class DebugCommand(usage.Options):
["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"], ["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"],
["corrupt-share", None, CorruptShareOptions, "Corrupt a share"], ["corrupt-share", None, CorruptShareOptions, "Corrupt a share"],
["repl", None, ReplOptions, "Open a python interpreter"], ["repl", None, ReplOptions, "Open a python interpreter"],
["consolidate", None, ConsolidateOptions, "Consolidate non-shared backups"],
] ]
def postOptions(self): def postOptions(self):
if not hasattr(self, 'subOptions'): if not hasattr(self, 'subOptions'):
@ -784,6 +801,7 @@ Subcommands:
tahoe debug find-shares Locate sharefiles in node directories tahoe debug find-shares Locate sharefiles in node directories
tahoe debug catalog-shares Describe all shares in node dirs tahoe debug catalog-shares Describe all shares in node dirs
tahoe debug corrupt-share Corrupt a share by flipping a bit. tahoe debug corrupt-share Corrupt a share by flipping a bit.
tahoe debug consolidate Consolidate old non-shared backups into shared ones.
Please run e.g. 'tahoe debug dump-share --help' for more details on each Please run e.g. 'tahoe debug dump-share --help' for more details on each
subcommand. subcommand.
@ -797,6 +815,7 @@ subDispatch = {
"catalog-shares": catalog_shares, "catalog-shares": catalog_shares,
"corrupt-share": corrupt_share, "corrupt-share": corrupt_share,
"repl": repl, "repl": repl,
"consolidate": consolidate,
} }

View File

@ -0,0 +1,297 @@
import os
from cStringIO import StringIO
import pickle
from twisted.trial import unittest
from allmydata.test.no_network import GridTestMixin
from allmydata.util import fileutil
from allmydata.scripts import runner, debug
from allmydata.scripts.common import get_aliases
from twisted.internet import defer, threads # CLI tests use deferToThread
from allmydata.interfaces import IDirectoryNode
class CLITestMixin:
def do_cli(self, verb, *args, **kwargs):
nodeargs = [
"--node-directory", self.get_clientdir(),
]
if verb == "debug":
argv = [verb, args[0]] + nodeargs + list(args[1:])
else:
argv = [verb] + nodeargs + list(args)
stdin = kwargs.get("stdin", "")
stdout, stderr = StringIO(), StringIO()
d = threads.deferToThread(runner.runner, argv, run_by_human=False,
stdin=StringIO(stdin),
stdout=stdout, stderr=stderr)
def _done(rc):
return rc, stdout.getvalue(), stderr.getvalue()
d.addCallback(_done)
return d
class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
def writeto(self, path, data):
d = os.path.dirname(os.path.join(self.basedir, "home", path))
fileutil.make_dirs(d)
f = open(os.path.join(self.basedir, "home", path), "w")
f.write(data)
f.close()
def writeto_snapshot(self, sn, path, data):
p = "Backups/fluxx/Archives/2009-03-%02d 01.01.01/%s" % (sn, path)
return self.writeto(p, data)
def do_cli_good(self, verb, *args, **kwargs):
d = self.do_cli(verb, *args, **kwargs)
def _check((rc,out,err)):
self.failUnlessEqual(err, "", verb)
self.failUnlessEqual(rc, 0, verb)
return out
d.addCallback(_check)
return d
def test_arg_parsing(self):
self.basedir = "consolidate/Consolidate/arg_parsing"
self.set_up_grid(num_clients=1, num_servers=1)
co = debug.ConsolidateOptions()
co.parseOptions(["--node-directory", self.get_clientdir(),
"--dbfile", "foo.db", "--backupfile", "backup", "--really",
"URI:DIR2:foo"])
self.failUnlessEqual(co["dbfile"], "foo.db")
self.failUnlessEqual(co["backupfile"], "backup")
self.failUnless(co["really"])
self.failUnlessEqual(co.where, "URI:DIR2:foo")
def OFF_test_basic(self):
# rename this method to enable the test. I've disabled it because, in
# my opinion:
#
# 1: 'tahoe debug consolidate' is useful enough to include in trunk,
# but not useful enough justify a lot of compatibility effort or
# extra test time
# 2: it requires sqlite3; I did not bother to make it work with
# pysqlite, nor did I bother making it fail gracefully when
# sqlite3 is not available
# 3: this test takes 30 seconds to run on my workstation, and it likely
# to take several minutes on the old slow dapper buildslave
# 4: I don't want other folks to see a SkipTest and wonder "oh no, what
# did I do wrong to not allow this test to run"
#
# These may not be strong arguments: I welcome feedback. In particular,
# this command may be more suitable for a plugin of some sort, if we
# had plugins of some sort. -warner 12-Mar-09
self.basedir = "consolidate/Consolidate/basic"
self.set_up_grid(num_clients=1)
fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/nonsystem"))
fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/fluxx/Latest"))
self.writeto(os.path.join(self.basedir,
"home/Backups/fluxx/Archives/nondir"),
"not a directory: ignore me")
# set up a number of non-shared "snapshots"
for i in range(1,8):
self.writeto_snapshot(i, "parent/README", "README")
self.writeto_snapshot(i, "parent/foo.txt", "foo")
self.writeto_snapshot(i, "parent/subdir1/bar.txt", "bar")
self.writeto_snapshot(i, "parent/subdir1/baz.txt", "baz")
self.writeto_snapshot(i, "parent/subdir2/yoy.txt", "yoy")
self.writeto_snapshot(i, "parent/subdir2/hola.txt", "hola")
if i >= 1:
pass # initial snapshot
if i >= 2:
pass # second snapshot: same as the first
if i >= 3:
# modify a file
self.writeto_snapshot(i, "parent/foo.txt", "FOOF!")
if i >= 4:
# foo.txt goes back to normal
self.writeto_snapshot(i, "parent/foo.txt", "foo")
if i >= 5:
# new file
self.writeto_snapshot(i, "parent/subdir1/new.txt", "new")
if i >= 6:
# copy parent/subdir1 to parent/subdir2/copy1
self.writeto_snapshot(i, "parent/subdir2/copy1/bar.txt", "bar")
self.writeto_snapshot(i, "parent/subdir2/copy1/baz.txt", "baz")
self.writeto_snapshot(i, "parent/subdir2/copy1/new.txt", "new")
if i >= 7:
# the last snapshot shall remain untouched
pass
# now copy the whole thing into tahoe
d = self.do_cli_good("create-alias", "tahoe")
d.addCallback(lambda ign:
self.do_cli_good("cp", "-r",
os.path.join(self.basedir, "home/Backups"),
"tahoe:Backups"))
def _copied(res):
rootcap = get_aliases(self.get_clientdir())["tahoe"]
# now scan the initial directory structure
n = self.g.clients[0].create_node_from_uri(rootcap)
return n.get_child_at_path([u"Backups", u"fluxx", u"Archives"])
d.addCallback(_copied)
self.nodes = {}
self.caps = {}
def stash(node, name):
self.nodes[name] = node
self.caps[name] = node.get_uri()
return node
d.addCallback(stash, "Archives")
self.manifests = {}
def stash_manifest(manifest, which):
self.manifests[which] = dict(manifest)
d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
d.addCallback(stash_manifest, "start")
def c(n):
pieces = n.split("-")
which = "finish"
if len(pieces) == 3:
which = pieces[-1]
sn = int(pieces[0])
name = pieces[1]
path = [u"2009-03-%02d 01.01.01" % sn]
path.extend( {"b": [],
"bp": [u"parent"],
"bps1": [u"parent", u"subdir1"],
"bps2": [u"parent", u"subdir2"],
"bps2c1": [u"parent", u"subdir2", u"copy1"],
}[name] )
return self.manifests[which][tuple(path)]
dbfile = os.path.join(self.basedir, "dirhash.db")
backupfile = os.path.join(self.basedir, "backup.pickle")
d.addCallback(lambda ign:
self.do_cli_good("debug", "consolidate",
"--dbfile", dbfile,
"--backupfile", backupfile,
"tahoe:"))
def _check_consolidate_output1(out):
lines = out.splitlines()
last = lines[-1]
self.failUnlessEqual(last.strip(),
"system done, "
"7 dirs created, 2 used as-is, 13 reused")
self.failUnless(os.path.exists(dbfile))
self.failUnless(os.path.exists(backupfile))
backup = pickle.load(open(backupfile, "rb"))
self.failUnless(u"fluxx" in backup["systems"])
self.failUnless(u"fluxx" in backup["archives"])
adata = backup["archives"]["fluxx"]
kids = adata[u"children"]
self.failUnlessEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]),
c("1-b-start"))
d.addCallback(_check_consolidate_output1)
d.addCallback(lambda ign:
self.do_cli_good("debug", "consolidate",
"--dbfile", dbfile,
"--backupfile", backupfile,
"--really", "tahoe:"))
def _check_consolidate_output2(out):
lines = out.splitlines()
last = lines[-1]
self.failUnlessEqual(last.strip(),
"system done, "
"0 dirs created, 0 used as-is, 0 reused")
d.addCallback(_check_consolidate_output2)
d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
d.addCallback(stash_manifest, "finish")
def check_consolidation(ignored):
#for which in ("finish",):
# for path in sorted(self.manifests[which].keys()):
# print "%s %s %s" % (which, "/".join(path),
# self.manifests[which][path])
# last snapshot should be untouched
self.failUnlessEqual(c("7-b"), c("7-b-start"))
# first snapshot should be a readonly form of the original
from allmydata.scripts.tahoe_backup import readonly
self.failUnlessEqual(c("1-b-finish"), readonly(c("1-b-start")))
self.failUnlessEqual(c("1-bp-finish"), readonly(c("1-bp-start")))
self.failUnlessEqual(c("1-bps1-finish"), readonly(c("1-bps1-start")))
self.failUnlessEqual(c("1-bps2-finish"), readonly(c("1-bps2-start")))
# new directories should be different than the old ones
self.failIfEqual(c("1-b"), c("1-b-start"))
self.failIfEqual(c("1-bp"), c("1-bp-start"))
self.failIfEqual(c("1-bps1"), c("1-bps1-start"))
self.failIfEqual(c("1-bps2"), c("1-bps2-start"))
self.failIfEqual(c("2-b"), c("2-b-start"))
self.failIfEqual(c("2-bp"), c("2-bp-start"))
self.failIfEqual(c("2-bps1"), c("2-bps1-start"))
self.failIfEqual(c("2-bps2"), c("2-bps2-start"))
self.failIfEqual(c("3-b"), c("3-b-start"))
self.failIfEqual(c("3-bp"), c("3-bp-start"))
self.failIfEqual(c("3-bps1"), c("3-bps1-start"))
self.failIfEqual(c("3-bps2"), c("3-bps2-start"))
self.failIfEqual(c("4-b"), c("4-b-start"))
self.failIfEqual(c("4-bp"), c("4-bp-start"))
self.failIfEqual(c("4-bps1"), c("4-bps1-start"))
self.failIfEqual(c("4-bps2"), c("4-bps2-start"))
self.failIfEqual(c("5-b"), c("5-b-start"))
self.failIfEqual(c("5-bp"), c("5-bp-start"))
self.failIfEqual(c("5-bps1"), c("5-bps1-start"))
self.failIfEqual(c("5-bps2"), c("5-bps2-start"))
# snapshot 1 and snapshot 2 should be identical
self.failUnlessEqual(c("2-b"), c("1-b"))
# snapshot 3 modified a file underneath parent/
self.failIfEqual(c("3-b"), c("2-b")) # 3 modified a file
self.failIfEqual(c("3-bp"), c("2-bp"))
# but the subdirs are the same
self.failUnlessEqual(c("3-bps1"), c("2-bps1"))
self.failUnlessEqual(c("3-bps2"), c("2-bps2"))
# snapshot 4 should be the same as 2
self.failUnlessEqual(c("4-b"), c("2-b"))
self.failUnlessEqual(c("4-bp"), c("2-bp"))
self.failUnlessEqual(c("4-bps1"), c("2-bps1"))
self.failUnlessEqual(c("4-bps2"), c("2-bps2"))
# snapshot 5 added a file under subdir1
self.failIfEqual(c("5-b"), c("4-b"))
self.failIfEqual(c("5-bp"), c("4-bp"))
self.failIfEqual(c("5-bps1"), c("4-bps1"))
self.failUnlessEqual(c("5-bps2"), c("4-bps2"))
# snapshot 6 copied a directory-it should be shared
self.failIfEqual(c("6-b"), c("5-b"))
self.failIfEqual(c("6-bp"), c("5-bp"))
self.failUnlessEqual(c("6-bps1"), c("5-bps1"))
self.failIfEqual(c("6-bps2"), c("5-bps2"))
self.failUnlessEqual(c("6-bps2c1"), c("6-bps1"))
d.addCallback(check_consolidation)
return d
def build_manifest(self, root):
# like dirnode.build_manifest, but this one doesn't skip duplicate
# nodes (i.e. it is not cycle-resistant).
manifest = []
manifest.append( ( (), root.get_uri() ) )
d = self.manifest_of(None, root, manifest, () )
d.addCallback(lambda ign: manifest)
return d
def manifest_of(self, ignored, dirnode, manifest, path):
d = dirnode.list()
def _got_children(children):
d = defer.succeed(None)
for name, (child, metadata) in children.iteritems():
childpath = path + (name,)
manifest.append( (childpath, child.get_uri()) )
if IDirectoryNode.providedBy(child):
d.addCallback(self.manifest_of, child, manifest, childpath)
return d
d.addCallback(_got_children)
return d