mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-04-08 03:14:21 +00:00
Remove the 'tahoe debug consolidate' subcommand.
This commit is contained in:
parent
731e3d68df
commit
7092de1b6f
5
NEWS
5
NEWS
@ -20,6 +20,11 @@ as 'convmv' before using Tahoe CLI.
|
||||
All CLI commands have been improved to support non-ASCII parameters such as
|
||||
filenames and aliases on all supported Operating Systems.
|
||||
|
||||
** Removals
|
||||
|
||||
The 'tahoe debug consolidate' subcommand (for converting old allmydata Windows
|
||||
client backups to a newer format) has been removed.
|
||||
|
||||
** dependency updates
|
||||
|
||||
no python-2.4.2 or 2.4.3 (2.4.4 is ok)
|
||||
|
@ -1,456 +0,0 @@
|
||||
|
||||
import os, pickle, time
|
||||
import sqlite3 as sqlite
|
||||
|
||||
import urllib
|
||||
import simplejson
|
||||
from allmydata.scripts.common_http import do_http, HTTPError
|
||||
from allmydata.util import hashutil, base32, time_format
|
||||
from allmydata.util.stringutils import to_str, quote_output, quote_path
|
||||
from allmydata.util.netstring import netstring
|
||||
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
|
||||
from allmydata import uri
|
||||
|
||||
|
||||
def readonly(writedircap):
|
||||
return uri.from_string_dirnode(writedircap).get_readonly().to_string()
|
||||
|
||||
def parse_old_timestamp(s, options):
|
||||
try:
|
||||
if not s.endswith("Z"):
|
||||
raise ValueError
|
||||
# This returns seconds-since-epoch for an ISO-8601-ish-formatted UTC
|
||||
# time string. This might raise ValueError if the string is not in the
|
||||
# right format.
|
||||
when = time_format.iso_utc_time_to_seconds(s[:-1])
|
||||
return when
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# "2008-11-16 10.34 PM" (localtime)
|
||||
if s[-3:] in (" AM", " PM"):
|
||||
# this might raise ValueError
|
||||
when = time.strptime(s[:-3], "%Y-%m-%d %I.%M")
|
||||
if s[-3:] == "PM":
|
||||
when += 12*60*60
|
||||
return when
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# "2008-11-16 10.34.56 PM" (localtime)
|
||||
if s[-3:] in (" AM", " PM"):
|
||||
# this might raise ValueError
|
||||
when = time.strptime(s[:-3], "%Y-%m-%d %I.%M.%S")
|
||||
if s[-3:] == "PM":
|
||||
when += 12*60*60
|
||||
return when
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# "2008-12-31 18.21.43"
|
||||
when = time.strptime(s, "%Y-%m-%d %H.%M.%S")
|
||||
return when
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
|
||||
return None
|
||||
|
||||
|
||||
TAG = "consolidator_dirhash_v1"
|
||||
|
||||
class CycleDetected(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Consolidator:
|
||||
def __init__(self, options):
|
||||
self.options = options
|
||||
self.rootcap, path = get_alias(options.aliases, options.where,
|
||||
DEFAULT_ALIAS)
|
||||
assert path == ""
|
||||
# TODO: allow dbfile and backupfile to be Unicode
|
||||
self.dbfile = options["dbfile"]
|
||||
assert self.dbfile, "--dbfile is required"
|
||||
self.backupfile = options["backupfile"]
|
||||
assert self.backupfile, "--backupfile is required"
|
||||
self.nodeurl = options["node-url"]
|
||||
if not self.nodeurl.endswith("/"):
|
||||
self.nodeurl += "/"
|
||||
self.must_rescan_readonly_snapshots = not os.path.exists(self.dbfile)
|
||||
self.db = sqlite.connect(self.dbfile)
|
||||
self.cursor = self.db.cursor()
|
||||
try:
|
||||
self.cursor.execute("CREATE TABLE dirhashes"
|
||||
"("
|
||||
" dirhash TEXT PRIMARY KEY,"
|
||||
" dircap TEXT"
|
||||
")")
|
||||
except sqlite.OperationalError, e:
|
||||
if "table dirhashes already exists" not in str(e):
|
||||
raise
|
||||
|
||||
def read_directory_json(self, dircap):
|
||||
url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap)
|
||||
resp = do_http("GET", url)
|
||||
if resp.status != 200:
|
||||
raise HTTPError("Error during directory GET", resp)
|
||||
jd = simplejson.load(resp)
|
||||
ntype, ndata = jd
|
||||
if ntype != "dirnode":
|
||||
return None
|
||||
return ndata
|
||||
|
||||
def msg(self, text):
|
||||
print >>self.options.stdout, text
|
||||
self.options.stdout.flush()
|
||||
def err(self, text):
|
||||
print >>self.options.stderr, text
|
||||
self.options.stderr.flush()
|
||||
|
||||
def consolidate(self):
|
||||
try:
|
||||
data = self.read_directory_json(self.rootcap + "/Backups")
|
||||
except HTTPError:
|
||||
self.err("Unable to list /Backups, maybe this account has none?")
|
||||
return 1
|
||||
kids = data["children"]
|
||||
potential_systems = {}
|
||||
for (childname, (childtype, childdata)) in kids.items():
|
||||
if childtype != "dirnode":
|
||||
continue
|
||||
if "rw_uri" not in childdata:
|
||||
self.msg("%s: not writeable" % quote_output(childname))
|
||||
continue
|
||||
potential_systems[childname] = to_str(childdata["rw_uri"])
|
||||
backup_data = {"Backups": data, "systems": {}, "archives": {}}
|
||||
systems = {}
|
||||
for name, sdircap in potential_systems.items():
|
||||
sdata = self.read_directory_json(sdircap)
|
||||
kids = sdata["children"]
|
||||
if not u"Archives" in kids and not u"Latest Backup" in kids:
|
||||
self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % quote_output(name))
|
||||
continue
|
||||
archives_capdata = kids[u"Archives"][1]
|
||||
if "rw_uri" not in archives_capdata:
|
||||
self.msg("%s: /Archives is not writeable" % quote_output(name))
|
||||
continue
|
||||
self.msg("%s is a system" % quote_output(name))
|
||||
backup_data["systems"][name] = sdata
|
||||
archives_dircap = to_str(archives_capdata["rw_uri"])
|
||||
archives_data = self.read_directory_json(archives_dircap)
|
||||
backup_data["archives"][name] = archives_data
|
||||
systems[name] = archives_dircap
|
||||
if not systems:
|
||||
self.msg("No systems under /Backups, nothing to consolidate")
|
||||
return 0
|
||||
backupfile = self.backupfile
|
||||
counter = 0
|
||||
while os.path.exists(backupfile):
|
||||
backupfile = self.backupfile + "." + str(counter)
|
||||
counter += 1
|
||||
f = open(backupfile, "wb")
|
||||
pickle.dump(backup_data, f)
|
||||
f.close()
|
||||
|
||||
for name, archives_dircap in sorted(systems.items()):
|
||||
self.do_system(name, archives_dircap)
|
||||
return 0
|
||||
|
||||
def do_system(self, system_name, archives_dircap):
|
||||
# first we walk through the Archives list, looking for the existing
|
||||
# snapshots. Each one will have a $NAME like "2008-11-16 10.34 PM"
|
||||
# (in various forms: we use tahoe_backup.parse_old_timestamp to
|
||||
# interpret it). At first, they'll all have $NAME and be writecaps.
|
||||
# As we run, we will create $NAME-readonly (with a readcap) for each
|
||||
# one (the first one will just be the readonly equivalent of the
|
||||
# oldest snapshot: the others will be constructed out of shared
|
||||
# directories). When we're done we'll have a $NAME-readonly for
|
||||
# everything except the latest snapshot (to avoid any danger of
|
||||
# modifying a backup that's already in progress). The very last step,
|
||||
# which won't be enabled until we're sure that everything is working
|
||||
# correctly, will replace each $NAME with $NAME-readonly.
|
||||
|
||||
# We maintain a table that maps dirhash (hash of directory contents)
|
||||
# to a directory readcap which contains those contents. We use this
|
||||
# to decide if we can link to an existing directory, or if we must
|
||||
# create a brand new one. Usually we add to this table in two places:
|
||||
# when we scan the oldest snapshot (which we've just converted to
|
||||
# readonly form), and when we must create a brand new one. If the
|
||||
# table doesn't exist (probably because we've manually deleted it),
|
||||
# we will scan *all* the existing readonly snapshots, and repopulate
|
||||
# the table. We keep this table in a SQLite database (rather than a
|
||||
# pickle) because we want to update it persistently after every
|
||||
# directory creation, and writing out a 10k entry pickle takes about
|
||||
# 250ms
|
||||
|
||||
# 'snapshots' maps timestamp to [rwname, writecap, roname, readcap].
|
||||
# The possibilities are:
|
||||
# [$NAME, writecap, None, None] : haven't touched it
|
||||
# [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
|
||||
# [None, None, $NAME, readcap] : processed and replaced
|
||||
|
||||
self.msg("consolidating system %s" % quote_output(system_name))
|
||||
self.directories_reused = 0
|
||||
self.directories_used_as_is = 0
|
||||
self.directories_created = 0
|
||||
self.directories_seen = set()
|
||||
self.directories_used = set()
|
||||
|
||||
data = self.read_directory_json(archives_dircap)
|
||||
snapshots = {}
|
||||
|
||||
children = sorted(data["children"].items())
|
||||
for i, (childname, (childtype, childdata)) in enumerate(children):
|
||||
if childtype != "dirnode":
|
||||
self.msg("non-dirnode %s in Archives/" % quote_output(childname))
|
||||
continue
|
||||
timename = to_str(childname)
|
||||
if timename.endswith("-readonly"):
|
||||
timename = timename[:-len("-readonly")]
|
||||
timestamp = parse_old_timestamp(timename, self.options)
|
||||
assert timestamp is not None, timename
|
||||
snapshots.setdefault(timestamp, [None, None, None, None])
|
||||
# if the snapshot is readonly (i.e. it has no rw_uri), we might
|
||||
# need to re-scan it
|
||||
is_readonly = not childdata.has_key("rw_uri")
|
||||
if is_readonly:
|
||||
readcap = to_str(childdata["ro_uri"])
|
||||
if self.must_rescan_readonly_snapshots:
|
||||
self.msg(" scanning old %s (%d/%d)" %
|
||||
(quote_output(childname), i+1, len(children)))
|
||||
self.scan_old_directory(to_str(childdata["ro_uri"]))
|
||||
snapshots[timestamp][2] = childname
|
||||
snapshots[timestamp][3] = readcap
|
||||
else:
|
||||
writecap = to_str(childdata["rw_uri"])
|
||||
snapshots[timestamp][0] = childname
|
||||
snapshots[timestamp][1] = writecap
|
||||
snapshots = [ [timestamp] + values
|
||||
for (timestamp, values) in snapshots.items() ]
|
||||
# now 'snapshots' is [timestamp, rwname, writecap, roname, readcap],
|
||||
# which makes it easier to process in temporal order
|
||||
snapshots.sort()
|
||||
self.msg(" %d snapshots" % len(snapshots))
|
||||
# we always ignore the last one, for safety
|
||||
snapshots = snapshots[:-1]
|
||||
|
||||
first_snapshot = True
|
||||
for i,(timestamp, rwname, writecap, roname, readcap) in enumerate(snapshots):
|
||||
eta = "?"
|
||||
start_created = self.directories_created
|
||||
start_used_as_is = self.directories_used_as_is
|
||||
start_reused = self.directories_reused
|
||||
|
||||
# [None, None, $NAME, readcap] : processed and replaced
|
||||
# [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
|
||||
# [$NAME, writecap, None, None] : haven't touched it
|
||||
|
||||
if readcap and not writecap:
|
||||
# skip past anything we've already processed and replaced
|
||||
assert roname
|
||||
assert not rwname
|
||||
first_snapshot = False
|
||||
self.msg(" %s already readonly" % quote_output(roname))
|
||||
continue
|
||||
if readcap and writecap:
|
||||
# we've processed it, creating a -readonly version, but we
|
||||
# haven't replaced it.
|
||||
assert roname
|
||||
assert rwname
|
||||
first_snapshot = False
|
||||
self.msg(" %s processed but not yet replaced" % quote_output(roname))
|
||||
if self.options["really"]:
|
||||
self.msg(" replacing %s with %s" % (quote_output(rwname), quote_output(roname)))
|
||||
self.put_child(archives_dircap, rwname, readcap)
|
||||
self.delete_child(archives_dircap, roname)
|
||||
continue
|
||||
assert writecap
|
||||
assert rwname
|
||||
assert not readcap
|
||||
assert not roname
|
||||
roname = rwname + "-readonly"
|
||||
# for the oldest snapshot, we can do a simple readonly conversion
|
||||
if first_snapshot:
|
||||
first_snapshot = False
|
||||
readcap = readonly(writecap)
|
||||
self.directories_used_as_is += 1
|
||||
self.msg(" %s: oldest snapshot, using as-is" % quote_output(rwname))
|
||||
self.scan_old_directory(readcap)
|
||||
else:
|
||||
# for the others, we must scan their contents and build up a new
|
||||
# readonly directory (which shares common subdirs with previous
|
||||
# backups)
|
||||
self.msg(" %s: processing (%d/%d)" % (quote_output(rwname), i+1, len(snapshots)))
|
||||
started = time.time()
|
||||
readcap = self.process_directory(readonly(writecap), (rwname,))
|
||||
elapsed = time.time() - started
|
||||
eta = "%ds" % (elapsed * (len(snapshots) - i-1))
|
||||
if self.options["really"]:
|
||||
self.msg(" replaced %s" % quote_output(rwname))
|
||||
self.put_child(archives_dircap, rwname, readcap)
|
||||
else:
|
||||
self.msg(" created %s" % quote_output(roname))
|
||||
self.put_child(archives_dircap, roname, readcap)
|
||||
|
||||
snapshot_created = self.directories_created - start_created
|
||||
snapshot_used_as_is = self.directories_used_as_is - start_used_as_is
|
||||
snapshot_reused = self.directories_reused - start_reused
|
||||
self.msg(" %s: done: %d dirs created, %d used as-is, %d reused, eta %s"
|
||||
% (quote_output(rwname),
|
||||
snapshot_created, snapshot_used_as_is, snapshot_reused,
|
||||
eta))
|
||||
# done!
|
||||
self.msg(" system done, dircounts: %d/%d seen/used, %d created, %d as-is, %d reused" \
|
||||
% (len(self.directories_seen), len(self.directories_used),
|
||||
self.directories_created, self.directories_used_as_is,
|
||||
self.directories_reused))
|
||||
|
||||
def process_directory(self, readcap, path):
|
||||
# I walk all my children (recursing over any subdirectories), build
|
||||
# up a table of my contents, then see if I can re-use an old
|
||||
# directory with the same contents. If not, I create a new directory
|
||||
# for my contents. In all cases I return a directory readcap that
|
||||
# points to my contents.
|
||||
|
||||
readcap = to_str(readcap)
|
||||
self.directories_seen.add(readcap)
|
||||
|
||||
# build up contents to pass to mkdir() (which uses t=set_children)
|
||||
contents = {} # childname -> (type, rocap, metadata)
|
||||
data = self.read_directory_json(readcap)
|
||||
assert data is not None
|
||||
hashkids = []
|
||||
children_modified = False
|
||||
for (childname, (childtype, childdata)) in sorted(data["children"].items()):
|
||||
if childtype == "dirnode":
|
||||
childpath = path + (childname,)
|
||||
old_childcap = to_str(childdata["ro_uri"])
|
||||
childcap = self.process_directory(old_childcap, childpath)
|
||||
if childcap != old_childcap:
|
||||
children_modified = True
|
||||
contents[childname] = ("dirnode", childcap, None)
|
||||
else:
|
||||
childcap = to_str(childdata["ro_uri"])
|
||||
contents[childname] = (childtype, childcap, None)
|
||||
hashkids.append( (childname, childcap) )
|
||||
|
||||
dirhash = self.hash_directory_contents(hashkids)
|
||||
old_dircap = self.get_old_dirhash(dirhash)
|
||||
if old_dircap:
|
||||
if self.options["verbose"]:
|
||||
self.msg(" %s: reused" % quote_path(path))
|
||||
assert isinstance(old_dircap, str)
|
||||
self.directories_reused += 1
|
||||
self.directories_used.add(old_dircap)
|
||||
return old_dircap
|
||||
if not children_modified:
|
||||
# we're allowed to use this directory as-is
|
||||
if self.options["verbose"]:
|
||||
self.msg(" %s: used as-is" % quote_path(path))
|
||||
new_dircap = readonly(readcap)
|
||||
assert isinstance(new_dircap, str)
|
||||
self.store_dirhash(dirhash, new_dircap)
|
||||
self.directories_used_as_is += 1
|
||||
self.directories_used.add(new_dircap)
|
||||
return new_dircap
|
||||
# otherwise, we need to create a new directory
|
||||
if self.options["verbose"]:
|
||||
self.msg(" %s: created" % quote_path(path))
|
||||
new_dircap = readonly(self.mkdir(contents))
|
||||
assert isinstance(new_dircap, str)
|
||||
self.store_dirhash(dirhash, new_dircap)
|
||||
self.directories_created += 1
|
||||
self.directories_used.add(new_dircap)
|
||||
return new_dircap
|
||||
|
||||
def put_child(self, dircap, childname, childcap):
|
||||
url = self.nodeurl + "uri/%s/%s?t=uri" % (urllib.quote(dircap),
|
||||
urllib.quote(childname))
|
||||
resp = do_http("PUT", url, childcap)
|
||||
if resp.status not in (200, 201):
|
||||
raise HTTPError("Error during put_child", resp)
|
||||
|
||||
def delete_child(self, dircap, childname):
|
||||
url = self.nodeurl + "uri/%s/%s" % (urllib.quote(dircap),
|
||||
urllib.quote(childname))
|
||||
resp = do_http("DELETE", url)
|
||||
if resp.status not in (200, 201):
|
||||
raise HTTPError("Error during delete_child", resp)
|
||||
|
||||
def mkdir(self, contents):
|
||||
url = self.nodeurl + "uri?t=mkdir"
|
||||
resp = do_http("POST", url)
|
||||
if resp.status < 200 or resp.status >= 300:
|
||||
raise HTTPError("Error during mkdir", resp)
|
||||
dircap = to_str(resp.read().strip())
|
||||
url = self.nodeurl + "uri/%s?t=set_children" % urllib.quote(dircap)
|
||||
body = dict([ (childname, (contents[childname][0],
|
||||
{"ro_uri": contents[childname][1],
|
||||
"metadata": contents[childname][2],
|
||||
}))
|
||||
for childname in contents
|
||||
])
|
||||
resp = do_http("POST", url, simplejson.dumps(body))
|
||||
if resp.status != 200:
|
||||
raise HTTPError("Error during set_children", resp)
|
||||
return dircap
|
||||
|
||||
def scan_old_directory(self, dircap, ancestors=()):
|
||||
# scan this directory (recursively) and stash a hash of its contents
|
||||
# in the DB. This assumes that all subdirs can be used as-is (i.e.
|
||||
# they've already been declared immutable)
|
||||
dircap = readonly(dircap)
|
||||
if dircap in ancestors:
|
||||
raise CycleDetected
|
||||
ancestors = ancestors + (dircap,)
|
||||
#self.visited.add(dircap)
|
||||
# TODO: we could use self.visited as a mapping from dircap to dirhash,
|
||||
# to avoid re-scanning old shared directories multiple times
|
||||
self.directories_seen.add(dircap)
|
||||
self.directories_used.add(dircap)
|
||||
data = self.read_directory_json(dircap)
|
||||
kids = []
|
||||
for (childname, (childtype, childdata)) in data["children"].items():
|
||||
childcap = to_str(childdata["ro_uri"])
|
||||
if childtype == "dirnode":
|
||||
self.scan_old_directory(childcap, ancestors)
|
||||
kids.append( (childname, childcap) )
|
||||
dirhash = self.hash_directory_contents(kids)
|
||||
self.store_dirhash(dirhash, dircap)
|
||||
return dirhash
|
||||
|
||||
def hash_directory_contents(self, kids):
|
||||
kids.sort()
|
||||
s = "".join([netstring(to_str(childname))+netstring(childcap)
|
||||
for (childname, childcap) in kids])
|
||||
return hashutil.tagged_hash(TAG, s)
|
||||
|
||||
def store_dirhash(self, dirhash, dircap):
|
||||
assert isinstance(dircap, str)
|
||||
# existing items should prevail
|
||||
try:
|
||||
c = self.cursor
|
||||
c.execute("INSERT INTO dirhashes (dirhash, dircap) VALUES (?,?)",
|
||||
(base32.b2a(dirhash), dircap))
|
||||
self.db.commit()
|
||||
except sqlite.IntegrityError:
|
||||
# already present
|
||||
pass
|
||||
|
||||
def get_old_dirhash(self, dirhash):
|
||||
self.cursor.execute("SELECT dircap FROM dirhashes WHERE dirhash=?",
|
||||
(base32.b2a(dirhash),))
|
||||
row = self.cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
(dircap,) = row
|
||||
return str(dircap)
|
||||
|
||||
|
||||
def main(options):
|
||||
c = Consolidator(options)
|
||||
return c.consolidate()
|
@ -4,8 +4,6 @@
|
||||
import struct, time, os
|
||||
from twisted.python import usage, failure
|
||||
from twisted.internet import defer
|
||||
from allmydata.scripts.cli import VDriveOptions
|
||||
from allmydata.util.stringutils import argv_to_unicode
|
||||
|
||||
class DumpOptions(usage.Options):
|
||||
def getSynopsis(self):
|
||||
@ -759,23 +757,6 @@ def repl(options):
|
||||
return code.interact()
|
||||
|
||||
|
||||
class ConsolidateOptions(VDriveOptions):
|
||||
optParameters = [
|
||||
("dbfile", None, None, "persistent file for reusable dirhashes"),
|
||||
("backupfile", "b", None, "file to store backup of Archives/ contents"),
|
||||
]
|
||||
optFlags = [
|
||||
("really", None, "Really remove old snapshot directories"),
|
||||
("verbose", "v", "Emit a line for every directory examined"),
|
||||
]
|
||||
def parseArgs(self, where):
|
||||
self.where = argv_to_unicode(where)
|
||||
|
||||
def consolidate(options):
|
||||
from allmydata.scripts.consolidate import main
|
||||
return main(options)
|
||||
|
||||
|
||||
class DebugCommand(usage.Options):
|
||||
subCommands = [
|
||||
["dump-share", None, DumpOptions,
|
||||
@ -785,7 +766,6 @@ class DebugCommand(usage.Options):
|
||||
["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"],
|
||||
["corrupt-share", None, CorruptShareOptions, "Corrupt a share"],
|
||||
["repl", None, ReplOptions, "Open a python interpreter"],
|
||||
["consolidate", None, ConsolidateOptions, "Consolidate non-shared backups"],
|
||||
]
|
||||
def postOptions(self):
|
||||
if not hasattr(self, 'subOptions'):
|
||||
@ -801,7 +781,6 @@ Subcommands:
|
||||
tahoe debug find-shares Locate sharefiles in node directories
|
||||
tahoe debug catalog-shares Describe all shares in node dirs
|
||||
tahoe debug corrupt-share Corrupt a share by flipping a bit.
|
||||
tahoe debug consolidate Consolidate old non-shared backups into shared ones.
|
||||
|
||||
Please run e.g. 'tahoe debug dump-share --help' for more details on each
|
||||
subcommand.
|
||||
@ -815,7 +794,6 @@ subDispatch = {
|
||||
"catalog-shares": catalog_shares,
|
||||
"corrupt-share": corrupt_share,
|
||||
"repl": repl,
|
||||
"consolidate": consolidate,
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,298 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
import pickle
|
||||
from twisted.trial import unittest
|
||||
from allmydata.test.no_network import GridTestMixin
|
||||
from allmydata.test.common_util import ReallyEqualMixin
|
||||
from allmydata.util import fileutil
|
||||
from allmydata.scripts import runner, debug
|
||||
from allmydata.scripts.common import get_aliases
|
||||
from twisted.internet import defer, threads # CLI tests use deferToThread
|
||||
from allmydata.interfaces import IDirectoryNode
|
||||
|
||||
have_sqlite3 = False
|
||||
try:
|
||||
import sqlite3
|
||||
sqlite3 # hush pyflakes
|
||||
have_sqlite3 = True
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
from allmydata.scripts import consolidate
|
||||
|
||||
|
||||
class CLITestMixin:
|
||||
def do_cli(self, verb, *args, **kwargs):
|
||||
nodeargs = [
|
||||
"--node-directory", self.get_clientdir(),
|
||||
]
|
||||
if verb == "debug":
|
||||
argv = [verb, args[0]] + nodeargs + list(args[1:])
|
||||
else:
|
||||
argv = [verb] + nodeargs + list(args)
|
||||
stdin = kwargs.get("stdin", "")
|
||||
stdout, stderr = StringIO(), StringIO()
|
||||
d = threads.deferToThread(runner.runner, argv, run_by_human=False,
|
||||
stdin=StringIO(stdin),
|
||||
stdout=stdout, stderr=stderr)
|
||||
def _done(rc):
|
||||
return rc, stdout.getvalue(), stderr.getvalue()
|
||||
d.addCallback(_done)
|
||||
return d
|
||||
|
||||
class Consolidate(GridTestMixin, CLITestMixin, ReallyEqualMixin, unittest.TestCase):
|
||||
|
||||
def writeto(self, path, data):
|
||||
d = os.path.dirname(os.path.join(self.basedir, "home", path))
|
||||
fileutil.make_dirs(d)
|
||||
f = open(os.path.join(self.basedir, "home", path), "w")
|
||||
f.write(data)
|
||||
f.close()
|
||||
|
||||
def writeto_snapshot(self, sn, path, data):
|
||||
p = "Backups/fluxx/Archives/2009-03-%02d 01.01.01/%s" % (sn, path)
|
||||
return self.writeto(p, data)
|
||||
|
||||
def do_cli_good(self, verb, *args, **kwargs):
|
||||
d = self.do_cli(verb, *args, **kwargs)
|
||||
def _check((rc,out,err)):
|
||||
self.failUnlessReallyEqual(err, "", verb)
|
||||
self.failUnlessReallyEqual(rc, 0, verb)
|
||||
return out
|
||||
d.addCallback(_check)
|
||||
return d
|
||||
|
||||
def test_arg_parsing(self):
|
||||
self.basedir = "consolidate/Consolidate/arg_parsing"
|
||||
self.set_up_grid(num_clients=1, num_servers=1)
|
||||
co = debug.ConsolidateOptions()
|
||||
co.parseOptions(["--node-directory", self.get_clientdir(),
|
||||
"--dbfile", "foo.db", "--backupfile", "backup", "--really",
|
||||
"URI:DIR2:foo"])
|
||||
self.failUnlessReallyEqual(co["dbfile"], "foo.db")
|
||||
self.failUnlessReallyEqual(co["backupfile"], "backup")
|
||||
self.failUnless(co["really"])
|
||||
self.failUnlessReallyEqual(co.where, u"URI:DIR2:foo")
|
||||
|
||||
def test_basic(self):
|
||||
if not have_sqlite3:
|
||||
raise unittest.SkipTest("'tahoe debug consolidate' is not supported because sqlite3 is not available.")
|
||||
|
||||
self.basedir = "consolidate/Consolidate/basic"
|
||||
self.set_up_grid(num_clients=1)
|
||||
|
||||
fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/nonsystem"))
|
||||
fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/fluxx/Latest"))
|
||||
self.writeto(os.path.join(self.basedir,
|
||||
"home/Backups/fluxx/Archives/nondir"),
|
||||
"not a directory: ignore me")
|
||||
|
||||
# set up a number of non-shared "snapshots"
|
||||
for i in range(1,8):
|
||||
self.writeto_snapshot(i, "parent/README", "README")
|
||||
self.writeto_snapshot(i, "parent/foo.txt", "foo")
|
||||
self.writeto_snapshot(i, "parent/subdir1/bar.txt", "bar")
|
||||
self.writeto_snapshot(i, "parent/subdir1/baz.txt", "baz")
|
||||
self.writeto_snapshot(i, "parent/subdir2/yoy.txt", "yoy")
|
||||
self.writeto_snapshot(i, "parent/subdir2/hola.txt", "hola")
|
||||
|
||||
if i >= 1:
|
||||
pass # initial snapshot
|
||||
if i >= 2:
|
||||
pass # second snapshot: same as the first
|
||||
if i >= 3:
|
||||
# modify a file
|
||||
self.writeto_snapshot(i, "parent/foo.txt", "FOOF!")
|
||||
if i >= 4:
|
||||
# foo.txt goes back to normal
|
||||
self.writeto_snapshot(i, "parent/foo.txt", "foo")
|
||||
if i >= 5:
|
||||
# new file
|
||||
self.writeto_snapshot(i, "parent/subdir1/new.txt", "new")
|
||||
if i >= 6:
|
||||
# copy parent/subdir1 to parent/subdir2/copy1
|
||||
self.writeto_snapshot(i, "parent/subdir2/copy1/bar.txt", "bar")
|
||||
self.writeto_snapshot(i, "parent/subdir2/copy1/baz.txt", "baz")
|
||||
self.writeto_snapshot(i, "parent/subdir2/copy1/new.txt", "new")
|
||||
if i >= 7:
|
||||
# the last snapshot shall remain untouched
|
||||
pass
|
||||
|
||||
# now copy the whole thing into tahoe
|
||||
d = self.do_cli_good("create-alias", "tahoe")
|
||||
d.addCallback(lambda ign:
|
||||
self.do_cli_good("cp", "-r",
|
||||
os.path.join(self.basedir, "home/Backups"),
|
||||
"tahoe:Backups"))
|
||||
def _copied(res):
|
||||
rootcap = get_aliases(self.get_clientdir())["tahoe"]
|
||||
# now scan the initial directory structure
|
||||
n = self.g.clients[0].create_node_from_uri(rootcap)
|
||||
return n.get_child_at_path([u"Backups", u"fluxx", u"Archives"])
|
||||
d.addCallback(_copied)
|
||||
self.nodes = {}
|
||||
self.caps = {}
|
||||
def stash(node, name):
|
||||
self.nodes[name] = node
|
||||
self.caps[name] = node.get_uri()
|
||||
return node
|
||||
d.addCallback(stash, "Archives")
|
||||
self.manifests = {}
|
||||
def stash_manifest(manifest, which):
|
||||
self.manifests[which] = dict(manifest)
|
||||
d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
|
||||
d.addCallback(stash_manifest, "start")
|
||||
def c(n):
|
||||
pieces = n.split("-")
|
||||
which = "finish"
|
||||
if len(pieces) == 3:
|
||||
which = pieces[-1]
|
||||
sn = int(pieces[0])
|
||||
name = pieces[1]
|
||||
path = [u"2009-03-%02d 01.01.01" % sn]
|
||||
path.extend( {"b": [],
|
||||
"bp": [u"parent"],
|
||||
"bps1": [u"parent", u"subdir1"],
|
||||
"bps2": [u"parent", u"subdir2"],
|
||||
"bps2c1": [u"parent", u"subdir2", u"copy1"],
|
||||
}[name] )
|
||||
return self.manifests[which][tuple(path)]
|
||||
|
||||
dbfile = os.path.join(self.basedir, "dirhash.db")
|
||||
backupfile = os.path.join(self.basedir, "backup.pickle")
|
||||
|
||||
d.addCallback(lambda ign:
|
||||
self.do_cli_good("debug", "consolidate",
|
||||
"--dbfile", dbfile,
|
||||
"--backupfile", backupfile,
|
||||
"--verbose",
|
||||
"tahoe:"))
|
||||
def _check_consolidate_output1(out):
|
||||
lines = out.splitlines()
|
||||
last = lines[-1]
|
||||
self.failUnlessReallyEqual(last.strip(),
|
||||
"system done, dircounts: "
|
||||
"25/12 seen/used, 7 created, 2 as-is, 13 reused")
|
||||
self.failUnless(os.path.exists(dbfile))
|
||||
self.failUnless(os.path.exists(backupfile))
|
||||
self.first_backup = backup = pickle.load(open(backupfile, "rb"))
|
||||
self.failUnless(u"fluxx" in backup["systems"])
|
||||
self.failUnless(u"fluxx" in backup["archives"])
|
||||
adata = backup["archives"]["fluxx"]
|
||||
kids = adata[u"children"]
|
||||
self.failUnlessReallyEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]),
|
||||
c("1-b-start"))
|
||||
d.addCallback(_check_consolidate_output1)
|
||||
d.addCallback(lambda ign:
|
||||
self.do_cli_good("debug", "consolidate",
|
||||
"--dbfile", dbfile,
|
||||
"--backupfile", backupfile,
|
||||
"--really", "tahoe:"))
|
||||
def _check_consolidate_output2(out):
|
||||
lines = out.splitlines()
|
||||
last = lines[-1]
|
||||
self.failUnlessReallyEqual(last.strip(),
|
||||
"system done, dircounts: "
|
||||
"0/0 seen/used, 0 created, 0 as-is, 0 reused")
|
||||
backup = pickle.load(open(backupfile, "rb"))
|
||||
self.failUnlessReallyEqual(backup, self.first_backup)
|
||||
self.failUnless(os.path.exists(backupfile + ".0"))
|
||||
d.addCallback(_check_consolidate_output2)
|
||||
|
||||
d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
|
||||
d.addCallback(stash_manifest, "finish")
|
||||
|
||||
def check_consolidation(ignored):
|
||||
#for which in ("finish",):
|
||||
# for path in sorted(self.manifests[which].keys()):
|
||||
# print "%s %s %s" % (which, "/".join(path),
|
||||
# self.manifests[which][path])
|
||||
|
||||
# last snapshot should be untouched
|
||||
self.failUnlessReallyEqual(c("7-b"), c("7-b-start"))
|
||||
|
||||
# first snapshot should be a readonly form of the original
|
||||
self.failUnlessReallyEqual(c("1-b-finish"), consolidate.readonly(c("1-b-start")))
|
||||
self.failUnlessReallyEqual(c("1-bp-finish"), consolidate.readonly(c("1-bp-start")))
|
||||
self.failUnlessReallyEqual(c("1-bps1-finish"), consolidate.readonly(c("1-bps1-start")))
|
||||
self.failUnlessReallyEqual(c("1-bps2-finish"), consolidate.readonly(c("1-bps2-start")))
|
||||
|
||||
# new directories should be different than the old ones
|
||||
self.failIfEqual(c("1-b"), c("1-b-start"))
|
||||
self.failIfEqual(c("1-bp"), c("1-bp-start"))
|
||||
self.failIfEqual(c("1-bps1"), c("1-bps1-start"))
|
||||
self.failIfEqual(c("1-bps2"), c("1-bps2-start"))
|
||||
self.failIfEqual(c("2-b"), c("2-b-start"))
|
||||
self.failIfEqual(c("2-bp"), c("2-bp-start"))
|
||||
self.failIfEqual(c("2-bps1"), c("2-bps1-start"))
|
||||
self.failIfEqual(c("2-bps2"), c("2-bps2-start"))
|
||||
self.failIfEqual(c("3-b"), c("3-b-start"))
|
||||
self.failIfEqual(c("3-bp"), c("3-bp-start"))
|
||||
self.failIfEqual(c("3-bps1"), c("3-bps1-start"))
|
||||
self.failIfEqual(c("3-bps2"), c("3-bps2-start"))
|
||||
self.failIfEqual(c("4-b"), c("4-b-start"))
|
||||
self.failIfEqual(c("4-bp"), c("4-bp-start"))
|
||||
self.failIfEqual(c("4-bps1"), c("4-bps1-start"))
|
||||
self.failIfEqual(c("4-bps2"), c("4-bps2-start"))
|
||||
self.failIfEqual(c("5-b"), c("5-b-start"))
|
||||
self.failIfEqual(c("5-bp"), c("5-bp-start"))
|
||||
self.failIfEqual(c("5-bps1"), c("5-bps1-start"))
|
||||
self.failIfEqual(c("5-bps2"), c("5-bps2-start"))
|
||||
|
||||
# snapshot 1 and snapshot 2 should be identical
|
||||
self.failUnlessReallyEqual(c("2-b"), c("1-b"))
|
||||
|
||||
# snapshot 3 modified a file underneath parent/
|
||||
self.failIfEqual(c("3-b"), c("2-b")) # 3 modified a file
|
||||
self.failIfEqual(c("3-bp"), c("2-bp"))
|
||||
# but the subdirs are the same
|
||||
self.failUnlessReallyEqual(c("3-bps1"), c("2-bps1"))
|
||||
self.failUnlessReallyEqual(c("3-bps2"), c("2-bps2"))
|
||||
|
||||
# snapshot 4 should be the same as 2
|
||||
self.failUnlessReallyEqual(c("4-b"), c("2-b"))
|
||||
self.failUnlessReallyEqual(c("4-bp"), c("2-bp"))
|
||||
self.failUnlessReallyEqual(c("4-bps1"), c("2-bps1"))
|
||||
self.failUnlessReallyEqual(c("4-bps2"), c("2-bps2"))
|
||||
|
||||
# snapshot 5 added a file under subdir1
|
||||
self.failIfEqual(c("5-b"), c("4-b"))
|
||||
self.failIfEqual(c("5-bp"), c("4-bp"))
|
||||
self.failIfEqual(c("5-bps1"), c("4-bps1"))
|
||||
self.failUnlessReallyEqual(c("5-bps2"), c("4-bps2"))
|
||||
|
||||
# snapshot 6 copied a directory-it should be shared
|
||||
self.failIfEqual(c("6-b"), c("5-b"))
|
||||
self.failIfEqual(c("6-bp"), c("5-bp"))
|
||||
self.failUnlessReallyEqual(c("6-bps1"), c("5-bps1"))
|
||||
self.failIfEqual(c("6-bps2"), c("5-bps2"))
|
||||
self.failUnlessReallyEqual(c("6-bps2c1"), c("6-bps1"))
|
||||
|
||||
d.addCallback(check_consolidation)
|
||||
|
||||
return d
|
||||
test_basic.timeout = 28800 # It took more than 7200 seconds on François's ARM
|
||||
|
||||
def build_manifest(self, root):
|
||||
# like dirnode.build_manifest, but this one doesn't skip duplicate
|
||||
# nodes (i.e. it is not cycle-resistant).
|
||||
manifest = []
|
||||
manifest.append( ( (), root.get_uri() ) )
|
||||
d = self.manifest_of(None, root, manifest, () )
|
||||
d.addCallback(lambda ign: manifest)
|
||||
return d
|
||||
|
||||
def manifest_of(self, ignored, dirnode, manifest, path):
|
||||
d = dirnode.list()
|
||||
def _got_children(children):
|
||||
d = defer.succeed(None)
|
||||
for name, (child, metadata) in children.iteritems():
|
||||
childpath = path + (name,)
|
||||
manifest.append( (childpath, child.get_uri()) )
|
||||
if IDirectoryNode.providedBy(child):
|
||||
d.addCallback(self.manifest_of, child, manifest, childpath)
|
||||
return d
|
||||
d.addCallback(_got_children)
|
||||
return d
|
Loading…
x
Reference in New Issue
Block a user