tahoe-lafs/src/allmydata/test/cli/test_backup.py

538 lines
23 KiB
Python

import os.path
from cStringIO import StringIO
from datetime import timedelta
import re
from twisted.trial import unittest
from twisted.python.monkey import MonkeyPatcher
import __builtin__
from allmydata.util import fileutil
from allmydata.util.fileutil import abspath_expanduser_unicode
from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
from allmydata.util.namespace import Namespace
from allmydata.scripts import cli, backupdb
from ..common_util import StallMixin
from ..no_network import GridTestMixin
from .common import CLITestMixin, parse_options
timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
def writeto(self, path, data):
full_path = os.path.join(self.basedir, "home", path)
fileutil.make_dirs(os.path.dirname(full_path))
fileutil.write(full_path, data)
def count_output(self, out):
mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), "
"(\d)+ files skipped, "
"(\d+) directories created \((\d+) reused\), "
"(\d+) directories skipped", out)
return [int(s) for s in mo.groups()]
def count_output2(self, out):
mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out)
return [int(s) for s in mo.groups()]
def progress_output(self, out):
def parse_timedelta(h, m, s):
return timedelta(int(h), int(m), int(s))
mos = re.findall(
r"Backing up (\d)+/(\d)+\.\.\. (\d+)h (\d+)m (\d+)s elapsed\.\.\.",
out,
)
return list(
(int(progress), int(total), parse_timedelta(h, m, s))
for (progress, total, h, m, s)
in mos
)
def test_backup(self):
self.basedir = "cli/Backup/backup"
self.set_up_grid(oneshare=True)
# is the backupdb available? If so, we test that a second backup does
# not create new directories.
hush = StringIO()
bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"),
hush)
self.failUnless(bdb)
# create a small local directory with a couple of files
source = os.path.join(self.basedir, "home")
fileutil.make_dirs(os.path.join(source, "empty"))
self.writeto("parent/subdir/foo.txt", "foo")
self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
self.writeto("parent/blah.txt", "blah")
def do_backup(verbose=False):
cmd = ["backup"]
if verbose:
cmd.append("--verbose")
cmd.append(source)
cmd.append("tahoe:backups")
return self.do_cli(*cmd)
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res: do_backup(True))
def _check0((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
(
files_uploaded,
files_reused,
files_skipped,
directories_created,
directories_reused,
directories_skipped,
) = self.count_output(out)
# foo.txt, bar.txt, blah.txt
self.failUnlessReallyEqual(files_uploaded, 3)
self.failUnlessReallyEqual(files_reused, 0)
self.failUnlessReallyEqual(files_skipped, 0)
# empty, home, home/parent, home/parent/subdir
self.failUnlessReallyEqual(directories_created, 4)
self.failUnlessReallyEqual(directories_reused, 0)
self.failUnlessReallyEqual(directories_skipped, 0)
# This is the first-upload scenario so there should have been
# nothing to check.
(files_checked, directories_checked) = self.count_output2(out)
self.failUnlessReallyEqual(files_checked, 0)
self.failUnlessReallyEqual(directories_checked, 0)
progress = self.progress_output(out)
for left, right in zip(progress[:-1], progress[1:]):
# Progress as measured by file count should progress
# monotonically.
self.assertTrue(
left[0] < right[0],
"Failed: {} < {}".format(left[0], right[0]),
)
# Total work to do should remain the same.
self.assertEqual(left[1], right[1])
# Amount of elapsed time should only go up. Allow it to
# remain the same to account for resolution of the report.
self.assertTrue(
left[2] <= right[2],
"Failed: {} <= {}".format(left[2], right[2]),
)
for element in progress:
# Can't have more progress than the total.
self.assertTrue(
element[0] <= element[1],
"Failed: {} <= {}".format(element[0], element[1]),
)
d.addCallback(_check0)
d.addCallback(lambda res: self.do_cli("ls", "--uri", "tahoe:backups"))
def _check1((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
lines = out.split("\n")
children = dict([line.split() for line in lines if line])
latest_uri = children["Latest"]
self.failUnless(latest_uri.startswith("URI:DIR2-CHK:"), latest_uri)
childnames = children.keys()
self.failUnlessReallyEqual(sorted(childnames), ["Archives", "Latest"])
d.addCallback(_check1)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
def _check2((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.failUnlessReallyEqual(sorted(out.split()), ["empty", "parent"])
d.addCallback(_check2)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
def _check2a((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.failUnlessReallyEqual(out.strip(), "")
d.addCallback(_check2a)
d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
def _check3((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.failUnlessReallyEqual(out, "foo")
d.addCallback(_check3)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
def _check4((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.old_archives = out.split()
self.failUnlessReallyEqual(len(self.old_archives), 1)
d.addCallback(_check4)
d.addCallback(self.stall, 1.1)
d.addCallback(lambda res: do_backup())
def _check4a((rc, out, err)):
# second backup should reuse everything, if the backupdb is
# available
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
fu, fr, fs, dc, dr, ds = self.count_output(out)
# foo.txt, bar.txt, blah.txt
self.failUnlessReallyEqual(fu, 0)
self.failUnlessReallyEqual(fr, 3)
self.failUnlessReallyEqual(fs, 0)
# empty, home, home/parent, home/parent/subdir
self.failUnlessReallyEqual(dc, 0)
self.failUnlessReallyEqual(dr, 4)
self.failUnlessReallyEqual(ds, 0)
d.addCallback(_check4a)
# sneak into the backupdb, crank back the "last checked"
# timestamp to force a check on all files
def _reset_last_checked(res):
dbfile = self.get_client_config().get_private_path("backupdb.sqlite")
self.failUnless(os.path.exists(dbfile), dbfile)
bdb = backupdb.get_backupdb(dbfile)
bdb.cursor.execute("UPDATE last_upload SET last_checked=0")
bdb.cursor.execute("UPDATE directories SET last_checked=0")
bdb.connection.commit()
d.addCallback(_reset_last_checked)
d.addCallback(self.stall, 1.1)
d.addCallback(lambda res: do_backup(verbose=True))
def _check4b((rc, out, err)):
# we should check all files, and re-use all of them. None of
# the directories should have been changed, so we should
# re-use all of them too.
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
fu, fr, fs, dc, dr, ds = self.count_output(out)
fchecked, dchecked = self.count_output2(out)
self.failUnlessReallyEqual(fchecked, 3)
self.failUnlessReallyEqual(fu, 0)
self.failUnlessReallyEqual(fr, 3)
self.failUnlessReallyEqual(fs, 0)
self.failUnlessReallyEqual(dchecked, 4)
self.failUnlessReallyEqual(dc, 0)
self.failUnlessReallyEqual(dr, 4)
self.failUnlessReallyEqual(ds, 0)
d.addCallback(_check4b)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
def _check5((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.new_archives = out.split()
self.failUnlessReallyEqual(len(self.new_archives), 3, out)
# the original backup should still be the oldest (i.e. sorts
# alphabetically towards the beginning)
self.failUnlessReallyEqual(sorted(self.new_archives)[0],
self.old_archives[0])
d.addCallback(_check5)
d.addCallback(self.stall, 1.1)
def _modify(res):
self.writeto("parent/subdir/foo.txt", "FOOF!")
# and turn a file into a directory
os.unlink(os.path.join(source, "parent/blah.txt"))
os.mkdir(os.path.join(source, "parent/blah.txt"))
self.writeto("parent/blah.txt/surprise file", "surprise")
self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
# turn a directory into a file
os.rmdir(os.path.join(source, "empty"))
self.writeto("empty", "imagine nothing being here")
return do_backup()
d.addCallback(_modify)
def _check5a((rc, out, err)):
# second backup should reuse bar.txt (if backupdb is available),
# and upload the rest. None of the directories can be reused.
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
fu, fr, fs, dc, dr, ds = self.count_output(out)
# new foo.txt, surprise file, subfile, empty
self.failUnlessReallyEqual(fu, 4)
# old bar.txt
self.failUnlessReallyEqual(fr, 1)
self.failUnlessReallyEqual(fs, 0)
# home, parent, subdir, blah.txt, surprisedir
self.failUnlessReallyEqual(dc, 5)
self.failUnlessReallyEqual(dr, 0)
self.failUnlessReallyEqual(ds, 0)
d.addCallback(_check5a)
d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
def _check6((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.new_archives = out.split()
self.failUnlessReallyEqual(len(self.new_archives), 4)
self.failUnlessReallyEqual(sorted(self.new_archives)[0],
self.old_archives[0])
d.addCallback(_check6)
d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
def _check7((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.failUnlessReallyEqual(out, "FOOF!")
# the old snapshot should not be modified
return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
d.addCallback(_check7)
def _check8((rc, out, err)):
self.failUnlessReallyEqual(err, "")
self.failUnlessReallyEqual(rc, 0)
self.failUnlessReallyEqual(out, "foo")
d.addCallback(_check8)
return d
# on our old dapper buildslave, this test takes a long time (usually
# 130s), so we have to bump up the default 120s timeout. The create-alias
# and initial backup alone take 60s, probably because of the handful of
# dirnodes being created (RSA key generation). The backup between check4
# and check4a takes 6s, as does the backup before check4b.
test_backup.timeout = 3000
def _check_filtering(self, filtered, all, included, excluded):
filtered = set(filtered)
all = set(all)
included = set(included)
excluded = set(excluded)
self.failUnlessReallyEqual(filtered, included)
self.failUnlessReallyEqual(all.difference(filtered), excluded)
def test_exclude_options(self):
root_listdir = (u'lib.a', u'_darcs', u'subdir', u'nice_doc.lyx')
subdir_listdir = (u'another_doc.lyx', u'run_snake_run.py', u'CVS', u'.svn', u'_darcs')
basedir = "cli/Backup/exclude_options"
fileutil.make_dirs(basedir)
nodeurl_path = os.path.join(basedir, 'node.url')
fileutil.write(nodeurl_path, 'http://example.net:2357/')
def parse(args): return parse_options(basedir, "backup", args)
# test simple exclude
backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
filtered = list(backup_options.filter_listdir(root_listdir))
self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
(u'nice_doc.lyx',))
# multiple exclude
backup_options = parse(['--exclude', '*lyx', '--exclude', 'lib.?', 'from', 'to'])
filtered = list(backup_options.filter_listdir(root_listdir))
self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
(u'nice_doc.lyx', u'lib.a'))
# vcs metadata exclusion
backup_options = parse(['--exclude-vcs', 'from', 'to'])
filtered = list(backup_options.filter_listdir(subdir_listdir))
self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'run_snake_run.py',),
(u'CVS', u'.svn', u'_darcs'))
# read exclude patterns from file
exclusion_string = "_darcs\n*py\n.svn"
excl_filepath = os.path.join(basedir, 'exclusion')
fileutil.write(excl_filepath, exclusion_string)
backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
filtered = list(backup_options.filter_listdir(subdir_listdir))
self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'CVS'),
(u'.svn', u'_darcs', u'run_snake_run.py'))
# test BackupConfigurationError
self.failUnlessRaises(cli.BackupConfigurationError,
parse,
['--exclude-from', excl_filepath + '.no', 'from', 'to'])
# test that an iterator works too
backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
filtered = list(backup_options.filter_listdir(iter(root_listdir)))
self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
(u'nice_doc.lyx',))
def test_exclude_options_unicode(self):
nice_doc = u"nice_d\u00F8c.lyx"
try:
doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding())
except UnicodeEncodeError:
raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.")
root_listdir = (u'lib.a', u'_darcs', u'subdir', nice_doc)
basedir = "cli/Backup/exclude_options_unicode"
fileutil.make_dirs(basedir)
nodeurl_path = os.path.join(basedir, 'node.url')
fileutil.write(nodeurl_path, 'http://example.net:2357/')
def parse(args): return parse_options(basedir, "backup", args)
# test simple exclude
backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
filtered = list(backup_options.filter_listdir(root_listdir))
self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
(nice_doc,))
# multiple exclude
backup_options = parse(['--exclude', doc_pattern_arg, '--exclude', 'lib.?', 'from', 'to'])
filtered = list(backup_options.filter_listdir(root_listdir))
self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
(nice_doc, u'lib.a'))
# read exclude patterns from file
exclusion_string = doc_pattern_arg + "\nlib.?"
excl_filepath = os.path.join(basedir, 'exclusion')
fileutil.write(excl_filepath, exclusion_string)
backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
filtered = list(backup_options.filter_listdir(root_listdir))
self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
(nice_doc, u'lib.a'))
# test that an iterator works too
backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
filtered = list(backup_options.filter_listdir(iter(root_listdir)))
self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
(nice_doc,))
def test_exclude_from_tilde_expansion(self):
basedir = "cli/Backup/exclude_from_tilde_expansion"
fileutil.make_dirs(basedir)
nodeurl_path = os.path.join(basedir, 'node.url')
fileutil.write(nodeurl_path, 'http://example.net:2357/')
# ensure that tilde expansion is performed on exclude-from argument
exclude_file = u'~/.tahoe/excludes.dummy'
ns = Namespace()
ns.called = False
def call_file(name, *args):
ns.called = True
self.failUnlessEqual(name, abspath_expanduser_unicode(exclude_file))
return StringIO()
patcher = MonkeyPatcher((__builtin__, 'file', call_file))
patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to'])
self.failUnless(ns.called)
def test_ignore_symlinks(self):
if not hasattr(os, 'symlink'):
raise unittest.SkipTest("Symlinks are not supported by Python on this platform.")
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid(oneshare=True)
source = os.path.join(self.basedir, "home")
self.writeto("foo.txt", "foo")
os.symlink(os.path.join(source, "foo.txt"), os.path.join(source, "foo2.txt"))
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res: self.do_cli("backup", "--verbose", source, "tahoe:test"))
def _check((rc, out, err)):
self.failUnlessReallyEqual(rc, 2)
foo2 = os.path.join(source, "foo2.txt")
self.failUnlessIn("WARNING: cannot backup symlink ", err)
self.failUnlessIn(foo2, err)
fu, fr, fs, dc, dr, ds = self.count_output(out)
# foo.txt
self.failUnlessReallyEqual(fu, 1)
self.failUnlessReallyEqual(fr, 0)
# foo2.txt
self.failUnlessReallyEqual(fs, 1)
# home
self.failUnlessReallyEqual(dc, 1)
self.failUnlessReallyEqual(dr, 0)
self.failUnlessReallyEqual(ds, 0)
d.addCallback(_check)
return d
def test_ignore_unreadable_file(self):
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid(oneshare=True)
source = os.path.join(self.basedir, "home")
self.writeto("foo.txt", "foo")
os.chmod(os.path.join(source, "foo.txt"), 0000)
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
def _check((rc, out, err)):
self.failUnlessReallyEqual(rc, 2)
self.failUnlessReallyEqual(err, "WARNING: permission denied on file %s\n" % os.path.join(source, "foo.txt"))
fu, fr, fs, dc, dr, ds = self.count_output(out)
self.failUnlessReallyEqual(fu, 0)
self.failUnlessReallyEqual(fr, 0)
# foo.txt
self.failUnlessReallyEqual(fs, 1)
# home
self.failUnlessReallyEqual(dc, 1)
self.failUnlessReallyEqual(dr, 0)
self.failUnlessReallyEqual(ds, 0)
d.addCallback(_check)
# This is necessary for the temp files to be correctly removed
def _cleanup(self):
os.chmod(os.path.join(source, "foo.txt"), 0644)
d.addCallback(_cleanup)
d.addErrback(_cleanup)
return d
def test_ignore_unreadable_directory(self):
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid(oneshare=True)
source = os.path.join(self.basedir, "home")
os.mkdir(source)
os.mkdir(os.path.join(source, "test"))
os.chmod(os.path.join(source, "test"), 0000)
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
def _check((rc, out, err)):
self.failUnlessReallyEqual(rc, 2)
self.failUnlessReallyEqual(err, "WARNING: permission denied on directory %s\n" % os.path.join(source, "test"))
fu, fr, fs, dc, dr, ds = self.count_output(out)
self.failUnlessReallyEqual(fu, 0)
self.failUnlessReallyEqual(fr, 0)
self.failUnlessReallyEqual(fs, 0)
# home, test
self.failUnlessReallyEqual(dc, 2)
self.failUnlessReallyEqual(dr, 0)
# test
self.failUnlessReallyEqual(ds, 1)
d.addCallback(_check)
# This is necessary for the temp files to be correctly removed
def _cleanup(self):
os.chmod(os.path.join(source, "test"), 0655)
d.addCallback(_cleanup)
d.addErrback(_cleanup)
return d
def test_backup_without_alias(self):
# 'tahoe backup' should output a sensible error message when invoked
# without an alias instead of a stack trace.
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid(oneshare=True)
source = os.path.join(self.basedir, "file1")
d = self.do_cli('backup', source, source)
def _check((rc, out, err)):
self.failUnlessReallyEqual(rc, 1)
self.failUnlessIn("error:", err)
self.failUnlessReallyEqual(out, "")
d.addCallback(_check)
return d
def test_backup_with_nonexistent_alias(self):
# 'tahoe backup' should output a sensible error message when invoked
# with a nonexistent alias.
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid(oneshare=True)
source = os.path.join(self.basedir, "file1")
d = self.do_cli("backup", source, "nonexistent:" + source)
def _check((rc, out, err)):
self.failUnlessReallyEqual(rc, 1)
self.failUnlessIn("error:", err)
self.failUnlessIn("nonexistent", err)
self.failUnlessReallyEqual(out, "")
d.addCallback(_check)
return d