checker: overhaul checker results, split check/check_and_repair into separate methods, improve web displays

This commit is contained in:
Brian Warner 2008-09-07 12:44:56 -07:00
parent c061e588aa
commit 3408d552cd
22 changed files with 1327 additions and 394 deletions

View File

@ -0,0 +1,181 @@
from zope.interface import implements
from allmydata.interfaces import ICheckerResults, ICheckAndRepairResults, \
IDeepCheckResults, IDeepCheckAndRepairResults
from allmydata.util import base32
class CheckerResults:
implements(ICheckerResults)
def __init__(self, storage_index):
self.storage_index = storage_index
self.problems = []
self.data = {"count-corrupt-shares": 0,
"list-corrupt-shares": [],
}
self.summary = ""
self.report = []
def set_healthy(self, healthy):
self.healthy = bool(healthy)
def set_needs_rebalancing(self, needs_rebalancing):
self.needs_rebalancing_p = bool(needs_rebalancing)
def set_data(self, data):
self.data.update(data)
def set_summary(self, summary):
assert isinstance(summary, str) # should be a single string
self.summary = summary
def set_report(self, report):
assert not isinstance(report, str) # should be list of strings
self.report = report
def set_servermap(self, smap):
# mutable only
self.servermap = smap
def get_storage_index(self):
return self.storage_index
def get_storage_index_string(self):
return base32.b2a(self.storage_index)
def is_healthy(self):
return self.healthy
def needs_rebalancing(self):
return self.needs_rebalancing_p
def get_data(self):
return self.data
def get_summary(self):
return self.summary
def get_report(self):
return self.report
def get_servermap(self):
return self.servermap
class CheckAndRepairResults:
implements(ICheckAndRepairResults)
def __init__(self, storage_index):
self.storage_index = storage_index
self.repair_attempted = False
def get_storage_index(self):
return self.storage_index
def get_storage_index_string(self):
return base32.b2a(self.storage_index)
def get_repair_attempted(self):
return self.repair_attempted
def get_repair_successful(self):
return self.repair_successful
def get_pre_repair_results(self):
return self.pre_repair_results
def get_post_repair_results(self):
return self.post_repair_results
class DeepResultsBase:
def __init__(self, root_storage_index):
self.root_storage_index = root_storage_index
if root_storage_index is None:
self.root_storage_index_s = "<none>"
else:
self.root_storage_index_s = base32.b2a(root_storage_index)[:6]
self.objects_checked = 0
self.objects_healthy = 0
self.objects_unhealthy = 0
self.corrupt_shares = []
self.all_results = {}
def get_root_storage_index_string(self):
return self.root_storage_index_s
def get_corrupt_shares(self):
return self.corrupt_shares
def get_all_results(self):
return self.all_results
class DeepCheckResults(DeepResultsBase):
implements(IDeepCheckResults)
def add_check(self, r, path):
if not r:
return # non-distributed object, i.e. LIT file
r = ICheckerResults(r)
assert isinstance(path, (list, tuple))
self.objects_checked += 1
if r.is_healthy():
self.objects_healthy += 1
else:
self.objects_unhealthy += 1
self.all_results[tuple(path)] = r
self.corrupt_shares.extend(r.get_data()["list-corrupt-shares"])
def get_counters(self):
return {"count-objects-checked": self.objects_checked,
"count-objects-healthy": self.objects_healthy,
"count-objects-unhealthy": self.objects_unhealthy,
"count-corrupt-shares": len(self.corrupt_shares),
}
class DeepCheckAndRepairResults(DeepResultsBase):
implements(IDeepCheckAndRepairResults)
def __init__(self, root_storage_index):
DeepResultsBase.__init__(self, root_storage_index)
self.objects_healthy_post_repair = 0
self.objects_unhealthy_post_repair = 0
self.objects_healthy_post_repair = 0
self.objects_healthy_post_repair = 0
self.repairs_attempted = 0
self.repairs_successful = 0
self.repairs_unsuccessful = 0
self.corrupt_shares_post_repair = []
def add_check_and_repair(self, r, path):
if not r:
return # non-distributed object, i.e. LIT file
r = ICheckAndRepairResults(r)
assert isinstance(path, (list, tuple))
pre_repair = r.get_pre_repair_results()
post_repair = r.get_post_repair_results()
self.objects_checked += 1
if pre_repair.is_healthy():
self.objects_healthy += 1
else:
self.objects_unhealthy += 1
self.corrupt_shares.extend(pre_repair.get_data()["list-corrupt-shares"])
if r.get_repair_attempted():
self.repairs_attempted += 1
if r.get_repair_successful():
self.repairs_successful += 1
else:
self.repairs_unsuccessful += 1
if post_repair.is_healthy():
self.objects_healthy_post_repair += 1
else:
self.objects_unhealthy_post_repair += 1
self.all_results[tuple(path)] = r
self.corrupt_shares_post_repair.extend(post_repair.get_data()["list-corrupt-shares"])
def get_counters(self):
return {"count-objects-checked": self.objects_checked,
"count-objects-healthy-pre-repair": self.objects_healthy,
"count-objects-unhealthy-pre-repair": self.objects_unhealthy,
"count-objects-healthy-post-repair": self.objects_healthy_post_repair,
"count-objects-unhealthy-post-repair": self.objects_unhealthy_post_repair,
"count-repairs-attempted": self.repairs_attempted,
"count-repairs-successful": self.repairs_successful,
"count-repairs-unsuccessful": self.repairs_unsuccessful,
"count-corrupt-shares-pre-repair": len(self.corrupt_shares),
"count-corrupt-shares-post-repair": len(self.corrupt_shares_post_repair),
}
def get_remaining_corrupt_shares(self):
return self.corrupt_shares_post_repair

View File

@ -9,7 +9,8 @@ from allmydata.mutable.node import MutableFileNode
from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\ from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode, \ IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode, \
ExistingChildError, ICheckable ExistingChildError, ICheckable
from allmydata.immutable.checker import DeepCheckResults from allmydata.checker_results import DeepCheckResults, \
DeepCheckAndRepairResults
from allmydata.util import hashutil, mathutil, base32, log from allmydata.util import hashutil, mathutil, base32, log
from allmydata.util.hashutil import netstring from allmydata.util.hashutil import netstring
from allmydata.util.limiter import ConcurrencyLimiter from allmydata.util.limiter import ConcurrencyLimiter
@ -246,9 +247,11 @@ class NewDirectoryNode:
def get_storage_index(self): def get_storage_index(self):
return self._uri._filenode_uri.storage_index return self._uri._filenode_uri.storage_index
def check(self, verify=False, repair=False): def check(self, verify=False):
"""Perform a file check. See IChecker.check for details.""" """Perform a file check. See IChecker.check for details."""
return self._node.check(verify, repair) return self._node.check(verify)
def check_and_repair(self, verify=False):
return self._node.check_and_repair(verify)
def list(self): def list(self):
"""I return a Deferred that fires with a dictionary mapping child """I return a Deferred that fires with a dictionary mapping child
@ -537,17 +540,25 @@ class NewDirectoryNode:
d.addCallback(_got_list) d.addCallback(_got_list)
return d return d
def deep_check(self, verify=False, repair=False): def deep_check(self, verify=False):
return self.deep_check_base(verify, False)
def deep_check_and_repair(self, verify=False):
return self.deep_check_base(verify, True)
def deep_check_base(self, verify, repair):
# shallow-check each object first, then traverse children # shallow-check each object first, then traverse children
root_si = self._node.get_storage_index() root_si = self._node.get_storage_index()
self._lp = log.msg(format="deep-check starting (%(si)s)," self._lp = log.msg(format="deep-check starting (%(si)s),"
" verify=%(verify)s, repair=%(repair)s", " verify=%(verify)s, repair=%(repair)s",
si=base32.b2a(root_si), verify=verify, repair=repair) si=base32.b2a(root_si), verify=verify, repair=repair)
if repair:
results = DeepCheckAndRepairResults(root_si)
else:
results = DeepCheckResults(root_si) results = DeepCheckResults(root_si)
found = set() found = set()
limiter = ConcurrencyLimiter(10) limiter = ConcurrencyLimiter(10)
d = self._add_deepcheck_from_node(self, results, found, limiter, d = self._add_deepcheck_from_node([], self, results, found, limiter,
verify, repair) verify, repair)
def _done(res): def _done(res):
log.msg("deep-check done", parent=self._lp) log.msg("deep-check done", parent=self._lp)
@ -555,7 +566,7 @@ class NewDirectoryNode:
d.addCallback(_done) d.addCallback(_done)
return d return d
def _add_deepcheck_from_node(self, node, results, found, limiter, def _add_deepcheck_from_node(self, path, node, results, found, limiter,
verify, repair): verify, repair):
verifier = node.get_verifier() verifier = node.get_verifier()
if verifier in found: if verifier in found:
@ -563,15 +574,25 @@ class NewDirectoryNode:
return None return None
found.add(verifier) found.add(verifier)
d = limiter.add(node.check, verify, repair) if repair:
d.addCallback(results.add_check) d = limiter.add(node.check_and_repair, verify)
d.addCallback(results.add_check_and_repair, path)
else:
d = limiter.add(node.check, verify)
d.addCallback(results.add_check, path)
# TODO: stats: split the DeepStats.foo calls out of
# _add_deepstats_from_node into a separate non-recursing method, call
# it from both here and _add_deepstats_from_node.
if IDirectoryNode.providedBy(node): if IDirectoryNode.providedBy(node):
d.addCallback(lambda res: node.list()) d.addCallback(lambda res: node.list())
def _got_children(children): def _got_children(children):
dl = [] dl = []
for name, (child, metadata) in children.iteritems(): for name, (child, metadata) in children.iteritems():
d2 = self._add_deepcheck_from_node(child, results, childpath = path + [name]
d2 = self._add_deepcheck_from_node(childpath, child,
results,
found, limiter, found, limiter,
verify, repair) verify, repair)
if d2: if d2:

View File

@ -6,99 +6,12 @@ This does no verification of the shares whatsoever. If the peer claims to
have the share, we believe them. have the share, we believe them.
""" """
from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
from twisted.python import log from twisted.python import log
from allmydata import storage from allmydata import storage
from allmydata.interfaces import ICheckerResults, IDeepCheckResults from allmydata.checker_results import CheckerResults
from allmydata.immutable import download from allmydata.immutable import download
from allmydata.util import hashutil, base32 from allmydata.util import hashutil
class Results:
implements(ICheckerResults)
def __init__(self, storage_index):
# storage_index might be None for, say, LIT files
self.storage_index = storage_index
if storage_index is None:
self.storage_index_s = "<none>"
else:
self.storage_index_s = base32.b2a(storage_index)[:6]
self.status_report = "[not generated yet]" # string
def is_healthy(self):
return self.healthy
def get_storage_index(self):
return self.storage_index
def get_storage_index_string(self):
return self.storage_index_s
def get_mutability_string(self):
if self.storage_index:
return "immutable"
return "literal"
def to_string(self):
s = ""
if self.healthy:
s += "Healthy!\n"
else:
s += "Not Healthy!\n"
s += "\n"
s += self.status_report
s += "\n"
return s
class DeepCheckResults:
implements(IDeepCheckResults)
def __init__(self, root_storage_index):
self.root_storage_index = root_storage_index
if root_storage_index is None:
self.root_storage_index_s = "<none>"
else:
self.root_storage_index_s = base32.b2a(root_storage_index)[:6]
self.objects_checked = 0
self.objects_healthy = 0
self.repairs_attempted = 0
self.repairs_successful = 0
self.problems = []
self.all_results = {}
self.server_problems = {}
def get_root_storage_index_string(self):
return self.root_storage_index_s
def add_check(self, r):
self.objects_checked += 1
if r.is_healthy():
self.objects_healthy += 1
else:
self.problems.append(r)
self.all_results[r.get_storage_index()] = r
def add_repair(self, is_successful):
self.repairs_attempted += 1
if is_successful:
self.repairs_successful += 1
def count_objects_checked(self):
return self.objects_checked
def count_objects_healthy(self):
return self.objects_healthy
def count_repairs_attempted(self):
return self.repairs_attempted
def count_repairs_successful(self):
return self.repairs_successful
def get_server_problems(self):
return self.server_problems
def get_problems(self):
return self.problems
def get_all_results(self):
return self.all_results
class SimpleCHKFileChecker: class SimpleCHKFileChecker:
"""Return a list of (needed, total, found, sharemap), where sharemap maps """Return a list of (needed, total, found, sharemap), where sharemap maps
@ -152,18 +65,25 @@ class SimpleCHKFileChecker:
pass pass
def _done(self, res): def _done(self, res):
r = Results(self.storage_index) r = CheckerResults(self.storage_index)
report = [] report = []
r.healthy = bool(len(self.found_shares) >= self.total_shares) r.set_healthy(bool(len(self.found_shares) >= self.total_shares))
r.stuff = (self.needed_shares, self.total_shares, data = {"count-shares-good": len(self.found_shares),
len(self.found_shares), self.sharemap) "count-shares-needed": self.needed_shares,
"count-shares-expected": self.total_shares,
}
# TODO: count-good-shares-hosts, count-corrupt-shares,
# list-corrupt-shares, servers-responding, sharemap
#r.stuff = (self.needed_shares, self.total_shares,
# len(self.found_shares), self.sharemap)
if len(self.found_shares) < self.total_shares: if len(self.found_shares) < self.total_shares:
wanted = set(range(self.total_shares)) wanted = set(range(self.total_shares))
missing = wanted - self.found_shares missing = wanted - self.found_shares
report.append("Missing shares: %s" % report.append("Missing shares: %s" %
",".join(["sh%d" % shnum ",".join(["sh%d" % shnum
for shnum in sorted(missing)])) for shnum in sorted(missing)]))
r.status_report = "\n".join(report) + "\n" r.set_report(report)
# TODO: r.set_summary(summary)
return r return r
class VerifyingOutput: class VerifyingOutput:
@ -175,7 +95,7 @@ class VerifyingOutput:
self._crypttext_hash_tree = None self._crypttext_hash_tree = None
self._opened = False self._opened = False
self._results = results self._results = results
results.healthy = False results.set_healthy(False)
def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree): def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
self._crypttext_hash_tree = crypttext_hashtree self._crypttext_hash_tree = crypttext_hashtree
@ -196,8 +116,10 @@ class VerifyingOutput:
self.crypttext_hash = self._crypttext_hasher.digest() self.crypttext_hash = self._crypttext_hasher.digest()
def finish(self): def finish(self):
self._results.healthy = True self._results.set_healthy(True)
return self._results # the return value of finish() is passed out of FileDownloader._done,
# but SimpleCHKFileVerifier overrides this with the CheckerResults
# instance instead.
class SimpleCHKFileVerifier(download.FileDownloader): class SimpleCHKFileVerifier(download.FileDownloader):
@ -218,7 +140,7 @@ class SimpleCHKFileVerifier(download.FileDownloader):
self._si_s = storage.si_b2a(self._storage_index) self._si_s = storage.si_b2a(self._storage_index)
self.init_logging() self.init_logging()
r = Results(self._storage_index) self._check_results = r = CheckerResults(self._storage_index)
self._output = VerifyingOutput(self._size, r) self._output = VerifyingOutput(self._size, r)
self._paused = False self._paused = False
self._stopped = False self._stopped = False
@ -265,5 +187,6 @@ class SimpleCHKFileVerifier(download.FileDownloader):
# once we know that, we can download blocks from everybody # once we know that, we can download blocks from everybody
d.addCallback(self._download_all_segments) d.addCallback(self._download_all_segments)
d.addCallback(self._done) d.addCallback(self._done)
d.addCallback(lambda ignored: self._check_results)
return d return d

View File

@ -3,8 +3,10 @@ from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable
from allmydata import uri from allmydata import uri
from allmydata.immutable.checker import Results, DeepCheckResults, \ from allmydata.immutable.checker import SimpleCHKFileChecker, \
SimpleCHKFileChecker, SimpleCHKFileVerifier SimpleCHKFileVerifier
from allmydata.checker_results import DeepCheckResults, \
DeepCheckAndRepairResults
class FileNode: class FileNode:
implements(IFileNode, ICheckable) implements(IFileNode, ICheckable)
@ -47,8 +49,7 @@ class FileNode:
def get_storage_index(self): def get_storage_index(self):
return self.u.storage_index return self.u.storage_index
def check(self, verify=False, repair=False): def check(self, verify=False):
assert repair is False # not implemented yet
storage_index = self.u.storage_index storage_index = self.u.storage_index
k = self.u.needed_shares k = self.u.needed_shares
N = self.u.total_shares N = self.u.total_shares
@ -61,11 +62,23 @@ class FileNode:
v = self.checker_class(self._client, storage_index, k, N) v = self.checker_class(self._client, storage_index, k, N)
return v.start() return v.start()
def deep_check(self, verify=False, repair=False): def check_and_repair(self, verify=False):
d = self.check(verify, repair) raise NotImplementedError("not implemented yet")
def deep_check(self, verify=False):
d = self.check(verify)
def _done(r): def _done(r):
dr = DeepCheckResults(self.get_verifier().storage_index) dr = DeepCheckResults(self.get_verifier().storage_index)
dr.add_check(r) dr.add_check(r, [])
return dr
d.addCallback(_done)
return d
def deep_check_and_repair(self, verify=False):
d = self.check_and_repair(verify)
def _done(r):
dr = DeepCheckAndRepairResults(self.get_verifier().storage_index)
dr.add_check_and_repair(r, [])
return dr return dr
d.addCallback(_done) d.addCallback(_done)
return d return d
@ -120,20 +133,13 @@ class LiteralFileNode:
return None return None
def check(self, verify=False, repair=False): def check(self, verify=False, repair=False):
# neither verify= nor repair= affect LIT files # neither verify= nor repair= affect LIT files, and the check returns
r = Results(None) # no results.
r.healthy = True return defer.succeed(None)
r.problems = []
return defer.succeed(r)
def deep_check(self, verify=False, repair=False): def deep_check(self, verify=False, repair=False):
d = self.check(verify, repair)
def _done(r):
dr = DeepCheckResults(None) dr = DeepCheckResults(None)
dr.add_check(r) return defer.succeed(dr)
return dr
d.addCallback(_done)
return d
def download(self, target): def download(self, target):
# note that this does not update the stats_provider # note that this does not update the stats_provider

View File

@ -1430,16 +1430,18 @@ class IUploader(Interface):
"""TODO: how should this work?""" """TODO: how should this work?"""
class ICheckable(Interface): class ICheckable(Interface):
def check(verify=False, repair=False): def check(verify=False):
"""Check upon my health, optionally repairing any problems. """Check upon my health, optionally repairing any problems.
This returns a Deferred that fires with an instance that provides This returns a Deferred that fires with an instance that provides
ICheckerResults. ICheckerResults, or None if the object is non-distributed (i.e. LIT
files).
Filenodes and dirnodes (which provide IFilesystemNode) are also Filenodes and dirnodes (which provide IFilesystemNode) are also
checkable. Instances that represent verifier-caps will be checkable checkable. Instances that represent verifier-caps will be checkable
but not downloadable. Some objects (like LIT files) do not actually but not downloadable. Some objects (like LIT files) do not actually
live in the grid, and their checkers indicate a healthy result. live in the grid, and their checkers return None (non-distributed
files are always healthy).
If verify=False, a relatively lightweight check will be performed: I If verify=False, a relatively lightweight check will be performed: I
will ask all servers if they have a share for me, and I will believe will ask all servers if they have a share for me, and I will believe
@ -1470,7 +1472,19 @@ class ICheckable(Interface):
taken. taken.
""" """
def deep_check(verify=False, repair=False): def check_and_repair(verify=False):
"""Like check(), but if the file/directory is not healthy, attempt to
repair the damage.
This returns a Deferred which fires with a tuple of (pre, post), each
is either None or an ICheckerResults instance. For non-distributed
files (i.e. a LIT file) both are None. Otherwise, 'pre' is an
ICheckerResults representing the state of the object before any
repair attempt is made. If the file was unhealthy and repair was
attempted, 'post' will be another ICheckerResults instance with the
state of the object after repair."""
def deep_check(verify=False):
"""Check upon the health of me and everything I can reach. """Check upon the health of me and everything I can reach.
This is a recursive form of check(), useable on dirnodes. (it can be This is a recursive form of check(), useable on dirnodes. (it can be
@ -1479,40 +1493,118 @@ class ICheckable(Interface):
I return a Deferred that fires with an IDeepCheckResults object. I return a Deferred that fires with an IDeepCheckResults object.
""" """
class ICheckerResults(Interface): def deep_check_and_repair(verify=False):
"""I contain the detailed results of a check/verify/repair operation. """Check upon the health of me and everything I can reach. Repair
anything that isn't healthy.
The IFilesystemNode.check()/verify()/repair() methods all return This is a recursive form of check_and_repair(), useable on dirnodes.
instances that provide ICheckerResults. (it can be called safely on filenodes too, but only checks/repairs
the one object).
I return a Deferred that fires with an IDeepCheckAndRepairResults
object.
""" """
def is_healthy(): class ICheckerResults(Interface):
"""Return a bool, True if the file is fully healthy, False if it is """I contain the detailed results of a check/verify operation.
damaged in any way.""" """
def get_storage_index(): def get_storage_index():
"""Return a string with the (binary) storage index.""" """Return a string with the (binary) storage index."""
def get_storage_index_string(): def get_storage_index_string():
"""Return a string with the (printable) abbreviated storage index.""" """Return a string with the (printable) abbreviated storage index."""
def get_mutability_string():
"""Return a string with 'mutable' or 'immutable'."""
def to_string(): def is_healthy():
"""Return a string that describes the detailed results of the """Return a boolean, True if the file/dir is fully healthy, False if
check/verify operation. This string will be displayed on a page all it is damaged in any way. Non-distributed LIT files always return
by itself.""" True."""
def needs_rebalancing():
"""Return a boolean, True if the file/dir's reliability could be
improved by moving shares to new servers. Non-distributed LIT files
always returne False."""
def get_data():
"""Return a dictionary that describes the state of the file/dir.
Non-distributed LIT files always return an empty dictionary. Normal
files and directories return a dictionary with the following keys
(note that these use base32-encoded strings rather than binary ones)
(also note that for mutable files, these counts are for the 'best'
version)::
count-shares-good: the number of distinct good shares that were found
count-shares-needed: 'k', the number of shares required for recovery
count-shares-expected: 'N', the number of total shares generated
count-good-share-hosts: the number of distinct storage servers with
good shares. If this number is less than
count-shares-good, then some shares are
doubled up, increasing the correlation of
failures. This indicates that one or more
shares should be moved to an otherwise unused
server, if one is available.
count-corrupt-shares: the number of shares with integrity failures
list-corrupt-shares: a list of 'share locators', one for each share
that was found to be corrupt. Each share
locator is a list of (serverid, storage_index,
sharenum).
servers-responding: list of base32-encoded storage server identifiers,
one for each server which responded to the share
query.
sharemap: dict mapping share identifier to list of serverids
(base32-encoded strings). This indicates which servers are
holding which shares. For immutable files, the shareid is
an integer (the share number, from 0 to N-1). For
immutable files, it is a string of the form
'seq%d-%s-sh%d', containing the sequence number, the
roothash, and the share number.
Mutable files will add the following keys::
count-wrong-shares: the number of shares for versions other than
the 'best' one (highest sequence number, highest
roothash). These are either old ...
count-recoverable-versions: the number of recoverable versions of
the file. For a healthy file, this will
equal 1.
count-unrecoverable-versions: the number of unrecoverable versions
of the file. For a healthy file, this
will be 0.
"""
def get_summary():
"""Return a string with a brief (one-line) summary of the results."""
def get_report():
"""Return a list of strings with more detailed results."""
class ICheckAndRepairResults(Interface):
"""I contain the detailed results of a check/verify/repair operation.
The IFilesystemNode.check()/verify()/repair() methods all return
instances that provide ICheckAndRepairResults.
"""
def get_storage_index():
"""Return a string with the (binary) storage index."""
def get_storage_index_string():
"""Return a string with the (printable) abbreviated storage index."""
def get_repair_attempted():
"""Return a boolean, True if a repair was attempted."""
def get_repair_successful():
"""Return a boolean, True if repair was attempted and the file/dir
was fully healthy afterwards."""
def get_pre_repair_results():
"""Return an ICheckerResults instance that describes the state of the
file/dir before any repair was attempted."""
def get_post_repair_results():
"""Return an ICheckerResults instance that describes the state of the
file/dir after any repair was attempted. If no repair was attempted,
the pre-repair and post-repair results will be identical."""
# The old checker results (for only immutable files) were described
# with this:
# For filenodes, this fires with a tuple of (needed_shares,
# total_shares, found_shares, sharemap). The first three are ints. The
# basic health of the file is found_shares / needed_shares: if less
# than 1.0, the file is unrecoverable.
#
# The sharemap has a key for each sharenum. The value is a list of
# (binary) nodeids who hold that share. If two shares are kept on the
# same nodeid, they will fail as a pair, and overall reliability is
# decreased.
class IDeepCheckResults(Interface): class IDeepCheckResults(Interface):
"""I contain the results of a deep-check operation. """I contain the results of a deep-check operation.
@ -1523,24 +1615,86 @@ class IDeepCheckResults(Interface):
def get_root_storage_index_string(): def get_root_storage_index_string():
"""Return the storage index (abbreviated human-readable string) of """Return the storage index (abbreviated human-readable string) of
the first object checked.""" the first object checked."""
def count_objects_checked(): def get_counters():
"""Return the number of objects that were checked.""" """Return a dictionary with the following keys::
def count_objects_healthy():
"""Return the number of objects that were fully healthy.""" count-objects-checked: count of how many objects were checked
def count_repairs_attempted(): count-objects-healthy: how many of those objects were completely
"""Return the number of repair operations that were attempted.""" healthy
def count_repairs_successful(): count-objects-unhealthy: how many were damaged in some way
"""Return the number of repair operations that succeeded in bringing count-corrupt-shares: how many shares were found to have
the object back up to full health.""" corruption, summed over all objects
def get_server_problems(): examined
"""Return a dict, mapping server nodeid to a count of how many """
problems involved that server."""
def get_problems(): def get_corrupt_shares():
"""Return a list of ICheckerResults, one for each object that """Return a set of (serverid, storage_index, sharenum) for all shares
was not fully healthy.""" that were found to be corrupt. Both serverid and storage_index are
binary.
"""
def get_all_results(): def get_all_results():
"""Return a dict mapping storage_index (a binary string) to an """Return a dictionary mapping pathname (a tuple of strings, ready to
ICheckerResults instance, one for each object that was checked.""" be slash-joined) to an ICheckerResults instance, one for each object
that was checked."""
class IDeepCheckAndRepairResults(Interface):
"""I contain the results of a deep-check-and-repair operation.
This is returned by a call to ICheckable.deep_check_and_repair().
"""
def get_root_storage_index_string():
"""Return the storage index (abbreviated human-readable string) of
the first object checked."""
def get_counters():
"""Return a dictionary with the following keys::
count-objects-checked: count of how many objects were checked
count-objects-healthy-pre-repair: how many of those objects were
completely healthy (before any
repair)
count-objects-unhealthy-pre-repair: how many were damaged in
some way
count-objects-healthy-post-repair: how many of those objects were
completely healthy (after any
repair)
count-objects-unhealthy-post-repair: how many were damaged in
some way
count-repairs-attempted: repairs were attempted on this many
objects. The count-repairs- keys will
always be provided, however unless
repair=true is present, they will all
be zero.
count-repairs-successful: how many repairs resulted in healthy
objects
count-repairs-unsuccessful: how many repairs resulted did not
results in completely healthy objects
count-corrupt-shares-pre-repair: how many shares were found to
have corruption, summed over all
objects examined (before any
repair)
count-corrupt-shares-post-repair: how many shares were found to
have corruption, summed over all
objects examined (after any
repair)
"""
def get_corrupt_shares():
"""Return a set of (serverid, storage_index, sharenum) for all shares
that were found to be corrupt before any repair was attempted. Both
serverid and storage_index are binary.
"""
def get_remaining_corrupt_shares():
"""Return a set of (serverid, storage_index, sharenum) for all shares
that were found to be corrupt after any repair was completed. Both
serverid and storage_index are binary. These are shares that need
manual inspection and probably deletion.
"""
def get_all_results():
"""Return a dictionary mapping pathname (a tuple of strings, ready to
be slash-joined) to an ICheckAndRepairResults instance, one for each
object that was checked."""
class IRepairable(Interface): class IRepairable(Interface):
def repair(checker_results): def repair(checker_results):
@ -1551,7 +1705,16 @@ class IRepairable(Interface):
proof that you have actually discovered a problem with this file. I proof that you have actually discovered a problem with this file. I
will use the data in the checker results to guide the repair process, will use the data in the checker results to guide the repair process,
such as which servers provided bad data and should therefore be such as which servers provided bad data and should therefore be
avoided. avoided. The ICheckerResults object is inside the
ICheckAndRepairResults object, which is returned by the
ICheckable.check() method::
d = filenode.check(repair=False)
def _got_results(check_and_repair_results):
check_results = check_and_repair_results.get_pre_repair_results()
return filenode.repair(check_results)
d.addCallback(_got_results)
return d
""" """
class IRepairResults(Interface): class IRepairResults(Interface):

View File

@ -1,10 +1,9 @@
from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
from twisted.python import failure from twisted.python import failure
from allmydata import hashtree from allmydata import hashtree
from allmydata.util import hashutil, base32, idlib, log from allmydata.util import hashutil, base32, idlib, log
from allmydata.interfaces import ICheckerResults from allmydata.checker_results import CheckAndRepairResults, CheckerResults
from common import MODE_CHECK, CorruptShareError from common import MODE_CHECK, CorruptShareError
from servermap import ServerMap, ServermapUpdater from servermap import ServerMap, ServermapUpdater
@ -16,21 +15,19 @@ class MutableChecker:
self._node = node self._node = node
self.bad_shares = [] # list of (nodeid,shnum,failure) self.bad_shares = [] # list of (nodeid,shnum,failure)
self._storage_index = self._node.get_storage_index() self._storage_index = self._node.get_storage_index()
self.results = Results(self._storage_index) self.results = CheckerResults(self._storage_index)
self.need_repair = False self.need_repair = False
def check(self, verify=False, repair=False): def check(self, verify=False):
servermap = ServerMap() servermap = ServerMap()
self.results.servermap = servermap
u = ServermapUpdater(self._node, servermap, MODE_CHECK) u = ServermapUpdater(self._node, servermap, MODE_CHECK)
d = u.update() d = u.update()
d.addCallback(self._got_mapupdate_results) d.addCallback(self._got_mapupdate_results)
if verify: if verify:
d.addCallback(self._verify_all_shares) d.addCallback(self._verify_all_shares)
d.addCallback(self._generate_results) d.addCallback(lambda res: servermap)
if repair: d.addCallback(self._fill_checker_results, self.results)
d.addCallback(self._maybe_do_repair) d.addCallback(lambda res: self.results)
d.addCallback(self._return_results)
return d return d
def _got_mapupdate_results(self, servermap): def _got_mapupdate_results(self, servermap):
@ -68,7 +65,7 @@ class MutableChecker:
for (shnum, peerid, timestamp) in shares: for (shnum, peerid, timestamp) in shares:
ss = servermap.connections[peerid] ss = servermap.connections[peerid]
d = self._do_read(ss, peerid, self._storage_index, [shnum], readv) d = self._do_read(ss, peerid, self._storage_index, [shnum], readv)
d.addCallback(self._got_answer, peerid) d.addCallback(self._got_answer, peerid, servermap)
dl.append(d) dl.append(d)
return defer.DeferredList(dl, fireOnOneErrback=True) return defer.DeferredList(dl, fireOnOneErrback=True)
@ -78,7 +75,7 @@ class MutableChecker:
d = ss.callRemote("slot_readv", storage_index, shnums, readv) d = ss.callRemote("slot_readv", storage_index, shnums, readv)
return d return d
def _got_answer(self, datavs, peerid): def _got_answer(self, datavs, peerid, servermap):
for shnum,datav in datavs.items(): for shnum,datav in datavs.items():
data = datav[0] data = datav[0]
try: try:
@ -88,7 +85,7 @@ class MutableChecker:
self.need_repair = True self.need_repair = True
self.bad_shares.append( (peerid, shnum, f) ) self.bad_shares.append( (peerid, shnum, f) )
prefix = data[:SIGNED_PREFIX_LENGTH] prefix = data[:SIGNED_PREFIX_LENGTH]
self.results.servermap.mark_bad_share(peerid, shnum, prefix) servermap.mark_bad_share(peerid, shnum, prefix)
def check_prefix(self, peerid, shnum, data): def check_prefix(self, peerid, shnum, data):
(seqnum, root_hash, IV, segsize, datalength, k, N, prefix, (seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
@ -134,13 +131,34 @@ class MutableChecker:
if alleged_writekey != self._node.get_writekey(): if alleged_writekey != self._node.get_writekey():
raise CorruptShareError(peerid, shnum, "invalid privkey") raise CorruptShareError(peerid, shnum, "invalid privkey")
def _generate_results(self, res): def _count_shares(self, smap, version):
self.results.healthy = True available_shares = smap.shares_available()
smap = self.results.servermap (num_distinct_shares, k, N) = available_shares[version]
counters = {}
counters["count-shares-good"] = num_distinct_shares
counters["count-shares-needed"] = k
counters["count-shares-expected"] = N
good_hosts = smap.all_peers_for_version(version)
counters["count-good-share-hosts"] = good_hosts
vmap = smap.make_versionmap()
counters["count-wrong-shares"] = sum([len(shares)
for verinfo,shares in vmap.items()
if verinfo != version])
return counters
def _fill_checker_results(self, smap, r):
r.set_servermap(smap.copy())
healthy = True
data = {}
report = [] report = []
summary = []
vmap = smap.make_versionmap() vmap = smap.make_versionmap()
recoverable = smap.recoverable_versions() recoverable = smap.recoverable_versions()
unrecoverable = smap.unrecoverable_versions() unrecoverable = smap.unrecoverable_versions()
data["count-recoverable-versions"] = len(recoverable)
data["count-unrecoverable-versions"] = len(unrecoverable)
if recoverable: if recoverable:
report.append("Recoverable Versions: " + report.append("Recoverable Versions: " +
"/".join(["%d*%s" % (len(vmap[v]), "/".join(["%d*%s" % (len(vmap[v]),
@ -152,34 +170,65 @@ class MutableChecker:
smap.summarize_version(v)) smap.summarize_version(v))
for v in unrecoverable])) for v in unrecoverable]))
if smap.unrecoverable_versions(): if smap.unrecoverable_versions():
self.results.healthy = False healthy = False
summary.append("some versions are unrecoverable")
report.append("Unhealthy: some versions are unrecoverable") report.append("Unhealthy: some versions are unrecoverable")
if len(recoverable) == 0: if len(recoverable) == 0:
self.results.healthy = False healthy = False
summary.append("no versions are recoverable")
report.append("Unhealthy: no versions are recoverable") report.append("Unhealthy: no versions are recoverable")
if len(recoverable) > 1: if len(recoverable) > 1:
self.results.healthy = False healthy = False
summary.append("multiple versions are recoverable")
report.append("Unhealthy: there are multiple recoverable versions") report.append("Unhealthy: there are multiple recoverable versions")
if self.best_version:
if recoverable:
best_version = smap.best_recoverable_version()
report.append("Best Recoverable Version: " + report.append("Best Recoverable Version: " +
smap.summarize_version(self.best_version)) smap.summarize_version(best_version))
available_shares = smap.shares_available() counters = self._count_shares(smap, best_version)
(num_distinct_shares, k, N) = available_shares[self.best_version] data.update(counters)
if num_distinct_shares < N: if counters["count-shares-good"] < counters["count-shares-expected"]:
self.results.healthy = False healthy = False
report.append("Unhealthy: best recoverable version has only %d shares (encoding is %d-of-%d)" report.append("Unhealthy: best version has only %d shares "
% (num_distinct_shares, k, N)) "(encoding is %d-of-%d)"
% (counters["count-shares-good"],
counters["count-shares-needed"],
counters["count-shares-expected"]))
summary.append("%d shares (enc %d-of-%d)"
% (counters["count-shares-good"],
counters["count-shares-needed"],
counters["count-shares-expected"]))
elif unrecoverable:
healthy = False
# find a k and N from somewhere
first = list(unrecoverable)[0]
# not exactly the best version, but that doesn't matter too much
data.update(self._count_shares(smap, first))
else:
# couldn't find anything at all
data["count-shares-good"] = 0
data["count-shares-needed"] = 3 # arbitrary defaults
data["count-shares-expected"] = 10
data["count-good-share-hosts"] = 0
data["count-wrong-shares"] = 0
if self.bad_shares: if self.bad_shares:
data["count-corrupt-shares"] = len(self.bad_shares)
data["list-corrupt-shares"] = locators = []
report.append("Corrupt Shares:") report.append("Corrupt Shares:")
summary.append("Corrupt Shares:")
for (peerid, shnum, f) in sorted(self.bad_shares): for (peerid, shnum, f) in sorted(self.bad_shares):
locators.append( (peerid, self._storage_index, shnum) )
s = "%s-sh%d" % (idlib.shortnodeid_b2a(peerid), shnum) s = "%s-sh%d" % (idlib.shortnodeid_b2a(peerid), shnum)
if f.check(CorruptShareError): if f.check(CorruptShareError):
ft = f.value.reason ft = f.value.reason
else: else:
ft = str(f) ft = str(f)
report.append(" %s: %s" % (s, ft)) report.append(" %s: %s" % (s, ft))
summary.append(s)
p = (peerid, self._storage_index, shnum, f) p = (peerid, self._storage_index, shnum, f)
self.results.problems.append(p) r.problems.append(p)
msg = ("CorruptShareError during mutable verify, " msg = ("CorruptShareError during mutable verify, "
"peerid=%(peerid)s, si=%(si)s, shnum=%(shnum)d, " "peerid=%(peerid)s, si=%(si)s, shnum=%(shnum)d, "
"where=%(where)s") "where=%(where)s")
@ -188,68 +237,52 @@ class MutableChecker:
shnum=shnum, shnum=shnum,
where=ft, where=ft,
level=log.WEIRD, umid="EkK8QA") level=log.WEIRD, umid="EkK8QA")
else:
data["count-corrupt-shares"] = 0
data["list-corrupt-shares"] = []
self.results.status_report = "\n".join(report) + "\n" # TODO: servers-responding, sharemap
def _maybe_do_repair(self, res): r.set_healthy(healthy)
r.set_needs_rebalancing(False) # TODO
r.set_data(data)
if healthy:
r.set_summary("Healthy")
else:
r.set_summary("Unhealthy: " + " ".join(summary))
r.set_report(report)
class MutableCheckAndRepairer(MutableChecker):
def __init__(self, node):
MutableChecker.__init__(self, node)
self.cr_results = CheckAndRepairResults(self._storage_index)
self.cr_results.pre_repair_results = self.results
self.need_repair = False
def check(self, verify=False):
d = MutableChecker.check(self, verify)
d.addCallback(self._maybe_repair)
d.addCallback(lambda res: self.cr_results)
return d
def _maybe_repair(self, res):
if not self.need_repair: if not self.need_repair:
self.cr_results.post_repair_results = self.results
return return
self.results.repair_attempted = True self.cr_results.repair_attempted = True
d = self._node.repair(self.results) d = self._node.repair(self.results)
def _repair_finished(repair_results): def _repair_finished(repair_results):
self.results.repair_succeeded = True self.cr_results.repair_successful = True
self.results.repair_results = repair_results r = CheckerResults(self._storage_index)
self.cr_results.post_repair_results = r
self._fill_checker_results(repair_results.servermap, r)
self.cr_results.repair_results = repair_results # TODO?
def _repair_error(f): def _repair_error(f):
# I'm not sure if I want to pass through a failure or not. # I'm not sure if I want to pass through a failure or not.
self.results.repair_succeeded = False self.cr_results.repair_successful = False
self.results.repair_failure = f self.cr_results.repair_failure = f # TODO?
#self.cr_results.post_repair_results = ??
return f return f
d.addCallbacks(_repair_finished, _repair_error) d.addCallbacks(_repair_finished, _repair_error)
return d return d
def _return_results(self, res):
return self.results
class Results:
implements(ICheckerResults)
def __init__(self, storage_index):
self.storage_index = storage_index
self.storage_index_s = base32.b2a(storage_index)[:6]
self.repair_attempted = False
self.status_report = "[not generated yet]" # string
self.repair_report = None
self.problems = [] # list of (peerid, storage_index, shnum, failure)
def is_healthy(self):
return self.healthy
def get_storage_index(self):
return self.storage_index
def get_storage_index_string(self):
return self.storage_index_s
def get_mutability_string(self):
return "mutable"
def to_string(self):
s = ""
if self.healthy:
s += "Healthy!\n"
else:
s += "Not Healthy!\n"
s += "\n"
s += self.status_report
s += "\n"
if self.repair_attempted:
s += "Repair attempted "
if self.repair_succeeded:
s += "and successful\n"
else:
s += "and failed\n"
s += "\n"
s += self.repair_results.to_string()
s += "\n"
return s

View File

@ -12,7 +12,8 @@ from allmydata.util import hashutil
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.uri import WriteableSSKFileURI from allmydata.uri import WriteableSSKFileURI
from allmydata.immutable.encode import NotEnoughSharesError from allmydata.immutable.encode import NotEnoughSharesError
from allmydata.immutable.checker import DeepCheckResults from allmydata.checker_results import DeepCheckResults, \
DeepCheckAndRepairResults
from pycryptopp.publickey import rsa from pycryptopp.publickey import rsa
from pycryptopp.cipher.aes import AES from pycryptopp.cipher.aes import AES
@ -21,7 +22,7 @@ from common import MODE_READ, MODE_WRITE, UnrecoverableFileError, \
ResponseCache, UncoordinatedWriteError ResponseCache, UncoordinatedWriteError
from servermap import ServerMap, ServermapUpdater from servermap import ServerMap, ServermapUpdater
from retrieve import Retrieve from retrieve import Retrieve
from checker import MutableChecker from checker import MutableChecker, MutableCheckAndRepairer
from repair import Repairer from repair import Repairer
@ -54,6 +55,7 @@ class MutableFileNode:
SIGNATURE_KEY_SIZE = 2048 SIGNATURE_KEY_SIZE = 2048
DEFAULT_ENCODING = (3, 10) DEFAULT_ENCODING = (3, 10)
checker_class = MutableChecker checker_class = MutableChecker
check_and_repairer_class = MutableCheckAndRepairer
def __init__(self, client): def __init__(self, client):
self._client = client self._client = client
@ -243,15 +245,29 @@ class MutableFileNode:
################################# #################################
# ICheckable # ICheckable
def check(self, verify=False, repair=False): def check(self, verify=False):
checker = self.checker_class(self) checker = self.checker_class(self)
return checker.check(verify, repair) return checker.check(verify)
def deep_check(self, verify=False, repair=False): def check_and_repair(self, verify=False):
d = self.check(verify, repair) checker = self.check_and_repairer_class(self)
return checker.check(verify)
def deep_check(self, verify=False):
# deep-check on a filenode only gets one result
d = self.check(verify)
def _done(r): def _done(r):
dr = DeepCheckResults(self.get_storage_index()) dr = DeepCheckResults(self.get_storage_index())
dr.add_check(r) dr.add_check(r, [])
return dr
d.addCallback(_done)
return d
def deep_check_and_repair(self, verify=False):
d = self.check_and_repair(verify)
def _done(r):
dr = DeepCheckAndRepairResults(self.get_storage_index())
dr.add_check_and_repair(r, [])
return dr return dr
d.addCallback(_done) d.addCallback(_done)
return d return d

View File

@ -1,10 +1,13 @@
from zope.interface import implements from zope.interface import implements
from allmydata.interfaces import IRepairResults from allmydata.interfaces import IRepairResults, ICheckerResults
class RepairResults: class RepairResults:
implements(IRepairResults) implements(IRepairResults)
def __init__(self, smap):
self.servermap = smap
def to_string(self): def to_string(self):
return "" return ""
@ -14,7 +17,7 @@ class MustForceRepairError(Exception):
class Repairer: class Repairer:
def __init__(self, node, checker_results): def __init__(self, node, checker_results):
self.node = node self.node = node
self.checker_results = checker_results self.checker_results = ICheckerResults(checker_results)
assert checker_results.storage_index == self.node.get_storage_index() assert checker_results.storage_index == self.node.get_storage_index()
def start(self, force=False): def start(self, force=False):
@ -44,7 +47,7 @@ class Repairer:
# old shares: replace old shares with the latest version # old shares: replace old shares with the latest version
# bogus shares (bad sigs): replace the bad one with a good one # bogus shares (bad sigs): replace the bad one with a good one
smap = self.checker_results.servermap smap = self.checker_results.get_servermap()
if smap.unrecoverable_newer_versions(): if smap.unrecoverable_newer_versions():
if not force: if not force:
@ -88,8 +91,8 @@ class Repairer:
best_version = smap.best_recoverable_version() best_version = smap.best_recoverable_version()
d = self.node.download_version(smap, best_version, fetch_privkey=True) d = self.node.download_version(smap, best_version, fetch_privkey=True)
d.addCallback(self.node.upload, smap) d.addCallback(self.node.upload, smap)
d.addCallback(self.get_results) d.addCallback(self.get_results, smap)
return d return d
def get_results(self, res): def get_results(self, res, smap):
return RepairResults() return RepairResults(smap)

View File

@ -121,6 +121,17 @@ class ServerMap:
self.last_update_mode = None self.last_update_mode = None
self.last_update_time = 0 self.last_update_time = 0
def copy(self):
s = ServerMap()
s.servermap = self.servermap.copy() # tuple->tuple
s.connections = self.connections.copy() # str->RemoteReference
s.unreachable_peers = set(self.unreachable_peers)
s.problems = self.problems[:]
s.bad_shares = self.bad_shares.copy() # tuple->str
s.last_update_mode = self.last_update_mode
s.last_update_time = self.last_update_time
return s
def mark_bad_share(self, peerid, shnum, checkstring): def mark_bad_share(self, peerid, shnum, checkstring):
"""This share was found to be bad, either in the checkstring or """This share was found to be bad, either in the checkstring or
signature (detected during mapupdate), or deeper in the share signature (detected during mapupdate), or deeper in the share
@ -162,6 +173,13 @@ class ServerMap:
for (peerid, shnum) for (peerid, shnum)
in self.servermap]) in self.servermap])
def all_peers_for_version(self, verinfo):
"""Return a set of peerids that hold shares for the given version."""
return set([peerid
for ( (peerid, shnum), (verinfo2, timestamp) )
in self.servermap.items()
if verinfo == verinfo2])
def make_sharemap(self): def make_sharemap(self):
"""Return a dict that maps shnum to a set of peerds that hold it.""" """Return a dict that maps shnum to a set of peerds that hold it."""
sharemap = DictOfSets() sharemap = DictOfSets()

View File

@ -10,9 +10,9 @@ from allmydata import uri, dirnode, client
from allmydata.introducer.server import IntroducerNode from allmydata.introducer.server import IntroducerNode
from allmydata.interfaces import IURI, IMutableFileNode, IFileNode, \ from allmydata.interfaces import IURI, IMutableFileNode, IFileNode, \
FileTooLargeError, ICheckable FileTooLargeError, ICheckable
from allmydata.immutable import checker
from allmydata.immutable.encode import NotEnoughSharesError from allmydata.immutable.encode import NotEnoughSharesError
from allmydata.mutable.checker import Results as MutableCheckerResults from allmydata.checker_results import CheckerResults, CheckAndRepairResults, \
DeepCheckResults, DeepCheckAndRepairResults
from allmydata.mutable.common import CorruptShareError from allmydata.mutable.common import CorruptShareError
from allmydata.util import log, testutil, fileutil from allmydata.util import log, testutil, fileutil
from allmydata.stats import PickleStatsGatherer from allmydata.stats import PickleStatsGatherer
@ -44,16 +44,27 @@ class FakeCHKFileNode:
return self.my_uri return self.my_uri
def get_verifier(self): def get_verifier(self):
return IURI(self.my_uri).get_verifier() return IURI(self.my_uri).get_verifier()
def check(self, verify=False, repair=False): def check(self, verify=False):
r = checker.Results(None) r = CheckerResults(self.storage_index)
is_bad = self.bad_shares.get(self.storage_index, None) is_bad = self.bad_shares.get(self.storage_index, None)
data = {}
if is_bad: if is_bad:
r.healthy = False r.set_healthy(False)
r.problems = failure.Failure(CorruptShareError(is_bad)) r.problems = failure.Failure(CorruptShareError(is_bad))
else: else:
r.healthy = True r.set_healthy(True)
r.problems = [] r.problems = []
r.set_data(data)
return defer.succeed(r) return defer.succeed(r)
def check_and_repair(self, verify=False):
d = self.check(verify)
def _got(cr):
r = CheckAndRepairResults(self.storage_index)
r.pre_repair_results = r.post_repair_results = cr
return r
d.addCallback(_got)
return d
def is_mutable(self): def is_mutable(self):
return False return False
def is_readonly(self): def is_readonly(self):
@ -136,24 +147,45 @@ class FakeMutableFileNode:
def get_storage_index(self): def get_storage_index(self):
return self.storage_index return self.storage_index
def check(self, verify=False, repair=False): def check(self, verify=False):
r = MutableCheckerResults(self.storage_index) r = CheckerResults(self.storage_index)
is_bad = self.bad_shares.get(self.storage_index, None) is_bad = self.bad_shares.get(self.storage_index, None)
data = {}
data["list-corrupt-shares"] = []
if is_bad: if is_bad:
r.healthy = False r.set_healthy(False)
r.problems = failure.Failure(CorruptShareError("peerid", r.problems = failure.Failure(CorruptShareError("peerid",
0, # shnum 0, # shnum
is_bad)) is_bad))
else: else:
r.healthy = True r.set_healthy(True)
r.problems = [] r.problems = []
r.set_data(data)
return defer.succeed(r) return defer.succeed(r)
def deep_check(self, verify=False, repair=False): def check_and_repair(self, verify=False):
d = self.check(verify, repair) d = self.check(verify)
def _got(cr):
r = CheckAndRepairResults(self.storage_index)
r.pre_repair_results = r.post_repair_results = cr
return r
d.addCallback(_got)
return d
def deep_check(self, verify=False):
d = self.check(verify)
def _done(r): def _done(r):
dr = checker.DeepCheckResults(self.storage_index) dr = DeepCheckResults(self.storage_index)
dr.add_check(r) dr.add_check(r, [])
return dr
d.addCallback(_done)
return d
def deep_check_and_repair(self, verify=False):
d = self.check_and_repair(verify)
def _done(r):
dr = DeepCheckAndRepairResults(self.storage_index)
dr.add_check(r, [])
return dr return dr
d.addCallback(_done) d.addCallback(_done)
return d return d

View File

@ -4,12 +4,14 @@ from zope.interface import implements
from twisted.trial import unittest from twisted.trial import unittest
from twisted.internet import defer from twisted.internet import defer
from allmydata import uri, dirnode from allmydata import uri, dirnode
from allmydata.immutable import upload, checker from allmydata.immutable import upload
from allmydata.interfaces import IURI, IClient, IMutableFileNode, \ from allmydata.interfaces import IURI, IClient, IMutableFileNode, \
INewDirectoryURI, IReadonlyNewDirectoryURI, IFileNode, ExistingChildError INewDirectoryURI, IReadonlyNewDirectoryURI, IFileNode, \
ExistingChildError, IDeepCheckResults, IDeepCheckAndRepairResults
from allmydata.util import hashutil, testutil from allmydata.util import hashutil, testutil
from allmydata.test.common import make_chk_file_uri, make_mutable_file_uri, \ from allmydata.test.common import make_chk_file_uri, make_mutable_file_uri, \
FakeDirectoryNode, create_chk_filenode FakeDirectoryNode, create_chk_filenode
from allmydata.checker_results import CheckerResults, CheckAndRepairResults
# to test dirnode.py, we want to construct a tree of real DirectoryNodes that # to test dirnode.py, we want to construct a tree of real DirectoryNodes that
# contain pointers to fake files. We start with a fake MutableFileNode that # contain pointers to fake files. We start with a fake MutableFileNode that
@ -32,12 +34,20 @@ class Marker:
def get_verifier(self): def get_verifier(self):
return self.verifieruri return self.verifieruri
def check(self, verify=False, repair=False): def check(self, verify=False):
r = checker.Results(None) r = CheckerResults(None)
r.healthy = True r.set_healthy(True)
r.problems = []
return defer.succeed(r) return defer.succeed(r)
def check_and_repair(self, verify=False):
d = self.check(verify)
def _got(cr):
r = CheckAndRepairResults(None)
r.pre_repair_results = r.post_repair_results = cr
return r
d.addCallback(_got)
return d
# dirnode requires three methods from the client: upload(), # dirnode requires three methods from the client: upload(),
# create_node_from_uri(), and create_empty_dirnode(). Of these, upload() is # create_node_from_uri(), and create_empty_dirnode(). Of these, upload() is
# only used by the convenience composite method add_file(). # only used by the convenience composite method add_file().
@ -150,12 +160,40 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
d = self._test_deepcheck_create() d = self._test_deepcheck_create()
d.addCallback(lambda rootnode: rootnode.deep_check()) d.addCallback(lambda rootnode: rootnode.deep_check())
def _check_results(r): def _check_results(r):
self.failUnlessEqual(r.count_objects_checked(), 3) self.failUnless(IDeepCheckResults.providedBy(r))
self.failUnlessEqual(r.count_objects_healthy(), 3) c = r.get_counters()
self.failUnlessEqual(r.count_repairs_attempted(), 0) self.failUnlessEqual(c,
self.failUnlessEqual(r.count_repairs_successful(), 0) {"count-objects-checked": 3,
self.failUnlessEqual(len(r.get_server_problems()), 0) "count-objects-healthy": 3,
self.failUnlessEqual(len(r.get_problems()), 0) "count-objects-unhealthy": 0,
"count-corrupt-shares": 0,
})
self.failIf(r.get_corrupt_shares())
self.failUnlessEqual(len(r.get_all_results()), 3)
d.addCallback(_check_results)
return d
def test_deepcheck_and_repair(self):
d = self._test_deepcheck_create()
d.addCallback(lambda rootnode: rootnode.deep_check_and_repair())
def _check_results(r):
self.failUnless(IDeepCheckAndRepairResults.providedBy(r))
c = r.get_counters()
self.failUnlessEqual(c,
{"count-objects-checked": 3,
"count-objects-healthy-pre-repair": 3,
"count-objects-unhealthy-pre-repair": 0,
"count-corrupt-shares-pre-repair": 0,
"count-objects-healthy-post-repair": 3,
"count-objects-unhealthy-post-repair": 0,
"count-corrupt-shares-post-repair": 0,
"count-repairs-attempted": 0,
"count-repairs-successful": 0,
"count-repairs-unsuccessful": 0,
})
self.failIf(r.get_corrupt_shares())
self.failIf(r.get_remaining_corrupt_shares())
self.failUnlessEqual(len(r.get_all_results()), 3)
d.addCallback(_check_results) d.addCallback(_check_results)
return d return d
@ -169,12 +207,14 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
d.addCallback(lambda rootnode: self._mark_file_bad(rootnode)) d.addCallback(lambda rootnode: self._mark_file_bad(rootnode))
d.addCallback(lambda rootnode: rootnode.deep_check()) d.addCallback(lambda rootnode: rootnode.deep_check())
def _check_results(r): def _check_results(r):
self.failUnlessEqual(r.count_objects_checked(), 3) c = r.get_counters()
self.failUnlessEqual(r.count_objects_healthy(), 2) self.failUnlessEqual(c,
self.failUnlessEqual(r.count_repairs_attempted(), 0) {"count-objects-checked": 3,
self.failUnlessEqual(r.count_repairs_successful(), 0) "count-objects-healthy": 2,
self.failUnlessEqual(len(r.get_server_problems()), 0) "count-objects-unhealthy": 1,
self.failUnlessEqual(len(r.get_problems()), 1) "count-corrupt-shares": 0,
})
#self.failUnlessEqual(len(r.get_problems()), 1) # TODO
d.addCallback(_check_results) d.addCallback(_check_results)
return d return d

View File

@ -2,7 +2,8 @@
from twisted.trial import unittest from twisted.trial import unittest
from twisted.internet import defer from twisted.internet import defer
from allmydata import uri from allmydata import uri
from allmydata.immutable import filenode, download, checker from allmydata.immutable import filenode, download
from allmydata.checker_results import CheckerResults, CheckAndRepairResults
from allmydata.mutable.node import MutableFileNode from allmydata.mutable.node import MutableFileNode
from allmydata.util import hashutil from allmydata.util import hashutil
@ -131,10 +132,21 @@ class Checker(unittest.TestCase):
d.addCallback(lambda res: fn1.check(verify=True)) d.addCallback(lambda res: fn1.check(verify=True))
d.addCallback(_check_checker_results) d.addCallback(_check_checker_results)
# TODO: check-and-repair
d.addCallback(lambda res: fn1.deep_check()) d.addCallback(lambda res: fn1.deep_check())
def _check_deepcheck_results(dcr): def _check_deepcheck_results(dcr):
self.failIf(dcr.get_problems()) c = dcr.get_counters()
self.failUnlessEqual(c["count-objects-checked"], 1)
self.failUnlessEqual(c["count-objects-healthy"], 1)
self.failUnlessEqual(c["count-objects-unhealthy"], 0)
self.failUnlessEqual(c["count-corrupt-shares"], 0)
self.failIf(dcr.get_corrupt_shares())
d.addCallback(_check_deepcheck_results) d.addCallback(_check_deepcheck_results)
d.addCallback(lambda res: fn1.deep_check(verify=True))
d.addCallback(_check_deepcheck_results)
return d return d
def test_literal_filenode(self): def test_literal_filenode(self):
@ -145,7 +157,7 @@ class Checker(unittest.TestCase):
d = fn1.check() d = fn1.check()
def _check_checker_results(cr): def _check_checker_results(cr):
self.failUnless(cr.is_healthy()) self.failUnlessEqual(cr, None)
d.addCallback(_check_checker_results) d.addCallback(_check_checker_results)
d.addCallback(lambda res: fn1.check(verify=True)) d.addCallback(lambda res: fn1.check(verify=True))
@ -153,7 +165,15 @@ class Checker(unittest.TestCase):
d.addCallback(lambda res: fn1.deep_check()) d.addCallback(lambda res: fn1.deep_check())
def _check_deepcheck_results(dcr): def _check_deepcheck_results(dcr):
self.failIf(dcr.get_problems()) c = dcr.get_counters()
self.failUnlessEqual(c["count-objects-checked"], 0)
self.failUnlessEqual(c["count-objects-healthy"], 0)
self.failUnlessEqual(c["count-objects-unhealthy"], 0)
self.failUnlessEqual(c["count-corrupt-shares"], 0)
self.failIf(dcr.get_corrupt_shares())
d.addCallback(_check_deepcheck_results)
d.addCallback(lambda res: fn1.deep_check(verify=True))
d.addCallback(_check_deepcheck_results) d.addCallback(_check_deepcheck_results)
return d return d
@ -169,6 +189,7 @@ class Checker(unittest.TestCase):
n = MutableFileNode(client).init_from_uri(u) n = MutableFileNode(client).init_from_uri(u)
n.checker_class = FakeMutableChecker n.checker_class = FakeMutableChecker
n.check_and_repairer_class = FakeMutableCheckAndRepairer
d = n.check() d = n.check()
def _check_checker_results(cr): def _check_checker_results(cr):
@ -180,24 +201,41 @@ class Checker(unittest.TestCase):
d.addCallback(lambda res: n.deep_check()) d.addCallback(lambda res: n.deep_check())
def _check_deepcheck_results(dcr): def _check_deepcheck_results(dcr):
self.failIf(dcr.get_problems()) c = dcr.get_counters()
self.failUnlessEqual(c["count-objects-checked"], 1)
self.failUnlessEqual(c["count-objects-healthy"], 1)
self.failUnlessEqual(c["count-objects-unhealthy"], 0)
self.failUnlessEqual(c["count-corrupt-shares"], 0)
self.failIf(dcr.get_corrupt_shares())
d.addCallback(_check_deepcheck_results) d.addCallback(_check_deepcheck_results)
d.addCallback(lambda res: n.deep_check(verify=True))
d.addCallback(_check_deepcheck_results)
return d return d
class FakeMutableChecker: class FakeMutableChecker:
def __init__(self, node): def __init__(self, node):
self.r = checker.Results(node.get_storage_index()) self.r = CheckerResults(node.get_storage_index())
self.r.healthy = True self.r.set_healthy(True)
self.r.problems = []
def check(self, verify, repair): def check(self, verify):
return defer.succeed(self.r)
class FakeMutableCheckAndRepairer:
def __init__(self, node):
cr = CheckerResults(node.get_storage_index())
cr.set_healthy(True)
self.r = CheckAndRepairResults(node.get_storage_index())
self.r.pre_repair_results = self.r.post_repair_results = cr
def check(self, verify):
return defer.succeed(self.r) return defer.succeed(self.r)
class FakeImmutableChecker: class FakeImmutableChecker:
def __init__(self, client, storage_index, needed_shares, total_shares): def __init__(self, client, storage_index, needed_shares, total_shares):
self.r = checker.Results(storage_index) self.r = CheckerResults(storage_index)
self.r.healthy = True self.r.set_healthy(True)
self.r.problems = []
def start(self): def start(self):
return defer.succeed(self.r) return defer.succeed(self.r)

View File

@ -1179,12 +1179,11 @@ class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin, PublishMixin):
class CheckerMixin: class CheckerMixin:
def check_good(self, r, where): def check_good(self, r, where):
self.failUnless(r.healthy, where) self.failUnless(r.is_healthy(), where)
self.failIf(r.problems, where)
return r return r
def check_bad(self, r, where): def check_bad(self, r, where):
self.failIf(r.healthy, where) self.failIf(r.is_healthy(), where)
return r return r
def check_expected_failure(self, r, expected_exception, substring, where): def check_expected_failure(self, r, expected_exception, substring, where):

View File

@ -1683,11 +1683,11 @@ class SystemTest(SystemTestMixin, unittest.TestCase):
def _got_lit_filenode(n): def _got_lit_filenode(n):
self.failUnless(isinstance(n, filenode.LiteralFileNode)) self.failUnless(isinstance(n, filenode.LiteralFileNode))
d = n.check() d = n.check()
def _check_filenode_results(r): def _check_lit_filenode_results(r):
self.failUnless(r.is_healthy()) self.failUnlessEqual(r, None)
d.addCallback(_check_filenode_results) d.addCallback(_check_lit_filenode_results)
d.addCallback(lambda res: n.check(verify=True)) d.addCallback(lambda res: n.check(verify=True))
d.addCallback(_check_filenode_results) d.addCallback(_check_lit_filenode_results)
return d return d
d.addCallback(_got_lit_filenode) d.addCallback(_got_lit_filenode)
return d return d
@ -1776,7 +1776,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase):
def _check1(filenode): def _check1(filenode):
before_check_reads = self._count_reads() before_check_reads = self._count_reads()
d2 = filenode.check(verify=False, repair=False) d2 = filenode.check(verify=False)
def _after_check(checkresults): def _after_check(checkresults):
after_check_reads = self._count_reads() after_check_reads = self._count_reads()
self.failIf(after_check_reads - before_check_reads > 0, after_check_reads - before_check_reads) self.failIf(after_check_reads - before_check_reads > 0, after_check_reads - before_check_reads)
@ -1789,7 +1789,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase):
d.addCallback(self._corrupt_a_share) d.addCallback(self._corrupt_a_share)
def _check2(ignored): def _check2(ignored):
before_check_reads = self._count_reads() before_check_reads = self._count_reads()
d2 = self.filenode.check(verify=False, repair=False) d2 = self.filenode.check(verify=False)
def _after_check(checkresults): def _after_check(checkresults):
after_check_reads = self._count_reads() after_check_reads = self._count_reads()
@ -1803,7 +1803,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase):
d.addCallback(lambda ignore: self.replace_shares({})) d.addCallback(lambda ignore: self.replace_shares({}))
def _check3(ignored): def _check3(ignored):
before_check_reads = self._count_reads() before_check_reads = self._count_reads()
d2 = self.filenode.check(verify=False, repair=False) d2 = self.filenode.check(verify=False)
def _after_check(checkresults): def _after_check(checkresults):
after_check_reads = self._count_reads() after_check_reads = self._count_reads()
@ -1824,7 +1824,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase):
def _check1(filenode): def _check1(filenode):
before_check_reads = self._count_reads() before_check_reads = self._count_reads()
d2 = filenode.check(verify=True, repair=False) d2 = filenode.check(verify=True)
def _after_check(checkresults): def _after_check(checkresults):
after_check_reads = self._count_reads() after_check_reads = self._count_reads()
# print "delta was ", after_check_reads - before_check_reads # print "delta was ", after_check_reads - before_check_reads
@ -1838,7 +1838,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase):
d.addCallback(self._corrupt_a_share) d.addCallback(self._corrupt_a_share)
def _check2(ignored): def _check2(ignored):
before_check_reads = self._count_reads() before_check_reads = self._count_reads()
d2 = self.filenode.check(verify=True, repair=False) d2 = self.filenode.check(verify=True)
def _after_check(checkresults): def _after_check(checkresults):
after_check_reads = self._count_reads() after_check_reads = self._count_reads()
@ -1876,7 +1876,8 @@ class MutableChecker(SystemTestMixin, unittest.TestCase):
return getPage(url, method="POST") return getPage(url, method="POST")
d.addCallback(_do_check) d.addCallback(_do_check)
def _got_results(out): def _got_results(out):
self.failUnless("<pre>Healthy!" in out, out) self.failUnless("<div>Healthy!</div>" in out, out)
self.failUnless("Recoverable Versions: 10*seq1-" in out, out)
self.failIf("Not Healthy!" in out, out) self.failIf("Not Healthy!" in out, out)
self.failIf("Unhealthy" in out, out) self.failIf("Unhealthy" in out, out)
self.failIf("Corrupt Shares" in out, out) self.failIf("Corrupt Shares" in out, out)
@ -1911,10 +1912,8 @@ class MutableChecker(SystemTestMixin, unittest.TestCase):
d.addCallback(_do_check) d.addCallback(_do_check)
def _got_results(out): def _got_results(out):
self.failUnless("Not Healthy!" in out, out) self.failUnless("Not Healthy!" in out, out)
self.failUnless("Unhealthy: best recoverable version has only 9 shares (encoding is 3-of-10)" in out, out) self.failUnless("Unhealthy: best version has only 9 shares (encoding is 3-of-10)" in out, out)
shid_re = (r"Corrupt Shares:\s+%s: block hash tree failure" % self.failUnless("Corrupt Shares:" in out, out)
self.corrupt_shareid)
self.failUnless(re.search(shid_re, out), out)
d.addCallback(_got_results) d.addCallback(_got_results)
# now make sure the webapi repairer can fix it # now make sure the webapi repairer can fix it
@ -1925,12 +1924,12 @@ class MutableChecker(SystemTestMixin, unittest.TestCase):
return getPage(url, method="POST") return getPage(url, method="POST")
d.addCallback(_do_repair) d.addCallback(_do_repair)
def _got_repair_results(out): def _got_repair_results(out):
self.failUnless("Repair attempted and successful" in out) self.failUnless("<div>Repair successful</div>" in out, out)
d.addCallback(_got_repair_results) d.addCallback(_got_repair_results)
d.addCallback(_do_check) d.addCallback(_do_check)
def _got_postrepair_results(out): def _got_postrepair_results(out):
self.failIf("Not Healthy!" in out, out) self.failIf("Not Healthy!" in out, out)
self.failUnless("Recoverable Versions: 10*seq" in out) self.failUnless("Recoverable Versions: 10*seq" in out, out)
d.addCallback(_got_postrepair_results) d.addCallback(_got_postrepair_results)
return d return d
@ -1963,7 +1962,7 @@ class MutableChecker(SystemTestMixin, unittest.TestCase):
d.addCallback(_do_check) d.addCallback(_do_check)
def _got_results(out): def _got_results(out):
self.failUnless("Not Healthy!" in out, out) self.failUnless("Not Healthy!" in out, out)
self.failUnless("Unhealthy: best recoverable version has only 9 shares (encoding is 3-of-10)" in out, out) self.failUnless("Unhealthy: best version has only 9 shares (encoding is 3-of-10)" in out, out)
self.failIf("Corrupt Shares" in out, out) self.failIf("Corrupt Shares" in out, out)
d.addCallback(_got_results) d.addCallback(_got_results)
@ -1975,7 +1974,7 @@ class MutableChecker(SystemTestMixin, unittest.TestCase):
return getPage(url, method="POST") return getPage(url, method="POST")
d.addCallback(_do_repair) d.addCallback(_do_repair)
def _got_repair_results(out): def _got_repair_results(out):
self.failUnless("Repair attempted and successful" in out) self.failUnless("Repair successful" in out)
d.addCallback(_got_repair_results) d.addCallback(_got_repair_results)
d.addCallback(_do_check) d.addCallback(_do_check)
def _got_postrepair_results(out): def _got_postrepair_results(out):

View File

@ -1451,6 +1451,31 @@ class Web(WebMixin, unittest.TestCase):
d.addCallback(_check3) d.addCallback(_check3)
return d return d
def test_POST_FILEURL_check_and_repair(self):
bar_url = self.public_url + "/foo/bar.txt"
d = self.POST(bar_url, t="check", repair="true")
def _check(res):
self.failUnless("Healthy!" in res)
d.addCallback(_check)
redir_url = "http://allmydata.org/TARGET"
def _check2(statuscode, target):
self.failUnlessEqual(statuscode, str(http.FOUND))
self.failUnlessEqual(target, redir_url)
d.addCallback(lambda res:
self.shouldRedirect2("test_POST_FILEURL_check_and_repair",
_check2,
self.POST, bar_url,
t="check", repair="true",
when_done=redir_url))
d.addCallback(lambda res:
self.POST(bar_url, t="check", return_to=redir_url))
def _check3(res):
self.failUnless("Healthy!" in res)
self.failUnless("Return to parent directory" in res)
self.failUnless(redir_url in res)
d.addCallback(_check3)
return d
def test_POST_DIRURL_check(self): def test_POST_DIRURL_check(self):
foo_url = self.public_url + "/foo/" foo_url = self.public_url + "/foo/"
d = self.POST(foo_url, t="check") d = self.POST(foo_url, t="check")
@ -1476,13 +1501,72 @@ class Web(WebMixin, unittest.TestCase):
d.addCallback(_check3) d.addCallback(_check3)
return d return d
def test_POST_DIRURL_check_and_repair(self):
foo_url = self.public_url + "/foo/"
d = self.POST(foo_url, t="check", repair="true")
def _check(res):
self.failUnless("Healthy!" in res)
d.addCallback(_check)
redir_url = "http://allmydata.org/TARGET"
def _check2(statuscode, target):
self.failUnlessEqual(statuscode, str(http.FOUND))
self.failUnlessEqual(target, redir_url)
d.addCallback(lambda res:
self.shouldRedirect2("test_POST_DIRURL_check_and_repair",
_check2,
self.POST, foo_url,
t="check", repair="true",
when_done=redir_url))
d.addCallback(lambda res:
self.POST(foo_url, t="check", return_to=redir_url))
def _check3(res):
self.failUnless("Healthy!" in res)
self.failUnless("Return to parent directory" in res)
self.failUnless(redir_url in res)
d.addCallback(_check3)
return d
def test_POST_DIRURL_deepcheck(self): def test_POST_DIRURL_deepcheck(self):
d = self.POST(self.public_url, t="deep-check") d = self.POST(self.public_url, t="deep-check")
def _check(res): def _check(res):
self.failUnless("Objects Checked: <span>8</span>" in res) self.failUnless("Objects Checked: <span>8</span>" in res)
self.failUnless("Objects Healthy: <span>8</span>" in res) self.failUnless("Objects Healthy: <span>8</span>" in res)
d.addCallback(_check)
redir_url = "http://allmydata.org/TARGET"
def _check2(statuscode, target):
self.failUnlessEqual(statuscode, str(http.FOUND))
self.failUnlessEqual(target, redir_url)
d.addCallback(lambda res:
self.shouldRedirect2("test_POST_DIRURL_check",
_check2,
self.POST, self.public_url,
t="deep-check",
when_done=redir_url))
d.addCallback(lambda res:
self.POST(self.public_url, t="deep-check",
return_to=redir_url))
def _check3(res):
self.failUnless("Return to parent directory" in res)
self.failUnless(redir_url in res)
d.addCallback(_check3)
return d
def test_POST_DIRURL_deepcheck_and_repair(self):
d = self.POST(self.public_url, t="deep-check", repair="true")
def _check(res):
self.failUnless("Objects Checked: <span>8</span>" in res)
self.failUnless("Objects Healthy (before repair): <span>8</span>" in res)
self.failUnless("Objects Unhealthy (before repair): <span>0</span>" in res)
self.failUnless("Corrupt Shares (before repair): <span>0</span>" in res)
self.failUnless("Repairs Attempted: <span>0</span>" in res) self.failUnless("Repairs Attempted: <span>0</span>" in res)
self.failUnless("Repairs Successful: <span>0</span>" in res) self.failUnless("Repairs Successful: <span>0</span>" in res)
self.failUnless("Repairs Unsuccessful: <span>0</span>" in res)
self.failUnless("Objects Healthy (after repair): <span>8</span>" in res)
self.failUnless("Objects Unhealthy (after repair): <span>0</span>" in res)
self.failUnless("Corrupt Shares (after repair): <span>0</span>" in res)
d.addCallback(_check) d.addCallback(_check)
redir_url = "http://allmydata.org/TARGET" redir_url = "http://allmydata.org/TARGET"
def _check2(statuscode, target): def _check2(statuscode, target):

View File

@ -0,0 +1,24 @@
<html xmlns:n="http://nevow.com/ns/nevow/0.1">
<head>
<title>AllMyData - Tahoe - Check Results</title>
<!-- <link href="http://www.allmydata.com/common/css/styles.css"
rel="stylesheet" type="text/css"/> -->
<link href="/webform_css" rel="stylesheet" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
<h1>File Check Results for SI=<span n:render="storage_index" /></h1>
<div n:render="healthy" />
<div n:render="repair_results" />
<div n:render="post_repair_results" />
<div n:render="maybe_pre_repair_results" />
<div n:render="return" />
</body>
</html>

View File

@ -8,9 +8,11 @@
</head> </head>
<body> <body>
<h1>File Check Results for SI=<span n:render="storage_index" /> (<span n:render="mutability" />)</h1> <h1>File Check Results for SI=<span n:render="storage_index" /></h1>
<pre n:render="results" /> <div n:render="healthy" />
<div n:render="results" />
<div n:render="return" /> <div n:render="return" />

View File

@ -1,24 +1,39 @@
import time import time
from nevow import rend, inevow, tags as T from nevow import rend, inevow, tags as T
from allmydata.web.common import getxmlfile, get_arg from twisted.web import html
from allmydata.interfaces import ICheckerResults, IDeepCheckResults from allmydata.web.common import getxmlfile, get_arg, IClient
from allmydata.interfaces import ICheckAndRepairResults, ICheckerResults, \
IDeepCheckResults, IDeepCheckAndRepairResults
from allmydata.util import base32, idlib
class CheckerResults(rend.Page): class ResultsBase:
def _render_results(self, cr):
assert ICheckerResults(cr)
return T.pre["\n".join(self._html(cr.get_report()))] # TODO: more
def _html(self, s):
if isinstance(s, (str, unicode)):
return html.escape(s)
assert isinstance(s, (list, tuple))
return [html.escape(w) for w in s]
class CheckerResults(rend.Page, ResultsBase):
docFactory = getxmlfile("checker-results.xhtml") docFactory = getxmlfile("checker-results.xhtml")
def __init__(self, results): def __init__(self, results):
assert ICheckerResults(results) self.r = ICheckerResults(results)
self.r = results
def render_storage_index(self, ctx, data): def render_storage_index(self, ctx, data):
return self.r.get_storage_index_string() return self.r.get_storage_index_string()
def render_mutability(self, ctx, data): def render_healthy(self, ctx, data):
return self.r.get_mutability_string() if self.r.is_healthy():
return ctx.tag["Healthy!"]
return ctx.tag["Not Healthy!:", self._html(self.r.get_summary())]
def render_results(self, ctx, data): def render_results(self, ctx, data):
return ctx.tag[self.r.to_string()] cr = self._render_results(self.r)
return ctx.tag[cr]
def render_return(self, ctx, data): def render_return(self, ctx, data):
req = inevow.IRequest(ctx) req = inevow.IRequest(ctx)
@ -27,7 +42,47 @@ class CheckerResults(rend.Page):
return T.div[T.a(href=return_to)["Return to parent directory"]] return T.div[T.a(href=return_to)["Return to parent directory"]]
return "" return ""
class DeepCheckResults(rend.Page): class CheckAndRepairResults(rend.Page, ResultsBase):
docFactory = getxmlfile("check-and-repair-results.xhtml")
def __init__(self, results):
self.r = ICheckAndRepairResults(results)
def render_storage_index(self, ctx, data):
return self.r.get_storage_index_string()
def render_healthy(self, ctx, data):
cr = self.r.get_post_repair_results()
if cr.is_healthy():
return ctx.tag["Healthy!"]
return ctx.tag["Not Healthy!:", self._html(cr.get_summary())]
def render_repair_results(self, ctx, data):
if self.r.get_repair_attempted():
if self.r.get_repair_successful():
return ctx.tag["Repair successful"]
else:
return ctx.tag["Repair unsuccessful"]
return ctx.tag["No repair necessary"]
def render_post_repair_results(self, ctx, data):
cr = self._render_results(self.r.get_post_repair_results())
return ctx.tag[cr]
def render_maybe_pre_repair_results(self, ctx, data):
if self.r.get_repair_attempted():
cr = self._render_results(self.r.get_pre_repair_results())
return ctx.tag[T.div["Pre-Repair Checker Results:"], cr]
return ""
def render_return(self, ctx, data):
req = inevow.IRequest(ctx)
return_to = get_arg(req, "return_to", None)
if return_to:
return T.div[T.a(href=return_to)["Return to parent directory"]]
return ""
class DeepCheckResults(rend.Page, ResultsBase):
docFactory = getxmlfile("deep-check-results.xhtml") docFactory = getxmlfile("deep-check-results.xhtml")
def __init__(self, results): def __init__(self, results):
@ -38,33 +93,75 @@ class DeepCheckResults(rend.Page):
return self.r.get_root_storage_index_string() return self.r.get_root_storage_index_string()
def data_objects_checked(self, ctx, data): def data_objects_checked(self, ctx, data):
return self.r.count_objects_checked() return self.r.get_counters()["count-objects-checked"]
def data_objects_healthy(self, ctx, data): def data_objects_healthy(self, ctx, data):
return self.r.count_objects_healthy() return self.r.get_counters()["count-objects-healthy"]
def data_repairs_attempted(self, ctx, data): def data_objects_unhealthy(self, ctx, data):
return self.r.count_repairs_attempted() return self.r.get_counters()["count-objects-unhealthy"]
def data_repairs_successful(self, ctx, data):
return self.r.count_repairs_successful() def data_count_corrupt_shares(self, ctx, data):
return self.r.get_counters()["count-corrupt-shares"]
def render_problems_p(self, ctx, data):
c = self.r.get_counters()
if c["count-objects-unhealthy"]:
return ctx.tag
return ""
def data_problems(self, ctx, data): def data_problems(self, ctx, data):
for cr in self.r.get_problems(): all_objects = self.r.get_all_results()
yield cr for path in sorted(all_objects.keys()):
cr = all_objects[path]
assert ICheckerResults.providedBy(cr)
if not cr.is_healthy():
yield path, cr
def render_problem(self, ctx, data): def render_problem(self, ctx, data):
cr = data path, cr = data
text = cr.get_storage_index_string() summary_text = ""
text += ": " summary = cr.get_summary()
text += cr.status_report if summary:
return ctx.tag[text] summary_text = ": " + summary
summary_text += " [SI: %s]" % cr.get_storage_index_string()
return ctx.tag["/".join(self._html(path)), self._html(summary_text)]
def data_all_objects(self, ctx, data):
r = self.r.get_all_results()
for storage_index in sorted(r.keys()):
yield r[storage_index]
def render_object(self, ctx, data): def render_servers_with_corrupt_shares_p(self, ctx, data):
r = data if self.r.get_counters()["count-corrupt-shares"]:
ctx.fillSlots("storage_index", r.get_storage_index_string()) return ctx.tag
ctx.fillSlots("healthy", str(r.is_healthy())) return ""
def data_servers_with_corrupt_shares(self, ctx, data):
servers = [serverid
for (serverid, storage_index, sharenum)
in self.r.get_corrupt_shares()]
servers.sort()
return servers
def render_server_problem(self, ctx, data):
serverid = data
data = [idlib.shortnodeid_b2a(serverid)]
c = IClient(ctx)
nickname = c.get_nickname_for_peerid(serverid)
if nickname:
data.append(" (%s)" % self._html(nickname))
return ctx.tag[data]
def render_corrupt_shares_p(self, ctx, data):
if self.r.get_counters()["count-corrupt-shares"]:
return ctx.tag
return ""
def data_corrupt_shares(self, ctx, data):
return self.r.get_corrupt_shares()
def render_share_problem(self, ctx, data):
serverid, storage_index, sharenum = data
nickname = IClient(ctx).get_nickname_for_peerid(serverid)
ctx.fillSlots("serverid", idlib.shortnodeid_b2a(serverid))
if nickname:
ctx.fillSlots("nickname", self._html(nickname))
ctx.fillSlots("si", base32.b2a(storage_index))
ctx.fillSlots("shnum", str(sharenum))
return ctx.tag return ctx.tag
def render_return(self, ctx, data): def render_return(self, ctx, data):
@ -74,6 +171,136 @@ class DeepCheckResults(rend.Page):
return T.div[T.a(href=return_to)["Return to parent directory"]] return T.div[T.a(href=return_to)["Return to parent directory"]]
return "" return ""
def data_all_objects(self, ctx, data):
r = self.r.get_all_results()
for path in sorted(r.keys()):
yield (path, r[path])
def render_object(self, ctx, data):
path, r = data
ctx.fillSlots("path", "/".join(self._html(path)))
ctx.fillSlots("healthy", str(r.is_healthy()))
ctx.fillSlots("summary", self._html(r.get_summary()))
return ctx.tag
def render_runtime(self, ctx, data):
req = inevow.IRequest(ctx)
runtime = time.time() - req.processing_started_timestamp
return ctx.tag["runtime: %s seconds" % runtime]
class DeepCheckAndRepairResults(rend.Page, ResultsBase):
docFactory = getxmlfile("deep-check-and-repair-results.xhtml")
def __init__(self, results):
assert IDeepCheckAndRepairResults(results)
self.r = results
def render_root_storage_index(self, ctx, data):
return self.r.get_root_storage_index_string()
def data_objects_checked(self, ctx, data):
return self.r.get_counters()["count-objects-checked"]
def data_objects_healthy(self, ctx, data):
return self.r.get_counters()["count-objects-healthy-pre-repair"]
def data_objects_unhealthy(self, ctx, data):
return self.r.get_counters()["count-objects-unhealthy-pre-repair"]
def data_corrupt_shares(self, ctx, data):
return self.r.get_counters()["count-corrupt-shares-pre-repair"]
def data_repairs_attempted(self, ctx, data):
return self.r.get_counters()["count-repairs-attempted"]
def data_repairs_successful(self, ctx, data):
return self.r.get_counters()["count-repairs-successful"]
def data_repairs_unsuccessful(self, ctx, data):
return self.r.get_counters()["count-repairs-unsuccessful"]
def data_objects_healthy_post(self, ctx, data):
return self.r.get_counters()["count-objects-healthy-post-repair"]
def data_objects_unhealthy_post(self, ctx, data):
return self.r.get_counters()["count-objects-unhealthy-post-repair"]
def data_corrupt_shares_post(self, ctx, data):
return self.r.get_counters()["count-corrupt-shares-post-repair"]
def render_pre_repair_problems_p(self, ctx, data):
c = self.r.get_counters()
if c["count-objects-unhealthy-pre-repair"]:
return ctx.tag
return ""
def data_pre_repair_problems(self, ctx, data):
all_objects = self.r.get_all_results()
for path in sorted(all_objects.keys()):
r = all_objects[path]
assert ICheckAndRepairResults.providedBy(r)
cr = r.get_pre_repair_results()
if not cr.is_healthy():
yield path, cr
def render_problem(self, ctx, data):
path, cr = data
return ["/".join(self._html(path)), ": ", self._html(cr.get_summary())]
def render_post_repair_problems_p(self, ctx, data):
c = self.r.get_counters()
if (c["count-objects-unhealthy-post-repair"]
or c["count-corrupt-shares-post-repair"]):
return ctx.tag
return ""
def data_post_repair_problems(self, ctx, data):
all_objects = self.r.get_all_results()
for path in sorted(all_objects.keys()):
r = all_objects[path]
assert ICheckAndRepairResults.providedBy(r)
cr = r.get_post_repair_results()
if not cr.is_healthy():
yield path, cr
def render_servers_with_corrupt_shares_p(self, ctx, data):
if self.r.get_counters()["count-corrupt-shares-pre-repair"]:
return ctx.tag
return ""
def data_servers_with_corrupt_shares(self, ctx, data):
return [] # TODO
def render_server_problem(self, ctx, data):
pass
def render_remaining_corrupt_shares_p(self, ctx, data):
if self.r.get_counters()["count-corrupt-shares-post-repair"]:
return ctx.tag
return ""
def data_post_repair_corrupt_shares(self, ctx, data):
return [] # TODO
def render_share_problem(self, ctx, data):
pass
def render_return(self, ctx, data):
req = inevow.IRequest(ctx)
return_to = get_arg(req, "return_to", None)
if return_to:
return T.div[T.a(href=return_to)["Return to parent directory"]]
return ""
def data_all_objects(self, ctx, data):
r = self.r.get_all_results()
for path in sorted(r.keys()):
yield (path, r[path])
def render_object(self, ctx, data):
path, r = data
ctx.fillSlots("path", "/".join(self._html(path)))
ctx.fillSlots("healthy_pre_repair",
str(r.get_pre_repair_results().is_healthy()))
ctx.fillSlots("healthy_post_repair",
str(r.get_post_repair_results().is_healthy()))
ctx.fillSlots("summary",
self._html(r.get_pre_repair_results().get_summary()))
return ctx.tag
def render_runtime(self, ctx, data): def render_runtime(self, ctx, data):
req = inevow.IRequest(ctx) req = inevow.IRequest(ctx)
runtime = time.time() - req.processing_started_timestamp runtime = time.time() - req.processing_started_timestamp

View File

@ -0,0 +1,89 @@
<html xmlns:n="http://nevow.com/ns/nevow/0.1">
<head>
<title>AllMyData - Tahoe - Deep Check Results</title>
<!-- <link href="http://www.allmydata.com/common/css/styles.css"
rel="stylesheet" type="text/css"/> -->
<link href="/webform_css" rel="stylesheet" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
<h1>Deep-Check-And-Repair Results for root
SI=<span n:render="root_storage_index" /></h1>
<p>Counters:</p>
<ul>
<li>Objects Checked: <span n:render="data" n:data="objects_checked" /></li>
<li>Objects Healthy (before repair): <span n:render="data" n:data="objects_healthy" /></li>
<li>Objects Unhealthy (before repair): <span n:render="data" n:data="objects_unhealthy" /></li>
<li>Corrupt Shares (before repair): <span n:render="data" n:data="corrupt_shares" /></li>
<li>Repairs Attempted: <span n:render="data" n:data="repairs_attempted" /></li>
<li>Repairs Successful: <span n:render="data" n:data="repairs_successful" /></li>
<li>Repairs Unsuccessful: <span n:render="data" n:data="repairs_unsuccessful" /></li>
<li>Objects Healthy (after repair): <span n:render="data" n:data="objects_healthy_post" /></li>
<li>Objects Unhealthy (after repair): <span n:render="data" n:data="objects_unhealthy_post" /></li>
<li>Corrupt Shares (after repair): <span n:render="data" n:data="corrupt_shares_post" /></li>
</ul>
<div n:render="pre_repair_problems_p">
<h2>Files/Directories That Had Problems:</h2>
<ul n:render="sequence" n:data="pre_repair_problems">
<li n:pattern="item" n:render="problem"/>
<li n:pattern="empty">None</li>
</ul>
</div>
<div n:render="post_repair_problems_p">
<h2>Files/Directories That Still Have Problems:</h2>
<ul n:render="sequence" n:data="post_repair_problems">
<li n:pattern="item" n:render="problem"/>
<li n:pattern="empty">None</li>
</ul>
</div>
<div n:render="servers_with_corrupt_shares_p">
<h2>Servers on which corrupt shares were found</h2>
<ul n:render="sequence" n:data="servers_with_corrupt_shares">
<li n:pattern="item" n:render="server_problem"/>
<li n:pattern="empty">None</li>
</ul>
</div>
<div n:render="remaining_corrupt_shares_p">
<h2>Remaining Corrupt Shares</h2>
<p>These shares need to be manually inspected and removed.</p>
<ul n:render="sequence" n:data="post_repair_corrupt_shares">
<li n:pattern="item" n:render="share_problem"/>
<li n:pattern="empty">None</li>
</ul>
</div>
<div n:render="return" />
<div>
<table n:render="sequence" n:data="all_objects">
<tr n:pattern="header">
<td>Relative Path</td>
<td>Healthy</td>
<td>Post-Repair</td>
<td>Summary</td>
</tr>
<tr n:pattern="item" n:render="object">
<td><n:slot name="path"/></td>
<td><n:slot name="healthy_pre_repair"/></td>
<td><n:slot name="healthy_post_repair"/></td>
<td><n:slot name="summary"/></td>
</tr>
</table>
</div>
<div n:render="runtime" />
</body>
</html>

View File

@ -10,44 +10,70 @@
<h1>Deep-Check Results for root SI=<span n:render="root_storage_index" /></h1> <h1>Deep-Check Results for root SI=<span n:render="root_storage_index" /></h1>
<p>Counters:</p>
<ul> <ul>
<li>Objects Checked: <span n:render="data" n:data="objects_checked" /></li> <li>Objects Checked: <span n:render="data" n:data="objects_checked" /></li>
<li>Objects Healthy: <span n:render="data" n:data="objects_healthy" /></li> <li>Objects Healthy: <span n:render="data" n:data="objects_healthy" /></li>
<li>Objects Unhealthy: <span n:render="data" n:data="objects_unhealthy" /></li>
<li>Corrupt Shares: <span n:render="data" n:data="count_corrupt_shares" /></li>
</ul> </ul>
<h2>Problems:</h2> <div n:render="problems_p">
<h2>Files/Directories That Had Problems:</h2>
<ul n:render="sequence" n:data="problems"> <ul n:render="sequence" n:data="problems">
<li n:pattern="item" n:render="problem"/> <li n:pattern="item" n:render="problem"/>
<li n:pattern="empty">None</li> <li n:pattern="empty">None</li>
</ul> </ul>
<h2>Repair Results:</h2>
<ul>
<li>Repairs Attempted: <span n:render="data" n:data="repairs_attempted" /></li>
<li>Repairs Successful: <span n:render="data" n:data="repairs_successful" /></li>
</ul>
<h2>Objects Checked</h2>
<div>
<table n:render="sequence" n:data="all_objects" border="1">
<tr n:pattern="header">
<td>Storage Index</td>
<td>Healthy?</td>
</tr>
<tr n:pattern="item" n:render="object">
<td><n:slot name="storage_index"/></td>
<td><n:slot name="healthy"/></td>
</tr>
<tr n:pattern="empty"><td>no objects?</td></tr>
</table>
</div> </div>
<div n:render="servers_with_corrupt_shares_p">
<h2>Servers on which corrupt shares were found</h2>
<ul n:render="sequence" n:data="servers_with_corrupt_shares">
<li n:pattern="item" n:render="server_problem"/>
<li n:pattern="empty">None</li>
</ul>
</div>
<div n:render="corrupt_shares_p">
<h2>Corrupt Shares</h2>
<p>If repair fails, these shares need to be manually inspected and removed.</p>
<table n:render="sequence" n:data="corrupt_shares" border="1">
<tr n:pattern="header">
<td>Server</td>
<td>Server Nickname</td>
<td>Storage Index</td>
<td>Share Number</td>
</tr>
<tr n:pattern="item" n:render="share_problem">
<td><n:slot name="serverid"/></td>
<td><n:slot name="nickname"/></td>
<td><n:slot name="si"/></td>
<td><n:slot name="shnum"/></td>
</tr>
</table>
</div>
<div n:render="return" /> <div n:render="return" />
<div>
<h2>All Results</h2>
<table n:render="sequence" n:data="all_objects" border="1">
<tr n:pattern="header">
<td>Relative Path</td>
<td>Healthy</td>
<td>Summary</td>
</tr>
<tr n:pattern="item" n:render="object">
<td><n:slot name="path"/></td>
<td><n:slot name="healthy"/></td>
<td><n:slot name="summary"/></td>
</tr>
</table>
</div>
<div n:render="runtime" /> <div n:render="runtime" />
</body> </body>

View File

@ -21,7 +21,8 @@ from allmydata.web.common import text_plain, WebError, IClient, \
getxmlfile, RenderMixin getxmlfile, RenderMixin
from allmydata.web.filenode import ReplaceMeMixin, \ from allmydata.web.filenode import ReplaceMeMixin, \
FileNodeHandler, PlaceHolderNodeHandler FileNodeHandler, PlaceHolderNodeHandler
from allmydata.web.checker_results import CheckerResults, DeepCheckResults from allmydata.web.checker_results import CheckerResults, DeepCheckResults, \
DeepCheckAndRepairResults
class BlockingFileError(Exception): class BlockingFileError(Exception):
# TODO: catch and transform # TODO: catch and transform
@ -340,7 +341,11 @@ class DirectoryNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
# check this directory and everything reachable from it # check this directory and everything reachable from it
verify = boolean_of_arg(get_arg(req, "verify", "false")) verify = boolean_of_arg(get_arg(req, "verify", "false"))
repair = boolean_of_arg(get_arg(req, "repair", "false")) repair = boolean_of_arg(get_arg(req, "repair", "false"))
d = self.node.deep_check(verify, repair) if repair:
d = self.node.deep_check_and_repair(verify)
d.addCallback(lambda res: DeepCheckAndRepairResults(res))
else:
d = self.node.deep_check(verify)
d.addCallback(lambda res: DeepCheckResults(res)) d.addCallback(lambda res: DeepCheckResults(res))
return d return d

View File

@ -14,7 +14,7 @@ from allmydata.util import log
from allmydata.web.common import text_plain, WebError, IClient, RenderMixin, \ from allmydata.web.common import text_plain, WebError, IClient, RenderMixin, \
boolean_of_arg, get_arg, should_create_intermediate_directories boolean_of_arg, get_arg, should_create_intermediate_directories
from allmydata.web.checker_results import CheckerResults from allmydata.web.checker_results import CheckerResults, CheckAndRepairResults
class ReplaceMeMixin: class ReplaceMeMixin:
@ -256,7 +256,11 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
def _POST_check(self, req): def _POST_check(self, req):
verify = boolean_of_arg(get_arg(req, "verify", "false")) verify = boolean_of_arg(get_arg(req, "verify", "false"))
repair = boolean_of_arg(get_arg(req, "repair", "false")) repair = boolean_of_arg(get_arg(req, "repair", "false"))
d = self.node.check(verify, repair) if repair:
d = self.node.check_and_repair(verify)
d.addCallback(lambda res: CheckAndRepairResults(res))
else:
d = self.node.check(verify)
d.addCallback(lambda res: CheckerResults(res)) d.addCallback(lambda res: CheckerResults(res))
return d return d