checker results: more tests, more results. immutable verifier tests are disabled until they emit more complete results

This commit is contained in:
Brian Warner 2008-09-09 17:15:46 -07:00
parent 90b934eb71
commit f895e39d48
4 changed files with 87 additions and 27 deletions

View File

@ -11,7 +11,7 @@ from twisted.python import log
from allmydata import storage
from allmydata.checker_results import CheckerResults
from allmydata.immutable import download
from allmydata.util import hashutil
from allmydata.util import hashutil, idlib, base32
class SimpleCHKFileChecker:
"""Return a list of (needed, total, found, sharemap), where sharemap maps
@ -24,6 +24,7 @@ class SimpleCHKFileChecker:
self.found_shares = set()
self.storage_index = storage_index
self.sharemap = {}
self.responded = set()
'''
def check_synchronously(self, si):
@ -57,6 +58,7 @@ class SimpleCHKFileChecker:
if k not in self.sharemap:
self.sharemap[k] = []
self.sharemap[k].append(peerid)
self.responded.add(peerid)
def _got_error(self, f):
if f.check(KeyError):
@ -67,13 +69,35 @@ class SimpleCHKFileChecker:
def _done(self, res):
r = CheckerResults(self.storage_index)
report = []
r.set_healthy(bool(len(self.found_shares) >= self.total_shares))
healthy = bool(len(self.found_shares) >= self.total_shares)
r.set_healthy(healthy)
data = {"count-shares-good": len(self.found_shares),
"count-shares-needed": self.needed_shares,
"count-shares-expected": self.total_shares,
"count-wrong-shares": 0,
}
# TODO: count-good-shares-hosts, count-corrupt-shares,
# list-corrupt-shares, servers-responding, sharemap
if healthy:
data["count-recoverable-versions"] = 1
data["count-unrecoverable-versions"] = 0
else:
data["count-recoverable-versions"] = 0
data["count-unrecoverable-versions"] = 1
data["count-corrupt-shares"] = 0 # non-verifier doesn't see corruption
data["list-corrupt-shares"] = []
hosts = set()
sharemap = {}
for (shnum,nodeids) in self.sharemap.items():
hosts.update(nodeids)
sharemap[shnum] = [idlib.nodeid_b2a(nodeid) for nodeid in nodeids]
data["count-good-share-hosts"] = len(hosts)
data["servers-responding"] = [base32.b2a(serverid)
for serverid in self.responded]
data["sharemap"] = sharemap
r.set_data(data)
r.set_needs_rebalancing(bool( len(self.found_shares) > len(hosts) ))
#r.stuff = (self.needed_shares, self.total_shares,
# len(self.found_shares), self.sharemap)
if len(self.found_shares) < self.total_shares:
@ -117,6 +141,7 @@ class VerifyingOutput:
def finish(self):
self._results.set_healthy(True)
self._results.set_needs_rebalancing(False) # TODO: wrong
# the return value of finish() is passed out of FileDownloader._done,
# but SimpleCHKFileVerifier overrides this with the CheckerResults
# instance instead.

View File

@ -17,6 +17,7 @@ class MutableChecker:
self._storage_index = self._node.get_storage_index()
self.results = CheckerResults(self._storage_index)
self.need_repair = False
self.responded = set() # set of (binary) nodeids
def check(self, verify=False):
servermap = ServerMap()
@ -139,7 +140,7 @@ class MutableChecker:
counters["count-shares-needed"] = k
counters["count-shares-expected"] = N
good_hosts = smap.all_peers_for_version(version)
counters["count-good-share-hosts"] = good_hosts
counters["count-good-share-hosts"] = len(good_hosts)
vmap = smap.make_versionmap()
counters["count-wrong-shares"] = sum([len(shares)
for verinfo,shares in vmap.items()
@ -182,29 +183,31 @@ class MutableChecker:
summary.append("multiple versions are recoverable")
report.append("Unhealthy: there are multiple recoverable versions")
needs_rebalancing = False
if recoverable:
best_version = smap.best_recoverable_version()
report.append("Best Recoverable Version: " +
smap.summarize_version(best_version))
counters = self._count_shares(smap, best_version)
data.update(counters)
if counters["count-shares-good"] < counters["count-shares-expected"]:
s = counters["count-shares-good"]
k = counters["count-shares-needed"]
N = counters["count-shares-expected"]
if s < k:
healthy = False
report.append("Unhealthy: best version has only %d shares "
"(encoding is %d-of-%d)"
% (counters["count-shares-good"],
counters["count-shares-needed"],
counters["count-shares-expected"]))
summary.append("%d shares (enc %d-of-%d)"
% (counters["count-shares-good"],
counters["count-shares-needed"],
counters["count-shares-expected"]))
"(encoding is %d-of-%d)" % (s, k, N))
summary.append("%d shares (enc %d-of-%d)" % (s, k, N))
hosts = smap.all_peers_for_version(best_version)
needs_rebalancing = bool( len(hosts) < N )
elif unrecoverable:
healthy = False
# find a k and N from somewhere
first = list(unrecoverable)[0]
# not exactly the best version, but that doesn't matter too much
data.update(self._count_shares(smap, first))
# leave needs_rebalancing=False: the file being unrecoverable is
# the bigger problem
else:
# couldn't find anything at all
data["count-shares-good"] = 0
@ -241,10 +244,12 @@ class MutableChecker:
data["count-corrupt-shares"] = 0
data["list-corrupt-shares"] = []
# TODO: servers-responding, sharemap
# TODO: sharemap
data["servers-responding"] = [base32.b2a(serverid)
for serverid in smap.reachable_peers]
r.set_healthy(healthy)
r.set_needs_rebalancing(False) # TODO
r.set_needs_rebalancing(needs_rebalancing)
r.set_data(data)
if healthy:
r.set_summary("Healthy")

View File

@ -116,6 +116,7 @@ class ServerMap:
self.servermap = {}
self.connections = {}
self.unreachable_peers = set() # peerids that didn't respond to queries
self.reachable_peers = set() # peerids that did respond to queries
self.problems = [] # mostly for debugging
self.bad_shares = {} # maps (peerid,shnum) to old checkstring
self.last_update_mode = None
@ -126,6 +127,7 @@ class ServerMap:
s.servermap = self.servermap.copy() # tuple->tuple
s.connections = self.connections.copy() # str->RemoteReference
s.unreachable_peers = set(self.unreachable_peers)
s.reachable_peers = set(self.reachable_peers)
s.problems = self.problems[:]
s.bad_shares = self.bad_shares.copy() # tuple->str
s.last_update_mode = self.last_update_mode
@ -350,6 +352,8 @@ class ServermapUpdater:
self._status.set_progress(0.0)
self._status.set_mode(mode)
self._servers_responded = set()
# how much data should we read?
# * if we only need the checkstring, then [0:75]
# * if we need to validate the checkstring sig, then [543ish:799ish]
@ -536,6 +540,7 @@ class ServermapUpdater:
now = time.time()
elapsed = now - started
self._queries_outstanding.discard(peerid)
self._servermap.reachable_peers.add(peerid)
self._must_query.discard(peerid)
self._queries_completed += 1
if not self._running:
@ -726,7 +731,10 @@ class ServermapUpdater:
self._queries_outstanding.discard(peerid)
self._bad_peers.add(peerid)
self._servermap.problems.append(f)
self._servermap.unreachable_peers.add(peerid) # TODO: overkill?
# a peerid could be in both ServerMap.reachable_peers and
# .unreachable_peers if they responded to our query, but then an
# exception was raised in _got_results.
self._servermap.unreachable_peers.add(peerid)
self._queries_completed += 1
self._last_failure = f

View File

@ -2023,9 +2023,27 @@ class DeepCheck(SystemTestMixin, unittest.TestCase):
d.addCallback(lambda ign: self.root.set_node(u"loop", self.root))
return d
def check_is_healthy(self, cr, where):
self.failUnless(ICheckerResults.providedBy(cr))
self.failUnless(cr.is_healthy())
def check_is_healthy(self, cr, n, where):
self.failUnless(ICheckerResults.providedBy(cr), where)
self.failUnless(cr.is_healthy(), where)
self.failUnlessEqual(cr.get_storage_index(), n.get_storage_index(),
where)
self.failUnlessEqual(cr.get_storage_index_string(),
base32.b2a(n.get_storage_index()), where)
needs_rebalancing = bool( len(self.clients) < 10 )
self.failUnlessEqual(cr.needs_rebalancing(), needs_rebalancing, where)
d = cr.get_data()
self.failUnlessEqual(d["count-shares-good"], 10, where)
self.failUnlessEqual(d["count-shares-needed"], 3, where)
self.failUnlessEqual(d["count-shares-expected"], 10, where)
self.failUnlessEqual(d["count-good-share-hosts"], len(self.clients), where)
self.failUnlessEqual(d["count-corrupt-shares"], 0, where)
self.failUnlessEqual(d["list-corrupt-shares"], [], where)
self.failUnlessEqual(sorted(d["servers-responding"]),
sorted([idlib.nodeid_b2a(c.nodeid)
for c in self.clients]), where)
#self.failUnless("sharemap" in d)
def check_and_repair_is_healthy(self, cr, where):
self.failUnless(ICheckAndRepairResults.providedBy(cr), where)
@ -2051,24 +2069,28 @@ class DeepCheck(SystemTestMixin, unittest.TestCase):
self.basedir = self.mktemp()
d = self.set_up_nodes()
d.addCallback(self.set_up_tree)
d.addCallback(self.do_test_good)
return d
def do_test_good(self, ignored):
d = defer.succeed(None)
# check the individual items
d.addCallback(lambda ign: self.root.check())
d.addCallback(self.check_is_healthy, "root")
d.addCallback(self.check_is_healthy, self.root, "root")
d.addCallback(lambda ign: self.mutable.check())
d.addCallback(self.check_is_healthy, "mutable")
d.addCallback(self.check_is_healthy, self.mutable, "mutable")
d.addCallback(lambda ign: self.large.check())
d.addCallback(self.check_is_healthy, "large")
d.addCallback(self.check_is_healthy, self.large, "large")
d.addCallback(lambda ign: self.small.check())
d.addCallback(self.failUnlessEqual, None, "small")
# and again with verify=True
d.addCallback(lambda ign: self.root.check(verify=True))
d.addCallback(self.check_is_healthy, "root")
d.addCallback(self.check_is_healthy, self.root, "root")
d.addCallback(lambda ign: self.mutable.check(verify=True))
d.addCallback(self.check_is_healthy, "mutable")
d.addCallback(lambda ign: self.large.check(verify=True))
d.addCallback(self.check_is_healthy, "large")
d.addCallback(self.check_is_healthy, self.mutable, "mutable")
#d.addCallback(lambda ign: self.large.check(verify=True))
#d.addCallback(self.check_is_healthy, self.large, "large")
d.addCallback(lambda ign: self.small.check(verify=True))
d.addCallback(self.failUnlessEqual, None, "small")