Remove 'needs-rebalancing' and add 'count-happiness' to checker reports; repair tests. fixes #1784, #2105

Signed-off-by: Daira Hopwood <daira@jacaranda.org>
This commit is contained in:
Daira Hopwood 2014-03-20 16:13:57 +00:00
parent 0c7793df10
commit 0ef5939477
11 changed files with 66 additions and 63 deletions

View File

@ -1415,6 +1415,8 @@ mainly intended for developers.
this dictionary has only the 'healthy' key, which will always be
True. For distributed files, this dictionary has the following
keys:
count-happiness: the servers-of-happiness level of the file, as
defined in `docs/specifications/servers-of-happiness.rst`_.
count-shares-good: the number of good shares that were found
count-shares-needed: 'k', the number of shares required for recovery
count-shares-expected: 'N', the number of total shares generated
@ -1438,12 +1440,6 @@ mainly intended for developers.
list-corrupt-shares: a list of "share locators", one for each share
that was found to be corrupt. Each share locator
is a list of (serverid, storage_index, sharenum).
needs-rebalancing: (bool) This field is intended to be True iff
reliability could be improved for this file by
rebalancing, i.e. by moving some shares to other
servers. It may be incorrect in some cases for
Tahoe-LAFS up to and including v1.10, and its
precise definition is expected to change.
servers-responding: list of base32-encoded storage server identifiers,
one for each server which responded to the share
query.
@ -1466,6 +1462,12 @@ mainly intended for developers.
'seq%d-%s-sh%d', containing the sequence number, the
roothash, and the share number.
Before Tahoe-LAFS v1.11, the `results` dictionary also had a `needs-rebalancing`
field, but that has been removed since it was computed incorrectly.
.. _`docs/specifications/servers-of-happiness.rst`: ../specifications/servers-of-happiness.rst
``POST $URL?t=start-deep-check`` (must add &ophandle=XYZ)
This initiates a recursive walk of all files and directories reachable from

View File

@ -8,7 +8,7 @@ class CheckResults:
implements(ICheckResults)
def __init__(self, uri, storage_index,
healthy, recoverable, needs_rebalancing,
healthy, recoverable, count_happiness,
count_shares_needed, count_shares_expected,
count_shares_good, count_good_share_hosts,
count_recoverable_versions, count_unrecoverable_versions,
@ -31,8 +31,8 @@ class CheckResults:
self._recoverable = recoverable
if not self._recoverable:
assert not self._healthy
self._needs_rebalancing_p = bool(needs_rebalancing)
self._count_happiness = count_happiness
self._count_shares_needed = count_shares_needed
self._count_shares_expected = count_shares_expected
self._count_shares_good = count_shares_good
@ -78,8 +78,8 @@ class CheckResults:
def is_recoverable(self):
return self._recoverable
def needs_rebalancing(self):
return self._needs_rebalancing_p
def get_happiness(self):
return self._count_happiness
def get_encoding_needed(self):
return self._count_shares_needed
@ -120,7 +120,8 @@ class CheckResults:
for (s, SI, shnum) in self._list_corrupt_shares]
incompatible = [(s.get_serverid(), SI, shnum)
for (s, SI, shnum) in self._list_incompatible_shares]
d = {"count-shares-needed": self._count_shares_needed,
d = {"count-happiness": self._count_happiness,
"count-shares-needed": self._count_shares_needed,
"count-shares-expected": self._count_shares_expected,
"count-shares-good": self._count_shares_good,
"count-good-share-hosts": self._count_good_share_hosts,

View File

@ -12,6 +12,7 @@ from allmydata.util.hashutil import file_renewal_secret_hash, \
file_cancel_secret_hash, bucket_renewal_secret_hash, \
bucket_cancel_secret_hash, uri_extension_hash, CRYPTO_VAL_SIZE, \
block_hash
from allmydata.util.happinessutil import servers_of_happiness
from allmydata.immutable import layout
@ -775,15 +776,11 @@ class Checker(log.PrefixingLogMixin):
recoverable = 0
unrecoverable = 1
# The file needs rebalancing if the set of servers that have at least
# one share is less than the number of uniquely-numbered shares
# available.
# TODO: this may be wrong, see ticket #1115 comment:27 and ticket #1784.
needs_rebalancing = bool(good_share_hosts < len(verifiedshares))
count_happiness = servers_of_happiness(verifiedshares)
cr = CheckResults(self._verifycap, SI,
healthy=healthy, recoverable=bool(recoverable),
needs_rebalancing=needs_rebalancing,
count_happiness=count_happiness,
count_shares_needed=self._verifycap.needed_shares,
count_shares_expected=self._verifycap.total_shares,
count_shares_good=len(verifiedshares),

View File

@ -11,6 +11,7 @@ from allmydata.interfaces import IImmutableFileNode, IUploadResults
from allmydata.util import consumer
from allmydata.check_results import CheckResults, CheckAndRepairResults
from allmydata.util.dictutil import DictOfSets
from allmydata.util.happinessutil import servers_of_happiness
from pycryptopp.cipher.aes import AES
# local imports
@ -144,12 +145,11 @@ class CiphertextFileNode:
is_healthy = bool(len(sm) >= verifycap.total_shares)
is_recoverable = bool(len(sm) >= verifycap.needed_shares)
# TODO: this may be wrong, see ticket #1115 comment:27 and ticket #1784.
needs_rebalancing = bool(len(sm) >= verifycap.total_shares)
count_happiness = servers_of_happiness(sm)
prr = CheckResults(cr.get_uri(), cr.get_storage_index(),
healthy=is_healthy, recoverable=is_recoverable,
needs_rebalancing=needs_rebalancing,
count_happiness=count_happiness,
count_shares_needed=verifycap.needed_shares,
count_shares_expected=verifycap.total_shares,
count_shares_good=len(sm),

View File

@ -2156,18 +2156,16 @@ class ICheckResults(Interface):
not. Unrecoverable files are obviously unhealthy. Non-distributed LIT
files always return True."""
def needs_rebalancing():
"""Return a boolean, True if the file/dir's reliability could be
improved by moving shares to new servers. Non-distributed LIT files
always return False."""
# the following methods all return None for non-distributed LIT files
def get_happiness():
"""Return the happiness count of the file."""
def get_encoding_needed():
"""Return 'k', the number of shares required for recovery"""
"""Return 'k', the number of shares required for recovery."""
def get_encoding_expected():
"""Return 'N', the number of total shares generated"""
"""Return 'N', the number of total shares generated."""
def get_share_counter_good():
"""Return the number of distinct good shares that were found. For

View File

@ -1,6 +1,7 @@
from allmydata.uri import from_string
from allmydata.util import base32, log, dictutil
from allmydata.util.happinessutil import servers_of_happiness
from allmydata.check_results import CheckAndRepairResults, CheckResults
from allmydata.mutable.common import MODE_CHECK, MODE_WRITE, CorruptShareError
@ -152,7 +153,6 @@ class MutableChecker:
summary.append("multiple versions are recoverable")
report.append("Unhealthy: there are multiple recoverable versions")
needs_rebalancing = False
if recoverable:
best_version = smap.best_recoverable_version()
report.append("Best Recoverable Version: " +
@ -166,16 +166,12 @@ class MutableChecker:
report.append("Unhealthy: best version has only %d shares "
"(encoding is %d-of-%d)" % (s, k, N))
summary.append("%d shares (enc %d-of-%d)" % (s, k, N))
hosts = smap.all_servers_for_version(best_version)
needs_rebalancing = bool( len(hosts) < N )
elif unrecoverable:
healthy = False
# find a k and N from somewhere
first = list(unrecoverable)[0]
# not exactly the best version, but that doesn't matter too much
counters = self._count_shares(smap, first)
# leave needs_rebalancing=False: the file being unrecoverable is
# the bigger problem
else:
# couldn't find anything at all
counters = {
@ -223,10 +219,12 @@ class MutableChecker:
else:
summary = "Unhealthy: " + " ".join(summary)
count_happiness = servers_of_happiness(sharemap)
cr = CheckResults(from_string(self._node.get_uri()),
self._storage_index,
healthy=healthy, recoverable=bool(recoverable),
needs_rebalancing=needs_rebalancing,
count_happiness=count_happiness,
count_shares_needed=counters["count-shares-needed"],
count_shares_expected=counters["count-shares-expected"],
count_shares_good=counters["count-shares-good"],

View File

@ -70,7 +70,7 @@ class FakeCHKFileNode:
s = StubServer("\x00"*20)
r = CheckResults(self.my_uri, self.storage_index,
healthy=True, recoverable=True,
needs_rebalancing=False,
count_happiness=10,
count_shares_needed=3,
count_shares_expected=10,
count_shares_good=10,
@ -282,7 +282,7 @@ class FakeMutableFileNode:
s = StubServer("\x00"*20)
r = CheckResults(self.my_uri, self.storage_index,
healthy=True, recoverable=True,
needs_rebalancing=False,
count_happiness=10,
count_shares_needed=3,
count_shares_expected=10,
count_shares_good=10,

View File

@ -84,7 +84,8 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
server_1 = sb.get_stub_server(serverid_1)
server_f = sb.get_stub_server(serverid_f)
u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
data = { "count_shares_needed": 3,
data = { "count_happiness": 8,
"count_shares_needed": 3,
"count_shares_expected": 9,
"count_shares_good": 10,
"count_good_share_hosts": 11,
@ -101,7 +102,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
}
cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=True, recoverable=True,
needs_rebalancing=False,
summary="groovy",
**data)
w = web_check_results.CheckResultsRenderer(c, cr)
@ -110,6 +110,7 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
self.failUnlessIn("Healthy : groovy", s)
self.failUnlessIn("Share Counts: need 3-of-9, have 10", s)
self.failUnlessIn("Happiness Level: 8", s)
self.failUnlessIn("Hosts with good shares: 11", s)
self.failUnlessIn("Corrupt shares: none", s)
self.failUnlessIn("Wrong Shares: 0", s)
@ -119,7 +120,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=False, recoverable=True,
needs_rebalancing=False,
summary="ungroovy",
**data)
w = web_check_results.CheckResultsRenderer(c, cr)
@ -132,7 +132,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
data["list_corrupt_shares"] = [(server_1, u.get_storage_index(), 2)]
cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=False, recoverable=False,
needs_rebalancing=False,
summary="rather dead",
**data)
w = web_check_results.CheckResultsRenderer(c, cr)
@ -157,7 +156,7 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
self.failUnlessEqual(j["summary"], "rather dead")
self.failUnlessEqual(j["storage-index"],
"2k6avpjga3dho3zsjo6nnkt7n4")
expected = {'needs-rebalancing': False,
expected = {'count-happiness': 8,
'count-shares-expected': 9,
'healthy': False,
'count-unrecoverable-versions': 0,
@ -192,7 +191,8 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
serverid_f = "\xff"*20
u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
data = { "count_shares_needed": 3,
data = { "count_happiness": 5,
"count_shares_needed": 3,
"count_shares_expected": 10,
"count_shares_good": 6,
"count_good_share_hosts": 7,
@ -210,11 +210,11 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
}
pre_cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=False, recoverable=True,
needs_rebalancing=False,
summary="illing",
**data)
data = { "count_shares_needed": 3,
data = { "count_happiness": 9,
"count_shares_needed": 3,
"count_shares_expected": 10,
"count_shares_good": 10,
"count_good_share_hosts": 11,
@ -232,7 +232,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
}
post_cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=True, recoverable=True,
needs_rebalancing=False,
summary="groovy",
**data)
@ -265,7 +264,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
crr.repair_successful = False
post_cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=False, recoverable=True,
needs_rebalancing=False,
summary="better",
**data)
crr.post_repair_results = post_cr
@ -281,7 +279,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
crr.repair_successful = False
post_cr = check_results.CheckResults(u, u.get_storage_index(),
healthy=False, recoverable=False,
needs_rebalancing=False,
summary="worse",
**data)
crr.post_repair_results = post_cr

View File

@ -277,13 +277,12 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase):
self.failUnlessEqual(cr.get_storage_index_string(),
base32.b2a(n.get_storage_index()), where)
num_servers = len(self.g.all_servers)
needs_rebalancing = bool( num_servers < 10 )
if not incomplete:
self.failUnlessEqual(cr.needs_rebalancing(), needs_rebalancing,
str((where, cr, cr.as_dict())))
self.failUnlessEqual(cr.get_share_counter_good(), 10, where)
self.failUnlessEqual(num_servers, 10, where)
self.failUnlessEqual(cr.get_happiness(), num_servers, where)
self.failUnlessEqual(cr.get_share_counter_good(), num_servers, where)
self.failUnlessEqual(cr.get_encoding_needed(), 3, where)
self.failUnlessEqual(cr.get_encoding_expected(), 10, where)
self.failUnlessEqual(cr.get_encoding_expected(), num_servers, where)
if not incomplete:
self.failUnlessEqual(cr.get_host_counter_good_shares(),
num_servers, where)
@ -533,13 +532,13 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase):
r = data["results"]
self.failUnlessEqual(r["healthy"], True, where)
num_servers = len(self.g.all_servers)
needs_rebalancing = bool( num_servers < 10 )
if not incomplete:
self.failUnlessEqual(r["needs-rebalancing"], needs_rebalancing,
where)
self.failUnlessEqual(r["count-shares-good"], 10, where)
self.failUnlessEqual(num_servers, 10)
self.failIfIn("needs-rebalancing", r)
self.failUnlessEqual(r["count-happiness"], num_servers, where)
self.failUnlessEqual(r["count-shares-good"], num_servers, where)
self.failUnlessEqual(r["count-shares-needed"], 3, where)
self.failUnlessEqual(r["count-shares-expected"], 10, where)
self.failUnlessEqual(r["count-shares-expected"], num_servers, where)
if not incomplete:
self.failUnlessEqual(r["count-good-share-hosts"], num_servers,
where)

View File

@ -4580,7 +4580,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
r = simplejson.loads(res)
self.failUnlessEqual(r["summary"], "Healthy")
self.failUnless(r["results"]["healthy"])
self.failIf(r["results"]["needs-rebalancing"])
self.failIfIn("needs-rebalancing", r["results"])
self.failUnless(r["results"]["recoverable"])
d.addCallback(_got_json_good)
@ -4624,8 +4624,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessEqual(r["summary"],
"Not Healthy: 9 shares (enc 3-of-10)")
self.failIf(r["results"]["healthy"])
self.failIf(r["results"]["needs-rebalancing"])
self.failUnless(r["results"]["recoverable"])
self.failIfIn("needs-rebalancing", r["results"])
d.addCallback(_got_json_sick)
d.addCallback(self.CHECK, "dead", "t=check")
@ -4638,8 +4638,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessEqual(r["summary"],
"Not Healthy: 1 shares (enc 3-of-10)")
self.failIf(r["results"]["healthy"])
self.failIf(r["results"]["needs-rebalancing"])
self.failIf(r["results"]["recoverable"])
self.failIfIn("needs-rebalancing", r["results"])
d.addCallback(_got_json_dead)
d.addCallback(self.CHECK, "corrupt", "t=check&verify=true")
@ -4652,6 +4652,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessIn("Unhealthy: 9 shares (enc 3-of-10)", r["summary"])
self.failIf(r["results"]["healthy"])
self.failUnless(r["results"]["recoverable"])
self.failIfIn("needs-rebalancing", r["results"])
self.failUnlessReallyEqual(r["results"]["count-happiness"], 9)
self.failUnlessReallyEqual(r["results"]["count-shares-good"], 9)
self.failUnlessReallyEqual(r["results"]["count-corrupt-shares"], 1)
d.addCallback(_got_json_corrupt)
@ -5100,12 +5102,14 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessEqual(u0["type"], "directory")
self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri())
u0cr = u0["check-results"]
self.failUnlessReallyEqual(u0cr["results"]["count-happiness"], 10)
self.failUnlessReallyEqual(u0cr["results"]["count-shares-good"], 10)
ugood = [u for u in units
if u["type"] == "file" and u["path"] == [u"good"]][0]
self.failUnlessReallyEqual(to_str(ugood["cap"]), self.uris["good"])
ugoodcr = ugood["check-results"]
self.failUnlessReallyEqual(ugoodcr["results"]["count-happiness"], 10)
self.failUnlessReallyEqual(ugoodcr["results"]["count-shares-good"], 10)
stats = units[-1]
@ -5204,6 +5208,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessEqual(last_unit["path"], ["subdir"])
r = last_unit["check-results"]["results"]
self.failUnlessReallyEqual(r["count-recoverable-versions"], 0)
self.failUnlessReallyEqual(r["count-happiness"], 1)
self.failUnlessReallyEqual(r["count-shares-good"], 1)
self.failUnlessReallyEqual(r["recoverable"], False)
d.addCallback(_check_broken_deepcheck)
@ -5281,6 +5286,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri())
u0crr = u0["check-and-repair-results"]
self.failUnlessReallyEqual(u0crr["repair-attempted"], False)
self.failUnlessReallyEqual(u0crr["pre-repair-results"]["results"]["count-happiness"], 10)
self.failUnlessReallyEqual(u0crr["pre-repair-results"]["results"]["count-shares-good"], 10)
ugood = [u for u in units
@ -5288,6 +5294,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
self.failUnlessEqual(to_str(ugood["cap"]), self.uris["good"])
ugoodcrr = ugood["check-and-repair-results"]
self.failUnlessReallyEqual(ugoodcrr["repair-attempted"], False)
self.failUnlessReallyEqual(ugoodcrr["pre-repair-results"]["results"]["count-happiness"], 10)
self.failUnlessReallyEqual(ugoodcrr["pre-repair-results"]["results"]["count-shares-good"], 10)
usick = [u for u in units
@ -5296,7 +5303,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
usickcrr = usick["check-and-repair-results"]
self.failUnlessReallyEqual(usickcrr["repair-attempted"], True)
self.failUnlessReallyEqual(usickcrr["repair-successful"], True)
self.failUnlessReallyEqual(usickcrr["pre-repair-results"]["results"]["count-happiness"], 9)
self.failUnlessReallyEqual(usickcrr["pre-repair-results"]["results"]["count-shares-good"], 9)
self.failUnlessReallyEqual(usickcrr["post-repair-results"]["results"]["count-happiness"], 10)
self.failUnlessReallyEqual(usickcrr["post-repair-results"]["results"]["count-shares-good"], 10)
stats = units[-1]

View File

@ -8,8 +8,10 @@ from allmydata.web.operations import ReloadMixin
from allmydata.interfaces import ICheckAndRepairResults, ICheckResults
from allmydata.util import base32, dictutil
def json_check_counts(r):
d = {"count-shares-good": r.get_share_counter_good(),
d = {"count-happiness": r.get_happiness(),
"count-shares-good": r.get_share_counter_good(),
"count-shares-needed": r.get_encoding_needed(),
"count-shares-expected": r.get_encoding_expected(),
"count-good-share-hosts": r.get_host_counter_good_shares(),
@ -40,7 +42,6 @@ def json_check_results(r):
data["storage-index"] = r.get_storage_index_string()
data["summary"] = r.get_summary()
data["results"] = json_check_counts(r)
data["results"]["needs-rebalancing"] = r.needs_rebalancing()
data["results"]["healthy"] = r.is_healthy()
data["results"]["recoverable"] = r.is_recoverable()
return data
@ -86,6 +87,7 @@ class ResultsBase:
"need %d-of-%d, have %d" % (cr.get_encoding_needed(),
cr.get_encoding_expected(),
cr.get_share_counter_good()))
add("Happiness Level", cr.get_happiness())
add("Hosts with good shares", cr.get_host_counter_good_shares())
if cr.get_corrupt_shares():