2008-07-08 00:36:00 +00:00
|
|
|
|
|
|
|
from twisted.internet import defer
|
|
|
|
from twisted.python import failure
|
|
|
|
from allmydata import hashtree
|
2008-12-19 15:39:24 +00:00
|
|
|
from allmydata.uri import from_string
|
2008-08-13 03:50:20 +00:00
|
|
|
from allmydata.util import hashutil, base32, idlib, log
|
2009-01-10 01:00:52 +00:00
|
|
|
from allmydata.check_results import CheckAndRepairResults, CheckResults
|
2008-07-08 00:36:00 +00:00
|
|
|
|
|
|
|
from common import MODE_CHECK, CorruptShareError
|
|
|
|
from servermap import ServerMap, ServermapUpdater
|
2008-07-18 04:09:23 +00:00
|
|
|
from layout import unpack_share, SIGNED_PREFIX_LENGTH
|
2008-07-08 00:36:00 +00:00
|
|
|
|
|
|
|
class MutableChecker:
|
|
|
|
|
Overhaul IFilesystemNode handling, to simplify tests and use POLA internally.
* stop using IURI as an adapter
* pass cap strings around instead of URI instances
* move filenode/dirnode creation duties from Client to new NodeMaker class
* move other Client duties to KeyGenerator, SecretHolder, History classes
* stop passing Client reference to dirnode/filenode constructors
- pass less-powerful references instead, like StorageBroker or Uploader
* always create DirectoryNodes by wrapping a filenode (mutable for now)
* remove some specialized mock classes from unit tests
Detailed list of changes (done one at a time, then merged together)
always pass a string to create_node_from_uri(), not an IURI instance
always pass a string to IFilesystemNode constructors, not an IURI instance
stop using IURI() as an adapter, switch on cap prefix in create_node_from_uri()
client.py: move SecretHolder code out to a separate class
test_web.py: hush pyflakes
client.py: move NodeMaker functionality out into a separate object
LiteralFileNode: stop storing a Client reference
immutable Checker: remove Client reference, it only needs a SecretHolder
immutable Upload: remove Client reference, leave SecretHolder and StorageBroker
immutable Repairer: replace Client reference with StorageBroker and SecretHolder
immutable FileNode: remove Client reference
mutable.Publish: stop passing Client
mutable.ServermapUpdater: get StorageBroker in constructor, not by peeking into Client reference
MutableChecker: reference StorageBroker and History directly, not through Client
mutable.FileNode: removed unused indirection to checker classes
mutable.FileNode: remove Client reference
client.py: move RSA key generation into a separate class, so it can be passed to the nodemaker
move create_mutable_file() into NodeMaker
test_dirnode.py: stop using FakeClient mockups, use NoNetworkGrid instead. This simplifies the code, but takes longer to run (17s instead of 6s). This should come down later when other cleanups make it possible to use simpler (non-RSA) fake mutable files for dirnode tests.
test_mutable.py: clean up basedir names
client.py: move create_empty_dirnode() into NodeMaker
dirnode.py: get rid of DirectoryNode.create
remove DirectoryNode.init_from_uri, refactor NodeMaker for customization, simplify test_web's mock Client to match
stop passing Client to DirectoryNode, make DirectoryNode.create_with_mutablefile the normal DirectoryNode constructor, start removing client from NodeMaker
remove Client from NodeMaker
move helper status into History, pass History to web.Status instead of Client
test_mutable.py: fix minor typo
2009-08-15 11:02:56 +00:00
|
|
|
def __init__(self, node, storage_broker, history, monitor):
|
2008-07-08 00:36:00 +00:00
|
|
|
self._node = node
|
Overhaul IFilesystemNode handling, to simplify tests and use POLA internally.
* stop using IURI as an adapter
* pass cap strings around instead of URI instances
* move filenode/dirnode creation duties from Client to new NodeMaker class
* move other Client duties to KeyGenerator, SecretHolder, History classes
* stop passing Client reference to dirnode/filenode constructors
- pass less-powerful references instead, like StorageBroker or Uploader
* always create DirectoryNodes by wrapping a filenode (mutable for now)
* remove some specialized mock classes from unit tests
Detailed list of changes (done one at a time, then merged together)
always pass a string to create_node_from_uri(), not an IURI instance
always pass a string to IFilesystemNode constructors, not an IURI instance
stop using IURI() as an adapter, switch on cap prefix in create_node_from_uri()
client.py: move SecretHolder code out to a separate class
test_web.py: hush pyflakes
client.py: move NodeMaker functionality out into a separate object
LiteralFileNode: stop storing a Client reference
immutable Checker: remove Client reference, it only needs a SecretHolder
immutable Upload: remove Client reference, leave SecretHolder and StorageBroker
immutable Repairer: replace Client reference with StorageBroker and SecretHolder
immutable FileNode: remove Client reference
mutable.Publish: stop passing Client
mutable.ServermapUpdater: get StorageBroker in constructor, not by peeking into Client reference
MutableChecker: reference StorageBroker and History directly, not through Client
mutable.FileNode: removed unused indirection to checker classes
mutable.FileNode: remove Client reference
client.py: move RSA key generation into a separate class, so it can be passed to the nodemaker
move create_mutable_file() into NodeMaker
test_dirnode.py: stop using FakeClient mockups, use NoNetworkGrid instead. This simplifies the code, but takes longer to run (17s instead of 6s). This should come down later when other cleanups make it possible to use simpler (non-RSA) fake mutable files for dirnode tests.
test_mutable.py: clean up basedir names
client.py: move create_empty_dirnode() into NodeMaker
dirnode.py: get rid of DirectoryNode.create
remove DirectoryNode.init_from_uri, refactor NodeMaker for customization, simplify test_web's mock Client to match
stop passing Client to DirectoryNode, make DirectoryNode.create_with_mutablefile the normal DirectoryNode constructor, start removing client from NodeMaker
remove Client from NodeMaker
move helper status into History, pass History to web.Status instead of Client
test_mutable.py: fix minor typo
2009-08-15 11:02:56 +00:00
|
|
|
self._storage_broker = storage_broker
|
|
|
|
self._history = history
|
2008-10-22 08:38:18 +00:00
|
|
|
self._monitor = monitor
|
2008-08-12 03:20:33 +00:00
|
|
|
self.bad_shares = [] # list of (nodeid,shnum,failure)
|
2008-07-08 00:36:00 +00:00
|
|
|
self._storage_index = self._node.get_storage_index()
|
2009-01-10 01:00:52 +00:00
|
|
|
self.results = CheckResults(from_string(node.get_uri()), self._storage_index)
|
2008-08-12 03:20:33 +00:00
|
|
|
self.need_repair = False
|
2008-09-10 00:15:46 +00:00
|
|
|
self.responded = set() # set of (binary) nodeids
|
2008-07-08 00:36:00 +00:00
|
|
|
|
2009-02-18 02:32:43 +00:00
|
|
|
def check(self, verify=False, add_lease=False):
|
2008-07-08 00:36:00 +00:00
|
|
|
servermap = ServerMap()
|
Overhaul IFilesystemNode handling, to simplify tests and use POLA internally.
* stop using IURI as an adapter
* pass cap strings around instead of URI instances
* move filenode/dirnode creation duties from Client to new NodeMaker class
* move other Client duties to KeyGenerator, SecretHolder, History classes
* stop passing Client reference to dirnode/filenode constructors
- pass less-powerful references instead, like StorageBroker or Uploader
* always create DirectoryNodes by wrapping a filenode (mutable for now)
* remove some specialized mock classes from unit tests
Detailed list of changes (done one at a time, then merged together)
always pass a string to create_node_from_uri(), not an IURI instance
always pass a string to IFilesystemNode constructors, not an IURI instance
stop using IURI() as an adapter, switch on cap prefix in create_node_from_uri()
client.py: move SecretHolder code out to a separate class
test_web.py: hush pyflakes
client.py: move NodeMaker functionality out into a separate object
LiteralFileNode: stop storing a Client reference
immutable Checker: remove Client reference, it only needs a SecretHolder
immutable Upload: remove Client reference, leave SecretHolder and StorageBroker
immutable Repairer: replace Client reference with StorageBroker and SecretHolder
immutable FileNode: remove Client reference
mutable.Publish: stop passing Client
mutable.ServermapUpdater: get StorageBroker in constructor, not by peeking into Client reference
MutableChecker: reference StorageBroker and History directly, not through Client
mutable.FileNode: removed unused indirection to checker classes
mutable.FileNode: remove Client reference
client.py: move RSA key generation into a separate class, so it can be passed to the nodemaker
move create_mutable_file() into NodeMaker
test_dirnode.py: stop using FakeClient mockups, use NoNetworkGrid instead. This simplifies the code, but takes longer to run (17s instead of 6s). This should come down later when other cleanups make it possible to use simpler (non-RSA) fake mutable files for dirnode tests.
test_mutable.py: clean up basedir names
client.py: move create_empty_dirnode() into NodeMaker
dirnode.py: get rid of DirectoryNode.create
remove DirectoryNode.init_from_uri, refactor NodeMaker for customization, simplify test_web's mock Client to match
stop passing Client to DirectoryNode, make DirectoryNode.create_with_mutablefile the normal DirectoryNode constructor, start removing client from NodeMaker
remove Client from NodeMaker
move helper status into History, pass History to web.Status instead of Client
test_mutable.py: fix minor typo
2009-08-15 11:02:56 +00:00
|
|
|
u = ServermapUpdater(self._node, self._storage_broker, self._monitor,
|
|
|
|
servermap, MODE_CHECK, add_lease=add_lease)
|
|
|
|
if self._history:
|
|
|
|
self._history.notify_mapupdate(u.get_status())
|
2008-07-08 00:36:00 +00:00
|
|
|
d = u.update()
|
|
|
|
d.addCallback(self._got_mapupdate_results)
|
|
|
|
if verify:
|
|
|
|
d.addCallback(self._verify_all_shares)
|
2008-09-07 19:44:56 +00:00
|
|
|
d.addCallback(lambda res: servermap)
|
|
|
|
d.addCallback(self._fill_checker_results, self.results)
|
|
|
|
d.addCallback(lambda res: self.results)
|
2008-07-08 00:36:00 +00:00
|
|
|
return d
|
|
|
|
|
|
|
|
def _got_mapupdate_results(self, servermap):
|
|
|
|
# the file is healthy if there is exactly one recoverable version, it
|
|
|
|
# has at least N distinct shares, and there are no unrecoverable
|
|
|
|
# versions: all existing shares will be for the same version.
|
2008-10-22 08:38:18 +00:00
|
|
|
self._monitor.raise_if_cancelled()
|
2008-07-08 00:36:00 +00:00
|
|
|
self.best_version = None
|
|
|
|
num_recoverable = len(servermap.recoverable_versions())
|
2008-08-12 03:20:33 +00:00
|
|
|
if num_recoverable:
|
2008-07-08 00:36:00 +00:00
|
|
|
self.best_version = servermap.best_recoverable_version()
|
2008-08-12 03:20:33 +00:00
|
|
|
|
|
|
|
if servermap.unrecoverable_versions():
|
|
|
|
self.need_repair = True
|
|
|
|
if num_recoverable != 1:
|
|
|
|
self.need_repair = True
|
|
|
|
if self.best_version:
|
2008-07-08 00:36:00 +00:00
|
|
|
available_shares = servermap.shares_available()
|
|
|
|
(num_distinct_shares, k, N) = available_shares[self.best_version]
|
|
|
|
if num_distinct_shares < N:
|
2008-08-12 03:20:33 +00:00
|
|
|
self.need_repair = True
|
2008-07-08 00:36:00 +00:00
|
|
|
|
|
|
|
return servermap
|
|
|
|
|
|
|
|
def _verify_all_shares(self, servermap):
|
|
|
|
# read every byte of each share
|
|
|
|
if not self.best_version:
|
|
|
|
return
|
|
|
|
versionmap = servermap.make_versionmap()
|
|
|
|
shares = versionmap[self.best_version]
|
|
|
|
(seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
|
|
|
|
offsets_tuple) = self.best_version
|
|
|
|
offsets = dict(offsets_tuple)
|
|
|
|
readv = [ (0, offsets["EOF"]) ]
|
|
|
|
dl = []
|
|
|
|
for (shnum, peerid, timestamp) in shares:
|
|
|
|
ss = servermap.connections[peerid]
|
|
|
|
d = self._do_read(ss, peerid, self._storage_index, [shnum], readv)
|
2008-09-07 19:44:56 +00:00
|
|
|
d.addCallback(self._got_answer, peerid, servermap)
|
2008-07-08 00:36:00 +00:00
|
|
|
dl.append(d)
|
2008-10-22 08:38:18 +00:00
|
|
|
return defer.DeferredList(dl, fireOnOneErrback=True, consumeErrors=True)
|
2008-07-08 00:36:00 +00:00
|
|
|
|
|
|
|
def _do_read(self, ss, peerid, storage_index, shnums, readv):
|
|
|
|
# isolate the callRemote to a separate method, so tests can subclass
|
|
|
|
# Publish and override it
|
|
|
|
d = ss.callRemote("slot_readv", storage_index, shnums, readv)
|
|
|
|
return d
|
|
|
|
|
2008-09-07 19:44:56 +00:00
|
|
|
def _got_answer(self, datavs, peerid, servermap):
|
2008-07-08 00:36:00 +00:00
|
|
|
for shnum,datav in datavs.items():
|
|
|
|
data = datav[0]
|
|
|
|
try:
|
|
|
|
self._got_results_one_share(shnum, peerid, data)
|
|
|
|
except CorruptShareError:
|
|
|
|
f = failure.Failure()
|
2008-08-12 03:20:33 +00:00
|
|
|
self.need_repair = True
|
|
|
|
self.bad_shares.append( (peerid, shnum, f) )
|
2008-07-18 04:09:23 +00:00
|
|
|
prefix = data[:SIGNED_PREFIX_LENGTH]
|
2008-09-07 19:44:56 +00:00
|
|
|
servermap.mark_bad_share(peerid, shnum, prefix)
|
2008-10-24 20:21:28 +00:00
|
|
|
ss = servermap.connections[peerid]
|
|
|
|
self.notify_server_corruption(ss, shnum, str(f.value))
|
2008-07-08 00:36:00 +00:00
|
|
|
|
|
|
|
def check_prefix(self, peerid, shnum, data):
|
|
|
|
(seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
|
|
|
|
offsets_tuple) = self.best_version
|
2008-07-18 04:09:23 +00:00
|
|
|
got_prefix = data[:SIGNED_PREFIX_LENGTH]
|
2008-07-08 00:36:00 +00:00
|
|
|
if got_prefix != prefix:
|
|
|
|
raise CorruptShareError(peerid, shnum,
|
|
|
|
"prefix mismatch: share changed while we were reading it")
|
|
|
|
|
|
|
|
def _got_results_one_share(self, shnum, peerid, data):
|
|
|
|
self.check_prefix(peerid, shnum, data)
|
|
|
|
|
|
|
|
# the [seqnum:signature] pieces are validated by _compare_prefix,
|
|
|
|
# which checks their signature against the pubkey known to be
|
|
|
|
# associated with this file.
|
|
|
|
|
|
|
|
(seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature,
|
|
|
|
share_hash_chain, block_hash_tree, share_data,
|
|
|
|
enc_privkey) = unpack_share(data)
|
|
|
|
|
|
|
|
# validate [share_hash_chain,block_hash_tree,share_data]
|
|
|
|
|
|
|
|
leaves = [hashutil.block_hash(share_data)]
|
|
|
|
t = hashtree.HashTree(leaves)
|
|
|
|
if list(t) != block_hash_tree:
|
|
|
|
raise CorruptShareError(peerid, shnum, "block hash tree failure")
|
|
|
|
share_hash_leaf = t[0]
|
|
|
|
t2 = hashtree.IncompleteHashTree(N)
|
|
|
|
# root_hash was checked by the signature
|
|
|
|
t2.set_hashes({0: root_hash})
|
|
|
|
try:
|
|
|
|
t2.set_hashes(hashes=share_hash_chain,
|
|
|
|
leaves={shnum: share_hash_leaf})
|
|
|
|
except (hashtree.BadHashError, hashtree.NotEnoughHashesError,
|
|
|
|
IndexError), e:
|
|
|
|
msg = "corrupt hashes: %s" % (e,)
|
|
|
|
raise CorruptShareError(peerid, shnum, msg)
|
|
|
|
|
|
|
|
# validate enc_privkey: only possible if we have a write-cap
|
|
|
|
if not self._node.is_readonly():
|
|
|
|
alleged_privkey_s = self._node._decrypt_privkey(enc_privkey)
|
|
|
|
alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s)
|
|
|
|
if alleged_writekey != self._node.get_writekey():
|
|
|
|
raise CorruptShareError(peerid, shnum, "invalid privkey")
|
|
|
|
|
2008-10-24 20:21:28 +00:00
|
|
|
def notify_server_corruption(self, ss, shnum, reason):
|
|
|
|
ss.callRemoteOnly("advise_corrupt_share",
|
|
|
|
"mutable", self._storage_index, shnum, reason)
|
|
|
|
|
2008-09-07 19:44:56 +00:00
|
|
|
def _count_shares(self, smap, version):
|
|
|
|
available_shares = smap.shares_available()
|
|
|
|
(num_distinct_shares, k, N) = available_shares[version]
|
|
|
|
counters = {}
|
|
|
|
counters["count-shares-good"] = num_distinct_shares
|
|
|
|
counters["count-shares-needed"] = k
|
|
|
|
counters["count-shares-expected"] = N
|
|
|
|
good_hosts = smap.all_peers_for_version(version)
|
2008-09-10 00:15:46 +00:00
|
|
|
counters["count-good-share-hosts"] = len(good_hosts)
|
2008-09-07 19:44:56 +00:00
|
|
|
vmap = smap.make_versionmap()
|
|
|
|
counters["count-wrong-shares"] = sum([len(shares)
|
|
|
|
for verinfo,shares in vmap.items()
|
|
|
|
if verinfo != version])
|
|
|
|
|
|
|
|
return counters
|
|
|
|
|
|
|
|
def _fill_checker_results(self, smap, r):
|
2008-10-22 08:38:18 +00:00
|
|
|
self._monitor.raise_if_cancelled()
|
2008-09-07 19:44:56 +00:00
|
|
|
r.set_servermap(smap.copy())
|
|
|
|
healthy = True
|
|
|
|
data = {}
|
2008-08-12 03:20:33 +00:00
|
|
|
report = []
|
2008-09-07 19:44:56 +00:00
|
|
|
summary = []
|
2008-08-12 03:20:33 +00:00
|
|
|
vmap = smap.make_versionmap()
|
|
|
|
recoverable = smap.recoverable_versions()
|
|
|
|
unrecoverable = smap.unrecoverable_versions()
|
2008-09-07 19:44:56 +00:00
|
|
|
data["count-recoverable-versions"] = len(recoverable)
|
|
|
|
data["count-unrecoverable-versions"] = len(unrecoverable)
|
|
|
|
|
2008-08-12 03:20:33 +00:00
|
|
|
if recoverable:
|
|
|
|
report.append("Recoverable Versions: " +
|
|
|
|
"/".join(["%d*%s" % (len(vmap[v]),
|
|
|
|
smap.summarize_version(v))
|
|
|
|
for v in recoverable]))
|
|
|
|
if unrecoverable:
|
|
|
|
report.append("Unrecoverable Versions: " +
|
|
|
|
"/".join(["%d*%s" % (len(vmap[v]),
|
|
|
|
smap.summarize_version(v))
|
|
|
|
for v in unrecoverable]))
|
|
|
|
if smap.unrecoverable_versions():
|
2008-09-07 19:44:56 +00:00
|
|
|
healthy = False
|
|
|
|
summary.append("some versions are unrecoverable")
|
2008-08-12 03:20:33 +00:00
|
|
|
report.append("Unhealthy: some versions are unrecoverable")
|
|
|
|
if len(recoverable) == 0:
|
2008-09-07 19:44:56 +00:00
|
|
|
healthy = False
|
|
|
|
summary.append("no versions are recoverable")
|
2008-08-12 03:20:33 +00:00
|
|
|
report.append("Unhealthy: no versions are recoverable")
|
|
|
|
if len(recoverable) > 1:
|
2008-09-07 19:44:56 +00:00
|
|
|
healthy = False
|
|
|
|
summary.append("multiple versions are recoverable")
|
2008-08-12 03:20:33 +00:00
|
|
|
report.append("Unhealthy: there are multiple recoverable versions")
|
2008-09-07 19:44:56 +00:00
|
|
|
|
2008-09-10 00:15:46 +00:00
|
|
|
needs_rebalancing = False
|
2008-09-07 19:44:56 +00:00
|
|
|
if recoverable:
|
|
|
|
best_version = smap.best_recoverable_version()
|
2008-08-12 03:20:33 +00:00
|
|
|
report.append("Best Recoverable Version: " +
|
2008-09-07 19:44:56 +00:00
|
|
|
smap.summarize_version(best_version))
|
|
|
|
counters = self._count_shares(smap, best_version)
|
|
|
|
data.update(counters)
|
2008-09-10 00:15:46 +00:00
|
|
|
s = counters["count-shares-good"]
|
|
|
|
k = counters["count-shares-needed"]
|
|
|
|
N = counters["count-shares-expected"]
|
2008-09-10 00:28:53 +00:00
|
|
|
if s < N:
|
2008-09-07 19:44:56 +00:00
|
|
|
healthy = False
|
|
|
|
report.append("Unhealthy: best version has only %d shares "
|
2008-09-10 00:15:46 +00:00
|
|
|
"(encoding is %d-of-%d)" % (s, k, N))
|
|
|
|
summary.append("%d shares (enc %d-of-%d)" % (s, k, N))
|
|
|
|
hosts = smap.all_peers_for_version(best_version)
|
|
|
|
needs_rebalancing = bool( len(hosts) < N )
|
2008-09-07 19:44:56 +00:00
|
|
|
elif unrecoverable:
|
|
|
|
healthy = False
|
|
|
|
# find a k and N from somewhere
|
|
|
|
first = list(unrecoverable)[0]
|
|
|
|
# not exactly the best version, but that doesn't matter too much
|
|
|
|
data.update(self._count_shares(smap, first))
|
2008-09-10 00:15:46 +00:00
|
|
|
# leave needs_rebalancing=False: the file being unrecoverable is
|
|
|
|
# the bigger problem
|
2008-09-07 19:44:56 +00:00
|
|
|
else:
|
|
|
|
# couldn't find anything at all
|
|
|
|
data["count-shares-good"] = 0
|
|
|
|
data["count-shares-needed"] = 3 # arbitrary defaults
|
|
|
|
data["count-shares-expected"] = 10
|
|
|
|
data["count-good-share-hosts"] = 0
|
|
|
|
data["count-wrong-shares"] = 0
|
|
|
|
|
2008-08-12 03:20:33 +00:00
|
|
|
if self.bad_shares:
|
2008-09-07 19:44:56 +00:00
|
|
|
data["count-corrupt-shares"] = len(self.bad_shares)
|
|
|
|
data["list-corrupt-shares"] = locators = []
|
2008-08-12 03:20:33 +00:00
|
|
|
report.append("Corrupt Shares:")
|
2008-09-07 19:44:56 +00:00
|
|
|
summary.append("Corrupt Shares:")
|
2008-08-12 03:20:33 +00:00
|
|
|
for (peerid, shnum, f) in sorted(self.bad_shares):
|
2008-09-07 19:44:56 +00:00
|
|
|
locators.append( (peerid, self._storage_index, shnum) )
|
2008-08-12 03:20:33 +00:00
|
|
|
s = "%s-sh%d" % (idlib.shortnodeid_b2a(peerid), shnum)
|
2008-08-12 04:23:06 +00:00
|
|
|
if f.check(CorruptShareError):
|
|
|
|
ft = f.value.reason
|
|
|
|
else:
|
|
|
|
ft = str(f)
|
|
|
|
report.append(" %s: %s" % (s, ft))
|
2008-09-07 19:44:56 +00:00
|
|
|
summary.append(s)
|
2008-08-12 03:20:33 +00:00
|
|
|
p = (peerid, self._storage_index, shnum, f)
|
2008-09-07 19:44:56 +00:00
|
|
|
r.problems.append(p)
|
2008-08-13 03:50:20 +00:00
|
|
|
msg = ("CorruptShareError during mutable verify, "
|
|
|
|
"peerid=%(peerid)s, si=%(si)s, shnum=%(shnum)d, "
|
|
|
|
"where=%(where)s")
|
|
|
|
log.msg(format=msg, peerid=idlib.nodeid_b2a(peerid),
|
|
|
|
si=base32.b2a(self._storage_index),
|
|
|
|
shnum=shnum,
|
|
|
|
where=ft,
|
2008-08-26 01:57:59 +00:00
|
|
|
level=log.WEIRD, umid="EkK8QA")
|
2008-09-07 19:44:56 +00:00
|
|
|
else:
|
|
|
|
data["count-corrupt-shares"] = 0
|
|
|
|
data["list-corrupt-shares"] = []
|
|
|
|
|
2008-09-10 00:57:06 +00:00
|
|
|
sharemap = {}
|
|
|
|
for verinfo in vmap:
|
|
|
|
for (shnum, peerid, timestamp) in vmap[verinfo]:
|
|
|
|
shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum)
|
|
|
|
if shareid not in sharemap:
|
|
|
|
sharemap[shareid] = []
|
2008-09-10 02:45:17 +00:00
|
|
|
sharemap[shareid].append(peerid)
|
2008-09-10 00:57:06 +00:00
|
|
|
data["sharemap"] = sharemap
|
2008-09-10 02:45:17 +00:00
|
|
|
data["servers-responding"] = list(smap.reachable_peers)
|
2008-09-07 19:44:56 +00:00
|
|
|
|
|
|
|
r.set_healthy(healthy)
|
2008-11-07 05:35:47 +00:00
|
|
|
r.set_recoverable(bool(recoverable))
|
2008-09-10 00:15:46 +00:00
|
|
|
r.set_needs_rebalancing(needs_rebalancing)
|
2008-09-07 19:44:56 +00:00
|
|
|
r.set_data(data)
|
|
|
|
if healthy:
|
|
|
|
r.set_summary("Healthy")
|
|
|
|
else:
|
|
|
|
r.set_summary("Unhealthy: " + " ".join(summary))
|
|
|
|
r.set_report(report)
|
2008-08-12 03:20:33 +00:00
|
|
|
|
|
|
|
|
2008-09-07 19:44:56 +00:00
|
|
|
class MutableCheckAndRepairer(MutableChecker):
|
Overhaul IFilesystemNode handling, to simplify tests and use POLA internally.
* stop using IURI as an adapter
* pass cap strings around instead of URI instances
* move filenode/dirnode creation duties from Client to new NodeMaker class
* move other Client duties to KeyGenerator, SecretHolder, History classes
* stop passing Client reference to dirnode/filenode constructors
- pass less-powerful references instead, like StorageBroker or Uploader
* always create DirectoryNodes by wrapping a filenode (mutable for now)
* remove some specialized mock classes from unit tests
Detailed list of changes (done one at a time, then merged together)
always pass a string to create_node_from_uri(), not an IURI instance
always pass a string to IFilesystemNode constructors, not an IURI instance
stop using IURI() as an adapter, switch on cap prefix in create_node_from_uri()
client.py: move SecretHolder code out to a separate class
test_web.py: hush pyflakes
client.py: move NodeMaker functionality out into a separate object
LiteralFileNode: stop storing a Client reference
immutable Checker: remove Client reference, it only needs a SecretHolder
immutable Upload: remove Client reference, leave SecretHolder and StorageBroker
immutable Repairer: replace Client reference with StorageBroker and SecretHolder
immutable FileNode: remove Client reference
mutable.Publish: stop passing Client
mutable.ServermapUpdater: get StorageBroker in constructor, not by peeking into Client reference
MutableChecker: reference StorageBroker and History directly, not through Client
mutable.FileNode: removed unused indirection to checker classes
mutable.FileNode: remove Client reference
client.py: move RSA key generation into a separate class, so it can be passed to the nodemaker
move create_mutable_file() into NodeMaker
test_dirnode.py: stop using FakeClient mockups, use NoNetworkGrid instead. This simplifies the code, but takes longer to run (17s instead of 6s). This should come down later when other cleanups make it possible to use simpler (non-RSA) fake mutable files for dirnode tests.
test_mutable.py: clean up basedir names
client.py: move create_empty_dirnode() into NodeMaker
dirnode.py: get rid of DirectoryNode.create
remove DirectoryNode.init_from_uri, refactor NodeMaker for customization, simplify test_web's mock Client to match
stop passing Client to DirectoryNode, make DirectoryNode.create_with_mutablefile the normal DirectoryNode constructor, start removing client from NodeMaker
remove Client from NodeMaker
move helper status into History, pass History to web.Status instead of Client
test_mutable.py: fix minor typo
2009-08-15 11:02:56 +00:00
|
|
|
def __init__(self, node, storage_broker, history, monitor):
|
|
|
|
MutableChecker.__init__(self, node, storage_broker, history, monitor)
|
2008-09-07 19:44:56 +00:00
|
|
|
self.cr_results = CheckAndRepairResults(self._storage_index)
|
|
|
|
self.cr_results.pre_repair_results = self.results
|
|
|
|
self.need_repair = False
|
|
|
|
|
2009-02-18 02:32:43 +00:00
|
|
|
def check(self, verify=False, add_lease=False):
|
|
|
|
d = MutableChecker.check(self, verify, add_lease)
|
2008-09-07 19:44:56 +00:00
|
|
|
d.addCallback(self._maybe_repair)
|
|
|
|
d.addCallback(lambda res: self.cr_results)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _maybe_repair(self, res):
|
2008-10-22 08:38:18 +00:00
|
|
|
self._monitor.raise_if_cancelled()
|
2008-08-26 23:34:54 +00:00
|
|
|
if not self.need_repair:
|
2008-09-07 19:44:56 +00:00
|
|
|
self.cr_results.post_repair_results = self.results
|
2008-08-26 23:34:54 +00:00
|
|
|
return
|
2009-07-01 01:13:43 +00:00
|
|
|
if self._node.is_readonly():
|
|
|
|
# ticket #625: we cannot yet repair read-only mutable files
|
|
|
|
self.cr_results.post_repair_results = self.results
|
|
|
|
self.cr_results.repair_attempted = False
|
|
|
|
return
|
2008-09-07 19:44:56 +00:00
|
|
|
self.cr_results.repair_attempted = True
|
2008-08-26 23:34:54 +00:00
|
|
|
d = self._node.repair(self.results)
|
|
|
|
def _repair_finished(repair_results):
|
2008-09-07 19:44:56 +00:00
|
|
|
self.cr_results.repair_successful = True
|
2009-01-10 01:00:52 +00:00
|
|
|
r = CheckResults(from_string(self._node.get_uri()), self._storage_index)
|
2008-09-07 19:44:56 +00:00
|
|
|
self.cr_results.post_repair_results = r
|
|
|
|
self._fill_checker_results(repair_results.servermap, r)
|
|
|
|
self.cr_results.repair_results = repair_results # TODO?
|
2008-08-26 23:34:54 +00:00
|
|
|
def _repair_error(f):
|
|
|
|
# I'm not sure if I want to pass through a failure or not.
|
2008-09-07 19:44:56 +00:00
|
|
|
self.cr_results.repair_successful = False
|
|
|
|
self.cr_results.repair_failure = f # TODO?
|
|
|
|
#self.cr_results.post_repair_results = ??
|
2008-08-26 23:34:54 +00:00
|
|
|
return f
|
|
|
|
d.addCallbacks(_repair_finished, _repair_error)
|
|
|
|
return d
|