2007-10-15 23:16:39 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
Given a StorageIndex, count how many shares we can find.
|
|
|
|
|
|
|
|
This does no verification of the shares whatsoever. If the peer claims to
|
|
|
|
have the share, we believe them.
|
|
|
|
"""
|
|
|
|
|
2008-07-16 00:23:25 +00:00
|
|
|
from zope.interface import implements
|
2007-10-15 23:16:39 +00:00
|
|
|
from twisted.internet import defer
|
|
|
|
from twisted.python import log
|
2008-07-16 20:14:39 +00:00
|
|
|
from allmydata import storage
|
2008-07-17 01:20:57 +00:00
|
|
|
from allmydata.interfaces import IVerifierURI, \
|
|
|
|
ICheckerResults, IDeepCheckResults
|
2008-07-16 20:14:39 +00:00
|
|
|
from allmydata.immutable import download
|
2008-07-16 00:23:25 +00:00
|
|
|
from allmydata.util import hashutil, base32
|
|
|
|
|
|
|
|
class Results:
|
|
|
|
implements(ICheckerResults)
|
|
|
|
|
|
|
|
def __init__(self, storage_index):
|
|
|
|
# storage_index might be None for, say, LIT files
|
|
|
|
self.storage_index = storage_index
|
|
|
|
if storage_index is None:
|
|
|
|
self.storage_index_s = "<none>"
|
|
|
|
else:
|
|
|
|
self.storage_index_s = base32.b2a(storage_index)[:6]
|
|
|
|
|
|
|
|
def is_healthy(self):
|
|
|
|
return self.healthy
|
|
|
|
|
2008-07-16 22:42:56 +00:00
|
|
|
def get_storage_index_string(self):
|
|
|
|
return self.storage_index_s
|
|
|
|
|
|
|
|
def get_mutability_string(self):
|
|
|
|
if self.storage_index:
|
|
|
|
return "immutable"
|
|
|
|
return "literal"
|
2008-07-16 00:23:25 +00:00
|
|
|
|
2008-07-16 22:42:56 +00:00
|
|
|
def to_string(self):
|
|
|
|
s = ""
|
2008-07-16 00:23:25 +00:00
|
|
|
if self.healthy:
|
2008-07-16 22:42:56 +00:00
|
|
|
s += "Healthy!\n"
|
2008-07-16 00:23:25 +00:00
|
|
|
else:
|
2008-07-16 22:42:56 +00:00
|
|
|
s += "Not Healthy!\n"
|
2008-07-16 00:23:25 +00:00
|
|
|
return s
|
|
|
|
|
2008-07-17 01:20:57 +00:00
|
|
|
class DeepCheckResults:
|
|
|
|
implements(IDeepCheckResults)
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.objects_checked = 0
|
|
|
|
self.objects_healthy = 0
|
|
|
|
self.repairs_attempted = 0
|
|
|
|
self.repairs_successful = 0
|
|
|
|
self.problems = []
|
|
|
|
self.server_problems = {}
|
|
|
|
|
|
|
|
def add_check(self, r):
|
|
|
|
self.objects_checked += 1
|
|
|
|
if r.is_healthy:
|
|
|
|
self.objects_healthy += 1
|
|
|
|
else:
|
|
|
|
self.problems.append(r)
|
|
|
|
|
|
|
|
def add_repair(self, is_successful):
|
|
|
|
self.repairs_attempted += 1
|
|
|
|
if is_successful:
|
|
|
|
self.repairs_successful += 1
|
|
|
|
|
|
|
|
def count_objects_checked(self):
|
|
|
|
return self.objects_checked
|
|
|
|
def count_objects_healthy(self):
|
|
|
|
return self.objects_healthy
|
|
|
|
def count_repairs_attempted(self):
|
|
|
|
return self.repairs_attempted
|
|
|
|
def count_repairs_successful(self):
|
|
|
|
return self.repairs_successful
|
|
|
|
def get_server_problems(self):
|
|
|
|
return self.server_problems
|
|
|
|
def get_problems(self):
|
|
|
|
return self.problems
|
|
|
|
|
2007-10-15 23:16:39 +00:00
|
|
|
|
|
|
|
class SimpleCHKFileChecker:
|
2007-10-17 09:25:50 +00:00
|
|
|
"""Return a list of (needed, total, found, sharemap), where sharemap maps
|
|
|
|
share number to a list of (binary) nodeids of the shareholders."""
|
2007-10-15 23:16:39 +00:00
|
|
|
|
2007-10-17 09:25:50 +00:00
|
|
|
def __init__(self, peer_getter, uri_to_check):
|
2007-10-15 23:16:39 +00:00
|
|
|
self.peer_getter = peer_getter
|
|
|
|
self.found_shares = set()
|
2008-07-16 00:23:25 +00:00
|
|
|
self.uri_to_check = IVerifierURI(uri_to_check)
|
2007-10-17 09:25:50 +00:00
|
|
|
self.sharemap = {}
|
2007-10-15 23:16:39 +00:00
|
|
|
|
|
|
|
'''
|
|
|
|
def check_synchronously(self, si):
|
|
|
|
# this is how we would write this class if we were using synchronous
|
|
|
|
# messages (or if we used promises).
|
|
|
|
found = set()
|
|
|
|
for (pmpeerid, peerid, connection) in self.peer_getter(storage_index):
|
2008-02-05 20:05:13 +00:00
|
|
|
buckets = connection.get_buckets(si)
|
2007-10-15 23:16:39 +00:00
|
|
|
found.update(buckets.keys())
|
|
|
|
return len(found)
|
|
|
|
'''
|
|
|
|
|
2007-10-17 09:25:50 +00:00
|
|
|
def check(self):
|
|
|
|
d = self._get_all_shareholders(self.uri_to_check.storage_index)
|
2007-10-15 23:16:39 +00:00
|
|
|
d.addCallback(self._done)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _get_all_shareholders(self, storage_index):
|
|
|
|
dl = []
|
2008-02-05 20:05:13 +00:00
|
|
|
for (peerid, ss) in self.peer_getter("storage", storage_index):
|
|
|
|
d = ss.callRemote("get_buckets", storage_index)
|
2007-10-17 09:25:50 +00:00
|
|
|
d.addCallbacks(self._got_response, self._got_error,
|
|
|
|
callbackArgs=(peerid,))
|
2007-10-15 23:16:39 +00:00
|
|
|
dl.append(d)
|
|
|
|
return defer.DeferredList(dl)
|
|
|
|
|
2007-10-17 09:25:50 +00:00
|
|
|
def _got_response(self, buckets, peerid):
|
2007-10-15 23:16:39 +00:00
|
|
|
# buckets is a dict: maps shum to an rref of the server who holds it
|
|
|
|
self.found_shares.update(buckets.keys())
|
2007-10-17 09:25:50 +00:00
|
|
|
for k in buckets:
|
|
|
|
if k not in self.sharemap:
|
|
|
|
self.sharemap[k] = []
|
|
|
|
self.sharemap[k].append(peerid)
|
2007-10-15 23:16:39 +00:00
|
|
|
|
|
|
|
def _got_error(self, f):
|
|
|
|
if f.check(KeyError):
|
|
|
|
pass
|
|
|
|
log.err(f)
|
|
|
|
pass
|
|
|
|
|
|
|
|
def _done(self, res):
|
2007-10-17 09:25:50 +00:00
|
|
|
u = self.uri_to_check
|
2008-07-16 00:23:25 +00:00
|
|
|
r = Results(self.uri_to_check.storage_index)
|
|
|
|
r.healthy = bool(len(self.found_shares) >= u.needed_shares)
|
|
|
|
r.stuff = (u.needed_shares, u.total_shares, len(self.found_shares),
|
|
|
|
self.sharemap)
|
|
|
|
return r
|
2007-10-15 23:16:39 +00:00
|
|
|
|
2007-10-16 19:25:09 +00:00
|
|
|
class VerifyingOutput:
|
2008-07-16 00:23:25 +00:00
|
|
|
def __init__(self, total_length, results):
|
2007-10-16 19:25:09 +00:00
|
|
|
self._crypttext_hasher = hashutil.crypttext_hasher()
|
|
|
|
self.length = 0
|
|
|
|
self.total_length = total_length
|
|
|
|
self._segment_number = 0
|
|
|
|
self._crypttext_hash_tree = None
|
|
|
|
self._opened = False
|
2008-07-16 00:23:25 +00:00
|
|
|
self._results = results
|
|
|
|
results.healthy = False
|
2007-10-16 19:25:09 +00:00
|
|
|
|
|
|
|
def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
|
|
|
|
self._crypttext_hash_tree = crypttext_hashtree
|
|
|
|
|
|
|
|
def write_segment(self, crypttext):
|
|
|
|
self.length += len(crypttext)
|
|
|
|
|
|
|
|
self._crypttext_hasher.update(crypttext)
|
|
|
|
if self._crypttext_hash_tree:
|
|
|
|
ch = hashutil.crypttext_segment_hasher()
|
|
|
|
ch.update(crypttext)
|
|
|
|
crypttext_leaves = {self._segment_number: ch.digest()}
|
|
|
|
self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
|
|
|
|
|
|
|
|
self._segment_number += 1
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self.crypttext_hash = self._crypttext_hasher.digest()
|
|
|
|
|
|
|
|
def finish(self):
|
2008-07-16 00:23:25 +00:00
|
|
|
self._results.healthy = True
|
|
|
|
return self._results
|
2007-10-16 19:25:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
class SimpleCHKFileVerifier(download.FileDownloader):
|
|
|
|
# this reconstructs the crypttext, which verifies that at least 'k' of
|
|
|
|
# the shareholders are around and have valid data. It does not check the
|
|
|
|
# remaining shareholders, and it cannot verify the plaintext.
|
|
|
|
check_plaintext_hash = False
|
|
|
|
|
|
|
|
def __init__(self, client, u):
|
|
|
|
self._client = client
|
|
|
|
|
|
|
|
u = IVerifierURI(u)
|
|
|
|
self._storage_index = u.storage_index
|
|
|
|
self._uri_extension_hash = u.uri_extension_hash
|
|
|
|
self._total_shares = u.total_shares
|
|
|
|
self._size = u.size
|
|
|
|
self._num_needed_shares = u.needed_shares
|
|
|
|
|
2008-02-27 00:33:14 +00:00
|
|
|
self._si_s = storage.si_b2a(self._storage_index)
|
2007-11-20 02:07:10 +00:00
|
|
|
self.init_logging()
|
|
|
|
|
2008-07-16 00:23:25 +00:00
|
|
|
r = Results(self._storage_index)
|
|
|
|
self._output = VerifyingOutput(self._size, r)
|
2007-10-16 19:25:09 +00:00
|
|
|
self._paused = False
|
|
|
|
self._stopped = False
|
|
|
|
|
2008-03-04 02:19:21 +00:00
|
|
|
self._results = None
|
2007-10-16 19:25:09 +00:00
|
|
|
self.active_buckets = {} # k: shnum, v: bucket
|
|
|
|
self._share_buckets = [] # list of (sharenum, bucket) tuples
|
|
|
|
self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
|
|
|
|
self._uri_extension_sources = []
|
|
|
|
|
|
|
|
self._uri_extension_data = None
|
|
|
|
|
|
|
|
self._fetch_failures = {"uri_extension": 0,
|
|
|
|
"plaintext_hashroot": 0,
|
|
|
|
"plaintext_hashtree": 0,
|
|
|
|
"crypttext_hashroot": 0,
|
|
|
|
"crypttext_hashtree": 0,
|
|
|
|
}
|
|
|
|
|
2007-11-20 02:07:10 +00:00
|
|
|
def init_logging(self):
|
2008-02-13 03:48:37 +00:00
|
|
|
self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
|
2007-11-20 02:07:10 +00:00
|
|
|
num = self._client.log("SimpleCHKFileVerifier(%s): starting" % prefix)
|
|
|
|
self._log_number = num
|
|
|
|
|
|
|
|
def log(self, msg, parent=None):
|
|
|
|
if parent is None:
|
|
|
|
parent = self._log_number
|
|
|
|
return self._client.log("SimpleCHKFileVerifier(%s): %s"
|
|
|
|
% (self._log_prefix, msg),
|
|
|
|
parent=parent)
|
|
|
|
|
2007-10-16 19:25:09 +00:00
|
|
|
|
|
|
|
def start(self):
|
2008-02-13 03:48:37 +00:00
|
|
|
log.msg("starting download [%s]" % storage.si_b2a(self._storage_index)[:5])
|
2007-10-16 19:25:09 +00:00
|
|
|
|
|
|
|
# first step: who should we download from?
|
|
|
|
d = defer.maybeDeferred(self._get_all_shareholders)
|
|
|
|
d.addCallback(self._got_all_shareholders)
|
|
|
|
# now get the uri_extension block from somebody and validate it
|
|
|
|
d.addCallback(self._obtain_uri_extension)
|
|
|
|
d.addCallback(self._got_uri_extension)
|
|
|
|
d.addCallback(self._get_hashtrees)
|
|
|
|
d.addCallback(self._create_validated_buckets)
|
|
|
|
# once we know that, we can download blocks from everybody
|
|
|
|
d.addCallback(self._download_all_segments)
|
|
|
|
d.addCallback(self._done)
|
|
|
|
return d
|
2007-10-22 23:19:18 +00:00
|
|
|
|