tahoe-lafs/src/allmydata/mutable/repairer.py

99 lines
4.5 KiB
Python
Raw Normal View History

from zope.interface import implements
from allmydata.interfaces import IRepairResults, ICheckerResults
class RepairResults:
implements(IRepairResults)
def __init__(self, smap):
self.servermap = smap
def to_string(self):
return ""
class MustForceRepairError(Exception):
pass
class Repairer:
def __init__(self, node, checker_results):
self.node = node
self.checker_results = ICheckerResults(checker_results)
assert checker_results.storage_index == self.node.get_storage_index()
def start(self, force=False):
# download, then re-publish. If a server had a bad share, try to
# replace it with a good one of the same shnum.
# The normal repair operation should not be used to replace
# application-specific merging of alternate versions: i.e if there
# are multiple highest seqnums with different roothashes. In this
# case, the application must use node.upload() (referencing the
# servermap that indicates the multiple-heads condition), or
# node.overwrite(). The repair() operation will refuse to run in
# these conditions unless a force=True argument is provided. If
# force=True is used, then the highest root hash will be reinforced.
# Likewise, the presence of an unrecoverable latest version is an
# unusual event, and should ideally be handled by retrying a couple
# times (spaced out over hours or days) and hoping that new shares
# will become available. If repair(force=True) is called, data will
# be lost: a new seqnum will be generated with the same contents as
# the most recent recoverable version, skipping over the lost
# version. repair(force=False) will refuse to run in a situation like
# this.
# Repair is designed to fix the following injuries:
# missing shares: add new ones to get at least N distinct ones
# old shares: replace old shares with the latest version
# bogus shares (bad sigs): replace the bad one with a good one
smap = self.checker_results.get_servermap()
if smap.unrecoverable_newer_versions():
if not force:
raise MustForceRepairError("There were unrecoverable newer "
"versions, so force=True must be "
"passed to the repair() operation")
# continuing on means that node.upload() will pick a seqnum that
# is higher than everything visible in the servermap, effectively
# discarding the unrecoverable versions.
if smap.needs_merge():
if not force:
raise MustForceRepairError("There were multiple recoverable "
"versions with identical seqnums, "
"so force=True must be passed to "
"the repair() operation")
# continuing on means that smap.best_recoverable_version() will
# pick the one with the highest roothash, and then node.upload()
# will replace all shares with its contents
# missing shares are handled during upload, which tries to find a
# home for every share
# old shares are handled during upload, which will replace any share
# that was present in the servermap
# bogus shares need to be managed here. We might notice a bogus share
# during mapupdate (whether done for a filecheck or just before a
# download) by virtue of it having an invalid signature. We might
# also notice a bad hash in the share during verify or download. In
# either case, the problem will be noted in the servermap, and the
# bad share (along with its checkstring) will be recorded in
# servermap.bad_shares . Publish knows that it should try and replace
# these.
# I chose to use the retrieve phase to ensure that the privkey is
# available, to avoid the extra roundtrip that would occur if we,
# say, added an smap.get_privkey() method.
assert self.node.get_writekey() # repair currently requires a writecap
best_version = smap.best_recoverable_version()
d = self.node.download_version(smap, best_version, fetch_privkey=True)
d.addCallback(self.node.upload, smap)
d.addCallback(self.get_results, smap)
return d
def get_results(self, res, smap):
return RepairResults(smap)