mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-06-23 09:15:32 +00:00
dirnode: refactor recursive-traversal methods, add stats to deep_check() method results and t=deep-check webapi
This commit is contained in:
@ -765,6 +765,8 @@ POST $URL?t=deep-check
|
|||||||
invoked. The 'check-results' field is the same as
|
invoked. The 'check-results' field is the same as
|
||||||
that returned by t=check&output=JSON, described
|
that returned by t=check&output=JSON, described
|
||||||
above.
|
above.
|
||||||
|
stats: a dictionary with the same keys as the t=deep-stats command
|
||||||
|
(described below)
|
||||||
|
|
||||||
POST $URL?t=check&repair=true
|
POST $URL?t=check&repair=true
|
||||||
|
|
||||||
@ -840,6 +842,8 @@ POST $URL?t=deep-check&repair=true
|
|||||||
invoked. The 'check-results' field is the same as
|
invoked. The 'check-results' field is the same as
|
||||||
that returned by t=check&repair=true&output=JSON,
|
that returned by t=check&repair=true&output=JSON,
|
||||||
described above.
|
described above.
|
||||||
|
stats: a dictionary with the same keys as the t=deep-stats command
|
||||||
|
(described below)
|
||||||
|
|
||||||
GET $DIRURL?t=manifest
|
GET $DIRURL?t=manifest
|
||||||
|
|
||||||
|
@ -91,6 +91,10 @@ class DeepResultsBase:
|
|||||||
self.objects_unhealthy = 0
|
self.objects_unhealthy = 0
|
||||||
self.corrupt_shares = []
|
self.corrupt_shares = []
|
||||||
self.all_results = {}
|
self.all_results = {}
|
||||||
|
self.stats = {}
|
||||||
|
|
||||||
|
def update_stats(self, new_stats):
|
||||||
|
self.stats.update(new_stats)
|
||||||
|
|
||||||
def get_root_storage_index_string(self):
|
def get_root_storage_index_string(self):
|
||||||
return self.root_storage_index_s
|
return self.root_storage_index_s
|
||||||
@ -101,6 +105,9 @@ class DeepResultsBase:
|
|||||||
def get_all_results(self):
|
def get_all_results(self):
|
||||||
return self.all_results
|
return self.all_results
|
||||||
|
|
||||||
|
def get_stats(self):
|
||||||
|
return self.stats
|
||||||
|
|
||||||
|
|
||||||
class DeepCheckResults(DeepResultsBase):
|
class DeepCheckResults(DeepResultsBase):
|
||||||
implements(IDeepCheckResults)
|
implements(IDeepCheckResults)
|
||||||
|
@ -7,7 +7,7 @@ import simplejson
|
|||||||
from allmydata.mutable.common import NotMutableError
|
from allmydata.mutable.common import NotMutableError
|
||||||
from allmydata.mutable.node import MutableFileNode
|
from allmydata.mutable.node import MutableFileNode
|
||||||
from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
|
from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
|
||||||
IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode, \
|
IURI, IFileNode, IMutableFileURI, IFilesystemNode, \
|
||||||
ExistingChildError, ICheckable
|
ExistingChildError, ICheckable
|
||||||
from allmydata.checker_results import DeepCheckResults, \
|
from allmydata.checker_results import DeepCheckResults, \
|
||||||
DeepCheckAndRepairResults
|
DeepCheckAndRepairResults
|
||||||
@ -446,161 +446,92 @@ class NewDirectoryNode:
|
|||||||
d.addCallback(lambda child: self.delete(current_child_name))
|
d.addCallback(lambda child: self.delete(current_child_name))
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def build_manifest(self):
|
|
||||||
"""Return a frozenset of verifier-capability strings for all nodes
|
def deep_traverse(self, walker):
|
||||||
(directories and files) reachable from this one."""
|
"""Perform a recursive walk, using this dirnode as a root, notifying
|
||||||
|
the 'walker' instance of everything I encounter.
|
||||||
|
|
||||||
|
I call walker.enter_directory(parent, children) once for each dirnode
|
||||||
|
I visit, immediately after retrieving the list of children. I pass in
|
||||||
|
the parent dirnode and the dict of childname->(childnode,metadata).
|
||||||
|
This function should *not* traverse the children: I will do that.
|
||||||
|
enter_directory() is most useful for the deep-stats number that
|
||||||
|
counts how large a directory is.
|
||||||
|
|
||||||
|
I call walker.add_node(node, path) for each node (both files and
|
||||||
|
directories) I can reach. Most work should be done here.
|
||||||
|
|
||||||
|
I avoid loops by keeping track of verifier-caps and refusing to call
|
||||||
|
each() or traverse a node that I've seen before.
|
||||||
|
|
||||||
|
I return a Deferred that will fire with the value of walker.finish().
|
||||||
|
"""
|
||||||
|
|
||||||
# this is just a tree-walker, except that following each edge
|
# this is just a tree-walker, except that following each edge
|
||||||
# requires a Deferred. We use a ConcurrencyLimiter to make sure the
|
# requires a Deferred. We use a ConcurrencyLimiter to make sure the
|
||||||
# fan-out doesn't cause problems.
|
# fan-out doesn't cause problems.
|
||||||
|
|
||||||
manifest = set()
|
found = set([self.get_verifier()])
|
||||||
manifest.add(self.get_verifier())
|
limiter = ConcurrencyLimiter(10)
|
||||||
limiter = ConcurrencyLimiter(10) # allow 10 in parallel
|
d = self._deep_traverse_dirnode(self, [], walker, found, limiter)
|
||||||
|
d.addCallback(lambda ignored: walker.finish())
|
||||||
d = self._build_manifest_from_node(self, manifest, limiter)
|
|
||||||
def _done(res):
|
|
||||||
# LIT nodes have no verifier-capability: their data is stored
|
|
||||||
# inside the URI itself, so there is no need to refresh anything.
|
|
||||||
# They indicate this by returning None from their get_verifier
|
|
||||||
# method. We need to remove any such Nones from our set. We also
|
|
||||||
# want to convert all these caps into strings.
|
|
||||||
return frozenset([IVerifierURI(cap).to_string()
|
|
||||||
for cap in manifest
|
|
||||||
if cap is not None])
|
|
||||||
d.addCallback(_done)
|
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def _build_manifest_from_node(self, node, manifest, limiter):
|
def _deep_traverse_dirnode(self, node, path, walker, found, limiter):
|
||||||
d = limiter.add(node.list)
|
# process this directory, then walk its children
|
||||||
def _got_list(res):
|
d = limiter.add(walker.add_node, node, path)
|
||||||
dl = []
|
d.addCallback(lambda ignored: node.list())
|
||||||
for name, (child, metadata) in res.iteritems():
|
d.addCallback(self._deep_traverse_dirnode_children, node, path,
|
||||||
verifier = child.get_verifier()
|
walker, found, limiter)
|
||||||
if verifier not in manifest:
|
|
||||||
manifest.add(verifier)
|
|
||||||
if IDirectoryNode.providedBy(child):
|
|
||||||
dl.append(self._build_manifest_from_node(child,
|
|
||||||
manifest,
|
|
||||||
limiter))
|
|
||||||
if dl:
|
|
||||||
return defer.DeferredList(dl)
|
|
||||||
d.addCallback(_got_list)
|
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
def _deep_traverse_dirnode_children(self, children, parent, path,
|
||||||
|
walker, found, limiter):
|
||||||
|
dl = [limiter.add(walker.enter_directory, parent, children)]
|
||||||
|
for name, (child, metadata) in children.iteritems():
|
||||||
|
verifier = child.get_verifier()
|
||||||
|
if verifier in found:
|
||||||
|
continue
|
||||||
|
found.add(verifier)
|
||||||
|
childpath = path + [name]
|
||||||
|
if IDirectoryNode.providedBy(child):
|
||||||
|
dl.append(self._deep_traverse_dirnode(child, childpath,
|
||||||
|
walker, found, limiter))
|
||||||
|
else:
|
||||||
|
dl.append(limiter.add(walker.add_node, child, childpath))
|
||||||
|
return defer.DeferredList(dl, fireOnOneErrback=True)
|
||||||
|
|
||||||
|
|
||||||
|
def build_manifest(self):
|
||||||
|
"""Return a frozenset of verifier-capability strings for all nodes
|
||||||
|
(directories and files) reachable from this one."""
|
||||||
|
return self.deep_traverse(ManifestWalker())
|
||||||
|
|
||||||
def deep_stats(self):
|
def deep_stats(self):
|
||||||
stats = DeepStats()
|
# Since deep_traverse tracks verifier caps, we avoid double-counting
|
||||||
# we track verifier caps, to avoid double-counting children for which
|
# children for which we've got both a write-cap and a read-cap
|
||||||
# we've got both a write-cap and a read-cap
|
return self.deep_traverse(DeepStats())
|
||||||
found = set()
|
|
||||||
found.add(self.get_verifier())
|
|
||||||
|
|
||||||
limiter = ConcurrencyLimiter(10)
|
|
||||||
|
|
||||||
d = self._add_deepstats_from_node(self, found, stats, limiter)
|
|
||||||
d.addCallback(lambda res: stats.get_results())
|
|
||||||
return d
|
|
||||||
|
|
||||||
def _add_deepstats_from_node(self, node, found, stats, limiter):
|
|
||||||
d = limiter.add(node.list)
|
|
||||||
def _got_list(children):
|
|
||||||
dl = []
|
|
||||||
dirsize_bytes = node.get_size()
|
|
||||||
dirsize_children = len(children)
|
|
||||||
stats.add("count-directories")
|
|
||||||
stats.add("size-directories", dirsize_bytes)
|
|
||||||
stats.max("largest-directory", dirsize_bytes)
|
|
||||||
stats.max("largest-directory-children", dirsize_children)
|
|
||||||
for name, (child, metadata) in children.iteritems():
|
|
||||||
verifier = child.get_verifier()
|
|
||||||
if verifier in found:
|
|
||||||
continue
|
|
||||||
found.add(verifier)
|
|
||||||
if IDirectoryNode.providedBy(child):
|
|
||||||
dl.append(self._add_deepstats_from_node(child, found,
|
|
||||||
stats, limiter))
|
|
||||||
elif IMutableFileNode.providedBy(child):
|
|
||||||
stats.add("count-files")
|
|
||||||
stats.add("count-mutable-files")
|
|
||||||
# TODO: update the servermap, compute a size, add it to
|
|
||||||
# size-mutable-files, max it into "largest-mutable-file"
|
|
||||||
elif IFileNode.providedBy(child): # CHK and LIT
|
|
||||||
stats.add("count-files")
|
|
||||||
size = child.get_size()
|
|
||||||
stats.histogram("size-files-histogram", size)
|
|
||||||
if child.get_uri().startswith("URI:LIT:"):
|
|
||||||
stats.add("count-literal-files")
|
|
||||||
stats.add("size-literal-files", size)
|
|
||||||
else:
|
|
||||||
stats.add("count-immutable-files")
|
|
||||||
stats.add("size-immutable-files", size)
|
|
||||||
stats.max("largest-immutable-file", size)
|
|
||||||
if dl:
|
|
||||||
return defer.DeferredList(dl)
|
|
||||||
d.addCallback(_got_list)
|
|
||||||
return d
|
|
||||||
|
|
||||||
def deep_check(self, verify=False):
|
def deep_check(self, verify=False):
|
||||||
return self.deep_check_base(verify, False)
|
return self.deep_traverse(DeepChecker(self, verify, repair=False))
|
||||||
|
|
||||||
def deep_check_and_repair(self, verify=False):
|
def deep_check_and_repair(self, verify=False):
|
||||||
return self.deep_check_base(verify, True)
|
return self.deep_traverse(DeepChecker(self, verify, repair=True))
|
||||||
|
|
||||||
def deep_check_base(self, verify, repair):
|
|
||||||
# shallow-check each object first, then traverse children
|
|
||||||
root_si = self._node.get_storage_index()
|
|
||||||
self._lp = log.msg(format="deep-check starting (%(si)s),"
|
|
||||||
" verify=%(verify)s, repair=%(repair)s",
|
|
||||||
si=base32.b2a(root_si), verify=verify, repair=repair)
|
|
||||||
if repair:
|
|
||||||
results = DeepCheckAndRepairResults(root_si)
|
|
||||||
else:
|
|
||||||
results = DeepCheckResults(root_si)
|
|
||||||
found = set()
|
|
||||||
limiter = ConcurrencyLimiter(10)
|
|
||||||
|
|
||||||
d = self._add_deepcheck_from_node([], self, results, found, limiter,
|
class ManifestWalker:
|
||||||
verify, repair)
|
def __init__(self):
|
||||||
def _done(res):
|
self.manifest = set()
|
||||||
log.msg("deep-check done", parent=self._lp)
|
def add_node(self, node, path):
|
||||||
return results
|
v = node.get_verifier()
|
||||||
d.addCallback(_done)
|
# LIT files have no verify-cap, so don't add them
|
||||||
return d
|
if v:
|
||||||
|
assert not isinstance(v, str), "ICK: %s %s" % (v, node)
|
||||||
def _add_deepcheck_from_node(self, path, node, results, found, limiter,
|
self.manifest.add(v.to_string())
|
||||||
verify, repair):
|
def enter_directory(self, parent, children):
|
||||||
verifier = node.get_verifier()
|
pass
|
||||||
if verifier in found:
|
def finish(self):
|
||||||
# avoid loops
|
return frozenset(self.manifest)
|
||||||
return None
|
|
||||||
found.add(verifier)
|
|
||||||
|
|
||||||
if repair:
|
|
||||||
d = limiter.add(node.check_and_repair, verify)
|
|
||||||
d.addCallback(results.add_check_and_repair, path)
|
|
||||||
else:
|
|
||||||
d = limiter.add(node.check, verify)
|
|
||||||
d.addCallback(results.add_check, path)
|
|
||||||
|
|
||||||
# TODO: stats: split the DeepStats.foo calls out of
|
|
||||||
# _add_deepstats_from_node into a separate non-recursing method, call
|
|
||||||
# it from both here and _add_deepstats_from_node.
|
|
||||||
|
|
||||||
if IDirectoryNode.providedBy(node):
|
|
||||||
d.addCallback(lambda res: node.list())
|
|
||||||
def _got_children(children):
|
|
||||||
dl = []
|
|
||||||
for name, (child, metadata) in children.iteritems():
|
|
||||||
childpath = path + [name]
|
|
||||||
d2 = self._add_deepcheck_from_node(childpath, child,
|
|
||||||
results,
|
|
||||||
found, limiter,
|
|
||||||
verify, repair)
|
|
||||||
if d2:
|
|
||||||
dl.append(d2)
|
|
||||||
if dl:
|
|
||||||
return defer.DeferredList(dl, fireOnOneErrback=True)
|
|
||||||
d.addCallback(_got_children)
|
|
||||||
return d
|
|
||||||
|
|
||||||
|
|
||||||
class DeepStats:
|
class DeepStats:
|
||||||
@ -627,6 +558,33 @@ class DeepStats:
|
|||||||
self.buckets = [ (0,0), (1,3)]
|
self.buckets = [ (0,0), (1,3)]
|
||||||
self.root = math.sqrt(10)
|
self.root = math.sqrt(10)
|
||||||
|
|
||||||
|
def add_node(self, node, childpath):
|
||||||
|
if IDirectoryNode.providedBy(node):
|
||||||
|
self.add("count-directories")
|
||||||
|
elif IMutableFileNode.providedBy(node):
|
||||||
|
self.add("count-files")
|
||||||
|
self.add("count-mutable-files")
|
||||||
|
# TODO: update the servermap, compute a size, add it to
|
||||||
|
# size-mutable-files, max it into "largest-mutable-file"
|
||||||
|
elif IFileNode.providedBy(node): # CHK and LIT
|
||||||
|
self.add("count-files")
|
||||||
|
size = node.get_size()
|
||||||
|
self.histogram("size-files-histogram", size)
|
||||||
|
if node.get_uri().startswith("URI:LIT:"):
|
||||||
|
self.add("count-literal-files")
|
||||||
|
self.add("size-literal-files", size)
|
||||||
|
else:
|
||||||
|
self.add("count-immutable-files")
|
||||||
|
self.add("size-immutable-files", size)
|
||||||
|
self.max("largest-immutable-file", size)
|
||||||
|
|
||||||
|
def enter_directory(self, parent, children):
|
||||||
|
dirsize_bytes = parent.get_size()
|
||||||
|
dirsize_children = len(children)
|
||||||
|
self.add("size-directories", dirsize_bytes)
|
||||||
|
self.max("largest-directory", dirsize_bytes)
|
||||||
|
self.max("largest-directory-children", dirsize_children)
|
||||||
|
|
||||||
def add(self, key, value=1):
|
def add(self, key, value=1):
|
||||||
self.stats[key] += value
|
self.stats[key] += value
|
||||||
|
|
||||||
@ -666,6 +624,42 @@ class DeepStats:
|
|||||||
stats[key] = out
|
stats[key] = out
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
return self.get_results()
|
||||||
|
|
||||||
|
|
||||||
|
class DeepChecker:
|
||||||
|
def __init__(self, root, verify, repair):
|
||||||
|
root_si = root.get_storage_index()
|
||||||
|
self._lp = log.msg(format="deep-check starting (%(si)s),"
|
||||||
|
" verify=%(verify)s, repair=%(repair)s",
|
||||||
|
si=base32.b2a(root_si), verify=verify, repair=repair)
|
||||||
|
self._verify = verify
|
||||||
|
self._repair = repair
|
||||||
|
if repair:
|
||||||
|
self._results = DeepCheckAndRepairResults(root_si)
|
||||||
|
else:
|
||||||
|
self._results = DeepCheckResults(root_si)
|
||||||
|
self._stats = DeepStats()
|
||||||
|
|
||||||
|
def add_node(self, node, childpath):
|
||||||
|
if self._repair:
|
||||||
|
d = node.check_and_repair(self._verify)
|
||||||
|
d.addCallback(self._results.add_check_and_repair, childpath)
|
||||||
|
else:
|
||||||
|
d = node.check(self._verify)
|
||||||
|
d.addCallback(self._results.add_check, childpath)
|
||||||
|
d.addCallback(lambda ignored: self._stats.add_node(node, childpath))
|
||||||
|
return d
|
||||||
|
|
||||||
|
def enter_directory(self, parent, children):
|
||||||
|
return self._stats.enter_directory(parent, children)
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
log.msg("deep-check done", parent=self._lp)
|
||||||
|
self._results.update_stats(self._stats.get_results())
|
||||||
|
return self._results
|
||||||
|
|
||||||
|
|
||||||
# use client.create_dirnode() to make one of these
|
# use client.create_dirnode() to make one of these
|
||||||
|
|
||||||
|
@ -1648,6 +1648,10 @@ class IDeepCheckResults(Interface):
|
|||||||
be slash-joined) to an ICheckerResults instance, one for each object
|
be slash-joined) to an ICheckerResults instance, one for each object
|
||||||
that was checked."""
|
that was checked."""
|
||||||
|
|
||||||
|
def get_stats():
|
||||||
|
"""Return a dictionary with the same keys as
|
||||||
|
IDirectoryNode.deep_stats()."""
|
||||||
|
|
||||||
class IDeepCheckAndRepairResults(Interface):
|
class IDeepCheckAndRepairResults(Interface):
|
||||||
"""I contain the results of a deep-check-and-repair operation.
|
"""I contain the results of a deep-check-and-repair operation.
|
||||||
|
|
||||||
@ -1690,6 +1694,10 @@ class IDeepCheckAndRepairResults(Interface):
|
|||||||
repair)
|
repair)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def get_stats():
|
||||||
|
"""Return a dictionary with the same keys as
|
||||||
|
IDirectoryNode.deep_stats()."""
|
||||||
|
|
||||||
def get_corrupt_shares():
|
def get_corrupt_shares():
|
||||||
"""Return a set of (serverid, storage_index, sharenum) for all shares
|
"""Return a set of (serverid, storage_index, sharenum) for all shares
|
||||||
that were found to be corrupt before any repair was attempted. Both
|
that were found to be corrupt before any repair was attempted. Both
|
||||||
|
@ -2271,6 +2271,7 @@ class DeepCheck(SystemTestMixin, unittest.TestCase):
|
|||||||
self.failUnlessEqual(data["count-corrupt-shares"], 0, where)
|
self.failUnlessEqual(data["count-corrupt-shares"], 0, where)
|
||||||
self.failUnlessEqual(data["list-corrupt-shares"], [], where)
|
self.failUnlessEqual(data["list-corrupt-shares"], [], where)
|
||||||
self.failUnlessEqual(data["list-unhealthy-files"], [], where)
|
self.failUnlessEqual(data["list-unhealthy-files"], [], where)
|
||||||
|
self.json_check_stats(data["stats"], where)
|
||||||
|
|
||||||
def json_full_deepcheck_and_repair_is_healthy(self, data, n, where):
|
def json_full_deepcheck_and_repair_is_healthy(self, data, n, where):
|
||||||
self.failUnlessEqual(data["root-storage-index"],
|
self.failUnlessEqual(data["root-storage-index"],
|
||||||
|
@ -196,6 +196,7 @@ class DeepCheckResults(rend.Page, ResultsBase):
|
|||||||
for (path_t, r)
|
for (path_t, r)
|
||||||
in self.r.get_all_results().items()
|
in self.r.get_all_results().items()
|
||||||
if not r.is_healthy() ]
|
if not r.is_healthy() ]
|
||||||
|
data["stats"] = self.r.get_stats()
|
||||||
return simplejson.dumps(data, indent=1)
|
return simplejson.dumps(data, indent=1)
|
||||||
|
|
||||||
def render_root_storage_index(self, ctx, data):
|
def render_root_storage_index(self, ctx, data):
|
||||||
@ -344,6 +345,7 @@ class DeepCheckAndRepairResults(rend.Page, ResultsBase):
|
|||||||
for (path_t, r)
|
for (path_t, r)
|
||||||
in self.r.get_all_results().items()
|
in self.r.get_all_results().items()
|
||||||
if not r.get_pre_repair_results().is_healthy() ]
|
if not r.get_pre_repair_results().is_healthy() ]
|
||||||
|
data["stats"] = self.r.get_stats()
|
||||||
return simplejson.dumps(data, indent=1)
|
return simplejson.dumps(data, indent=1)
|
||||||
|
|
||||||
def render_root_storage_index(self, ctx, data):
|
def render_root_storage_index(self, ctx, data):
|
||||||
|
Reference in New Issue
Block a user