mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-23 14:52:26 +00:00
dirnode: add a deep_stats(), like deep-size but with more information. webish adds t=deeps-size too.
This commit is contained in:
parent
462ef2a0ac
commit
6c00a70dbc
@ -571,6 +571,38 @@ GET $URL?t=deep-size
|
||||
expansion or encoding overhead into account. Later versions of the code may
|
||||
improve this estimate upwards.
|
||||
|
||||
GET $URL?t=deep-stats
|
||||
|
||||
Return a JSON-encoded dictionary that lists interesting statistics about
|
||||
the set of all files and directories reachable from the given directory:
|
||||
|
||||
count-immutable-files: count of how many CHK files are in the set
|
||||
count-mutable-files: same, for mutable files (does not include directories)
|
||||
count-literal-files: same, for LIT files (data contained inside the URI)
|
||||
count-files: sum of the above three
|
||||
count-directories: count of directories
|
||||
size-immutable-files: total bytes for all CHK files in the set, =deep-size
|
||||
size-mutable-files (TODO): same, for current version of all mutable files
|
||||
size-literal-files: same, for LIT files
|
||||
size-directories: size of directories (includes size-literal-files)
|
||||
largest-directory: number of children in the largest directory
|
||||
largest-immutable-file: number of bytes in the largest CHK file
|
||||
|
||||
size-mutable-files is not implemented, because it would require extra
|
||||
queries to each mutable file to get their size. This may be implemented in
|
||||
the future.
|
||||
|
||||
Assuming no sharing, the basic space consumed by a single root directory is
|
||||
the sum of size-immutable-files, size-mutable-files, and size-directories.
|
||||
The actual disk space used by the shares is larger, because of the
|
||||
following sources of overhead:
|
||||
|
||||
integrity data
|
||||
expansion due to erasure coding
|
||||
share management data (leases)
|
||||
backend (ext3) minimum block size
|
||||
|
||||
|
||||
6. XMLRPC (coming soon)
|
||||
|
||||
http://127.0.0.1:8123/xmlrpc
|
||||
|
@ -113,6 +113,8 @@ class NewDirectoryNode:
|
||||
|
||||
def __init__(self, client):
|
||||
self._client = client
|
||||
self._most_recent_size = None
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s %s %s>" % (self.__class__.__name__, self.is_readonly() and "RO" or "RW", hasattr(self, '_uri') and self._uri.abbrev())
|
||||
def init_from_uri(self, myuri):
|
||||
@ -137,8 +139,18 @@ class NewDirectoryNode:
|
||||
self._uri = NewDirectoryURI(IMutableFileURI(self._node.get_uri()))
|
||||
return self
|
||||
|
||||
def get_size(self):
|
||||
# return the size of our backing mutable file, in bytes, if we've
|
||||
# fetched it.
|
||||
return self._most_recent_size
|
||||
|
||||
def _set_size(self, data):
|
||||
self._most_recent_size = len(data)
|
||||
return data
|
||||
|
||||
def _read(self):
|
||||
d = self._node.download_best_version()
|
||||
d.addCallback(self._set_size)
|
||||
d.addCallback(self._unpack_contents)
|
||||
return d
|
||||
|
||||
@ -463,6 +475,76 @@ class NewDirectoryNode:
|
||||
d.addCallback(_got_list)
|
||||
return d
|
||||
|
||||
def deep_stats(self):
|
||||
stats = dict([ (k,0) for k in ["count-immutable-files",
|
||||
"count-mutable-files",
|
||||
"count-literal-files",
|
||||
"count-files",
|
||||
"count-directories",
|
||||
"size-immutable-files",
|
||||
#"size-mutable-files",
|
||||
"size-literal-files",
|
||||
"size-directories",
|
||||
"largest-directory",
|
||||
"largest-directory-children",
|
||||
"largest-immutable-file",
|
||||
#"largest-mutable-file",
|
||||
]])
|
||||
# we track verifier caps, to avoid double-counting children for which
|
||||
# we've got both a write-cap and a read-cap
|
||||
found = set()
|
||||
found.add(self.get_verifier())
|
||||
|
||||
limiter = ConcurrencyLimiter(10)
|
||||
|
||||
d = self._add_deepstats_from_node(self, found, stats, limiter)
|
||||
d.addCallback(lambda res: stats)
|
||||
return d
|
||||
|
||||
def _add_deepstats_from_node(self, node, found, stats, limiter):
|
||||
d = limiter.add(node.list)
|
||||
def _got_list(children):
|
||||
dl = []
|
||||
dirsize_bytes = node.get_size()
|
||||
dirsize_children = len(children)
|
||||
stats["count-directories"] += 1
|
||||
stats["size-directories"] += dirsize_bytes
|
||||
stats["largest-directory"] = max(stats["largest-directory"],
|
||||
dirsize_bytes)
|
||||
stats["largest-directory-children"] = max(stats["largest-directory-children"],
|
||||
dirsize_children)
|
||||
for name, (child, metadata) in children.iteritems():
|
||||
verifier = child.get_verifier()
|
||||
if verifier in found:
|
||||
continue
|
||||
found.add(verifier)
|
||||
if IDirectoryNode.providedBy(child):
|
||||
dl.append(self._add_deepstats_from_node(child, found,
|
||||
stats, limiter))
|
||||
elif IMutableFileNode.providedBy(child):
|
||||
stats["count-files"] += 1
|
||||
stats["count-mutable-files"] += 1
|
||||
# TODO: update the servermap, compute a size, add it to
|
||||
# stats["size-mutable-files"], max it into
|
||||
# stats["largest-mutable-file"]
|
||||
elif IFileNode.providedBy(child): # CHK and LIT
|
||||
stats["count-files"] += 1
|
||||
size = child.get_size()
|
||||
if child.get_uri().startswith("URI:LIT:"):
|
||||
stats["count-literal-files"] += 1
|
||||
stats["size-literal-files"] += size
|
||||
else:
|
||||
stats["count-immutable-files"] += 1
|
||||
stats["size-immutable-files"] += size
|
||||
stats["largest-immutable-file"] = max(
|
||||
stats["largest-immutable-file"], size)
|
||||
if dl:
|
||||
return defer.DeferredList(dl)
|
||||
d.addCallback(_got_list)
|
||||
return d
|
||||
|
||||
|
||||
|
||||
# use client.create_dirnode() to make one of these
|
||||
|
||||
|
||||
|
@ -867,8 +867,42 @@ class IDirectoryNode(IMutableFilesystemNode):
|
||||
operation finishes. The child name must be a unicode string."""
|
||||
|
||||
def build_manifest():
|
||||
"""Return a frozenset of verifier-capability strings for all nodes
|
||||
(directories and files) reachable from this one."""
|
||||
"""Return a Deferred that fires with a frozenset of
|
||||
verifier-capability strings for all nodes (directories and files)
|
||||
reachable from this one."""
|
||||
|
||||
def deep_stats():
|
||||
"""Return a Deferred that fires with a dictionary of statistics
|
||||
computed by examining all nodes (directories and files) reachable
|
||||
from this one, with the following keys::
|
||||
|
||||
count-immutable-files: count of how many CHK files are in the set
|
||||
count-mutable-files: same, for mutable files (does not include
|
||||
directories)
|
||||
count-literal-files: same, for LIT files
|
||||
count-files: sum of the above three
|
||||
|
||||
count-directories: count of directories
|
||||
|
||||
size-immutable-files: total bytes for all CHK files in the set
|
||||
size-mutable-files (TODO): same, for current version of all mutable
|
||||
files, does not include directories
|
||||
size-literal-files: same, for LIT files
|
||||
size-directories: size of mutable files used by directories
|
||||
|
||||
largest-directory: number of bytes in the largest directory
|
||||
largest-directory-children: number of children in the largest
|
||||
directory
|
||||
largest-immutable-file: number of bytes in the largest CHK file
|
||||
|
||||
size-mutable-files is not yet implemented, because it would involve
|
||||
even more queries than deep_stats does.
|
||||
|
||||
This operation will visit every directory node underneath this one,
|
||||
and can take a long time to run. On a typical workstation with good
|
||||
bandwidth, this can examine roughly 15 directories per second (and
|
||||
takes several minutes of 100% CPU for ~1700 directories).
|
||||
"""
|
||||
|
||||
class ICodecEncoder(Interface):
|
||||
def set_params(data_size, required_shares, max_shares):
|
||||
|
@ -207,6 +207,29 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
|
||||
sorted(self.expected_manifest))
|
||||
d.addCallback(_check_manifest)
|
||||
|
||||
d.addCallback(lambda res: n.deep_stats())
|
||||
def _check_deepstats(stats):
|
||||
self.failUnless(isinstance(stats, dict))
|
||||
expected = {"count-immutable-files": 0,
|
||||
"count-mutable-files": 1,
|
||||
"count-literal-files": 0,
|
||||
"count-files": 1,
|
||||
"count-directories": 2,
|
||||
"size-immutable-files": 0,
|
||||
"size-literal-files": 0,
|
||||
#"size-directories": 616, # varies
|
||||
#"largest-directory": 616,
|
||||
"largest-directory-children": 2,
|
||||
"largest-immutable-file": 0,
|
||||
}
|
||||
for k,v in expected.iteritems():
|
||||
self.failUnlessEqual(stats[k], v,
|
||||
"stats[%s] was %s, not %s" %
|
||||
(k, stats[k], v))
|
||||
self.failUnless(stats["size-directories"] > 600)
|
||||
self.failUnless(stats["largest-directory"] > 600)
|
||||
d.addCallback(_check_deepstats)
|
||||
|
||||
def _add_subsubdir(res):
|
||||
return self.subdir.create_empty_directory(u"subsubdir")
|
||||
d.addCallback(_add_subsubdir)
|
||||
|
@ -768,6 +768,29 @@ class Web(WebMixin, unittest.TestCase):
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_GET_DIRURL_deepstats(self):
|
||||
d = self.GET(self.public_url + "/foo?t=deep-stats", followRedirect=True)
|
||||
def _got(stats_json):
|
||||
stats = simplejson.loads(stats_json)
|
||||
expected = {"count-immutable-files": 3,
|
||||
"count-mutable-files": 0,
|
||||
"count-literal-files": 0,
|
||||
"count-files": 3,
|
||||
"count-directories": 3,
|
||||
"size-immutable-files": 57,
|
||||
"size-literal-files": 0,
|
||||
#"size-directories": 1912, # varies
|
||||
#"largest-directory": 1590,
|
||||
"largest-directory-children": 5,
|
||||
"largest-immutable-file": 19,
|
||||
}
|
||||
for k,v in expected.iteritems():
|
||||
self.failUnlessEqual(stats[k], v,
|
||||
"stats[%s] was %s, not %s" %
|
||||
(k, stats[k], v))
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_GET_DIRURL_uri(self):
|
||||
d = self.GET(self.public_url + "/foo?t=uri")
|
||||
def _check(res):
|
||||
|
@ -1244,6 +1244,18 @@ class DeepSize(rend.Page):
|
||||
d.addCallback(_measure_size)
|
||||
return d
|
||||
|
||||
class DeepStats(rend.Page):
|
||||
|
||||
def __init__(self, dirnode, dirpath):
|
||||
self._dirnode = dirnode
|
||||
self._dirpath = dirpath
|
||||
|
||||
def renderHTTP(self, ctx):
|
||||
inevow.IRequest(ctx).setHeader("content-type", "text/plain")
|
||||
d = self._dirnode.deep_stats()
|
||||
d.addCallback(simplejson.dumps, indent=1)
|
||||
return d
|
||||
|
||||
class ChildError:
|
||||
implements(inevow.IResource)
|
||||
def renderHTTP(self, ctx):
|
||||
@ -1338,6 +1350,8 @@ class VDrive(rend.Page):
|
||||
return Manifest(node, path), ()
|
||||
elif t == "deep-size":
|
||||
return DeepSize(node, path), ()
|
||||
elif t == "deep-stats":
|
||||
return DeepStats(node, path), ()
|
||||
elif t == 'rename-form':
|
||||
return RenameForm(self.name, node, path), ()
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user