dirnode: add a deep_stats(), like deep-size but with more information. webish adds t=deeps-size too.

This commit is contained in:
Brian Warner 2008-05-08 13:21:14 -07:00
parent 462ef2a0ac
commit 6c00a70dbc
6 changed files with 210 additions and 2 deletions

View File

@ -571,6 +571,38 @@ GET $URL?t=deep-size
expansion or encoding overhead into account. Later versions of the code may
improve this estimate upwards.
GET $URL?t=deep-stats
Return a JSON-encoded dictionary that lists interesting statistics about
the set of all files and directories reachable from the given directory:
count-immutable-files: count of how many CHK files are in the set
count-mutable-files: same, for mutable files (does not include directories)
count-literal-files: same, for LIT files (data contained inside the URI)
count-files: sum of the above three
count-directories: count of directories
size-immutable-files: total bytes for all CHK files in the set, =deep-size
size-mutable-files (TODO): same, for current version of all mutable files
size-literal-files: same, for LIT files
size-directories: size of directories (includes size-literal-files)
largest-directory: number of children in the largest directory
largest-immutable-file: number of bytes in the largest CHK file
size-mutable-files is not implemented, because it would require extra
queries to each mutable file to get their size. This may be implemented in
the future.
Assuming no sharing, the basic space consumed by a single root directory is
the sum of size-immutable-files, size-mutable-files, and size-directories.
The actual disk space used by the shares is larger, because of the
following sources of overhead:
integrity data
expansion due to erasure coding
share management data (leases)
backend (ext3) minimum block size
6. XMLRPC (coming soon)
http://127.0.0.1:8123/xmlrpc

View File

@ -113,6 +113,8 @@ class NewDirectoryNode:
def __init__(self, client):
self._client = client
self._most_recent_size = None
def __repr__(self):
return "<%s %s %s>" % (self.__class__.__name__, self.is_readonly() and "RO" or "RW", hasattr(self, '_uri') and self._uri.abbrev())
def init_from_uri(self, myuri):
@ -137,8 +139,18 @@ class NewDirectoryNode:
self._uri = NewDirectoryURI(IMutableFileURI(self._node.get_uri()))
return self
def get_size(self):
# return the size of our backing mutable file, in bytes, if we've
# fetched it.
return self._most_recent_size
def _set_size(self, data):
self._most_recent_size = len(data)
return data
def _read(self):
d = self._node.download_best_version()
d.addCallback(self._set_size)
d.addCallback(self._unpack_contents)
return d
@ -463,6 +475,76 @@ class NewDirectoryNode:
d.addCallback(_got_list)
return d
def deep_stats(self):
stats = dict([ (k,0) for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]])
# we track verifier caps, to avoid double-counting children for which
# we've got both a write-cap and a read-cap
found = set()
found.add(self.get_verifier())
limiter = ConcurrencyLimiter(10)
d = self._add_deepstats_from_node(self, found, stats, limiter)
d.addCallback(lambda res: stats)
return d
def _add_deepstats_from_node(self, node, found, stats, limiter):
d = limiter.add(node.list)
def _got_list(children):
dl = []
dirsize_bytes = node.get_size()
dirsize_children = len(children)
stats["count-directories"] += 1
stats["size-directories"] += dirsize_bytes
stats["largest-directory"] = max(stats["largest-directory"],
dirsize_bytes)
stats["largest-directory-children"] = max(stats["largest-directory-children"],
dirsize_children)
for name, (child, metadata) in children.iteritems():
verifier = child.get_verifier()
if verifier in found:
continue
found.add(verifier)
if IDirectoryNode.providedBy(child):
dl.append(self._add_deepstats_from_node(child, found,
stats, limiter))
elif IMutableFileNode.providedBy(child):
stats["count-files"] += 1
stats["count-mutable-files"] += 1
# TODO: update the servermap, compute a size, add it to
# stats["size-mutable-files"], max it into
# stats["largest-mutable-file"]
elif IFileNode.providedBy(child): # CHK and LIT
stats["count-files"] += 1
size = child.get_size()
if child.get_uri().startswith("URI:LIT:"):
stats["count-literal-files"] += 1
stats["size-literal-files"] += size
else:
stats["count-immutable-files"] += 1
stats["size-immutable-files"] += size
stats["largest-immutable-file"] = max(
stats["largest-immutable-file"], size)
if dl:
return defer.DeferredList(dl)
d.addCallback(_got_list)
return d
# use client.create_dirnode() to make one of these

View File

@ -867,8 +867,42 @@ class IDirectoryNode(IMutableFilesystemNode):
operation finishes. The child name must be a unicode string."""
def build_manifest():
"""Return a frozenset of verifier-capability strings for all nodes
(directories and files) reachable from this one."""
"""Return a Deferred that fires with a frozenset of
verifier-capability strings for all nodes (directories and files)
reachable from this one."""
def deep_stats():
"""Return a Deferred that fires with a dictionary of statistics
computed by examining all nodes (directories and files) reachable
from this one, with the following keys::
count-immutable-files: count of how many CHK files are in the set
count-mutable-files: same, for mutable files (does not include
directories)
count-literal-files: same, for LIT files
count-files: sum of the above three
count-directories: count of directories
size-immutable-files: total bytes for all CHK files in the set
size-mutable-files (TODO): same, for current version of all mutable
files, does not include directories
size-literal-files: same, for LIT files
size-directories: size of mutable files used by directories
largest-directory: number of bytes in the largest directory
largest-directory-children: number of children in the largest
directory
largest-immutable-file: number of bytes in the largest CHK file
size-mutable-files is not yet implemented, because it would involve
even more queries than deep_stats does.
This operation will visit every directory node underneath this one,
and can take a long time to run. On a typical workstation with good
bandwidth, this can examine roughly 15 directories per second (and
takes several minutes of 100% CPU for ~1700 directories).
"""
class ICodecEncoder(Interface):
def set_params(data_size, required_shares, max_shares):

View File

@ -207,6 +207,29 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
sorted(self.expected_manifest))
d.addCallback(_check_manifest)
d.addCallback(lambda res: n.deep_stats())
def _check_deepstats(stats):
self.failUnless(isinstance(stats, dict))
expected = {"count-immutable-files": 0,
"count-mutable-files": 1,
"count-literal-files": 0,
"count-files": 1,
"count-directories": 2,
"size-immutable-files": 0,
"size-literal-files": 0,
#"size-directories": 616, # varies
#"largest-directory": 616,
"largest-directory-children": 2,
"largest-immutable-file": 0,
}
for k,v in expected.iteritems():
self.failUnlessEqual(stats[k], v,
"stats[%s] was %s, not %s" %
(k, stats[k], v))
self.failUnless(stats["size-directories"] > 600)
self.failUnless(stats["largest-directory"] > 600)
d.addCallback(_check_deepstats)
def _add_subsubdir(res):
return self.subdir.create_empty_directory(u"subsubdir")
d.addCallback(_add_subsubdir)

View File

@ -768,6 +768,29 @@ class Web(WebMixin, unittest.TestCase):
d.addCallback(_got)
return d
def test_GET_DIRURL_deepstats(self):
d = self.GET(self.public_url + "/foo?t=deep-stats", followRedirect=True)
def _got(stats_json):
stats = simplejson.loads(stats_json)
expected = {"count-immutable-files": 3,
"count-mutable-files": 0,
"count-literal-files": 0,
"count-files": 3,
"count-directories": 3,
"size-immutable-files": 57,
"size-literal-files": 0,
#"size-directories": 1912, # varies
#"largest-directory": 1590,
"largest-directory-children": 5,
"largest-immutable-file": 19,
}
for k,v in expected.iteritems():
self.failUnlessEqual(stats[k], v,
"stats[%s] was %s, not %s" %
(k, stats[k], v))
d.addCallback(_got)
return d
def test_GET_DIRURL_uri(self):
d = self.GET(self.public_url + "/foo?t=uri")
def _check(res):

View File

@ -1244,6 +1244,18 @@ class DeepSize(rend.Page):
d.addCallback(_measure_size)
return d
class DeepStats(rend.Page):
def __init__(self, dirnode, dirpath):
self._dirnode = dirnode
self._dirpath = dirpath
def renderHTTP(self, ctx):
inevow.IRequest(ctx).setHeader("content-type", "text/plain")
d = self._dirnode.deep_stats()
d.addCallback(simplejson.dumps, indent=1)
return d
class ChildError:
implements(inevow.IResource)
def renderHTTP(self, ctx):
@ -1338,6 +1350,8 @@ class VDrive(rend.Page):
return Manifest(node, path), ()
elif t == "deep-size":
return DeepSize(node, path), ()
elif t == "deep-stats":
return DeepStats(node, path), ()
elif t == 'rename-form':
return RenameForm(self.name, node, path), ()
else: