mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-06-23 09:15:32 +00:00
deep-stats: add file-size histogram
This commit is contained in:
@ -585,6 +585,9 @@ GET $URL?t=deep-stats
|
|||||||
size-mutable-files (TODO): same, for current version of all mutable files
|
size-mutable-files (TODO): same, for current version of all mutable files
|
||||||
size-literal-files: same, for LIT files
|
size-literal-files: same, for LIT files
|
||||||
size-directories: size of directories (includes size-literal-files)
|
size-directories: size of directories (includes size-literal-files)
|
||||||
|
size-files-histogram: list of (minsize, maxsize, count) buckets,
|
||||||
|
with a histogram of filesizes, 5dB/bucket,
|
||||||
|
for both literal and immutable files
|
||||||
largest-directory: number of children in the largest directory
|
largest-directory: number of children in the largest directory
|
||||||
largest-immutable-file: number of bytes in the largest CHK file
|
largest-immutable-file: number of bytes in the largest CHK file
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
import os, time
|
import os, time, math
|
||||||
|
|
||||||
from zope.interface import implements
|
from zope.interface import implements
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
@ -8,7 +8,7 @@ from allmydata.mutable.common import NotMutableError
|
|||||||
from allmydata.mutable.node import MutableFileNode
|
from allmydata.mutable.node import MutableFileNode
|
||||||
from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
|
from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
|
||||||
IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode
|
IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode
|
||||||
from allmydata.util import hashutil
|
from allmydata.util import hashutil, mathutil
|
||||||
from allmydata.util.hashutil import netstring
|
from allmydata.util.hashutil import netstring
|
||||||
from allmydata.util.limiter import ConcurrencyLimiter
|
from allmydata.util.limiter import ConcurrencyLimiter
|
||||||
from allmydata.uri import NewDirectoryURI
|
from allmydata.uri import NewDirectoryURI
|
||||||
@ -514,6 +514,7 @@ class NewDirectoryNode:
|
|||||||
elif IFileNode.providedBy(child): # CHK and LIT
|
elif IFileNode.providedBy(child): # CHK and LIT
|
||||||
stats.add("count-files")
|
stats.add("count-files")
|
||||||
size = child.get_size()
|
size = child.get_size()
|
||||||
|
stats.histogram("size-files-histogram", size)
|
||||||
if child.get_uri().startswith("URI:LIT:"):
|
if child.get_uri().startswith("URI:LIT:"):
|
||||||
stats.add("count-literal-files")
|
stats.add("count-literal-files")
|
||||||
stats.add("size-literal-files", size)
|
stats.add("size-literal-files", size)
|
||||||
@ -544,6 +545,11 @@ class DeepStats:
|
|||||||
#"largest-mutable-file",
|
#"largest-mutable-file",
|
||||||
]:
|
]:
|
||||||
self.stats[k] = 0
|
self.stats[k] = 0
|
||||||
|
self.histograms = {}
|
||||||
|
for k in ["size-files-histogram"]:
|
||||||
|
self.histograms[k] = {} # maps (min,max) to count
|
||||||
|
self.buckets = [ (0,0), (1,3)]
|
||||||
|
self.root = math.sqrt(10)
|
||||||
|
|
||||||
def add(self, key, value=1):
|
def add(self, key, value=1):
|
||||||
self.stats[key] += value
|
self.stats[key] += value
|
||||||
@ -551,8 +557,38 @@ class DeepStats:
|
|||||||
def max(self, key, value):
|
def max(self, key, value):
|
||||||
self.stats[key] = max(self.stats[key], value)
|
self.stats[key] = max(self.stats[key], value)
|
||||||
|
|
||||||
|
def which_bucket(self, size):
|
||||||
|
# return (min,max) such that min <= size <= max
|
||||||
|
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
|
||||||
|
# (101,316), (317, 1000), etc: two per decade
|
||||||
|
assert size >= 0
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
if i >= len(self.buckets):
|
||||||
|
# extend the list
|
||||||
|
new_lower = self.buckets[i-1][1]+1
|
||||||
|
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
|
||||||
|
self.buckets.append( (new_lower, new_upper) )
|
||||||
|
maybe = self.buckets[i]
|
||||||
|
if maybe[0] <= size <= maybe[1]:
|
||||||
|
return maybe
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
def histogram(self, key, size):
|
||||||
|
bucket = self.which_bucket(size)
|
||||||
|
h = self.histograms[key]
|
||||||
|
if bucket not in h:
|
||||||
|
h[bucket] = 0
|
||||||
|
h[bucket] += 1
|
||||||
|
|
||||||
def get_results(self):
|
def get_results(self):
|
||||||
return self.stats
|
stats = self.stats.copy()
|
||||||
|
for key in self.histograms:
|
||||||
|
h = self.histograms[key]
|
||||||
|
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
|
||||||
|
out.sort()
|
||||||
|
stats[key] = out
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
# use client.create_dirnode() to make one of these
|
# use client.create_dirnode() to make one of these
|
||||||
|
@ -166,6 +166,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
|
|||||||
|
|
||||||
d = self.client.create_empty_dirnode()
|
d = self.client.create_empty_dirnode()
|
||||||
def _then(n):
|
def _then(n):
|
||||||
|
# /
|
||||||
self.failUnless(n.is_mutable())
|
self.failUnless(n.is_mutable())
|
||||||
u = n.get_uri()
|
u = n.get_uri()
|
||||||
self.failUnless(u)
|
self.failUnless(u)
|
||||||
@ -186,8 +187,13 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
|
|||||||
assert isinstance(ffu_v, str)
|
assert isinstance(ffu_v, str)
|
||||||
self.expected_manifest.append(ffu_v)
|
self.expected_manifest.append(ffu_v)
|
||||||
d.addCallback(lambda res: n.set_uri(u"child", fake_file_uri))
|
d.addCallback(lambda res: n.set_uri(u"child", fake_file_uri))
|
||||||
|
# /
|
||||||
|
# /child = mutable
|
||||||
|
|
||||||
d.addCallback(lambda res: n.create_empty_directory(u"subdir"))
|
d.addCallback(lambda res: n.create_empty_directory(u"subdir"))
|
||||||
|
# /
|
||||||
|
# /child = mutable
|
||||||
|
# /subdir = directory
|
||||||
def _created(subdir):
|
def _created(subdir):
|
||||||
self.failUnless(isinstance(subdir, FakeDirectoryNode))
|
self.failUnless(isinstance(subdir, FakeDirectoryNode))
|
||||||
self.subdir = subdir
|
self.subdir = subdir
|
||||||
@ -230,6 +236,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
|
|||||||
stats["size-directories"])
|
stats["size-directories"])
|
||||||
self.failUnless(stats["largest-directory"] > 500,
|
self.failUnless(stats["largest-directory"] > 500,
|
||||||
stats["largest-directory"])
|
stats["largest-directory"])
|
||||||
|
self.failUnlessEqual(stats["size-files-histogram"], [])
|
||||||
d.addCallback(_check_deepstats)
|
d.addCallback(_check_deepstats)
|
||||||
|
|
||||||
def _add_subsubdir(res):
|
def _add_subsubdir(res):
|
||||||
@ -458,6 +465,49 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin):
|
|||||||
|
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
class DeepStats(unittest.TestCase):
|
||||||
|
def test_stats(self):
|
||||||
|
ds = dirnode.DeepStats()
|
||||||
|
ds.add("count-files")
|
||||||
|
ds.add("size-immutable-files", 123)
|
||||||
|
ds.histogram("size-files-histogram", 123)
|
||||||
|
ds.max("largest-directory", 444)
|
||||||
|
|
||||||
|
s = ds.get_results()
|
||||||
|
self.failUnlessEqual(s["count-files"], 1)
|
||||||
|
self.failUnlessEqual(s["size-immutable-files"], 123)
|
||||||
|
self.failUnlessEqual(s["largest-directory"], 444)
|
||||||
|
self.failUnlessEqual(s["count-literal-files"], 0)
|
||||||
|
|
||||||
|
ds.add("count-files")
|
||||||
|
ds.add("size-immutable-files", 321)
|
||||||
|
ds.histogram("size-files-histogram", 321)
|
||||||
|
ds.max("largest-directory", 2)
|
||||||
|
|
||||||
|
s = ds.get_results()
|
||||||
|
self.failUnlessEqual(s["count-files"], 2)
|
||||||
|
self.failUnlessEqual(s["size-immutable-files"], 444)
|
||||||
|
self.failUnlessEqual(s["largest-directory"], 444)
|
||||||
|
self.failUnlessEqual(s["count-literal-files"], 0)
|
||||||
|
self.failUnlessEqual(s["size-files-histogram"],
|
||||||
|
[ (101, 316, 1), (317, 1000, 1) ])
|
||||||
|
|
||||||
|
ds = dirnode.DeepStats()
|
||||||
|
for i in range(1, 1100):
|
||||||
|
ds.histogram("size-files-histogram", i)
|
||||||
|
ds.histogram("size-files-histogram", 4*1000*1000*1000*1000) # 4TB
|
||||||
|
s = ds.get_results()
|
||||||
|
self.failUnlessEqual(s["size-files-histogram"],
|
||||||
|
[ (1, 3, 3),
|
||||||
|
(4, 10, 7),
|
||||||
|
(11, 31, 21),
|
||||||
|
(32, 100, 69),
|
||||||
|
(101, 316, 216),
|
||||||
|
(317, 1000, 684),
|
||||||
|
(1001, 3162, 99),
|
||||||
|
(3162277660169L, 10000000000000L, 1),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
netstring = hashutil.netstring
|
netstring = hashutil.netstring
|
||||||
split_netstring = dirnode.split_netstring
|
split_netstring = dirnode.split_netstring
|
||||||
|
@ -1112,6 +1112,31 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin,
|
|||||||
# P/s2-rw/mydata992 (same as P/s2-rw/mydata992)
|
# P/s2-rw/mydata992 (same as P/s2-rw/mydata992)
|
||||||
d1.addCallback(lambda manifest:
|
d1.addCallback(lambda manifest:
|
||||||
self.failUnlessEqual(len(manifest), 4))
|
self.failUnlessEqual(len(manifest), 4))
|
||||||
|
d1.addCallback(lambda res: home.deep_stats())
|
||||||
|
def _check_stats(stats):
|
||||||
|
expected = {"count-immutable-files": 1,
|
||||||
|
"count-mutable-files": 0,
|
||||||
|
"count-literal-files": 1,
|
||||||
|
"count-files": 2,
|
||||||
|
"count-directories": 3,
|
||||||
|
"size-immutable-files": 112,
|
||||||
|
"size-literal-files": 23,
|
||||||
|
#"size-directories": 616, # varies
|
||||||
|
#"largest-directory": 616,
|
||||||
|
"largest-directory-children": 3,
|
||||||
|
"largest-immutable-file": 112,
|
||||||
|
}
|
||||||
|
for k,v in expected.iteritems():
|
||||||
|
self.failUnlessEqual(stats[k], v,
|
||||||
|
"stats[%s] was %s, not %s" %
|
||||||
|
(k, stats[k], v))
|
||||||
|
self.failUnless(stats["size-directories"] > 1300,
|
||||||
|
stats["size-directories"])
|
||||||
|
self.failUnless(stats["largest-directory"] > 800,
|
||||||
|
stats["largest-directory"])
|
||||||
|
self.failUnlessEqual(stats["size-files-histogram"],
|
||||||
|
[ (11, 31, 1), (101, 316, 1) ])
|
||||||
|
d1.addCallback(_check_stats)
|
||||||
return d1
|
return d1
|
||||||
d.addCallback(_got_home)
|
d.addCallback(_got_home)
|
||||||
return d
|
return d
|
||||||
|
@ -788,6 +788,8 @@ class Web(WebMixin, unittest.TestCase):
|
|||||||
self.failUnlessEqual(stats[k], v,
|
self.failUnlessEqual(stats[k], v,
|
||||||
"stats[%s] was %s, not %s" %
|
"stats[%s] was %s, not %s" %
|
||||||
(k, stats[k], v))
|
(k, stats[k], v))
|
||||||
|
self.failUnlessEqual(stats["size-files-histogram"],
|
||||||
|
[ [11, 31, 3] ])
|
||||||
d.addCallback(_got)
|
d.addCallback(_got)
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user