mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-06-23 01:08:53 +00:00
Move DeepStats to separate file as dirnode is a bit too large to work with comfortably.
This commit is contained in:
committed by
Brian Warner
parent
0cea91d737
commit
73fabaec19
120
src/allmydata/deep_stats.py
Normal file
120
src/allmydata/deep_stats.py
Normal file
@ -0,0 +1,120 @@
|
||||
"""Implementation of the deep stats class."""
|
||||
|
||||
import math
|
||||
|
||||
from allmydata.interfaces import IImmutableFileNode
|
||||
from allmydata.interfaces import IMutableFileNode
|
||||
from allmydata.interfaces import IDirectoryNode
|
||||
from allmydata.unknown import UnknownNode
|
||||
from allmydata.uri import LiteralFileURI
|
||||
from allmydata.uri import from_string
|
||||
from allmydata.util import mathutil
|
||||
|
||||
class DeepStats:
|
||||
"""Deep stats object.
|
||||
|
||||
Holds results of the deep-stats opetation.
|
||||
Used for json generation in the API."""
|
||||
|
||||
def __init__(self, origin):
|
||||
self.origin = origin
|
||||
self.stats = {}
|
||||
for k in ["count-immutable-files",
|
||||
"count-mutable-files",
|
||||
"count-literal-files",
|
||||
"count-files",
|
||||
"count-directories",
|
||||
"count-unknown",
|
||||
"size-immutable-files",
|
||||
#"size-mutable-files",
|
||||
"size-literal-files",
|
||||
"size-directories",
|
||||
"largest-directory",
|
||||
"largest-directory-children",
|
||||
"largest-immutable-file",
|
||||
#"largest-mutable-file",
|
||||
]:
|
||||
self.stats[k] = 0
|
||||
self.histograms = {}
|
||||
for k in ["size-files-histogram"]:
|
||||
self.histograms[k] = {} # maps (min,max) to count
|
||||
self.buckets = [(0,0), (1,3)]
|
||||
self.root = math.sqrt(10)
|
||||
|
||||
def set_monitor(self, monitor):
|
||||
self.monitor = monitor
|
||||
monitor.origin_si = self.origin.get_storage_index()
|
||||
monitor.set_status(self.get_results())
|
||||
|
||||
def add_node(self, node, childpath):
|
||||
if isinstance(node, UnknownNode):
|
||||
self.add("count-unknown")
|
||||
elif IDirectoryNode.providedBy(node):
|
||||
self.add("count-directories")
|
||||
elif IMutableFileNode.providedBy(node):
|
||||
self.add("count-files")
|
||||
self.add("count-mutable-files")
|
||||
# TODO: update the servermap, compute a size, add it to
|
||||
# size-mutable-files, max it into "largest-mutable-file"
|
||||
elif IImmutableFileNode.providedBy(node): # CHK and LIT
|
||||
self.add("count-files")
|
||||
size = node.get_size()
|
||||
self.histogram("size-files-histogram", size)
|
||||
theuri = from_string(node.get_uri())
|
||||
if isinstance(theuri, LiteralFileURI):
|
||||
self.add("count-literal-files")
|
||||
self.add("size-literal-files", size)
|
||||
else:
|
||||
self.add("count-immutable-files")
|
||||
self.add("size-immutable-files", size)
|
||||
self.max("largest-immutable-file", size)
|
||||
|
||||
def enter_directory(self, parent, children):
|
||||
dirsize_bytes = parent.get_size()
|
||||
if dirsize_bytes is not None:
|
||||
self.add("size-directories", dirsize_bytes)
|
||||
self.max("largest-directory", dirsize_bytes)
|
||||
dirsize_children = len(children)
|
||||
self.max("largest-directory-children", dirsize_children)
|
||||
|
||||
def add(self, key, value=1):
|
||||
self.stats[key] += value
|
||||
|
||||
def max(self, key, value):
|
||||
self.stats[key] = max(self.stats[key], value)
|
||||
|
||||
def which_bucket(self, size):
|
||||
# return (min,max) such that min <= size <= max
|
||||
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
|
||||
# (101,316), (317, 1000), etc: two per decade
|
||||
assert size >= 0
|
||||
i = 0
|
||||
while True:
|
||||
if i >= len(self.buckets):
|
||||
# extend the list
|
||||
new_lower = self.buckets[i-1][1]+1
|
||||
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
|
||||
self.buckets.append((new_lower, new_upper))
|
||||
maybe = self.buckets[i]
|
||||
if maybe[0] <= size <= maybe[1]:
|
||||
return maybe
|
||||
i += 1
|
||||
|
||||
def histogram(self, key, size):
|
||||
bucket = self.which_bucket(size)
|
||||
h = self.histograms[key]
|
||||
if bucket not in h:
|
||||
h[bucket] = 0
|
||||
h[bucket] += 1
|
||||
|
||||
def get_results(self):
|
||||
stats = self.stats.copy()
|
||||
for key in self.histograms:
|
||||
h = self.histograms[key]
|
||||
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
|
||||
out.sort()
|
||||
stats[key] = out
|
||||
return stats
|
||||
|
||||
def finish(self):
|
||||
return self.get_results()
|
Reference in New Issue
Block a user