mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-01-20 11:38:52 +00:00
136 lines
4.8 KiB
Python
136 lines
4.8 KiB
Python
"""Implementation of the deep stats class."""
|
|
|
|
import math
|
|
|
|
from allmydata.interfaces import IImmutableFileNode
|
|
from allmydata.interfaces import IMutableFileNode
|
|
from allmydata.interfaces import IDirectoryNode
|
|
from allmydata.unknown import UnknownNode
|
|
from allmydata.uri import LiteralFileURI
|
|
from allmydata.uri import from_string
|
|
from allmydata.util import mathutil
|
|
|
|
class DeepStats(object):
|
|
"""Deep stats object.
|
|
|
|
Holds results of the deep-stats opetation.
|
|
Used for json generation in the API."""
|
|
|
|
# Json API version.
|
|
# Rules:
|
|
# - increment each time a field is removed or changes meaning.
|
|
# - it's ok to add a new field without incrementing the version.
|
|
API_VERSION = 1
|
|
|
|
def __init__(self, origin):
|
|
"""Initializes DeepStats object. Sets most of the fields to 0."""
|
|
self.monitor = None
|
|
self.origin = origin
|
|
self.stats = {
|
|
'api-version': self.API_VERSION
|
|
}
|
|
for k in ["count-immutable-files",
|
|
"count-mutable-files",
|
|
"count-literal-files",
|
|
"count-files",
|
|
"count-directories",
|
|
"count-unknown",
|
|
"size-immutable-files",
|
|
#"size-mutable-files",
|
|
"size-literal-files",
|
|
"size-directories",
|
|
"largest-directory",
|
|
"largest-directory-children",
|
|
"largest-immutable-file",
|
|
#"largest-mutable-file",
|
|
]:
|
|
self.stats[k] = 0
|
|
self.histograms = {}
|
|
for k in ["size-files-histogram"]:
|
|
self.histograms[k] = {} # maps (min,max) to count
|
|
self.buckets = [(0, 0), (1, 3)]
|
|
self.root = math.sqrt(10)
|
|
|
|
def set_monitor(self, monitor):
|
|
"""Sets a new monitor."""
|
|
self.monitor = monitor
|
|
monitor.origin_si = self.origin.get_storage_index()
|
|
monitor.set_status(self.get_results())
|
|
|
|
def add_node(self, node, childpath):
|
|
"""Adds a node's stats to calculation."""
|
|
if isinstance(node, UnknownNode):
|
|
self.add("count-unknown")
|
|
elif IDirectoryNode.providedBy(node):
|
|
self.add("count-directories")
|
|
elif IMutableFileNode.providedBy(node):
|
|
self.add("count-files")
|
|
self.add("count-mutable-files")
|
|
# TODO: update the servermap, compute a size, add it to
|
|
# size-mutable-files, max it into "largest-mutable-file"
|
|
elif IImmutableFileNode.providedBy(node): # CHK and LIT
|
|
self.add("count-files")
|
|
size = node.get_size()
|
|
self.histogram("size-files-histogram", size)
|
|
theuri = from_string(node.get_uri())
|
|
if isinstance(theuri, LiteralFileURI):
|
|
self.add("count-literal-files")
|
|
self.add("size-literal-files", size)
|
|
else:
|
|
self.add("count-immutable-files")
|
|
self.add("size-immutable-files", size)
|
|
self.max("largest-immutable-file", size)
|
|
|
|
def enter_directory(self, parent, children):
|
|
"""Adds directory stats."""
|
|
dirsize_bytes = parent.get_size()
|
|
if dirsize_bytes is not None:
|
|
self.add("size-directories", dirsize_bytes)
|
|
self.max("largest-directory", dirsize_bytes)
|
|
dirsize_children = len(children)
|
|
self.max("largest-directory-children", dirsize_children)
|
|
|
|
def add(self, key, value=1):
|
|
self.stats[key] += value
|
|
|
|
def max(self, key, value):
|
|
self.stats[key] = max(self.stats[key], value)
|
|
|
|
def which_bucket(self, size):
|
|
# return (min,max) such that min <= size <= max
|
|
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
|
|
# (101,316), (317, 1000), etc: two per decade
|
|
assert size >= 0
|
|
i = 0
|
|
while True:
|
|
if i >= len(self.buckets):
|
|
# extend the list
|
|
new_lower = self.buckets[i-1][1]+1
|
|
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
|
|
self.buckets.append((new_lower, new_upper))
|
|
maybe = self.buckets[i]
|
|
if maybe[0] <= size <= maybe[1]:
|
|
return maybe
|
|
i += 1
|
|
|
|
def histogram(self, key, size):
|
|
bucket = self.which_bucket(size)
|
|
h = self.histograms[key]
|
|
if bucket not in h:
|
|
h[bucket] = 0
|
|
h[bucket] += 1
|
|
|
|
def get_results(self):
|
|
"""Returns deep-stats resutls."""
|
|
stats = self.stats.copy()
|
|
for key in self.histograms:
|
|
h = self.histograms[key]
|
|
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
|
|
out.sort()
|
|
stats[key] = out
|
|
return stats
|
|
|
|
def finish(self):
|
|
"""Finishes gathering stats."""
|
|
return self.get_results()
|