tahoe-lafs/src/allmydata/deep_stats.py

136 lines
4.8 KiB
Python
Raw Normal View History

"""Implementation of the deep stats class."""
import math
from allmydata.interfaces import IImmutableFileNode
from allmydata.interfaces import IMutableFileNode
from allmydata.interfaces import IDirectoryNode
from allmydata.unknown import UnknownNode
from allmydata.uri import LiteralFileURI
from allmydata.uri import from_string
from allmydata.util import mathutil
class DeepStats(object):
"""Deep stats object.
Holds results of the deep-stats opetation.
Used for json generation in the API."""
2016-11-29 14:29:22 +00:00
# Json API version.
# Rules:
# - increment each time a field is removed or changes meaning.
# - it's ok to add a new field without incrementing the version.
API_VERSION = 1
def __init__(self, origin):
2016-11-29 14:29:22 +00:00
"""Initializes DeepStats object. Sets most of the fields to 0."""
self.monitor = None
self.origin = origin
2016-11-29 14:29:22 +00:00
self.stats = {
'api-version': self.API_VERSION
}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [(0, 0), (1, 3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
2016-12-02 16:35:19 +00:00
"""Sets a new monitor."""
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
2016-12-02 16:35:19 +00:00
"""Adds a node's stats to calculation."""
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
2016-12-02 16:35:19 +00:00
"""Adds directory stats."""
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append((new_lower, new_upper))
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
2016-12-02 16:35:19 +00:00
"""Returns deep-stats resutls."""
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
2016-12-02 16:35:19 +00:00
"""Finishes gathering stats."""
return self.get_results()