2020-12-29 14:38:11 +00:00
|
|
|
"""Implementation of the deep stats class.
|
|
|
|
|
|
|
|
Ported to Python 3.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from future.utils import PY2
|
|
|
|
if PY2:
|
|
|
|
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
|
2016-11-29 14:12:27 +00:00
|
|
|
|
|
|
|
import math
|
|
|
|
|
|
|
|
from allmydata.interfaces import IImmutableFileNode
|
|
|
|
from allmydata.interfaces import IMutableFileNode
|
|
|
|
from allmydata.interfaces import IDirectoryNode
|
|
|
|
from allmydata.unknown import UnknownNode
|
|
|
|
from allmydata.uri import LiteralFileURI
|
|
|
|
from allmydata.uri import from_string
|
|
|
|
from allmydata.util import mathutil
|
|
|
|
|
2016-11-29 14:28:23 +00:00
|
|
|
class DeepStats(object):
|
2016-11-29 14:12:27 +00:00
|
|
|
"""Deep stats object.
|
|
|
|
|
2020-12-29 14:38:11 +00:00
|
|
|
Holds results of the deep-stats operation.
|
2016-11-29 14:12:27 +00:00
|
|
|
Used for json generation in the API."""
|
|
|
|
|
2016-11-29 14:29:22 +00:00
|
|
|
# Json API version.
|
|
|
|
# Rules:
|
|
|
|
# - increment each time a field is removed or changes meaning.
|
|
|
|
# - it's ok to add a new field without incrementing the version.
|
|
|
|
API_VERSION = 1
|
|
|
|
|
2016-11-29 14:12:27 +00:00
|
|
|
def __init__(self, origin):
|
2016-11-29 14:29:22 +00:00
|
|
|
"""Initializes DeepStats object. Sets most of the fields to 0."""
|
2016-12-02 14:47:23 +00:00
|
|
|
self.monitor = None
|
2016-11-29 14:12:27 +00:00
|
|
|
self.origin = origin
|
2016-11-29 14:29:22 +00:00
|
|
|
self.stats = {
|
|
|
|
'api-version': self.API_VERSION
|
|
|
|
}
|
2016-11-29 14:12:27 +00:00
|
|
|
for k in ["count-immutable-files",
|
|
|
|
"count-mutable-files",
|
|
|
|
"count-literal-files",
|
|
|
|
"count-files",
|
|
|
|
"count-directories",
|
|
|
|
"count-unknown",
|
|
|
|
"size-immutable-files",
|
|
|
|
#"size-mutable-files",
|
|
|
|
"size-literal-files",
|
|
|
|
"size-directories",
|
|
|
|
"largest-directory",
|
|
|
|
"largest-directory-children",
|
|
|
|
"largest-immutable-file",
|
|
|
|
#"largest-mutable-file",
|
|
|
|
]:
|
|
|
|
self.stats[k] = 0
|
|
|
|
self.histograms = {}
|
|
|
|
for k in ["size-files-histogram"]:
|
|
|
|
self.histograms[k] = {} # maps (min,max) to count
|
2016-12-02 14:53:50 +00:00
|
|
|
self.buckets = [(0, 0), (1, 3)]
|
2016-11-29 14:12:27 +00:00
|
|
|
self.root = math.sqrt(10)
|
|
|
|
|
|
|
|
def set_monitor(self, monitor):
|
2016-12-02 16:35:19 +00:00
|
|
|
"""Sets a new monitor."""
|
2016-11-29 14:12:27 +00:00
|
|
|
self.monitor = monitor
|
|
|
|
monitor.origin_si = self.origin.get_storage_index()
|
|
|
|
monitor.set_status(self.get_results())
|
|
|
|
|
|
|
|
def add_node(self, node, childpath):
|
2016-12-02 16:35:19 +00:00
|
|
|
"""Adds a node's stats to calculation."""
|
2016-11-29 14:12:27 +00:00
|
|
|
if isinstance(node, UnknownNode):
|
|
|
|
self.add("count-unknown")
|
|
|
|
elif IDirectoryNode.providedBy(node):
|
|
|
|
self.add("count-directories")
|
|
|
|
elif IMutableFileNode.providedBy(node):
|
|
|
|
self.add("count-files")
|
|
|
|
self.add("count-mutable-files")
|
|
|
|
# TODO: update the servermap, compute a size, add it to
|
|
|
|
# size-mutable-files, max it into "largest-mutable-file"
|
|
|
|
elif IImmutableFileNode.providedBy(node): # CHK and LIT
|
|
|
|
self.add("count-files")
|
|
|
|
size = node.get_size()
|
|
|
|
self.histogram("size-files-histogram", size)
|
|
|
|
theuri = from_string(node.get_uri())
|
|
|
|
if isinstance(theuri, LiteralFileURI):
|
|
|
|
self.add("count-literal-files")
|
|
|
|
self.add("size-literal-files", size)
|
|
|
|
else:
|
|
|
|
self.add("count-immutable-files")
|
|
|
|
self.add("size-immutable-files", size)
|
|
|
|
self.max("largest-immutable-file", size)
|
|
|
|
|
|
|
|
def enter_directory(self, parent, children):
|
2016-12-02 16:35:19 +00:00
|
|
|
"""Adds directory stats."""
|
2016-11-29 14:12:27 +00:00
|
|
|
dirsize_bytes = parent.get_size()
|
|
|
|
if dirsize_bytes is not None:
|
|
|
|
self.add("size-directories", dirsize_bytes)
|
|
|
|
self.max("largest-directory", dirsize_bytes)
|
|
|
|
dirsize_children = len(children)
|
|
|
|
self.max("largest-directory-children", dirsize_children)
|
|
|
|
|
|
|
|
def add(self, key, value=1):
|
|
|
|
self.stats[key] += value
|
|
|
|
|
|
|
|
def max(self, key, value):
|
|
|
|
self.stats[key] = max(self.stats[key], value)
|
|
|
|
|
|
|
|
def which_bucket(self, size):
|
|
|
|
# return (min,max) such that min <= size <= max
|
|
|
|
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
|
|
|
|
# (101,316), (317, 1000), etc: two per decade
|
|
|
|
assert size >= 0
|
|
|
|
i = 0
|
|
|
|
while True:
|
|
|
|
if i >= len(self.buckets):
|
|
|
|
# extend the list
|
|
|
|
new_lower = self.buckets[i-1][1]+1
|
|
|
|
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
|
|
|
|
self.buckets.append((new_lower, new_upper))
|
|
|
|
maybe = self.buckets[i]
|
|
|
|
if maybe[0] <= size <= maybe[1]:
|
|
|
|
return maybe
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
def histogram(self, key, size):
|
|
|
|
bucket = self.which_bucket(size)
|
|
|
|
h = self.histograms[key]
|
|
|
|
if bucket not in h:
|
|
|
|
h[bucket] = 0
|
|
|
|
h[bucket] += 1
|
|
|
|
|
|
|
|
def get_results(self):
|
2020-12-29 14:38:11 +00:00
|
|
|
"""Returns deep-stats results."""
|
2016-11-29 14:12:27 +00:00
|
|
|
stats = self.stats.copy()
|
|
|
|
for key in self.histograms:
|
|
|
|
h = self.histograms[key]
|
|
|
|
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
|
|
|
|
out.sort()
|
|
|
|
stats[key] = out
|
|
|
|
return stats
|
|
|
|
|
|
|
|
def finish(self):
|
2016-12-02 16:35:19 +00:00
|
|
|
"""Finishes gathering stats."""
|
2016-11-29 14:12:27 +00:00
|
|
|
return self.get_results()
|