"""Implementation of the deep stats class. Ported to Python 3. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import math from allmydata.interfaces import IImmutableFileNode from allmydata.interfaces import IMutableFileNode from allmydata.interfaces import IDirectoryNode from allmydata.unknown import UnknownNode from allmydata.uri import LiteralFileURI from allmydata.uri import from_string from allmydata.util import mathutil class DeepStats(object): """Deep stats object. Holds results of the deep-stats operation. Used for json generation in the API.""" # Json API version. # Rules: # - increment each time a field is removed or changes meaning. # - it's ok to add a new field without incrementing the version. API_VERSION = 1 def __init__(self, origin): """Initializes DeepStats object. Sets most of the fields to 0.""" self.monitor = None self.origin = origin self.stats = { 'api-version': self.API_VERSION } for k in ["count-immutable-files", "count-mutable-files", "count-literal-files", "count-files", "count-directories", "count-unknown", "size-immutable-files", #"size-mutable-files", "size-literal-files", "size-directories", "largest-directory", "largest-directory-children", "largest-immutable-file", #"largest-mutable-file", ]: self.stats[k] = 0 self.histograms = {} for k in ["size-files-histogram"]: self.histograms[k] = {} # maps (min,max) to count self.buckets = [(0, 0), (1, 3)] self.root = math.sqrt(10) def set_monitor(self, monitor): """Sets a new monitor.""" self.monitor = monitor monitor.origin_si = self.origin.get_storage_index() monitor.set_status(self.get_results()) def add_node(self, node, childpath): """Adds a node's stats to calculation.""" if isinstance(node, UnknownNode): self.add("count-unknown") elif IDirectoryNode.providedBy(node): self.add("count-directories") elif IMutableFileNode.providedBy(node): self.add("count-files") self.add("count-mutable-files") # TODO: update the servermap, compute a size, add it to # size-mutable-files, max it into "largest-mutable-file" elif IImmutableFileNode.providedBy(node): # CHK and LIT self.add("count-files") size = node.get_size() self.histogram("size-files-histogram", size) theuri = from_string(node.get_uri()) if isinstance(theuri, LiteralFileURI): self.add("count-literal-files") self.add("size-literal-files", size) else: self.add("count-immutable-files") self.add("size-immutable-files", size) self.max("largest-immutable-file", size) def enter_directory(self, parent, children): """Adds directory stats.""" dirsize_bytes = parent.get_size() if dirsize_bytes is not None: self.add("size-directories", dirsize_bytes) self.max("largest-directory", dirsize_bytes) dirsize_children = len(children) self.max("largest-directory-children", dirsize_children) def add(self, key, value=1): self.stats[key] += value def max(self, key, value): self.stats[key] = max(self.stats[key], value) def which_bucket(self, size): # return (min,max) such that min <= size <= max # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100), # (101,316), (317, 1000), etc: two per decade assert size >= 0 i = 0 while True: if i >= len(self.buckets): # extend the list new_lower = self.buckets[i-1][1]+1 new_upper = int(mathutil.next_power_of_k(new_lower, self.root)) self.buckets.append((new_lower, new_upper)) maybe = self.buckets[i] if maybe[0] <= size <= maybe[1]: return maybe i += 1 def histogram(self, key, size): bucket = self.which_bucket(size) h = self.histograms[key] if bucket not in h: h[bucket] = 0 h[bucket] += 1 def get_results(self): """Returns deep-stats results.""" stats = self.stats.copy() for key in self.histograms: h = self.histograms[key] out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ] out.sort() stats[key] = out return stats def finish(self): """Finishes gathering stats.""" return self.get_results()