Move DeepStats to separate file as dirnode is a bit too large to work with comfortably.

This commit is contained in:
Vladimir Rusinov
2016-11-29 14:12:27 +00:00
committed by Brian Warner
parent 0cea91d737
commit 73fabaec19
2 changed files with 125 additions and 112 deletions

120
src/allmydata/deep_stats.py Normal file
View File

@ -0,0 +1,120 @@
"""Implementation of the deep stats class."""
import math
from allmydata.interfaces import IImmutableFileNode
from allmydata.interfaces import IMutableFileNode
from allmydata.interfaces import IDirectoryNode
from allmydata.unknown import UnknownNode
from allmydata.uri import LiteralFileURI
from allmydata.uri import from_string
from allmydata.util import mathutil
class DeepStats:
"""Deep stats object.
Holds results of the deep-stats opetation.
Used for json generation in the API."""
def __init__(self, origin):
self.origin = origin
self.stats = {}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [(0,0), (1,3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append((new_lower, new_upper))
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
return self.get_results()

View File

@ -1,26 +1,26 @@
import time, unicodedata
import time, math, unicodedata
from zope.interface import implements from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
from foolscap.api import fireEventually from foolscap.api import fireEventually
import simplejson import simplejson
from allmydata.deep_stats import DeepStats
from allmydata.mutable.common import NotWriteableError from allmydata.mutable.common import NotWriteableError
from allmydata.mutable.filenode import MutableFileNode from allmydata.mutable.filenode import MutableFileNode
from allmydata.unknown import UnknownNode, strip_prefix_for_ro from allmydata.unknown import UnknownNode, strip_prefix_for_ro
from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \ from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
IImmutableFileNode, IMutableFileNode, \
ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \ ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
from allmydata.check_results import DeepCheckResults, \ from allmydata.check_results import DeepCheckResults, \
DeepCheckAndRepairResults DeepCheckAndRepairResults
from allmydata.monitor import Monitor from allmydata.monitor import Monitor
from allmydata.util import hashutil, mathutil, base32, log from allmydata.util import hashutil, base32, log
from allmydata.util.encodingutil import quote_output from allmydata.util.encodingutil import quote_output
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.util.netstring import netstring, split_netstring from allmydata.util.netstring import netstring, split_netstring
from allmydata.util.consumer import download_to_data from allmydata.util.consumer import download_to_data
from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap from allmydata.uri import wrap_dirnode_cap
from pycryptopp.cipher.aes import AES from pycryptopp.cipher.aes import AES
from allmydata.util.dictutil import AuxValueDict from allmydata.util.dictutil import AuxValueDict
@ -794,111 +794,6 @@ class DirectoryNode:
return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease)) return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
class DeepStats:
def __init__(self, origin):
self.origin = origin
self.stats = {}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [ (0,0), (1,3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append( (new_lower, new_upper) )
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
return self.get_results()
class ManifestWalker(DeepStats): class ManifestWalker(DeepStats):
def __init__(self, origin): def __init__(self, origin):
DeepStats.__init__(self, origin) DeepStats.__init__(self, origin)
@ -968,5 +863,3 @@ class DeepChecker:
# use client.create_dirnode() to make one of these # use client.create_dirnode() to make one of these