mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-06-21 16:39:38 +00:00
Move DeepStats to separate file as dirnode is a bit too large to work with comfortably.
This commit is contained in:
committed by
Brian Warner
parent
0cea91d737
commit
73fabaec19
120
src/allmydata/deep_stats.py
Normal file
120
src/allmydata/deep_stats.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
"""Implementation of the deep stats class."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from allmydata.interfaces import IImmutableFileNode
|
||||||
|
from allmydata.interfaces import IMutableFileNode
|
||||||
|
from allmydata.interfaces import IDirectoryNode
|
||||||
|
from allmydata.unknown import UnknownNode
|
||||||
|
from allmydata.uri import LiteralFileURI
|
||||||
|
from allmydata.uri import from_string
|
||||||
|
from allmydata.util import mathutil
|
||||||
|
|
||||||
|
class DeepStats:
|
||||||
|
"""Deep stats object.
|
||||||
|
|
||||||
|
Holds results of the deep-stats opetation.
|
||||||
|
Used for json generation in the API."""
|
||||||
|
|
||||||
|
def __init__(self, origin):
|
||||||
|
self.origin = origin
|
||||||
|
self.stats = {}
|
||||||
|
for k in ["count-immutable-files",
|
||||||
|
"count-mutable-files",
|
||||||
|
"count-literal-files",
|
||||||
|
"count-files",
|
||||||
|
"count-directories",
|
||||||
|
"count-unknown",
|
||||||
|
"size-immutable-files",
|
||||||
|
#"size-mutable-files",
|
||||||
|
"size-literal-files",
|
||||||
|
"size-directories",
|
||||||
|
"largest-directory",
|
||||||
|
"largest-directory-children",
|
||||||
|
"largest-immutable-file",
|
||||||
|
#"largest-mutable-file",
|
||||||
|
]:
|
||||||
|
self.stats[k] = 0
|
||||||
|
self.histograms = {}
|
||||||
|
for k in ["size-files-histogram"]:
|
||||||
|
self.histograms[k] = {} # maps (min,max) to count
|
||||||
|
self.buckets = [(0,0), (1,3)]
|
||||||
|
self.root = math.sqrt(10)
|
||||||
|
|
||||||
|
def set_monitor(self, monitor):
|
||||||
|
self.monitor = monitor
|
||||||
|
monitor.origin_si = self.origin.get_storage_index()
|
||||||
|
monitor.set_status(self.get_results())
|
||||||
|
|
||||||
|
def add_node(self, node, childpath):
|
||||||
|
if isinstance(node, UnknownNode):
|
||||||
|
self.add("count-unknown")
|
||||||
|
elif IDirectoryNode.providedBy(node):
|
||||||
|
self.add("count-directories")
|
||||||
|
elif IMutableFileNode.providedBy(node):
|
||||||
|
self.add("count-files")
|
||||||
|
self.add("count-mutable-files")
|
||||||
|
# TODO: update the servermap, compute a size, add it to
|
||||||
|
# size-mutable-files, max it into "largest-mutable-file"
|
||||||
|
elif IImmutableFileNode.providedBy(node): # CHK and LIT
|
||||||
|
self.add("count-files")
|
||||||
|
size = node.get_size()
|
||||||
|
self.histogram("size-files-histogram", size)
|
||||||
|
theuri = from_string(node.get_uri())
|
||||||
|
if isinstance(theuri, LiteralFileURI):
|
||||||
|
self.add("count-literal-files")
|
||||||
|
self.add("size-literal-files", size)
|
||||||
|
else:
|
||||||
|
self.add("count-immutable-files")
|
||||||
|
self.add("size-immutable-files", size)
|
||||||
|
self.max("largest-immutable-file", size)
|
||||||
|
|
||||||
|
def enter_directory(self, parent, children):
|
||||||
|
dirsize_bytes = parent.get_size()
|
||||||
|
if dirsize_bytes is not None:
|
||||||
|
self.add("size-directories", dirsize_bytes)
|
||||||
|
self.max("largest-directory", dirsize_bytes)
|
||||||
|
dirsize_children = len(children)
|
||||||
|
self.max("largest-directory-children", dirsize_children)
|
||||||
|
|
||||||
|
def add(self, key, value=1):
|
||||||
|
self.stats[key] += value
|
||||||
|
|
||||||
|
def max(self, key, value):
|
||||||
|
self.stats[key] = max(self.stats[key], value)
|
||||||
|
|
||||||
|
def which_bucket(self, size):
|
||||||
|
# return (min,max) such that min <= size <= max
|
||||||
|
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
|
||||||
|
# (101,316), (317, 1000), etc: two per decade
|
||||||
|
assert size >= 0
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
if i >= len(self.buckets):
|
||||||
|
# extend the list
|
||||||
|
new_lower = self.buckets[i-1][1]+1
|
||||||
|
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
|
||||||
|
self.buckets.append((new_lower, new_upper))
|
||||||
|
maybe = self.buckets[i]
|
||||||
|
if maybe[0] <= size <= maybe[1]:
|
||||||
|
return maybe
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
def histogram(self, key, size):
|
||||||
|
bucket = self.which_bucket(size)
|
||||||
|
h = self.histograms[key]
|
||||||
|
if bucket not in h:
|
||||||
|
h[bucket] = 0
|
||||||
|
h[bucket] += 1
|
||||||
|
|
||||||
|
def get_results(self):
|
||||||
|
stats = self.stats.copy()
|
||||||
|
for key in self.histograms:
|
||||||
|
h = self.histograms[key]
|
||||||
|
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
|
||||||
|
out.sort()
|
||||||
|
stats[key] = out
|
||||||
|
return stats
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
return self.get_results()
|
@ -1,26 +1,26 @@
|
|||||||
|
import time, unicodedata
|
||||||
import time, math, unicodedata
|
|
||||||
|
|
||||||
from zope.interface import implements
|
from zope.interface import implements
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
from foolscap.api import fireEventually
|
from foolscap.api import fireEventually
|
||||||
import simplejson
|
import simplejson
|
||||||
|
|
||||||
|
from allmydata.deep_stats import DeepStats
|
||||||
from allmydata.mutable.common import NotWriteableError
|
from allmydata.mutable.common import NotWriteableError
|
||||||
from allmydata.mutable.filenode import MutableFileNode
|
from allmydata.mutable.filenode import MutableFileNode
|
||||||
from allmydata.unknown import UnknownNode, strip_prefix_for_ro
|
from allmydata.unknown import UnknownNode, strip_prefix_for_ro
|
||||||
from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
|
from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
|
||||||
IImmutableFileNode, IMutableFileNode, \
|
|
||||||
ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
|
ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
|
||||||
MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
|
MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
|
||||||
from allmydata.check_results import DeepCheckResults, \
|
from allmydata.check_results import DeepCheckResults, \
|
||||||
DeepCheckAndRepairResults
|
DeepCheckAndRepairResults
|
||||||
from allmydata.monitor import Monitor
|
from allmydata.monitor import Monitor
|
||||||
from allmydata.util import hashutil, mathutil, base32, log
|
from allmydata.util import hashutil, base32, log
|
||||||
from allmydata.util.encodingutil import quote_output
|
from allmydata.util.encodingutil import quote_output
|
||||||
from allmydata.util.assertutil import precondition
|
from allmydata.util.assertutil import precondition
|
||||||
from allmydata.util.netstring import netstring, split_netstring
|
from allmydata.util.netstring import netstring, split_netstring
|
||||||
from allmydata.util.consumer import download_to_data
|
from allmydata.util.consumer import download_to_data
|
||||||
from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap
|
from allmydata.uri import wrap_dirnode_cap
|
||||||
from pycryptopp.cipher.aes import AES
|
from pycryptopp.cipher.aes import AES
|
||||||
from allmydata.util.dictutil import AuxValueDict
|
from allmydata.util.dictutil import AuxValueDict
|
||||||
|
|
||||||
@ -794,111 +794,6 @@ class DirectoryNode:
|
|||||||
return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
|
return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DeepStats:
|
|
||||||
def __init__(self, origin):
|
|
||||||
self.origin = origin
|
|
||||||
self.stats = {}
|
|
||||||
for k in ["count-immutable-files",
|
|
||||||
"count-mutable-files",
|
|
||||||
"count-literal-files",
|
|
||||||
"count-files",
|
|
||||||
"count-directories",
|
|
||||||
"count-unknown",
|
|
||||||
"size-immutable-files",
|
|
||||||
#"size-mutable-files",
|
|
||||||
"size-literal-files",
|
|
||||||
"size-directories",
|
|
||||||
"largest-directory",
|
|
||||||
"largest-directory-children",
|
|
||||||
"largest-immutable-file",
|
|
||||||
#"largest-mutable-file",
|
|
||||||
]:
|
|
||||||
self.stats[k] = 0
|
|
||||||
self.histograms = {}
|
|
||||||
for k in ["size-files-histogram"]:
|
|
||||||
self.histograms[k] = {} # maps (min,max) to count
|
|
||||||
self.buckets = [ (0,0), (1,3)]
|
|
||||||
self.root = math.sqrt(10)
|
|
||||||
|
|
||||||
def set_monitor(self, monitor):
|
|
||||||
self.monitor = monitor
|
|
||||||
monitor.origin_si = self.origin.get_storage_index()
|
|
||||||
monitor.set_status(self.get_results())
|
|
||||||
|
|
||||||
def add_node(self, node, childpath):
|
|
||||||
if isinstance(node, UnknownNode):
|
|
||||||
self.add("count-unknown")
|
|
||||||
elif IDirectoryNode.providedBy(node):
|
|
||||||
self.add("count-directories")
|
|
||||||
elif IMutableFileNode.providedBy(node):
|
|
||||||
self.add("count-files")
|
|
||||||
self.add("count-mutable-files")
|
|
||||||
# TODO: update the servermap, compute a size, add it to
|
|
||||||
# size-mutable-files, max it into "largest-mutable-file"
|
|
||||||
elif IImmutableFileNode.providedBy(node): # CHK and LIT
|
|
||||||
self.add("count-files")
|
|
||||||
size = node.get_size()
|
|
||||||
self.histogram("size-files-histogram", size)
|
|
||||||
theuri = from_string(node.get_uri())
|
|
||||||
if isinstance(theuri, LiteralFileURI):
|
|
||||||
self.add("count-literal-files")
|
|
||||||
self.add("size-literal-files", size)
|
|
||||||
else:
|
|
||||||
self.add("count-immutable-files")
|
|
||||||
self.add("size-immutable-files", size)
|
|
||||||
self.max("largest-immutable-file", size)
|
|
||||||
|
|
||||||
def enter_directory(self, parent, children):
|
|
||||||
dirsize_bytes = parent.get_size()
|
|
||||||
if dirsize_bytes is not None:
|
|
||||||
self.add("size-directories", dirsize_bytes)
|
|
||||||
self.max("largest-directory", dirsize_bytes)
|
|
||||||
dirsize_children = len(children)
|
|
||||||
self.max("largest-directory-children", dirsize_children)
|
|
||||||
|
|
||||||
def add(self, key, value=1):
|
|
||||||
self.stats[key] += value
|
|
||||||
|
|
||||||
def max(self, key, value):
|
|
||||||
self.stats[key] = max(self.stats[key], value)
|
|
||||||
|
|
||||||
def which_bucket(self, size):
|
|
||||||
# return (min,max) such that min <= size <= max
|
|
||||||
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
|
|
||||||
# (101,316), (317, 1000), etc: two per decade
|
|
||||||
assert size >= 0
|
|
||||||
i = 0
|
|
||||||
while True:
|
|
||||||
if i >= len(self.buckets):
|
|
||||||
# extend the list
|
|
||||||
new_lower = self.buckets[i-1][1]+1
|
|
||||||
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
|
|
||||||
self.buckets.append( (new_lower, new_upper) )
|
|
||||||
maybe = self.buckets[i]
|
|
||||||
if maybe[0] <= size <= maybe[1]:
|
|
||||||
return maybe
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
def histogram(self, key, size):
|
|
||||||
bucket = self.which_bucket(size)
|
|
||||||
h = self.histograms[key]
|
|
||||||
if bucket not in h:
|
|
||||||
h[bucket] = 0
|
|
||||||
h[bucket] += 1
|
|
||||||
|
|
||||||
def get_results(self):
|
|
||||||
stats = self.stats.copy()
|
|
||||||
for key in self.histograms:
|
|
||||||
h = self.histograms[key]
|
|
||||||
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
|
|
||||||
out.sort()
|
|
||||||
stats[key] = out
|
|
||||||
return stats
|
|
||||||
|
|
||||||
def finish(self):
|
|
||||||
return self.get_results()
|
|
||||||
|
|
||||||
class ManifestWalker(DeepStats):
|
class ManifestWalker(DeepStats):
|
||||||
def __init__(self, origin):
|
def __init__(self, origin):
|
||||||
DeepStats.__init__(self, origin)
|
DeepStats.__init__(self, origin)
|
||||||
@ -968,5 +863,3 @@ class DeepChecker:
|
|||||||
|
|
||||||
|
|
||||||
# use client.create_dirnode() to make one of these
|
# use client.create_dirnode() to make one of these
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user