Merge PR382

refs ticket:567
Closes #382
This commit is contained in:
Brian Warner 2016-12-23 23:07:43 -05:00
commit 51eae34f02
5 changed files with 161 additions and 126 deletions

View File

@ -1751,6 +1751,9 @@ incorrectly.
keys may be missing until 'finished' is True):: keys may be missing until 'finished' is True)::
finished: (bool) True if the operation has finished, else False finished: (bool) True if the operation has finished, else False
api-version: (int), number of deep-stats API version. Will be increased every
time backwards-incompatible change is introduced.
Current version is 1.
count-immutable-files: count of how many CHK files are in the set count-immutable-files: count of how many CHK files are in the set
count-mutable-files: same, for mutable files (does not include directories) count-mutable-files: same, for mutable files (does not include directories)
count-literal-files: same, for LIT files (data contained inside the URI) count-literal-files: same, for LIT files (data contained inside the URI)
@ -2192,4 +2195,3 @@ URLs and HTTP and UTF-8
.. _RFC2231#4: https://tools.ietf.org/html/rfc2231#section-4 .. _RFC2231#4: https://tools.ietf.org/html/rfc2231#section-4
.. _some developers have reported: http://markmail.org/message/dsjyokgl7hv64ig3 .. _some developers have reported: http://markmail.org/message/dsjyokgl7hv64ig3
.. _RFC2616#19.5.1: https://tools.ietf.org/html/rfc2616#section-19.5.1 .. _RFC2616#19.5.1: https://tools.ietf.org/html/rfc2616#section-19.5.1

135
src/allmydata/deep_stats.py Normal file
View File

@ -0,0 +1,135 @@
"""Implementation of the deep stats class."""
import math
from allmydata.interfaces import IImmutableFileNode
from allmydata.interfaces import IMutableFileNode
from allmydata.interfaces import IDirectoryNode
from allmydata.unknown import UnknownNode
from allmydata.uri import LiteralFileURI
from allmydata.uri import from_string
from allmydata.util import mathutil
class DeepStats(object):
"""Deep stats object.
Holds results of the deep-stats opetation.
Used for json generation in the API."""
# Json API version.
# Rules:
# - increment each time a field is removed or changes meaning.
# - it's ok to add a new field without incrementing the version.
API_VERSION = 1
def __init__(self, origin):
"""Initializes DeepStats object. Sets most of the fields to 0."""
self.monitor = None
self.origin = origin
self.stats = {
'api-version': self.API_VERSION
}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [(0, 0), (1, 3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
"""Sets a new monitor."""
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
"""Adds a node's stats to calculation."""
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
"""Adds directory stats."""
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append((new_lower, new_upper))
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
"""Returns deep-stats resutls."""
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
"""Finishes gathering stats."""
return self.get_results()

View File

@ -1,33 +1,36 @@
"""Directory Node implementation."""
import time, math, unicodedata import time, unicodedata
from zope.interface import implements from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
from foolscap.api import fireEventually from foolscap.api import fireEventually
import simplejson import simplejson
from allmydata.deep_stats import DeepStats
from allmydata.mutable.common import NotWriteableError from allmydata.mutable.common import NotWriteableError
from allmydata.mutable.filenode import MutableFileNode from allmydata.mutable.filenode import MutableFileNode
from allmydata.unknown import UnknownNode, strip_prefix_for_ro from allmydata.unknown import UnknownNode, strip_prefix_for_ro
from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \ from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
IImmutableFileNode, IMutableFileNode, \
ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \ ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
from allmydata.check_results import DeepCheckResults, \ from allmydata.check_results import DeepCheckResults, \
DeepCheckAndRepairResults DeepCheckAndRepairResults
from allmydata.monitor import Monitor from allmydata.monitor import Monitor
from allmydata.util import hashutil, mathutil, base32, log from allmydata.util import hashutil, base32, log
from allmydata.util.encodingutil import quote_output from allmydata.util.encodingutil import quote_output
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.util.netstring import netstring, split_netstring from allmydata.util.netstring import netstring, split_netstring
from allmydata.util.consumer import download_to_data from allmydata.util.consumer import download_to_data
from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap from allmydata.uri import wrap_dirnode_cap
from pycryptopp.cipher.aes import AES from pycryptopp.cipher.aes import AES
from allmydata.util.dictutil import AuxValueDict from allmydata.util.dictutil import AuxValueDict
def update_metadata(metadata, new_metadata, now): def update_metadata(metadata, new_metadata, now):
"""Updates 'metadata' in-place with the information in 'new_metadata'. """Updates 'metadata' in-place with the information in 'new_metadata'.
Timestamps are set according to the time 'now'."""
Timestamps are set according to the time 'now'.
"""
if metadata is None: if metadata is None:
metadata = {} metadata = {}
@ -364,8 +367,8 @@ class DirectoryNode:
children.set_with_aux(name, (child, metadata), auxilliary=entry) children.set_with_aux(name, (child, metadata), auxilliary=entry)
else: else:
log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory", log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
name=quote_output(name, encoding='utf-8'), name=quote_output(name, encoding='utf-8'),
facility="tahoe.webish", level=log.UNUSUAL) facility="tahoe.webish", level=log.UNUSUAL)
except CapConstraintError, e: except CapConstraintError, e:
log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n" log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
"%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'), "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
@ -794,111 +797,6 @@ class DirectoryNode:
return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease)) return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
class DeepStats:
def __init__(self, origin):
self.origin = origin
self.stats = {}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [ (0,0), (1,3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append( (new_lower, new_upper) )
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
return self.get_results()
class ManifestWalker(DeepStats): class ManifestWalker(DeepStats):
def __init__(self, origin): def __init__(self, origin):
DeepStats.__init__(self, origin) DeepStats.__init__(self, origin)
@ -968,5 +866,3 @@ class DeepChecker:
# use client.create_dirnode() to make one of these # use client.create_dirnode() to make one of these

View File

@ -1,3 +1,5 @@
"""Tests for the dirnode module."""
import time import time
import unicodedata import unicodedata
from zope.interface import implements from zope.interface import implements

View File

@ -1815,6 +1815,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
#"largest-directory": 1590, #"largest-directory": 1590,
"largest-directory-children": 8, "largest-directory-children": 8,
"largest-immutable-file": 19, "largest-immutable-file": 19,
"api-version": 1,
} }
for k,v in expected.iteritems(): for k,v in expected.iteritems():
self.failUnlessReallyEqual(stats[k], v, self.failUnlessReallyEqual(stats[k], v,
@ -4442,4 +4443,3 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
# doesn't reveal anything. This addresses #1720. # doesn't reveal anything. This addresses #1720.
d.addCallback(lambda e: self.assertEquals(str(e), "404 Not Found")) d.addCallback(lambda e: self.assertEquals(str(e), "404 Not Found"))
return d return d