Merge PR382

refs ticket:567
Closes #382
This commit is contained in:
Brian Warner 2016-12-23 23:07:43 -05:00
commit 51eae34f02
5 changed files with 161 additions and 126 deletions

View File

@ -539,7 +539,7 @@ Creating a New Directory
checking that they are immutable. The "imm." prefix must not be stripped checking that they are immutable. The "imm." prefix must not be stripped
off without performing this check. (Future versions of the web-API server off without performing this check. (Future versions of the web-API server
will perform it where necessary.) will perform it where necessary.)
The cap for each child may be given either in the "rw_uri" or "ro_uri" The cap for each child may be given either in the "rw_uri" or "ro_uri"
field of the PROPDICT (not both). If a cap is given in the "rw_uri" field, field of the PROPDICT (not both). If a cap is given in the "rw_uri" field,
then the web-API server will check that it is an immutable read-cap of a then the web-API server will check that it is an immutable read-cap of a
@ -595,7 +595,7 @@ Creating a New Directory
format of the named target directory; intermediate directories, if created, format of the named target directory; intermediate directories, if created,
are created using the default mutable type setting, as configured on the are created using the default mutable type setting, as configured on the
Tahoe-LAFS server responding to the request. Tahoe-LAFS server responding to the request.
This operation will return an error if a blocking file is present at any of This operation will return an error if a blocking file is present at any of
the parent names, preventing the server from creating the necessary parent the parent names, preventing the server from creating the necessary parent
directory; or if it would require changing an immutable directory; or if directory; or if it would require changing an immutable directory; or if
@ -655,7 +655,7 @@ Creating a New Directory
the immediate parent directory already has a a child named NAME. the immediate parent directory already has a a child named NAME.
Note that the name= argument must be passed as a queryarg, because the POST Note that the name= argument must be passed as a queryarg, because the POST
request body is used for the initial children JSON. request body is used for the initial children JSON.
``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir-immutable&name=NAME`` ``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir-immutable&name=NAME``
@ -770,7 +770,7 @@ Getting Information About a File Or Directory (as JSON)
if and only if you have read-write access to that directory. The verify_uri if and only if you have read-write access to that directory. The verify_uri
field will be present if and only if the object has a verify-cap field will be present if and only if the object has a verify-cap
(non-distributed LIT files do not have verify-caps). (non-distributed LIT files do not have verify-caps).
If the cap is of an unknown format, then the file size and verify_uri will If the cap is of an unknown format, then the file size and verify_uri will
not be available:: not be available::
@ -861,7 +861,7 @@ When an edge is created or updated by "tahoe backup", the 'mtime' and
the UNIX "ctime" of the local file, which means the last time that the UNIX "ctime" of the local file, which means the last time that
either the contents or the metadata of the local file was changed. either the contents or the metadata of the local file was changed.
There are several ways that the 'ctime' field could be confusing: There are several ways that the 'ctime' field could be confusing:
1. You might be confused about whether it reflects the time of the creation 1. You might be confused about whether it reflects the time of the creation
of a link in the Tahoe filesystem (by a version of Tahoe < v1.7.0) or a of a link in the Tahoe filesystem (by a version of Tahoe < v1.7.0) or a
@ -917,7 +917,7 @@ Attaching an Existing File or Directory by its read- or write-cap
command). Note that "true", "t", and "1" are all synonyms for "True", and command). Note that "true", "t", and "1" are all synonyms for "True", and
"false", "f", and "0" are synonyms for "False", and the parameter is "false", "f", and "0" are synonyms for "False", and the parameter is
case-insensitive. case-insensitive.
Note that this operation does not take its child cap in the form of Note that this operation does not take its child cap in the form of
separate "rw_uri" and "ro_uri" fields. Therefore, it cannot accept a separate "rw_uri" and "ro_uri" fields. Therefore, it cannot accept a
child cap in a format unknown to the web-API server, unless its URI child cap in a format unknown to the web-API server, unless its URI
@ -966,7 +966,7 @@ Adding Multiple Files or Directories to a Parent Directory at Once
currently placed here are "linkcrtime" and "linkmotime". For details, see currently placed here are "linkcrtime" and "linkmotime". For details, see
the section above entitled "Getting Information About a File Or Directory (as the section above entitled "Getting Information About a File Or Directory (as
JSON)", in the "About the metadata" subsection. JSON)", in the "About the metadata" subsection.
Note that this command was introduced with the name "set_children", which Note that this command was introduced with the name "set_children", which
uses an underscore rather than a hyphen as other multi-word command names uses an underscore rather than a hyphen as other multi-word command names
do. The variant with a hyphen is now accepted, but clients that desire do. The variant with a hyphen is now accepted, but clients that desire
@ -1155,7 +1155,7 @@ Uploading a File
The file must be provided as the "file" field of an HTML encoded form body, The file must be provided as the "file" field of an HTML encoded form body,
produced in response to an HTML form like this:: produced in response to an HTML form like this::
<form action="/uri" method="POST" enctype="multipart/form-data"> <form action="/uri" method="POST" enctype="multipart/form-data">
<input type="hidden" name="t" value="upload" /> <input type="hidden" name="t" value="upload" />
<input type="file" name="file" /> <input type="file" name="file" />
@ -1182,7 +1182,7 @@ Uploading a File
This uploads a file, and attaches it as a new child of the given directory, This uploads a file, and attaches it as a new child of the given directory,
which must be mutable. The file must be provided as the "file" field of an which must be mutable. The file must be provided as the "file" field of an
HTML-encoded form body, produced in response to an HTML form like this:: HTML-encoded form body, produced in response to an HTML form like this::
<form action="." method="POST" enctype="multipart/form-data"> <form action="." method="POST" enctype="multipart/form-data">
<input type="hidden" name="t" value="upload" /> <input type="hidden" name="t" value="upload" />
<input type="file" name="file" /> <input type="file" name="file" />
@ -1751,6 +1751,9 @@ incorrectly.
keys may be missing until 'finished' is True):: keys may be missing until 'finished' is True)::
finished: (bool) True if the operation has finished, else False finished: (bool) True if the operation has finished, else False
api-version: (int), number of deep-stats API version. Will be increased every
time backwards-incompatible change is introduced.
Current version is 1.
count-immutable-files: count of how many CHK files are in the set count-immutable-files: count of how many CHK files are in the set
count-mutable-files: same, for mutable files (does not include directories) count-mutable-files: same, for mutable files (does not include directories)
count-literal-files: same, for LIT files (data contained inside the URI) count-literal-files: same, for LIT files (data contained inside the URI)
@ -2192,4 +2195,3 @@ URLs and HTTP and UTF-8
.. _RFC2231#4: https://tools.ietf.org/html/rfc2231#section-4 .. _RFC2231#4: https://tools.ietf.org/html/rfc2231#section-4
.. _some developers have reported: http://markmail.org/message/dsjyokgl7hv64ig3 .. _some developers have reported: http://markmail.org/message/dsjyokgl7hv64ig3
.. _RFC2616#19.5.1: https://tools.ietf.org/html/rfc2616#section-19.5.1 .. _RFC2616#19.5.1: https://tools.ietf.org/html/rfc2616#section-19.5.1

135
src/allmydata/deep_stats.py Normal file
View File

@ -0,0 +1,135 @@
"""Implementation of the deep stats class."""
import math
from allmydata.interfaces import IImmutableFileNode
from allmydata.interfaces import IMutableFileNode
from allmydata.interfaces import IDirectoryNode
from allmydata.unknown import UnknownNode
from allmydata.uri import LiteralFileURI
from allmydata.uri import from_string
from allmydata.util import mathutil
class DeepStats(object):
"""Deep stats object.
Holds results of the deep-stats opetation.
Used for json generation in the API."""
# Json API version.
# Rules:
# - increment each time a field is removed or changes meaning.
# - it's ok to add a new field without incrementing the version.
API_VERSION = 1
def __init__(self, origin):
"""Initializes DeepStats object. Sets most of the fields to 0."""
self.monitor = None
self.origin = origin
self.stats = {
'api-version': self.API_VERSION
}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [(0, 0), (1, 3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
"""Sets a new monitor."""
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
"""Adds a node's stats to calculation."""
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
"""Adds directory stats."""
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append((new_lower, new_upper))
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
"""Returns deep-stats resutls."""
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
"""Finishes gathering stats."""
return self.get_results()

View File

@ -1,33 +1,36 @@
"""Directory Node implementation."""
import time, math, unicodedata import time, unicodedata
from zope.interface import implements from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
from foolscap.api import fireEventually from foolscap.api import fireEventually
import simplejson import simplejson
from allmydata.deep_stats import DeepStats
from allmydata.mutable.common import NotWriteableError from allmydata.mutable.common import NotWriteableError
from allmydata.mutable.filenode import MutableFileNode from allmydata.mutable.filenode import MutableFileNode
from allmydata.unknown import UnknownNode, strip_prefix_for_ro from allmydata.unknown import UnknownNode, strip_prefix_for_ro
from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \ from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
IImmutableFileNode, IMutableFileNode, \
ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \ ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
from allmydata.check_results import DeepCheckResults, \ from allmydata.check_results import DeepCheckResults, \
DeepCheckAndRepairResults DeepCheckAndRepairResults
from allmydata.monitor import Monitor from allmydata.monitor import Monitor
from allmydata.util import hashutil, mathutil, base32, log from allmydata.util import hashutil, base32, log
from allmydata.util.encodingutil import quote_output from allmydata.util.encodingutil import quote_output
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.util.netstring import netstring, split_netstring from allmydata.util.netstring import netstring, split_netstring
from allmydata.util.consumer import download_to_data from allmydata.util.consumer import download_to_data
from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap from allmydata.uri import wrap_dirnode_cap
from pycryptopp.cipher.aes import AES from pycryptopp.cipher.aes import AES
from allmydata.util.dictutil import AuxValueDict from allmydata.util.dictutil import AuxValueDict
def update_metadata(metadata, new_metadata, now): def update_metadata(metadata, new_metadata, now):
"""Updates 'metadata' in-place with the information in 'new_metadata'. """Updates 'metadata' in-place with the information in 'new_metadata'.
Timestamps are set according to the time 'now'."""
Timestamps are set according to the time 'now'.
"""
if metadata is None: if metadata is None:
metadata = {} metadata = {}
@ -364,8 +367,8 @@ class DirectoryNode:
children.set_with_aux(name, (child, metadata), auxilliary=entry) children.set_with_aux(name, (child, metadata), auxilliary=entry)
else: else:
log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory", log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
name=quote_output(name, encoding='utf-8'), name=quote_output(name, encoding='utf-8'),
facility="tahoe.webish", level=log.UNUSUAL) facility="tahoe.webish", level=log.UNUSUAL)
except CapConstraintError, e: except CapConstraintError, e:
log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n" log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
"%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'), "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
@ -794,111 +797,6 @@ class DirectoryNode:
return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease)) return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
class DeepStats:
def __init__(self, origin):
self.origin = origin
self.stats = {}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [ (0,0), (1,3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append( (new_lower, new_upper) )
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
return self.get_results()
class ManifestWalker(DeepStats): class ManifestWalker(DeepStats):
def __init__(self, origin): def __init__(self, origin):
DeepStats.__init__(self, origin) DeepStats.__init__(self, origin)
@ -968,5 +866,3 @@ class DeepChecker:
# use client.create_dirnode() to make one of these # use client.create_dirnode() to make one of these

View File

@ -1,3 +1,5 @@
"""Tests for the dirnode module."""
import time import time
import unicodedata import unicodedata
from zope.interface import implements from zope.interface import implements

View File

@ -1815,6 +1815,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
#"largest-directory": 1590, #"largest-directory": 1590,
"largest-directory-children": 8, "largest-directory-children": 8,
"largest-immutable-file": 19, "largest-immutable-file": 19,
"api-version": 1,
} }
for k,v in expected.iteritems(): for k,v in expected.iteritems():
self.failUnlessReallyEqual(stats[k], v, self.failUnlessReallyEqual(stats[k], v,
@ -4442,4 +4443,3 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
# doesn't reveal anything. This addresses #1720. # doesn't reveal anything. This addresses #1720.
d.addCallback(lambda e: self.assertEquals(str(e), "404 Not Found")) d.addCallback(lambda e: self.assertEquals(str(e), "404 Not Found"))
return d return d