Merge PR382

refs ticket:567
Closes #382
This commit is contained in:
Brian Warner 2016-12-23 23:07:43 -05:00
commit 51eae34f02
5 changed files with 161 additions and 126 deletions

View File

@ -539,7 +539,7 @@ Creating a New Directory
checking that they are immutable. The "imm." prefix must not be stripped
off without performing this check. (Future versions of the web-API server
will perform it where necessary.)
The cap for each child may be given either in the "rw_uri" or "ro_uri"
field of the PROPDICT (not both). If a cap is given in the "rw_uri" field,
then the web-API server will check that it is an immutable read-cap of a
@ -595,7 +595,7 @@ Creating a New Directory
format of the named target directory; intermediate directories, if created,
are created using the default mutable type setting, as configured on the
Tahoe-LAFS server responding to the request.
This operation will return an error if a blocking file is present at any of
the parent names, preventing the server from creating the necessary parent
directory; or if it would require changing an immutable directory; or if
@ -655,7 +655,7 @@ Creating a New Directory
the immediate parent directory already has a a child named NAME.
Note that the name= argument must be passed as a queryarg, because the POST
request body is used for the initial children JSON.
request body is used for the initial children JSON.
``POST /uri/$DIRCAP/[SUBDIRS../]?t=mkdir-immutable&name=NAME``
@ -770,7 +770,7 @@ Getting Information About a File Or Directory (as JSON)
if and only if you have read-write access to that directory. The verify_uri
field will be present if and only if the object has a verify-cap
(non-distributed LIT files do not have verify-caps).
If the cap is of an unknown format, then the file size and verify_uri will
not be available::
@ -861,7 +861,7 @@ When an edge is created or updated by "tahoe backup", the 'mtime' and
the UNIX "ctime" of the local file, which means the last time that
either the contents or the metadata of the local file was changed.
There are several ways that the 'ctime' field could be confusing:
There are several ways that the 'ctime' field could be confusing:
1. You might be confused about whether it reflects the time of the creation
of a link in the Tahoe filesystem (by a version of Tahoe < v1.7.0) or a
@ -917,7 +917,7 @@ Attaching an Existing File or Directory by its read- or write-cap
command). Note that "true", "t", and "1" are all synonyms for "True", and
"false", "f", and "0" are synonyms for "False", and the parameter is
case-insensitive.
Note that this operation does not take its child cap in the form of
separate "rw_uri" and "ro_uri" fields. Therefore, it cannot accept a
child cap in a format unknown to the web-API server, unless its URI
@ -966,7 +966,7 @@ Adding Multiple Files or Directories to a Parent Directory at Once
currently placed here are "linkcrtime" and "linkmotime". For details, see
the section above entitled "Getting Information About a File Or Directory (as
JSON)", in the "About the metadata" subsection.
Note that this command was introduced with the name "set_children", which
uses an underscore rather than a hyphen as other multi-word command names
do. The variant with a hyphen is now accepted, but clients that desire
@ -1155,7 +1155,7 @@ Uploading a File
The file must be provided as the "file" field of an HTML encoded form body,
produced in response to an HTML form like this::
<form action="/uri" method="POST" enctype="multipart/form-data">
<input type="hidden" name="t" value="upload" />
<input type="file" name="file" />
@ -1182,7 +1182,7 @@ Uploading a File
This uploads a file, and attaches it as a new child of the given directory,
which must be mutable. The file must be provided as the "file" field of an
HTML-encoded form body, produced in response to an HTML form like this::
<form action="." method="POST" enctype="multipart/form-data">
<input type="hidden" name="t" value="upload" />
<input type="file" name="file" />
@ -1751,6 +1751,9 @@ incorrectly.
keys may be missing until 'finished' is True)::
finished: (bool) True if the operation has finished, else False
api-version: (int), number of deep-stats API version. Will be increased every
time backwards-incompatible change is introduced.
Current version is 1.
count-immutable-files: count of how many CHK files are in the set
count-mutable-files: same, for mutable files (does not include directories)
count-literal-files: same, for LIT files (data contained inside the URI)
@ -2192,4 +2195,3 @@ URLs and HTTP and UTF-8
.. _RFC2231#4: https://tools.ietf.org/html/rfc2231#section-4
.. _some developers have reported: http://markmail.org/message/dsjyokgl7hv64ig3
.. _RFC2616#19.5.1: https://tools.ietf.org/html/rfc2616#section-19.5.1

135
src/allmydata/deep_stats.py Normal file
View File

@ -0,0 +1,135 @@
"""Implementation of the deep stats class."""
import math
from allmydata.interfaces import IImmutableFileNode
from allmydata.interfaces import IMutableFileNode
from allmydata.interfaces import IDirectoryNode
from allmydata.unknown import UnknownNode
from allmydata.uri import LiteralFileURI
from allmydata.uri import from_string
from allmydata.util import mathutil
class DeepStats(object):
"""Deep stats object.
Holds results of the deep-stats opetation.
Used for json generation in the API."""
# Json API version.
# Rules:
# - increment each time a field is removed or changes meaning.
# - it's ok to add a new field without incrementing the version.
API_VERSION = 1
def __init__(self, origin):
"""Initializes DeepStats object. Sets most of the fields to 0."""
self.monitor = None
self.origin = origin
self.stats = {
'api-version': self.API_VERSION
}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [(0, 0), (1, 3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
"""Sets a new monitor."""
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
"""Adds a node's stats to calculation."""
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
"""Adds directory stats."""
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append((new_lower, new_upper))
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
"""Returns deep-stats resutls."""
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
"""Finishes gathering stats."""
return self.get_results()

View File

@ -1,33 +1,36 @@
import time, math, unicodedata
"""Directory Node implementation."""
import time, unicodedata
from zope.interface import implements
from twisted.internet import defer
from foolscap.api import fireEventually
import simplejson
from allmydata.deep_stats import DeepStats
from allmydata.mutable.common import NotWriteableError
from allmydata.mutable.filenode import MutableFileNode
from allmydata.unknown import UnknownNode, strip_prefix_for_ro
from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
IImmutableFileNode, IMutableFileNode, \
ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
from allmydata.check_results import DeepCheckResults, \
DeepCheckAndRepairResults
from allmydata.monitor import Monitor
from allmydata.util import hashutil, mathutil, base32, log
from allmydata.util import hashutil, base32, log
from allmydata.util.encodingutil import quote_output
from allmydata.util.assertutil import precondition
from allmydata.util.netstring import netstring, split_netstring
from allmydata.util.consumer import download_to_data
from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap
from allmydata.uri import wrap_dirnode_cap
from pycryptopp.cipher.aes import AES
from allmydata.util.dictutil import AuxValueDict
def update_metadata(metadata, new_metadata, now):
"""Updates 'metadata' in-place with the information in 'new_metadata'.
Timestamps are set according to the time 'now'."""
Timestamps are set according to the time 'now'.
"""
if metadata is None:
metadata = {}
@ -364,8 +367,8 @@ class DirectoryNode:
children.set_with_aux(name, (child, metadata), auxilliary=entry)
else:
log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
name=quote_output(name, encoding='utf-8'),
facility="tahoe.webish", level=log.UNUSUAL)
name=quote_output(name, encoding='utf-8'),
facility="tahoe.webish", level=log.UNUSUAL)
except CapConstraintError, e:
log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
"%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
@ -794,111 +797,6 @@ class DirectoryNode:
return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
class DeepStats:
def __init__(self, origin):
self.origin = origin
self.stats = {}
for k in ["count-immutable-files",
"count-mutable-files",
"count-literal-files",
"count-files",
"count-directories",
"count-unknown",
"size-immutable-files",
#"size-mutable-files",
"size-literal-files",
"size-directories",
"largest-directory",
"largest-directory-children",
"largest-immutable-file",
#"largest-mutable-file",
]:
self.stats[k] = 0
self.histograms = {}
for k in ["size-files-histogram"]:
self.histograms[k] = {} # maps (min,max) to count
self.buckets = [ (0,0), (1,3)]
self.root = math.sqrt(10)
def set_monitor(self, monitor):
self.monitor = monitor
monitor.origin_si = self.origin.get_storage_index()
monitor.set_status(self.get_results())
def add_node(self, node, childpath):
if isinstance(node, UnknownNode):
self.add("count-unknown")
elif IDirectoryNode.providedBy(node):
self.add("count-directories")
elif IMutableFileNode.providedBy(node):
self.add("count-files")
self.add("count-mutable-files")
# TODO: update the servermap, compute a size, add it to
# size-mutable-files, max it into "largest-mutable-file"
elif IImmutableFileNode.providedBy(node): # CHK and LIT
self.add("count-files")
size = node.get_size()
self.histogram("size-files-histogram", size)
theuri = from_string(node.get_uri())
if isinstance(theuri, LiteralFileURI):
self.add("count-literal-files")
self.add("size-literal-files", size)
else:
self.add("count-immutable-files")
self.add("size-immutable-files", size)
self.max("largest-immutable-file", size)
def enter_directory(self, parent, children):
dirsize_bytes = parent.get_size()
if dirsize_bytes is not None:
self.add("size-directories", dirsize_bytes)
self.max("largest-directory", dirsize_bytes)
dirsize_children = len(children)
self.max("largest-directory-children", dirsize_children)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
def which_bucket(self, size):
# return (min,max) such that min <= size <= max
# values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
# (101,316), (317, 1000), etc: two per decade
assert size >= 0
i = 0
while True:
if i >= len(self.buckets):
# extend the list
new_lower = self.buckets[i-1][1]+1
new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
self.buckets.append( (new_lower, new_upper) )
maybe = self.buckets[i]
if maybe[0] <= size <= maybe[1]:
return maybe
i += 1
def histogram(self, key, size):
bucket = self.which_bucket(size)
h = self.histograms[key]
if bucket not in h:
h[bucket] = 0
h[bucket] += 1
def get_results(self):
stats = self.stats.copy()
for key in self.histograms:
h = self.histograms[key]
out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
out.sort()
stats[key] = out
return stats
def finish(self):
return self.get_results()
class ManifestWalker(DeepStats):
def __init__(self, origin):
DeepStats.__init__(self, origin)
@ -968,5 +866,3 @@ class DeepChecker:
# use client.create_dirnode() to make one of these

View File

@ -1,3 +1,5 @@
"""Tests for the dirnode module."""
import time
import unicodedata
from zope.interface import implements

View File

@ -1815,6 +1815,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
#"largest-directory": 1590,
"largest-directory-children": 8,
"largest-immutable-file": 19,
"api-version": 1,
}
for k,v in expected.iteritems():
self.failUnlessReallyEqual(stats[k], v,
@ -4442,4 +4443,3 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
# doesn't reveal anything. This addresses #1720.
d.addCallback(lambda e: self.assertEquals(str(e), "404 Not Found"))
return d