Provisional patch to NFC-normalize filenames going in and out of Tahoe directories.

This commit is contained in:
david-sarah
2010-06-15 20:14:50 -07:00
parent 82d0f0c59e
commit c8d99b77a3

View File

@ -1,5 +1,5 @@
import time, math import time, math, unicodedata
from zope.interface import implements from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
@ -16,6 +16,7 @@ from allmydata.check_results import DeepCheckResults, \
DeepCheckAndRepairResults DeepCheckAndRepairResults
from allmydata.monitor import Monitor from allmydata.monitor import Monitor
from allmydata.util import hashutil, mathutil, base32, log from allmydata.util import hashutil, mathutil, base32, log
from allmydata.util.stringutils import quote_output
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.util.netstring import netstring, split_netstring from allmydata.util.netstring import netstring, split_netstring
from allmydata.util.consumer import download_to_data from allmydata.util.consumer import download_to_data
@ -75,15 +76,17 @@ def update_metadata(metadata, new_metadata, now):
return metadata return metadata
def normalize(namex):
return unicodedata.normalize('NFC', namex)
# TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the # TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the
# contents and end by repacking them. It might be better to apply them to # contents and end by repacking them. It might be better to apply them to
# the unpacked contents. # the unpacked contents.
class Deleter: class Deleter:
def __init__(self, node, name, must_exist=True, must_be_directory=False, must_be_file=False): def __init__(self, node, namex, must_exist=True, must_be_directory=False, must_be_file=False):
self.node = node self.node = node
self.name = name self.name = normalize(namex)
self.must_exist = must_exist self.must_exist = must_exist
self.must_be_directory = must_be_directory self.must_be_directory = must_be_directory
self.must_be_file = must_be_file self.must_be_file = must_be_file
@ -109,9 +112,9 @@ class Deleter:
class MetadataSetter: class MetadataSetter:
def __init__(self, node, name, metadata, create_readonly_node=None): def __init__(self, node, namex, metadata, create_readonly_node=None):
self.node = node self.node = node
self.name = name self.name = normalize(namex)
self.metadata = metadata self.metadata = metadata
self.create_readonly_node = create_readonly_node self.create_readonly_node = create_readonly_node
@ -139,20 +142,20 @@ class Adder:
if entries is None: if entries is None:
entries = {} entries = {}
precondition(isinstance(entries, dict), entries) precondition(isinstance(entries, dict), entries)
# keys of 'entries' may not be normalized.
self.entries = entries self.entries = entries
self.overwrite = overwrite self.overwrite = overwrite
self.create_readonly_node = create_readonly_node self.create_readonly_node = create_readonly_node
def set_node(self, name, node, metadata): def set_node(self, namex, node, metadata):
precondition(isinstance(name, unicode), name)
precondition(IFilesystemNode.providedBy(node), node) precondition(IFilesystemNode.providedBy(node), node)
self.entries[name] = (node, metadata) self.entries[namex] = (node, metadata)
def modify(self, old_contents, servermap, first_time): def modify(self, old_contents, servermap, first_time):
children = self.node._unpack_contents(old_contents) children = self.node._unpack_contents(old_contents)
now = time.time() now = time.time()
for (name, (child, new_metadata)) in self.entries.iteritems(): for (namex, (child, new_metadata)) in self.entries.iteritems():
precondition(isinstance(name, unicode), name) name = normalize(namex)
precondition(IFilesystemNode.providedBy(child), child) precondition(IFilesystemNode.providedBy(child), child)
# Strictly speaking this is redundant because we would raise the # Strictly speaking this is redundant because we would raise the
@ -162,10 +165,10 @@ class Adder:
metadata = None metadata = None
if name in children: if name in children:
if not self.overwrite: if not self.overwrite:
raise ExistingChildError("child '%s' already exists" % name) raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8'))
if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]): if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]):
raise ExistingChildError("child '%s' already exists" % name) raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8'))
metadata = children[name][1].copy() metadata = children[name][1].copy()
metadata = update_metadata(metadata, new_metadata, now) metadata = update_metadata(metadata, new_metadata, now)
@ -176,6 +179,7 @@ class Adder:
new_contents = self.node._pack_contents(children) new_contents = self.node._pack_contents(children)
return new_contents return new_contents
def _encrypt_rw_uri(filenode, rw_uri): def _encrypt_rw_uri(filenode, rw_uri):
assert isinstance(rw_uri, str) assert isinstance(rw_uri, str)
writekey = filenode.get_writekey() writekey = filenode.get_writekey()
@ -212,7 +216,8 @@ def pack_children(filenode, children, deep_immutable=False):
(child, metadata) = children[name] (child, metadata) = children[name]
child.raise_error() child.raise_error()
if deep_immutable and not child.is_allowed_in_immutable_directory(): if deep_immutable and not child.is_allowed_in_immutable_directory():
raise MustBeDeepImmutableError("child '%s' is not allowed in an immutable directory" % (name,), name) raise MustBeDeepImmutableError("child %s is not allowed in an immutable directory" %
quote_output(name, encoding='utf-8'), name)
if has_aux: if has_aux:
entry = children.get_aux(name) entry = children.get_aux(name)
if not entry: if not entry:
@ -284,6 +289,7 @@ class DirectoryNode:
return plaintext return plaintext
def _create_and_validate_node(self, rw_uri, ro_uri, name): def _create_and_validate_node(self, rw_uri, ro_uri, name):
# name is just for error reporting
node = self._nodemaker.create_from_cap(rw_uri, ro_uri, node = self._nodemaker.create_from_cap(rw_uri, ro_uri,
deep_immutable=not self.is_mutable(), deep_immutable=not self.is_mutable(),
name=name) name=name)
@ -291,6 +297,7 @@ class DirectoryNode:
return node return node
def _create_readonly_node(self, node, name): def _create_readonly_node(self, node, name):
# name is just for error reporting
if not node.is_unknown() and node.is_readonly(): if not node.is_unknown() and node.is_readonly():
return node return node
return self._create_and_validate_node(None, node.get_readonly_uri(), name=name) return self._create_and_validate_node(None, node.get_readonly_uri(), name=name)
@ -299,7 +306,8 @@ class DirectoryNode:
# the directory is serialized as a list of netstrings, one per child. # the directory is serialized as a list of netstrings, one per child.
# Each child is serialized as a list of four netstrings: (name, ro_uri, # Each child is serialized as a list of four netstrings: (name, ro_uri,
# rwcapdata, metadata), in which the name, ro_uri, metadata are in # rwcapdata, metadata), in which the name, ro_uri, metadata are in
# cleartext. The 'name' is UTF-8 encoded. The rwcapdata is formatted as: # cleartext. The 'name' is UTF-8 encoded, and should be normalized to NFC.
# The rwcapdata is formatted as:
# pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac) # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac)
assert isinstance(data, str), (repr(data), type(data)) assert isinstance(data, str), (repr(data), type(data))
# an empty directory is serialized as an empty string # an empty directory is serialized as an empty string
@ -312,10 +320,15 @@ class DirectoryNode:
while position < len(data): while position < len(data):
entries, position = split_netstring(data, 1, position) entries, position = split_netstring(data, 1, position)
entry = entries[0] entry = entries[0]
(name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) (namex_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4)
if not mutable and len(rwcapdata) > 0: if not mutable and len(rwcapdata) > 0:
raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty") raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty")
name = name_utf8.decode("utf-8")
# A name containing characters that are unassigned in one version of Unicode might
# not be normalized wrt a later version. Therefore we normalize names going both in
# and out of directories.
name = normalize(namex_utf8.decode("utf-8"))
rw_uri = "" rw_uri = ""
if writeable: if writeable:
rw_uri = self._decrypt_rwcapdata(rwcapdata) rw_uri = self._decrypt_rwcapdata(rwcapdata)
@ -338,12 +351,12 @@ class DirectoryNode:
children[name] = (child, metadata) children[name] = (child, metadata)
children.set_with_aux(name, (child, metadata), auxilliary=entry) children.set_with_aux(name, (child, metadata), auxilliary=entry)
else: else:
log.msg(format="mutable cap for child '%(name)s' unpacked from an immutable directory", log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
name=name_utf8, name=quote_output(name, encoding='utf-8'),
facility="tahoe.webish", level=log.UNUSUAL) facility="tahoe.webish", level=log.UNUSUAL)
except CapConstraintError, e: except CapConstraintError, e:
log.msg(format="unmet constraint on cap for child '%(name)s' unpacked from a directory:\n" log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
"%(message)s", message=e.args[0], name=name_utf8, "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
facility="tahoe.webish", level=log.UNUSUAL) facility="tahoe.webish", level=log.UNUSUAL)
return children return children
@ -406,10 +419,10 @@ class DirectoryNode:
name to a tuple of (IFilesystemNode, metadata).""" name to a tuple of (IFilesystemNode, metadata)."""
return self._read() return self._read()
def has_child(self, name): def has_child(self, namex):
"""I return a Deferred that fires with a boolean, True if there """I return a Deferred that fires with a boolean, True if there
exists a child of the given name, False if not.""" exists a child of the given name, False if not."""
assert isinstance(name, unicode) name = normalize(namex)
d = self._read() d = self._read()
d.addCallback(lambda children: children.has_key(name)) d.addCallback(lambda children: children.has_key(name))
return d return d
@ -426,31 +439,31 @@ class DirectoryNode:
raise NoSuchChildError(name) raise NoSuchChildError(name)
return child return child
def get(self, name): def get(self, namex):
"""I return a Deferred that fires with the named child node, """I return a Deferred that fires with the named child node,
which is an IFilesystemNode.""" which is an IFilesystemNode."""
assert isinstance(name, unicode) name = normalize(namex)
d = self._read() d = self._read()
d.addCallback(self._get, name) d.addCallback(self._get, name)
return d return d
def get_child_and_metadata(self, name): def get_child_and_metadata(self, namex):
"""I return a Deferred that fires with the (node, metadata) pair for """I return a Deferred that fires with the (node, metadata) pair for
the named child. The node is an IFilesystemNode, and the metadata the named child. The node is an IFilesystemNode, and the metadata
is a dictionary.""" is a dictionary."""
assert isinstance(name, unicode) name = normalize(namex)
d = self._read() d = self._read()
d.addCallback(self._get_with_metadata, name) d.addCallback(self._get_with_metadata, name)
return d return d
def get_metadata_for(self, name): def get_metadata_for(self, namex):
assert isinstance(name, unicode) name = normalize(namex)
d = self._read() d = self._read()
d.addCallback(lambda children: children[name][1]) d.addCallback(lambda children: children[name][1])
return d return d
def set_metadata_for(self, name, metadata): def set_metadata_for(self, namex, metadata):
assert isinstance(name, unicode) name = normalize(namex)
if self.is_readonly(): if self.is_readonly():
return defer.fail(NotWriteableError()) return defer.fail(NotWriteableError())
assert isinstance(metadata, dict) assert isinstance(metadata, dict)
@ -460,7 +473,7 @@ class DirectoryNode:
d.addCallback(lambda res: self) d.addCallback(lambda res: self)
return d return d
def get_child_at_path(self, path): def get_child_at_path(self, pathx):
"""Transform a child path into an IFilesystemNode. """Transform a child path into an IFilesystemNode.
I perform a recursive series of 'get' operations to find the named I perform a recursive series of 'get' operations to find the named
@ -470,42 +483,41 @@ class DirectoryNode:
The path can be either a single string (slash-separated) or a list of The path can be either a single string (slash-separated) or a list of
path-name elements. path-name elements.
""" """
d = self.get_child_and_metadata_at_path(path) d = self.get_child_and_metadata_at_path(pathx)
d.addCallback(lambda (node, metadata): node) d.addCallback(lambda (node, metadata): node)
return d return d
def get_child_and_metadata_at_path(self, path): def get_child_and_metadata_at_path(self, pathx):
"""Transform a child path into an IFilesystemNode and """Transform a child path into an IFilesystemNode and
a metadata dictionary from the last edge that was traversed. a metadata dictionary from the last edge that was traversed.
""" """
if not path: if not pathx:
return defer.succeed((self, {})) return defer.succeed((self, {}))
if isinstance(path, (list, tuple)): if isinstance(pathx, (list, tuple)):
pass pass
else: else:
path = path.split("/") pathx = pathx.split("/")
for p in path: for p in pathx:
assert isinstance(p, unicode) assert isinstance(p, unicode), p
childname = path[0] childnamex = pathx[0]
remaining_path = path[1:] remaining_pathx = pathx[1:]
if remaining_path: if remaining_pathx:
d = self.get(childname) d = self.get(childnamex)
d.addCallback(lambda node: d.addCallback(lambda node:
node.get_child_and_metadata_at_path(remaining_path)) node.get_child_and_metadata_at_path(remaining_pathx))
return d return d
d = self.get_child_and_metadata(childname) d = self.get_child_and_metadata(childnamex)
return d return d
def set_uri(self, name, writecap, readcap, metadata=None, overwrite=True): def set_uri(self, namex, writecap, readcap, metadata=None, overwrite=True):
precondition(isinstance(name, unicode), name)
precondition(isinstance(writecap, (str,type(None))), writecap) precondition(isinstance(writecap, (str,type(None))), writecap)
precondition(isinstance(readcap, (str,type(None))), readcap) precondition(isinstance(readcap, (str,type(None))), readcap)
# We now allow packing unknown nodes, provided they are valid # We now allow packing unknown nodes, provided they are valid
# for this type of directory. # for this type of directory.
child_node = self._create_and_validate_node(writecap, readcap, name) child_node = self._create_and_validate_node(writecap, readcap, namex)
d = self.set_node(name, child_node, metadata, overwrite) d = self.set_node(namex, child_node, metadata, overwrite)
d.addCallback(lambda res: child_node) d.addCallback(lambda res: child_node)
return d return d
@ -513,8 +525,8 @@ class DirectoryNode:
# this takes URIs # this takes URIs
a = Adder(self, overwrite=overwrite, a = Adder(self, overwrite=overwrite,
create_readonly_node=self._create_readonly_node) create_readonly_node=self._create_readonly_node)
for (name, e) in entries.iteritems(): for (namex, e) in entries.iteritems():
assert isinstance(name, unicode) assert isinstance(namex, unicode), namex
if len(e) == 2: if len(e) == 2:
writecap, readcap = e writecap, readcap = e
metadata = None metadata = None
@ -526,13 +538,13 @@ class DirectoryNode:
# We now allow packing unknown nodes, provided they are valid # We now allow packing unknown nodes, provided they are valid
# for this type of directory. # for this type of directory.
child_node = self._create_and_validate_node(writecap, readcap, name) child_node = self._create_and_validate_node(writecap, readcap, namex)
a.set_node(name, child_node, metadata) a.set_node(namex, child_node, metadata)
d = self._node.modify(a.modify) d = self._node.modify(a.modify)
d.addCallback(lambda ign: self) d.addCallback(lambda ign: self)
return d return d
def set_node(self, name, child, metadata=None, overwrite=True): def set_node(self, namex, child, metadata=None, overwrite=True):
"""I add a child at the specific name. I return a Deferred that fires """I add a child at the specific name. I return a Deferred that fires
when the operation finishes. This Deferred will fire with the child when the operation finishes. This Deferred will fire with the child
node that was just added. I will replace any existing child of the node that was just added. I will replace any existing child of the
@ -545,11 +557,10 @@ class DirectoryNode:
if self.is_readonly(): if self.is_readonly():
return defer.fail(NotWriteableError()) return defer.fail(NotWriteableError())
assert isinstance(name, unicode)
assert IFilesystemNode.providedBy(child), child assert IFilesystemNode.providedBy(child), child
a = Adder(self, overwrite=overwrite, a = Adder(self, overwrite=overwrite,
create_readonly_node=self._create_readonly_node) create_readonly_node=self._create_readonly_node)
a.set_node(name, child, metadata) a.set_node(namex, child, metadata)
d = self._node.modify(a.modify) d = self._node.modify(a.modify)
d.addCallback(lambda res: child) d.addCallback(lambda res: child)
return d return d
@ -565,12 +576,12 @@ class DirectoryNode:
return d return d
def add_file(self, name, uploadable, metadata=None, overwrite=True): def add_file(self, namex, uploadable, metadata=None, overwrite=True):
"""I upload a file (using the given IUploadable), then attach the """I upload a file (using the given IUploadable), then attach the
resulting FileNode to the directory at the given name. I return a resulting FileNode to the directory at the given name. I return a
Deferred that fires (with the IFileNode of the uploaded file) when Deferred that fires (with the IFileNode of the uploaded file) when
the operation completes.""" the operation completes."""
assert isinstance(name, unicode) name = normalize(namex)
if self.is_readonly(): if self.is_readonly():
return defer.fail(NotWriteableError()) return defer.fail(NotWriteableError())
d = self._uploader.upload(uploadable) d = self._uploader.upload(uploadable)
@ -580,21 +591,20 @@ class DirectoryNode:
self.set_node(name, node, metadata, overwrite)) self.set_node(name, node, metadata, overwrite))
return d return d
def delete(self, name, must_exist=True, must_be_directory=False, must_be_file=False): def delete(self, namex, must_exist=True, must_be_directory=False, must_be_file=False):
"""I remove the child at the specific name. I return a Deferred that """I remove the child at the specific name. I return a Deferred that
fires (with the node just removed) when the operation finishes.""" fires (with the node just removed) when the operation finishes."""
assert isinstance(name, unicode)
if self.is_readonly(): if self.is_readonly():
return defer.fail(NotWriteableError()) return defer.fail(NotWriteableError())
deleter = Deleter(self, name, must_exist=must_exist, deleter = Deleter(self, namex, must_exist=must_exist,
must_be_directory=must_be_directory, must_be_file=must_be_file) must_be_directory=must_be_directory, must_be_file=must_be_file)
d = self._node.modify(deleter.modify) d = self._node.modify(deleter.modify)
d.addCallback(lambda res: deleter.old_child) d.addCallback(lambda res: deleter.old_child)
return d return d
def create_subdirectory(self, name, initial_children={}, overwrite=True, def create_subdirectory(self, namex, initial_children={}, overwrite=True,
mutable=True, metadata=None): mutable=True, metadata=None):
assert isinstance(name, unicode) name = normalize(namex)
if self.is_readonly(): if self.is_readonly():
return defer.fail(NotWriteableError()) return defer.fail(NotWriteableError())
if mutable: if mutable:
@ -611,21 +621,22 @@ class DirectoryNode:
d.addCallback(_created) d.addCallback(_created)
return d return d
def move_child_to(self, current_child_name, new_parent, def move_child_to(self, current_child_namex, new_parent,
new_child_name=None, overwrite=True): new_child_namex=None, overwrite=True):
"""I take one of my children and move them to a new parent. The child """I take one of my children and move them to a new parent. The child
is referenced by name. On the new parent, the child will live under is referenced by name. On the new parent, the child will live under
'new_child_name', which defaults to 'current_child_name'. I return a 'new_child_name', which defaults to 'current_child_name'. I return a
Deferred that fires when the operation finishes.""" Deferred that fires when the operation finishes."""
assert isinstance(current_child_name, unicode)
if self.is_readonly() or new_parent.is_readonly(): if self.is_readonly() or new_parent.is_readonly():
return defer.fail(NotWriteableError()) return defer.fail(NotWriteableError())
if new_child_name is None:
new_child_name = current_child_name current_child_name = normalize(current_child_namex)
assert isinstance(new_child_name, unicode) if new_child_namex is None:
new_child_namex = current_child_name
d = self.get(current_child_name) d = self.get(current_child_name)
def sn(child): def sn(child):
return new_parent.set_node(new_child_name, child, return new_parent.set_node(new_child_namex, child,
overwrite=overwrite) overwrite=overwrite)
d.addCallback(sn) d.addCallback(sn)
d.addCallback(lambda child: self.delete(current_child_name)) d.addCallback(lambda child: self.delete(current_child_name))