Implementation, tests and docs for blacklists. This version allows listing directories containing a blacklisted child. Inclusion of blacklist.py fixed. fixes #1425

This commit is contained in:
david-sarah 2011-08-24 08:59:28 -07:00
parent 505ebeb353
commit 3d7a32647c
11 changed files with 410 additions and 16 deletions

View File

@ -606,6 +606,13 @@ Other files
"which peers am I connected to" list), and the shortened form (the first
few characters) is recorded in various log messages.
``access.blacklist``
Gateway nodes may find it necessary to prohibit access to certain files. The
web-API has a facility to block access to filecaps by their storage index,
returning a 403 "Forbidden" error instead of the original file. For more
details, see the "Access Blacklist" section of `<frontends/webapi.rst>`_.
Example
=======

View File

@ -36,6 +36,7 @@ The Tahoe REST-ful Web API
8. `Static Files in /public_html`_
9. `Safety and Security Issues -- Names vs. URIs`_
10. `Concurrency Issues`_
11. `Access Blacklist`_
Enabling the web-API port
@ -1955,6 +1956,51 @@ For more details, please see the "Consistency vs Availability" and "The Prime
Coordination Directive" sections of `mutable.rst <../specifications/mutable.rst>`_.
Access Blacklist
================
Gateway nodes may find it necessary to prohibit access to certain files. The
web-API has a facility to block access to filecaps by their storage index,
returning a 403 "Forbidden" error instead of the original file.
This blacklist is recorded in $NODEDIR/access.blacklist, and contains one
blocked file per line. Comment lines (starting with ``#``) are ignored. Each
line consists of the storage-index (in the usual base32 format as displayed
by the "More Info" page, or by the "tahoe debug dump-cap" command), followed
by whitespace, followed by a reason string, which will be included in the 403
error message. This could hold a URL to a page that explains why the file is
blocked, for example.
So for example, if you found a need to block access to a file with filecap
``URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861``,
you could do the following::
tahoe debug dump-cap URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861
-> storage index: whpepioyrnff7orecjolvbudeu
echo "whpepioyrnff7orecjolvbudeu my puppy told me to" >>$NODEDIR/access.blacklist
tahoe restart $NODEDIR
tahoe get URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861
-> error, 403 Access Prohibited: my puppy told me to
The ``access.blacklist`` file will be checked each time a file or directory
is accessed: the file's ``mtime`` is used to decide whether it need to be
reloaded. Therefore no node restart is necessary when creating the initial
blacklist, nor when adding second, third, or additional entries to the list.
When modifying the file, be careful to update it atomically, otherwise a
request may arrive while the file is only halfway written, and the partial
file may be incorrectly parsed.
The blacklist is applied to all access paths (including FTP, SFTP, and CLI
operations), not just the web-API. The blacklist also applies to directories.
If a directory is blacklisted, the gateway will refuse access to both that
directory and any child files/directories underneath it, when accessed via
"DIRCAP/SUBDIR/FILENAME" -style URLs. Users who go directly to the child
file/dir will bypass the blacklist.
The node will log the SI of the file being blocked, and the reason code, into
the ``logs/twistd.log`` file.
.. [1] URLs and HTTP and UTF-8, Oh My
HTTP does not provide a mechanism to specify the character set used to

155
src/allmydata/blacklist.py Normal file
View File

@ -0,0 +1,155 @@
import os
from zope.interface import implements
from twisted.internet import defer
from twisted.python import log as twisted_log
from allmydata.interfaces import IFileNode, IFilesystemNode
from allmydata.util import base32
from allmydata.util.encodingutil import quote_output
class FileProhibited(Exception):
"""This client has been configured to prohibit access to this object."""
def __init__(self, reason):
Exception.__init__(self, "Access Prohibited: %s" % quote_output(reason, encoding='utf-8', quotemarks=False))
self.reason = reason
class Blacklist:
def __init__(self, blacklist_fn):
self.blacklist_fn = blacklist_fn
self.last_mtime = None
self.entries = {}
self.read_blacklist() # sets .last_mtime and .entries
def read_blacklist(self):
try:
current_mtime = os.stat(self.blacklist_fn).st_mtime
except EnvironmentError:
# unreadable blacklist file means no blacklist
self.entries.clear()
return
try:
if self.last_mtime is None or current_mtime > self.last_mtime:
self.entries.clear()
for line in open(self.blacklist_fn, "r").readlines():
line = line.strip()
if not line or line.startswith("#"):
continue
si_s, reason = line.split(None, 1)
si = base32.a2b(si_s) # must be valid base32
self.entries[si] = reason
self.last_mtime = current_mtime
except Exception, e:
twisted_log.err(e, "unparseable blacklist file")
raise
def check_storageindex(self, si):
self.read_blacklist()
reason = self.entries.get(si, None)
if reason is not None:
# log this to logs/twistd.log, since web logs go there too
twisted_log.msg("blacklist prohibited access to SI %s: %s" %
(base32.b2a(si), reason))
return reason
class ProhibitedNode:
implements(IFileNode)
def __init__(self, wrapped_node, reason):
assert IFilesystemNode.providedBy(wrapped_node), wrapped_node
self.wrapped_node = wrapped_node
self.reason = reason
def get_cap(self):
return self.wrapped_node.get_cap()
def get_readcap(self):
return self.wrapped_node.get_readcap()
def is_readonly(self):
return self.wrapped_node.is_readonly()
def is_mutable(self):
return self.wrapped_node.is_mutable()
def is_unknown(self):
return self.wrapped_node.is_unknown()
def is_allowed_in_immutable_directory(self):
return self.wrapped_node.is_allowed_in_immutable_directory()
def is_alleged_immutable(self):
return self.wrapped_node.is_alleged_immutable()
def raise_error(self):
# We don't raise an exception here because that would prevent the node from being listed.
pass
def get_uri(self):
return self.wrapped_node.get_uri()
def get_write_uri(self):
return self.wrapped_node.get_write_uri()
def get_readonly_uri(self):
return self.wrapped_node.get_readonly_uri()
def get_storage_index(self):
return self.wrapped_node.get_storage_index()
def get_verify_cap(self):
return self.wrapped_node.get_verify_cap()
def get_repair_cap(self):
return self.wrapped_node.get_repair_cap()
def get_size(self):
return None
def get_current_size(self):
return defer.succeed(None)
def get_size_of_best_version(self):
return defer.succeed(None)
def check(self, monitor, verify, add_lease):
return defer.succeed(None)
def check_and_repair(self, monitor, verify, add_lease):
return defer.succeed(None)
def get_version(self):
return None
# Omitting any of these methods would fail safe; they are just to ensure correct error reporting.
def get_best_readable_version(self):
raise FileProhibited(self.reason)
def download_best_version(self):
raise FileProhibited(self.reason)
def get_best_mutable_version(self):
raise FileProhibited(self.reason)
def overwrite(self, new_contents):
raise FileProhibited(self.reason)
def modify(self, modifier_cb):
raise FileProhibited(self.reason)
def get_servermap(self, mode):
raise FileProhibited(self.reason)
def download_version(self, servermap, version):
raise FileProhibited(self.reason)
def upload(self, new_contents, servermap):
raise FileProhibited(self.reason)
def get_writekey(self):
raise FileProhibited(self.reason)

View File

@ -25,6 +25,7 @@ from allmydata.history import History
from allmydata.interfaces import IStatsProducer, RIStubClient, \
SDMF_VERSION, MDMF_VERSION
from allmydata.nodemaker import NodeMaker
from allmydata.blacklist import Blacklist
KiB=1024
@ -279,6 +280,7 @@ class Client(node.Node, pollmixin.PollMixin):
self.terminator.setServiceParent(self)
self.add_service(Uploader(helper_furl, self.stats_provider))
self.init_stub_client()
self.init_blacklist()
self.init_nodemaker()
def init_client_storage_broker(self):
@ -331,6 +333,10 @@ class Client(node.Node, pollmixin.PollMixin):
d.addErrback(log.err, facility="tahoe.init",
level=log.BAD, umid="OEHq3g")
def init_blacklist(self):
fn = os.path.join(self.basedir, "access.blacklist")
self.blacklist = Blacklist(fn)
def init_nodemaker(self):
self.nodemaker = NodeMaker(self.storage_broker,
self._secret_holder,
@ -338,7 +344,8 @@ class Client(node.Node, pollmixin.PollMixin):
self.getServiceNamed("uploader"),
self.terminator,
self.get_encoding_parameters(),
self._key_generator)
self._key_generator,
self.blacklist)
default = self.get_config("client", "mutable.format", default="sdmf")
if default == "mdmf":
self.mutable_file_default = MDMF_VERSION

View File

@ -784,7 +784,7 @@ class IFilesystemNode(Interface):
read-only access with others, use get_readonly_uri().
"""
def get_write_uri(n):
def get_write_uri():
"""Return the URI string that can be used by others to get write
access to this node, if it is writeable. If this is a read-only node,
return None."""

View File

@ -9,14 +9,17 @@ from allmydata.mutable.filenode import MutableFileNode
from allmydata.mutable.publish import MutableData
from allmydata.dirnode import DirectoryNode, pack_children
from allmydata.unknown import UnknownNode
from allmydata.blacklist import ProhibitedNode
from allmydata import uri
class NodeMaker:
implements(INodeMaker)
def __init__(self, storage_broker, secret_holder, history,
uploader, terminator,
default_encoding_parameters, key_generator):
default_encoding_parameters, key_generator,
blacklist=None):
self.storage_broker = storage_broker
self.secret_holder = secret_holder
self.history = history
@ -24,6 +27,7 @@ class NodeMaker:
self.terminator = terminator
self.default_encoding_parameters = default_encoding_parameters
self.key_generator = key_generator
self.blacklist = blacklist
self._node_cache = weakref.WeakValueDictionary() # uri -> node
@ -62,14 +66,27 @@ class NodeMaker:
else:
memokey = "M" + bigcap
if memokey in self._node_cache:
return self._node_cache[memokey]
cap = uri.from_string(bigcap, deep_immutable=deep_immutable, name=name)
node = self._create_from_single_cap(cap)
if node:
self._node_cache[memokey] = node # note: WeakValueDictionary
node = self._node_cache[memokey]
else:
# don't cache UnknownNode
node = UnknownNode(writecap, readcap, deep_immutable=deep_immutable, name=name)
cap = uri.from_string(bigcap, deep_immutable=deep_immutable,
name=name)
node = self._create_from_single_cap(cap)
if node:
self._node_cache[memokey] = node # note: WeakValueDictionary
else:
# don't cache UnknownNode
node = UnknownNode(writecap, readcap,
deep_immutable=deep_immutable, name=name)
if self.blacklist:
si = node.get_storage_index()
# if this node is blacklisted, return the reason, otherwise return None
reason = self.blacklist.check_storageindex(si)
if reason is not None:
# The original node object is cached above, not the ProhibitedNode wrapper.
# This ensures that removing the blacklist entry will make the node
# accessible if create_from_cap is called again.
node = ProhibitedNode(node, reason)
return node
def _create_from_single_cap(self, cap):

View File

@ -170,6 +170,7 @@ class FakeClient(Client):
self.history = FakeHistory()
self.uploader = FakeUploader()
self.uploader.setServiceParent(self)
self.blacklist = None
self.nodemaker = FakeNodeMaker(None, self._secret_holder, None,
self.uploader, None,
None, None)
@ -5262,6 +5263,146 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi
return d
def test_blacklist(self):
# download from a blacklisted URI, get an error
self.basedir = "web/Grid/blacklist"
self.set_up_grid()
c0 = self.g.clients[0]
c0_basedir = c0.basedir
fn = os.path.join(c0_basedir, "access.blacklist")
self.uris = {}
DATA = "off-limits " * 50
d = c0.upload(upload.Data(DATA, convergence=""))
def _stash_uri_and_create_dir(ur):
self.uri = ur.uri
self.url = "uri/"+self.uri
u = uri.from_string_filenode(self.uri)
self.si = u.get_storage_index()
childnode = c0.create_node_from_uri(self.uri, None)
return c0.create_dirnode({u"blacklisted.txt": (childnode,{}) })
d.addCallback(_stash_uri_and_create_dir)
def _stash_dir(node):
self.dir_node = node
self.dir_uri = node.get_uri()
self.dir_url = "uri/"+self.dir_uri
d.addCallback(_stash_dir)
d.addCallback(lambda ign: self.GET(self.dir_url, followRedirect=True))
def _check_dir_html(body):
self.failUnlessIn("<html>", body)
self.failUnlessIn("blacklisted.txt</a>", body)
d.addCallback(_check_dir_html)
d.addCallback(lambda ign: self.GET(self.url))
d.addCallback(lambda body: self.failUnlessEqual(DATA, body))
def _blacklist(ign):
f = open(fn, "w")
f.write(" # this is a comment\n")
f.write(" \n")
f.write("\n") # also exercise blank lines
f.write("%s %s\n" % (base32.b2a(self.si), "off-limits to you"))
f.close()
# clients should be checking the blacklist each time, so we don't
# need to restart the client
d.addCallback(_blacklist)
d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_uri",
403, "Forbidden",
"Access Prohibited: off-limits",
self.GET, self.url))
# We should still be able to list the parent directory, in HTML...
d.addCallback(lambda ign: self.GET(self.dir_url, followRedirect=True))
def _check_dir_html2(body):
self.failUnlessIn("<html>", body)
self.failUnlessIn("blacklisted.txt</strike>", body)
d.addCallback(_check_dir_html2)
# ... and in JSON (used by CLI).
d.addCallback(lambda ign: self.GET(self.dir_url+"?t=json", followRedirect=True))
def _check_dir_json(res):
data = simplejson.loads(res)
self.failUnless(isinstance(data, list), data)
self.failUnlessEqual(data[0], "dirnode")
self.failUnless(isinstance(data[1], dict), data)
self.failUnlessIn("children", data[1])
self.failUnlessIn("blacklisted.txt", data[1]["children"])
childdata = data[1]["children"]["blacklisted.txt"]
self.failUnless(isinstance(childdata, list), data)
self.failUnlessEqual(childdata[0], "filenode")
self.failUnless(isinstance(childdata[1], dict), data)
d.addCallback(_check_dir_json)
def _unblacklist(ign):
open(fn, "w").close()
# the Blacklist object watches mtime to tell when the file has
# changed, but on windows this test will run faster than the
# filesystem's mtime resolution. So we edit Blacklist.last_mtime
# to force a reload.
self.g.clients[0].blacklist.last_mtime -= 2.0
d.addCallback(_unblacklist)
# now a read should work
d.addCallback(lambda ign: self.GET(self.url))
d.addCallback(lambda body: self.failUnlessEqual(DATA, body))
# read again to exercise the blacklist-is-unchanged logic
d.addCallback(lambda ign: self.GET(self.url))
d.addCallback(lambda body: self.failUnlessEqual(DATA, body))
# now add a blacklisted directory, and make sure files under it are
# refused too
def _add_dir(ign):
childnode = c0.create_node_from_uri(self.uri, None)
return c0.create_dirnode({u"child": (childnode,{}) })
d.addCallback(_add_dir)
def _get_dircap(dn):
self.dir_si_b32 = base32.b2a(dn.get_storage_index())
self.dir_url_base = "uri/"+dn.get_write_uri()
self.dir_url_json1 = "uri/"+dn.get_write_uri()+"?t=json"
self.dir_url_json2 = "uri/"+dn.get_write_uri()+"/?t=json"
self.dir_url_json_ro = "uri/"+dn.get_readonly_uri()+"/?t=json"
self.child_url = "uri/"+dn.get_readonly_uri()+"/child"
d.addCallback(_get_dircap)
d.addCallback(lambda ign: self.GET(self.dir_url_base, followRedirect=True))
d.addCallback(lambda body: self.failUnlessIn("<html>", body))
d.addCallback(lambda ign: self.GET(self.dir_url_json1))
d.addCallback(lambda res: simplejson.loads(res)) # just check it decodes
d.addCallback(lambda ign: self.GET(self.dir_url_json2))
d.addCallback(lambda res: simplejson.loads(res)) # just check it decodes
d.addCallback(lambda ign: self.GET(self.dir_url_json_ro))
d.addCallback(lambda res: simplejson.loads(res)) # just check it decodes
d.addCallback(lambda ign: self.GET(self.child_url))
d.addCallback(lambda body: self.failUnlessEqual(DATA, body))
def _block_dir(ign):
f = open(fn, "w")
f.write("%s %s\n" % (self.dir_si_b32, "dir-off-limits to you"))
f.close()
self.g.clients[0].blacklist.last_mtime -= 2.0
d.addCallback(_block_dir)
d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir base",
403, "Forbidden",
"Access Prohibited: dir-off-limits",
self.GET, self.dir_url_base))
d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir json1",
403, "Forbidden",
"Access Prohibited: dir-off-limits",
self.GET, self.dir_url_json1))
d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir json2",
403, "Forbidden",
"Access Prohibited: dir-off-limits",
self.GET, self.dir_url_json2))
d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir json_ro",
403, "Forbidden",
"Access Prohibited: dir-off-limits",
self.GET, self.dir_url_json_ro))
d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir child",
403, "Forbidden",
"Access Prohibited: dir-off-limits",
self.GET, self.child_url))
return d
class CompletelyUnhandledError(Exception):
pass
class ErrorBoom(rend.Page):

View File

@ -6,6 +6,7 @@ from zope.interface import Interface
from nevow import loaders, appserver
from nevow.inevow import IRequest
from nevow.util import resource_filename
from allmydata import blacklist
from allmydata.interfaces import ExistingChildError, NoSuchChildError, \
FileTooLargeError, NotEnoughSharesError, NoSharesError, \
EmptyPathnameComponentError, MustBeDeepImmutableError, \
@ -257,6 +258,9 @@ def humanize_failure(f):
"The cap is being passed in a read slot (ro_uri), or was retrieved "
"from a read slot as an unknown cap.") % quoted_name
return (t, http.BAD_REQUEST)
if f.check(blacklist.FileProhibited):
t = "Access Prohibited: %s" % quote_output(f.value.reason, encoding="utf-8", quotemarks=False)
return (t, http.FORBIDDEN)
if f.check(WebError):
return (f.value.text, f.value.code)
if f.check(FileTooLargeError):

View File

@ -17,6 +17,7 @@ from allmydata.uri import from_string_dirnode
from allmydata.interfaces import IDirectoryNode, IFileNode, IFilesystemNode, \
IImmutableFileNode, IMutableFileNode, ExistingChildError, \
NoSuchChildError, EmptyPathnameComponentError, SDMF_VERSION, MDMF_VERSION
from allmydata.blacklist import ProhibitedNode
from allmydata.monitor import Monitor, OperationCancelledError
from allmydata import dirnode
from allmydata.web.common import text_plain, WebError, \
@ -764,6 +765,17 @@ class DirectoryAsHTML(rend.Page):
ctx.fillSlots("size", "-")
info_link = "%s/uri/%s/?t=info" % (root, quoted_uri)
elif isinstance(target, ProhibitedNode):
ctx.fillSlots("filename", T.strike[name])
if IDirectoryNode.providedBy(target.wrapped_node):
blacklisted_type = "DIR-BLACKLISTED"
else:
blacklisted_type = "BLACKLISTED"
ctx.fillSlots("type", blacklisted_type)
ctx.fillSlots("size", "-")
info_link = None
ctx.fillSlots("info", ["Access Prohibited:", T.br, target.reason])
else:
# unknown
ctx.fillSlots("filename", html.escape(name))
@ -779,7 +791,8 @@ class DirectoryAsHTML(rend.Page):
# writecap and the readcap
info_link = "%s?t=info" % urllib.quote(name)
ctx.fillSlots("info", T.a(href=info_link)["More Info"])
if info_link:
ctx.fillSlots("info", T.a(href=info_link)["More Info"])
return ctx.tag

View File

@ -12,6 +12,8 @@ from allmydata.immutable.upload import FileHandle
from allmydata.mutable.publish import MutableFileHandle
from allmydata.mutable.common import MODE_READ
from allmydata.util import log, base32
from allmydata.util.encodingutil import quote_output
from allmydata.blacklist import FileProhibited, ProhibitedNode
from allmydata.web.common import text_plain, WebError, RenderMixin, \
boolean_of_arg, get_arg, should_create_intermediate_directories, \
@ -152,11 +154,13 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
def childFactory(self, ctx, name):
req = IRequest(ctx)
if isinstance(self.node, ProhibitedNode):
raise FileProhibited(self.node.reason)
if should_create_intermediate_directories(req):
raise WebError("Cannot create directory '%s', because its "
"parent is a file, not a directory" % name)
raise WebError("Files have no children, certainly not named '%s'"
% name)
raise WebError("Cannot create directory %s, because its "
"parent is a file, not a directory" % quote_output(name, encoding='utf-8'))
raise WebError("Files have no children, certainly not named %s"
% quote_output(name, encoding='utf-8'))
def render_GET(self, ctx):
req = IRequest(ctx)

View File

@ -129,7 +129,7 @@ class WebishServer(service.MultiService):
name = "webish"
def __init__(self, client, webport, nodeurl_path=None, staticdir=None,
clock=None):
clock=None):
service.MultiService.__init__(self)
# the 'data' argument to all render() methods default to the Client
# the 'clock' argument to root.Root is, if set, a