Hook into Twisted Web to control where request bodies are written

This commit is contained in:
Jean-Paul Calderone 2020-11-18 15:47:06 -05:00
parent b1244543f2
commit 46955202e2
4 changed files with 163 additions and 20 deletions

View File

@ -33,6 +33,7 @@ from allmydata.introducer.client import IntroducerClient
from allmydata.util import (
hashutil, base32, pollmixin, log, idlib,
yamlutil, configutil,
fileutil,
)
from allmydata.util.encodingutil import get_filesystem_encoding
from allmydata.util.abbreviate import parse_abbreviated_size
@ -1043,6 +1044,21 @@ class _Client(node.Node, pollmixin.PollMixin):
def set_default_mutable_keysize(self, keysize):
self._key_generator.set_default_keysize(keysize)
def _get_tempdir(self):
"""
Determine the path to the directory where temporary files for this node
should be written.
:return bytes: The path which will exist and be a directory.
"""
tempdir_config = self.config.get_config("node", "tempdir", "tmp")
if isinstance(tempdir_config, bytes):
tempdir_config = tempdir_config.decode('utf-8')
tempdir = self.config.get_config_path(tempdir_config)
if not os.path.exists(tempdir):
fileutil.make_dirs(tempdir)
return tempdir
def init_web(self, webport):
self.log("init_web(webport=%s)", args=(webport,))
@ -1050,7 +1066,13 @@ class _Client(node.Node, pollmixin.PollMixin):
nodeurl_path = self.config.get_config_path("node.url")
staticdir_config = self.config.get_config("node", "web.static", "public_html")
staticdir = self.config.get_config_path(staticdir_config)
ws = WebishServer(self, webport, nodeurl_path, staticdir)
ws = WebishServer(
self,
webport,
self._get_tempdir(),
nodeurl_path,
staticdir,
)
ws.setServiceParent(self)
def init_ftp_server(self):

View File

@ -316,8 +316,14 @@ class WebMixin(TimezoneMixin):
self.staticdir = self.mktemp()
self.clock = Clock()
self.fakeTime = 86460 # 1d 0h 1m 0s
self.ws = webish.WebishServer(self.s, "0", staticdir=self.staticdir,
clock=self.clock, now_fn=lambda:self.fakeTime)
self.ws = webish.WebishServer(
self.s,
"0",
tempdir=self.mktemp(),
staticdir=self.staticdir,
clock=self.clock,
now_fn=lambda:self.fakeTime,
)
self.ws.setServiceParent(self.s)
self.webish_port = self.ws.getPortnum()
self.webish_url = self.ws.getURL()

View File

@ -5,6 +5,19 @@ Tests for ``allmydata.webish``.
from uuid import (
uuid4,
)
from errno import (
EACCES,
)
from io import (
BytesIO,
)
from hypothesis import (
given,
)
from hypothesis.strategies import (
integers,
)
from testtools.matchers import (
AfterPreprocessing,
@ -12,6 +25,7 @@ from testtools.matchers import (
Equals,
MatchesAll,
Not,
IsInstance,
)
from twisted.python.filepath import (
@ -30,7 +44,7 @@ from ..common import (
from ...webish import (
TahoeLAFSRequest,
tahoe_lafs_site,
TahoeLAFSSite,
)
@ -96,7 +110,7 @@ class TahoeLAFSRequestTests(SyncTestCase):
class TahoeLAFSSiteTests(SyncTestCase):
"""
Tests for the ``Site`` created by ``tahoe_lafs_site``.
Tests for ``TahoeLAFSSite``.
"""
def _test_censoring(self, path, censored):
"""
@ -112,7 +126,7 @@ class TahoeLAFSSiteTests(SyncTestCase):
"""
logPath = self.mktemp()
site = tahoe_lafs_site(Resource(), logPath=logPath)
site = TahoeLAFSSite(self.mktemp(), Resource(), logPath=logPath)
site.startFactory()
channel = DummyChannel()
@ -170,6 +184,90 @@ class TahoeLAFSSiteTests(SyncTestCase):
b"/uri?uri=[CENSORED]",
)
def _get_request(self, tempdir):
"""
Create and return a new ``TahoeLAFSRequest`` hooked up to a
``TahoeLAFSSite``.
:param bytes tempdir: The temporary directory to give to the site.
:return TahoeLAFSRequest: The new request instance.
"""
site = TahoeLAFSSite(tempdir.path, Resource(), logPath=self.mktemp())
site.startFactory()
channel = DummyChannel()
channel.site = site
request = TahoeLAFSRequest(channel)
return request
@given(integers(min_value=0, max_value=1024 * 1024 - 1))
def test_small_content(self, request_body_size):
"""
A request body smaller than 1 MiB is kept in memory.
"""
tempdir = FilePath(self.mktemp())
request = self._get_request(tempdir)
request.gotLength(request_body_size)
self.assertThat(
request.content,
IsInstance(BytesIO),
)
def _large_request_test(self, request_body_size):
"""
Assert that when a request with a body of of the given size is received
its content is written to the directory the ``TahoeLAFSSite`` is
configured with.
"""
tempdir = FilePath(self.mktemp())
tempdir.makedirs()
request = self._get_request(tempdir)
# So. Bad news. The temporary file for the uploaded content is
# unnamed (and this isn't even necessarily a bad thing since it is how
# you get automatic on-process-exit cleanup behavior on POSIX). It's
# not visible by inspecting the filesystem. It has no name we can
# discover. Then how do we verify it is written to the right place?
# The question itself is meaningless if we try to be too precise. It
# *has* no filesystem location. However, it is still stored *on* some
# filesystem. We still want to make sure it is on the filesystem we
# specified because otherwise it might be on a filesystem that's too
# small or undesirable in some other way.
#
# I don't know of any way to ask a file descriptor which filesystem
# it's on, either, though. It might be the case that the [f]statvfs()
# result could be compared somehow to infer the filesystem but
# ... it's not clear what the failure modes might be there, across
# different filesystems and runtime environments.
#
# Another approach is to make the temp directory unwriteable and
# observe the failure when an attempt is made to create a file there.
# This is hardly a lovely solution but at least it's kind of simple.
tempdir.chmod(0o550)
with self.assertRaises(OSError) as ctx:
request.gotLength(request_body_size)
self.assertThat(
ctx.exception.errno,
Equals(EACCES),
)
def test_unknown_request_size(self):
"""
A request body with an unknown size is written to a file in the temporary
directory passed to ``TahoeLAFSSite``.
"""
self._large_request_test(None)
@given(integers(min_value=1024 * 1024))
def test_large_request(self, request_body_size):
"""
A request body of 1 MiB or more is written to a file in the temporary
directory passed to ``TahoeLAFSSite``.
"""
self._large_request_test(request_body_size)
def param(name, value):
return u"; {}={}".format(name, value)

View File

@ -1,13 +1,13 @@
from six import ensure_str
import re, time
import re, time, tempfile
from functools import (
partial,
)
from cgi import (
FieldStorage,
)
from io import (
BytesIO,
)
from twisted.application import service, strports, internet
from twisted.web import static
@ -150,17 +150,34 @@ def _logFormatter(logDateTime, request):
)
tahoe_lafs_site = partial(
Site,
requestFactory=TahoeLAFSRequest,
logFormatter=_logFormatter,
)
class TahoeLAFSSite(Site):
"""
The HTTP protocol factory used by Tahoe-LAFS.
Among the behaviors provided:
* A configurable temporary directory where large request bodies can be
written so they don't stay in memory.
* A log formatter that writes some access logs but omits capability
strings to help keep them secret.
"""
requestFactory = TahoeLAFSRequest
def __init__(self, tempdir, *args, **kwargs):
Site.__init__(self, *args, logFormatter=_logFormatter, **kwargs)
self._tempdir = tempdir
def getContentFile(self, length):
if length is None or length >= 1024 * 1024:
return tempfile.TemporaryFile(dir=self._tempdir)
return BytesIO()
class WebishServer(service.MultiService):
name = "webish"
def __init__(self, client, webport, nodeurl_path=None, staticdir=None,
def __init__(self, client, webport, tempdir, nodeurl_path=None, staticdir=None,
clock=None, now_fn=time.time):
service.MultiService.__init__(self)
# the 'data' argument to all render() methods default to the Client
@ -170,7 +187,7 @@ class WebishServer(service.MultiService):
# time in a deterministic manner.
self.root = root.Root(client, clock, now_fn)
self.buildServer(webport, nodeurl_path, staticdir)
self.buildServer(webport, tempdir, nodeurl_path, staticdir)
# If set, clock is a twisted.internet.task.Clock that the tests
# use to test ophandle expiration.
@ -180,9 +197,9 @@ class WebishServer(service.MultiService):
self.root.putChild(b"storage-plugins", StoragePlugins(client))
def buildServer(self, webport, nodeurl_path, staticdir):
def buildServer(self, webport, tempdir, nodeurl_path, staticdir):
self.webport = webport
self.site = tahoe_lafs_site(self.root)
self.site = TahoeLAFSSite(tempdir, self.root)
self.staticdir = staticdir # so tests can check
if staticdir:
self.root.putChild("static", static.File(staticdir))
@ -260,4 +277,4 @@ class IntroducerWebishServer(WebishServer):
def __init__(self, introducer, webport, nodeurl_path=None, staticdir=None):
service.MultiService.__init__(self)
self.root = introweb.IntroducerRoot(introducer)
self.buildServer(webport, nodeurl_path, staticdir)
self.buildServer(webport, tempfile.tempdir, nodeurl_path, staticdir)