Merge pull request #902 from tahoe-lafs/3512.localized-tempdir

Per-node web temp directory

Fixes: ticket:3512
This commit is contained in:
Jean-Paul Calderone 2020-11-25 18:18:36 -05:00 committed by GitHub
commit 152c04e48c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 261 additions and 46 deletions

View File

@ -0,0 +1 @@
Tahoe-LAFS now requires Twisted 19.10.0 or newer. As a result, it now has a transitive dependency on bcrypt.

View File

@ -15,6 +15,9 @@ self: super: {
# Need version of pyutil that supports Python 3. The version in 19.09
# is too old.
pyutil = python-super.callPackage ./pyutil.nix { };
# Need a newer version of Twisted, too.
twisted = python-super.callPackage ./twisted.nix { };
};
};
}

63
nix/twisted.nix Normal file
View File

@ -0,0 +1,63 @@
{ stdenv
, buildPythonPackage
, fetchPypi
, python
, zope_interface
, incremental
, automat
, constantly
, hyperlink
, pyhamcrest
, attrs
, pyopenssl
, service-identity
, setuptools
, idna
, bcrypt
}:
buildPythonPackage rec {
pname = "Twisted";
version = "19.10.0";
src = fetchPypi {
inherit pname version;
extension = "tar.bz2";
sha256 = "7394ba7f272ae722a74f3d969dcf599bc4ef093bc392038748a490f1724a515d";
};
propagatedBuildInputs = [ zope_interface incremental automat constantly hyperlink pyhamcrest attrs setuptools bcrypt ];
passthru.extras.tls = [ pyopenssl service-identity idna ];
# Patch t.p._inotify to point to libc. Without this,
# twisted.python.runtime.platform.supportsINotify() == False
patchPhase = stdenv.lib.optionalString stdenv.isLinux ''
substituteInPlace src/twisted/python/_inotify.py --replace \
"ctypes.util.find_library('c')" "'${stdenv.glibc.out}/lib/libc.so.6'"
'';
# Generate Twisted's plug-in cache. Twisted users must do it as well. See
# http://twistedmatrix.com/documents/current/core/howto/plugin.html#auto3
# and http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=477103 for
# details.
postFixup = ''
$out/bin/twistd --help > /dev/null
'';
checkPhase = ''
${python.interpreter} -m unittest discover -s twisted/test
'';
# Tests require network
doCheck = false;
meta = with stdenv.lib; {
homepage = https://twistedmatrix.com/;
description = "Twisted, an event-driven networking engine written in Python";
longDescription = ''
Twisted is an event-driven networking engine written in Python
and licensed under the MIT license.
'';
license = licenses.mit;
maintainers = [ ];
};
}

View File

@ -98,7 +98,9 @@ install_requires = [
# `pip install tahoe-lafs[sftp]` would not install requirements
# specified by Twisted[conch]. Since this would be the *whole point* of
# an sftp extra in Tahoe-LAFS, there is no point in having one.
"Twisted[tls,conch] >= 18.4.0",
# * Twisted 19.10 introduces Site.getContentFile which we use to get
# temporary upload files placed into a per-node temporary directory.
"Twisted[tls,conch] >= 19.10.0",
"PyYAML >= 3.11",

View File

@ -33,6 +33,7 @@ from allmydata.introducer.client import IntroducerClient
from allmydata.util import (
hashutil, base32, pollmixin, log, idlib,
yamlutil, configutil,
fileutil,
)
from allmydata.util.encodingutil import get_filesystem_encoding
from allmydata.util.abbreviate import parse_abbreviated_size
@ -1042,6 +1043,21 @@ class _Client(node.Node, pollmixin.PollMixin):
def set_default_mutable_keysize(self, keysize):
self._key_generator.set_default_keysize(keysize)
def _get_tempdir(self):
"""
Determine the path to the directory where temporary files for this node
should be written.
:return bytes: The path which will exist and be a directory.
"""
tempdir_config = self.config.get_config("node", "tempdir", "tmp")
if isinstance(tempdir_config, bytes):
tempdir_config = tempdir_config.decode('utf-8')
tempdir = self.config.get_config_path(tempdir_config)
if not os.path.exists(tempdir):
fileutil.make_dirs(tempdir)
return tempdir
def init_web(self, webport):
self.log("init_web(webport=%s)", args=(webport,))
@ -1049,7 +1065,13 @@ class _Client(node.Node, pollmixin.PollMixin):
nodeurl_path = self.config.get_config_path("node.url")
staticdir_config = self.config.get_config("node", "web.static", "public_html")
staticdir = self.config.get_config_path(staticdir_config)
ws = WebishServer(self, webport, nodeurl_path, staticdir)
ws = WebishServer(
self,
webport,
self._get_tempdir(),
nodeurl_path,
staticdir,
)
ws.setServiceParent(self)
def init_ftp_server(self):

View File

@ -19,7 +19,6 @@ import os.path
import re
import types
import errno
import tempfile
from base64 import b32decode, b32encode
# On Python 2 this will be the backported package.
@ -34,7 +33,6 @@ import foolscap.logging.log
from allmydata.util import log
from allmydata.util import fileutil, iputil
from allmydata.util.assertutil import _assert
from allmydata.util.fileutil import abspath_expanduser_unicode
from allmydata.util.encodingutil import get_filesystem_encoding, quote_output
from allmydata.util import configutil
@ -733,8 +731,6 @@ class Node(service.MultiService):
self._i2p_provider = i2p_provider
self._tor_provider = tor_provider
self.init_tempdir()
self.create_log_tub()
self.logSource = "Node"
self.setup_logging()
@ -761,25 +757,6 @@ class Node(service.MultiService):
"""
return len(self.tub.getListeners()) > 0
def init_tempdir(self):
"""
Initialize/create a directory for temporary files.
"""
tempdir_config = self.config.get_config("node", "tempdir", "tmp")
if isinstance(tempdir_config, bytes):
tempdir_config = tempdir_config.decode('utf-8')
tempdir = self.config.get_config_path(tempdir_config)
if not os.path.exists(tempdir):
fileutil.make_dirs(tempdir)
tempfile.tempdir = tempdir
# this should cause twisted.web.http (which uses
# tempfile.TemporaryFile) to put large request bodies in the given
# directory. Without this, the default temp dir is usually /tmp/,
# which is frequently too small.
temp_fd, test_name = tempfile.mkstemp()
_assert(os.path.dirname(test_name) == tempdir, test_name, tempdir)
os.close(temp_fd) # avoid leak of unneeded fd
# pull this outside of Node's __init__ too, see:
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2948
def create_log_tub(self):

View File

@ -1150,8 +1150,9 @@ class _TestCaseMixin(object):
test (including setUp and tearDown messages).
* trial-compatible mktemp method
* unittest2-compatible assertRaises helper
* Automatic cleanup of tempfile.tempdir mutation (pervasive through the
Tahoe-LAFS test suite).
* Automatic cleanup of tempfile.tempdir mutation (once pervasive through
the Tahoe-LAFS test suite, perhaps gone now but someone should verify
this).
"""
def setUp(self):
# Restore the original temporary directory. Node ``init_tempdir``

View File

@ -6,6 +6,9 @@ import treq
from bs4 import BeautifulSoup
from twisted.python.filepath import (
FilePath,
)
from twisted.application import service
from twisted.internet import defer
from twisted.internet.defer import inlineCallbacks, returnValue
@ -316,8 +319,16 @@ class WebMixin(TimezoneMixin):
self.staticdir = self.mktemp()
self.clock = Clock()
self.fakeTime = 86460 # 1d 0h 1m 0s
self.ws = webish.WebishServer(self.s, "0", staticdir=self.staticdir,
clock=self.clock, now_fn=lambda:self.fakeTime)
tempdir = FilePath(self.mktemp())
tempdir.makedirs()
self.ws = webish.WebishServer(
self.s,
"0",
tempdir=tempdir.path,
staticdir=self.staticdir,
clock=self.clock,
now_fn=lambda:self.fakeTime,
)
self.ws.setServiceParent(self.s)
self.webish_port = self.ws.getPortnum()
self.webish_url = self.ws.getURL()

View File

@ -5,6 +5,19 @@ Tests for ``allmydata.webish``.
from uuid import (
uuid4,
)
from errno import (
EACCES,
)
from io import (
BytesIO,
)
from hypothesis import (
given,
)
from hypothesis.strategies import (
integers,
)
from testtools.matchers import (
AfterPreprocessing,
@ -12,8 +25,13 @@ from testtools.matchers import (
Equals,
MatchesAll,
Not,
IsInstance,
HasLength,
)
from twisted.python.runtime import (
platform,
)
from twisted.python.filepath import (
FilePath,
)
@ -30,7 +48,7 @@ from ..common import (
from ...webish import (
TahoeLAFSRequest,
tahoe_lafs_site,
TahoeLAFSSite,
)
@ -96,7 +114,7 @@ class TahoeLAFSRequestTests(SyncTestCase):
class TahoeLAFSSiteTests(SyncTestCase):
"""
Tests for the ``Site`` created by ``tahoe_lafs_site``.
Tests for ``TahoeLAFSSite``.
"""
def _test_censoring(self, path, censored):
"""
@ -112,7 +130,7 @@ class TahoeLAFSSiteTests(SyncTestCase):
"""
logPath = self.mktemp()
site = tahoe_lafs_site(Resource(), logPath=logPath)
site = TahoeLAFSSite(self.mktemp(), Resource(), logPath=logPath)
site.startFactory()
channel = DummyChannel()
@ -170,6 +188,106 @@ class TahoeLAFSSiteTests(SyncTestCase):
b"/uri?uri=[CENSORED]",
)
def _create_request(self, tempdir):
"""
Create and return a new ``TahoeLAFSRequest`` hooked up to a
``TahoeLAFSSite``.
:param bytes tempdir: The temporary directory to give to the site.
:return TahoeLAFSRequest: The new request instance.
"""
site = TahoeLAFSSite(tempdir.path, Resource(), logPath=self.mktemp())
site.startFactory()
channel = DummyChannel()
channel.site = site
request = TahoeLAFSRequest(channel)
return request
@given(integers(min_value=0, max_value=1024 * 1024 - 1))
def test_small_content(self, request_body_size):
"""
A request body smaller than 1 MiB is kept in memory.
"""
tempdir = FilePath(self.mktemp())
request = self._create_request(tempdir)
request.gotLength(request_body_size)
self.assertThat(
request.content,
IsInstance(BytesIO),
)
def _large_request_test(self, request_body_size):
"""
Assert that when a request with a body of of the given size is received
its content is written to the directory the ``TahoeLAFSSite`` is
configured with.
"""
tempdir = FilePath(self.mktemp())
tempdir.makedirs()
request = self._create_request(tempdir)
# So. Bad news. The temporary file for the uploaded content is
# unnamed (and this isn't even necessarily a bad thing since it is how
# you get automatic on-process-exit cleanup behavior on POSIX). It's
# not visible by inspecting the filesystem. It has no name we can
# discover. Then how do we verify it is written to the right place?
# The question itself is meaningless if we try to be too precise. It
# *has* no filesystem location. However, it is still stored *on* some
# filesystem. We still want to make sure it is on the filesystem we
# specified because otherwise it might be on a filesystem that's too
# small or undesirable in some other way.
#
# I don't know of any way to ask a file descriptor which filesystem
# it's on, either, though. It might be the case that the [f]statvfs()
# result could be compared somehow to infer the filesystem but
# ... it's not clear what the failure modes might be there, across
# different filesystems and runtime environments.
#
# Another approach is to make the temp directory unwriteable and
# observe the failure when an attempt is made to create a file there.
# This is hardly a lovely solution but at least it's kind of simple.
#
# It would be nice if it worked consistently cross-platform but on
# Windows os.chmod is more or less broken.
if platform.isWindows():
request.gotLength(request_body_size)
self.assertThat(
tempdir.children(),
HasLength(1),
)
else:
tempdir.chmod(0o550)
with self.assertRaises(OSError) as ctx:
request.gotLength(request_body_size)
raise Exception(
"OSError not raised, instead tempdir.children() = {}".format(
tempdir.children(),
),
)
self.assertThat(
ctx.exception.errno,
Equals(EACCES),
)
def test_unknown_request_size(self):
"""
A request body with an unknown size is written to a file in the temporary
directory passed to ``TahoeLAFSSite``.
"""
self._large_request_test(None)
@given(integers(min_value=1024 * 1024))
def test_large_request(self, request_body_size):
"""
A request body of 1 MiB or more is written to a file in the temporary
directory passed to ``TahoeLAFSSite``.
"""
self._large_request_test(request_body_size)
def param(name, value):
return u"; {}={}".format(name, value)

View File

@ -1,13 +1,13 @@
from six import ensure_str
import re, time
import re, time, tempfile
from functools import (
partial,
)
from cgi import (
FieldStorage,
)
from io import (
BytesIO,
)
from twisted.application import service, strports, internet
from twisted.web import static
@ -150,17 +150,34 @@ def _logFormatter(logDateTime, request):
)
tahoe_lafs_site = partial(
Site,
requestFactory=TahoeLAFSRequest,
logFormatter=_logFormatter,
)
class TahoeLAFSSite(Site, object):
"""
The HTTP protocol factory used by Tahoe-LAFS.
Among the behaviors provided:
* A configurable temporary directory where large request bodies can be
written so they don't stay in memory.
* A log formatter that writes some access logs but omits capability
strings to help keep them secret.
"""
requestFactory = TahoeLAFSRequest
def __init__(self, tempdir, *args, **kwargs):
Site.__init__(self, *args, logFormatter=_logFormatter, **kwargs)
self._tempdir = tempdir
def getContentFile(self, length):
if length is None or length >= 1024 * 1024:
return tempfile.TemporaryFile(dir=self._tempdir)
return BytesIO()
class WebishServer(service.MultiService):
name = "webish"
def __init__(self, client, webport, nodeurl_path=None, staticdir=None,
def __init__(self, client, webport, tempdir, nodeurl_path=None, staticdir=None,
clock=None, now_fn=time.time):
service.MultiService.__init__(self)
# the 'data' argument to all render() methods default to the Client
@ -170,7 +187,7 @@ class WebishServer(service.MultiService):
# time in a deterministic manner.
self.root = root.Root(client, clock, now_fn)
self.buildServer(webport, nodeurl_path, staticdir)
self.buildServer(webport, tempdir, nodeurl_path, staticdir)
# If set, clock is a twisted.internet.task.Clock that the tests
# use to test ophandle expiration.
@ -180,9 +197,9 @@ class WebishServer(service.MultiService):
self.root.putChild(b"storage-plugins", StoragePlugins(client))
def buildServer(self, webport, nodeurl_path, staticdir):
def buildServer(self, webport, tempdir, nodeurl_path, staticdir):
self.webport = webport
self.site = tahoe_lafs_site(self.root)
self.site = TahoeLAFSSite(tempdir, self.root)
self.staticdir = staticdir # so tests can check
if staticdir:
self.root.putChild("static", static.File(staticdir))
@ -260,4 +277,4 @@ class IntroducerWebishServer(WebishServer):
def __init__(self, introducer, webport, nodeurl_path=None, staticdir=None):
service.MultiService.__init__(self)
self.root = introweb.IntroducerRoot(introducer)
self.buildServer(webport, nodeurl_path, staticdir)
self.buildServer(webport, tempfile.tempdir, nodeurl_path, staticdir)