2020-12-29 17:39:00 +00:00
|
|
|
"""
|
|
|
|
Ported to Python 3.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from future.utils import PY2
|
|
|
|
if PY2:
|
|
|
|
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
|
|
|
|
|
2020-10-26 16:12:49 +00:00
|
|
|
from six import ensure_str
|
|
|
|
|
2020-11-18 20:47:06 +00:00
|
|
|
import re, time, tempfile
|
2020-10-21 10:59:59 +00:00
|
|
|
|
|
|
|
from cgi import (
|
|
|
|
FieldStorage,
|
|
|
|
)
|
2020-11-18 20:47:06 +00:00
|
|
|
from io import (
|
|
|
|
BytesIO,
|
|
|
|
)
|
2020-10-21 10:59:59 +00:00
|
|
|
|
2008-01-08 01:04:56 +00:00
|
|
|
from twisted.application import service, strports, internet
|
2020-10-21 13:45:11 +00:00
|
|
|
from twisted.web import static
|
|
|
|
from twisted.web.http import (
|
|
|
|
parse_qs,
|
|
|
|
)
|
|
|
|
from twisted.web.server import (
|
|
|
|
Request,
|
|
|
|
Site,
|
|
|
|
)
|
2008-05-19 19:57:04 +00:00
|
|
|
from twisted.internet import defer
|
2018-04-20 20:03:19 +00:00
|
|
|
from twisted.internet.address import (
|
|
|
|
IPv4Address,
|
|
|
|
IPv6Address,
|
|
|
|
)
|
2011-01-17 23:31:52 +00:00
|
|
|
from allmydata.util import log, fileutil
|
2006-12-04 11:06:09 +00:00
|
|
|
|
2008-05-19 19:57:04 +00:00
|
|
|
from allmydata.web import introweb, root
|
2019-08-08 01:14:15 +00:00
|
|
|
from allmydata.web.operations import OphandleTable
|
2006-12-05 02:54:35 +00:00
|
|
|
|
2019-07-24 19:37:24 +00:00
|
|
|
from .web.storage_plugins import (
|
|
|
|
StoragePlugins,
|
|
|
|
)
|
|
|
|
|
2021-02-01 15:43:01 +00:00
|
|
|
|
|
|
|
if PY2:
|
|
|
|
FileUploadFieldStorage = FieldStorage
|
|
|
|
else:
|
|
|
|
class FileUploadFieldStorage(FieldStorage):
|
|
|
|
"""
|
|
|
|
Do terrible things to ensure files are still bytes.
|
|
|
|
|
|
|
|
On Python 2, uploaded files were always bytes. On Python 3, there's a
|
|
|
|
heuristic: if the filename is set on a field, it's assumed to be a file
|
|
|
|
upload and therefore bytes. If no filename is set, it's Unicode.
|
|
|
|
|
|
|
|
Unfortunately, we always want it to be bytes, and Tahoe-LAFS also
|
|
|
|
enables setting the filename not via the MIME filename, but via a
|
|
|
|
separate field called "name".
|
|
|
|
|
|
|
|
Thus we need to do this ridiculous workaround.
|
|
|
|
|
|
|
|
Source for idea:
|
|
|
|
https://mail.python.org/pipermail/python-dev/2017-February/147402.html
|
|
|
|
"""
|
|
|
|
@property
|
|
|
|
def filename(self):
|
|
|
|
if self.name == "file" and not self._mime_filename:
|
|
|
|
# We use the file field to upload files, see directory.py's
|
|
|
|
# _POST_upload. Lack of _mime_filename means we need to trick
|
|
|
|
# FieldStorage into thinking there is a filename so it'll
|
|
|
|
# return bytes.
|
|
|
|
return "unknown-filename"
|
|
|
|
return self._mime_filename
|
|
|
|
|
|
|
|
@filename.setter
|
|
|
|
def filename(self, value):
|
|
|
|
self._mime_filename = value
|
|
|
|
|
|
|
|
|
2020-10-21 13:45:11 +00:00
|
|
|
class TahoeLAFSRequest(Request, object):
|
2020-10-22 11:22:41 +00:00
|
|
|
"""
|
|
|
|
``TahoeLAFSRequest`` adds several features to a Twisted Web ``Request``
|
|
|
|
that are useful for Tahoe-LAFS.
|
|
|
|
|
|
|
|
:ivar NoneType|FieldStorage fields: For POST requests, a structured
|
|
|
|
representation of the contents of the request body. For anything
|
|
|
|
else, ``None``.
|
|
|
|
"""
|
2007-12-25 07:07:04 +00:00
|
|
|
fields = None
|
2010-08-06 07:07:05 +00:00
|
|
|
|
2007-08-11 00:25:33 +00:00
|
|
|
def requestReceived(self, command, path, version):
|
2020-10-21 13:45:11 +00:00
|
|
|
"""
|
|
|
|
Called by channel when all data has been received.
|
2007-08-11 00:25:33 +00:00
|
|
|
|
2020-10-21 13:45:11 +00:00
|
|
|
Override the base implementation to apply certain site-wide policies
|
|
|
|
and to provide less memory-intensive multipart/form-post handling for
|
|
|
|
large file uploads.
|
2007-08-11 00:25:33 +00:00
|
|
|
"""
|
2020-10-21 10:59:59 +00:00
|
|
|
self.content.seek(0)
|
2007-08-11 00:25:33 +00:00
|
|
|
self.args = {}
|
|
|
|
self.stack = []
|
|
|
|
|
|
|
|
self.method, self.uri = command, path
|
|
|
|
self.clientproto = version
|
2020-10-30 19:04:00 +00:00
|
|
|
x = self.uri.split(b'?', 1)
|
2007-08-11 00:25:33 +00:00
|
|
|
|
|
|
|
if len(x) == 1:
|
|
|
|
self.path = self.uri
|
|
|
|
else:
|
|
|
|
self.path, argstring = x
|
|
|
|
self.args = parse_qs(argstring, 1)
|
|
|
|
|
2020-12-29 17:39:00 +00:00
|
|
|
if self.method == b'POST':
|
2020-10-21 13:45:11 +00:00
|
|
|
# We use FieldStorage here because it performs better than
|
|
|
|
# cgi.parse_multipart(self.content, pdict) which is what
|
|
|
|
# twisted.web.http.Request uses.
|
2020-12-29 17:39:00 +00:00
|
|
|
|
|
|
|
headers = {
|
|
|
|
ensure_str(name.lower()): ensure_str(value[-1])
|
|
|
|
for (name, value)
|
|
|
|
in self.requestHeaders.getAllRawHeaders()
|
|
|
|
}
|
|
|
|
|
|
|
|
if 'content-length' not in headers:
|
2021-01-05 11:27:46 +00:00
|
|
|
# Python 3's cgi module would really, really like us to set Content-Length.
|
|
|
|
self.content.seek(0, 2)
|
|
|
|
headers['content-length'] = str(self.content.tell())
|
|
|
|
self.content.seek(0)
|
2020-12-29 17:39:00 +00:00
|
|
|
|
2021-02-01 15:43:01 +00:00
|
|
|
self.fields = FileUploadFieldStorage(
|
|
|
|
self.content, headers, environ={'REQUEST_METHOD': 'POST'})
|
2020-10-21 10:59:59 +00:00
|
|
|
self.content.seek(0)
|
|
|
|
|
2020-10-21 13:45:11 +00:00
|
|
|
self._tahoeLAFSSecurityPolicy()
|
|
|
|
|
2008-08-13 03:34:26 +00:00
|
|
|
self.processing_started_timestamp = time.time()
|
2007-08-11 00:25:33 +00:00
|
|
|
self.process()
|
|
|
|
|
2020-10-21 13:45:11 +00:00
|
|
|
def _tahoeLAFSSecurityPolicy(self):
|
|
|
|
"""
|
|
|
|
Set response properties related to Tahoe-LAFS-imposed security policy.
|
|
|
|
This will ensure that all HTTP requests received by the Tahoe-LAFS
|
|
|
|
HTTP server have this policy imposed, regardless of other
|
|
|
|
implementation details.
|
|
|
|
"""
|
|
|
|
# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
|
|
|
|
self.responseHeaders.setRawHeaders("X-Frame-Options", ["DENY"])
|
|
|
|
# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy
|
|
|
|
self.setHeader("Referrer-Policy", "no-referrer")
|
|
|
|
|
2007-08-15 22:21:38 +00:00
|
|
|
|
2018-04-20 20:03:19 +00:00
|
|
|
def _get_client_ip(request):
|
|
|
|
try:
|
|
|
|
get = request.getClientAddress
|
|
|
|
except AttributeError:
|
|
|
|
return request.getClientIP()
|
|
|
|
else:
|
|
|
|
client_addr = get()
|
|
|
|
if isinstance(client_addr, (IPv4Address, IPv6Address)):
|
|
|
|
return client_addr.host
|
|
|
|
return None
|
2008-02-13 02:31:23 +00:00
|
|
|
|
2007-08-15 22:21:38 +00:00
|
|
|
|
2020-10-21 18:42:30 +00:00
|
|
|
def _logFormatter(logDateTime, request):
|
|
|
|
# we build up a log string that hides most of the cap, to preserve
|
|
|
|
# user privacy. We retain the query args so we can identify things
|
|
|
|
# like t=json. Then we send it to the flog. We make no attempt to
|
|
|
|
# match apache formatting. TODO: when we move to DSA dirnodes and
|
|
|
|
# shorter caps, consider exposing a few characters of the cap, or
|
|
|
|
# maybe a few characters of its hash.
|
2020-10-30 19:04:00 +00:00
|
|
|
x = request.uri.split(b"?", 1)
|
2020-10-21 18:42:30 +00:00
|
|
|
if len(x) == 1:
|
|
|
|
# no query args
|
|
|
|
path = request.uri
|
2020-10-30 19:04:00 +00:00
|
|
|
queryargs = b""
|
2020-10-21 18:42:30 +00:00
|
|
|
else:
|
|
|
|
path, queryargs = x
|
|
|
|
# there is a form handler which redirects POST /uri?uri=FOO into
|
|
|
|
# GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make
|
|
|
|
# sure we censor these too.
|
2020-10-30 19:04:00 +00:00
|
|
|
if queryargs.startswith(b"uri="):
|
|
|
|
queryargs = b"uri=[CENSORED]"
|
2020-12-18 16:21:04 +00:00
|
|
|
queryargs = b"?" + queryargs
|
2020-10-30 19:04:00 +00:00
|
|
|
if path.startswith(b"/uri/"):
|
|
|
|
path = b"/uri/[CENSORED]"
|
|
|
|
elif path.startswith(b"/file/"):
|
|
|
|
path = b"/file/[CENSORED]"
|
|
|
|
elif path.startswith(b"/named/"):
|
|
|
|
path = b"/named/[CENSORED]"
|
2020-10-21 18:42:30 +00:00
|
|
|
|
|
|
|
uri = path + queryargs
|
|
|
|
|
|
|
|
template = "web: %(clientip)s %(method)s %(uri)s %(code)s %(length)s"
|
|
|
|
return template % dict(
|
|
|
|
clientip=_get_client_ip(request),
|
|
|
|
method=request.method,
|
|
|
|
uri=uri,
|
|
|
|
code=request.code,
|
|
|
|
length=(request.sentLength or "-"),
|
|
|
|
facility="tahoe.webish",
|
|
|
|
level=log.OPERATIONAL,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-11-18 21:53:28 +00:00
|
|
|
class TahoeLAFSSite(Site, object):
|
2020-11-18 20:47:06 +00:00
|
|
|
"""
|
|
|
|
The HTTP protocol factory used by Tahoe-LAFS.
|
|
|
|
|
|
|
|
Among the behaviors provided:
|
|
|
|
|
|
|
|
* A configurable temporary directory where large request bodies can be
|
|
|
|
written so they don't stay in memory.
|
|
|
|
|
|
|
|
* A log formatter that writes some access logs but omits capability
|
|
|
|
strings to help keep them secret.
|
|
|
|
"""
|
|
|
|
requestFactory = TahoeLAFSRequest
|
|
|
|
|
|
|
|
def __init__(self, tempdir, *args, **kwargs):
|
|
|
|
Site.__init__(self, *args, logFormatter=_logFormatter, **kwargs)
|
|
|
|
self._tempdir = tempdir
|
|
|
|
|
|
|
|
def getContentFile(self, length):
|
|
|
|
if length is None or length >= 1024 * 1024:
|
|
|
|
return tempfile.TemporaryFile(dir=self._tempdir)
|
|
|
|
return BytesIO()
|
2020-10-21 18:42:30 +00:00
|
|
|
|
|
|
|
|
2006-12-07 21:48:37 +00:00
|
|
|
class WebishServer(service.MultiService):
|
|
|
|
name = "webish"
|
|
|
|
|
2020-11-18 20:47:06 +00:00
|
|
|
def __init__(self, client, webport, tempdir, nodeurl_path=None, staticdir=None,
|
2016-01-04 19:58:55 +00:00
|
|
|
clock=None, now_fn=time.time):
|
2006-12-07 21:48:37 +00:00
|
|
|
service.MultiService.__init__(self)
|
2009-02-20 19:15:54 +00:00
|
|
|
# the 'data' argument to all render() methods default to the Client
|
Change OphandleTable to use a deterministic clock, so we can test it
To test the changes for #577, we need a deterministic way to simulate
the passage of long periods of time. twisted.internet.task.Clock seems,
from my Googling, to be the way to go for this functionality. I changed
a few things so that OphandleTable would use twisted.internet.task.Clock
when testing:
* WebishServer.__init___ now takes an optional 'clock' parameter,
* which it passes to the root.Root instance it creates.
* root.Root.__init__ now takes an optional 'clock' parameter, which it
passes to the OphandleTable.__init__ method.
* OphandleTable.__init__ now takes an optional 'clock' parameter. If
it is provided, and it isn't None, its callLater method will be used
to schedule ophandle expirations (as opposed to using
reactor.callLater, which is what OphandleTable does normally).
* The WebMixin object in test_web.py now sets a self.clock parameter,
which is a twisted.internet.task.Clock that it feeds to the
WebishServer it creates.
Tests using the WebMixin can control the passage of time in
OphandleTable by accessing self.clock.
2010-02-20 21:07:13 +00:00
|
|
|
# the 'clock' argument to root.Root is, if set, a
|
|
|
|
# twisted.internet.task.Clock that is provided by the unit tests
|
|
|
|
# so that they can test features that involve the passage of
|
|
|
|
# time in a deterministic manner.
|
2019-03-21 07:37:47 +00:00
|
|
|
|
2019-03-21 19:00:08 +00:00
|
|
|
self.root = root.Root(client, clock, now_fn)
|
2020-11-18 20:47:06 +00:00
|
|
|
self.buildServer(webport, tempdir, nodeurl_path, staticdir)
|
2019-08-06 23:06:57 +00:00
|
|
|
|
2019-08-08 01:14:15 +00:00
|
|
|
# If set, clock is a twisted.internet.task.Clock that the tests
|
|
|
|
# use to test ophandle expiration.
|
2019-08-24 18:31:08 +00:00
|
|
|
self._operations = OphandleTable(clock)
|
|
|
|
self._operations.setServiceParent(self)
|
2021-01-26 17:40:07 +00:00
|
|
|
self.root.putChild(b"operations", self._operations)
|
2009-02-20 19:15:54 +00:00
|
|
|
|
2019-08-02 17:22:45 +00:00
|
|
|
self.root.putChild(b"storage-plugins", StoragePlugins(client))
|
2019-07-24 19:37:24 +00:00
|
|
|
|
2020-11-18 20:47:06 +00:00
|
|
|
def buildServer(self, webport, tempdir, nodeurl_path, staticdir):
|
2009-02-20 19:15:54 +00:00
|
|
|
self.webport = webport
|
2020-11-18 20:47:06 +00:00
|
|
|
self.site = TahoeLAFSSite(tempdir, self.root)
|
2015-02-04 06:09:40 +00:00
|
|
|
self.staticdir = staticdir # so tests can check
|
2008-10-29 22:34:31 +00:00
|
|
|
if staticdir:
|
2021-01-26 17:40:07 +00:00
|
|
|
self.root.putChild(b"static", static.File(staticdir))
|
2011-01-17 07:47:51 +00:00
|
|
|
if re.search(r'^\d', webport):
|
|
|
|
webport = "tcp:"+webport # twisted warns about bare "0" or "3456"
|
2020-10-26 16:12:49 +00:00
|
|
|
# strports must be native strings.
|
|
|
|
webport = ensure_str(webport)
|
2020-10-21 13:45:11 +00:00
|
|
|
s = strports.service(webport, self.site)
|
2006-12-07 21:48:37 +00:00
|
|
|
s.setServiceParent(self)
|
2011-01-17 07:47:51 +00:00
|
|
|
|
|
|
|
self._scheme = None
|
|
|
|
self._portnum = None
|
|
|
|
self._url = None
|
2011-01-17 23:31:52 +00:00
|
|
|
self._listener = s # stash it so we can query for the portnum
|
2011-01-17 07:47:51 +00:00
|
|
|
|
2007-12-03 21:52:42 +00:00
|
|
|
self._started = defer.Deferred()
|
2008-01-08 01:04:56 +00:00
|
|
|
if nodeurl_path:
|
2011-01-17 23:31:52 +00:00
|
|
|
def _write_nodeurl_file(ign):
|
|
|
|
# this file will be created with default permissions
|
write node.url and portnum files atomically, to fix race in test_runner
Previously, test_runner sometimes fails because the _node_has_started()
poller fires after the portnum file has been opened, but before it has
actually been filled, allowing the test process to observe an empty file,
which flunks the test.
This adds a new fileutil.write_atomically() function (using the usual
write-to-.tmp-then-rename approach), and uses it for both node.url and
client.port . These files are written a bit before the node is really up and
running, but they're late enough for test_runner's purposes, which is to know
when it's safe to read client.port and use 'tahoe restart' (and therefore
SIGINT) to restart the node.
The current node/client code doesn't offer any better "are you really done
with startup" indicator.. the ideal approach would be to either watch the
logfile, or connect to its flogport, but both are a hassle. Changing the node
to write out a new "all done" file would be intrusive for regular
operations.
2012-05-14 20:32:03 +00:00
|
|
|
line = self.getURL() + "\n"
|
|
|
|
fileutil.write_atomically(nodeurl_path, line, mode="")
|
2011-01-17 23:31:52 +00:00
|
|
|
self._started.addCallback(_write_nodeurl_file)
|
2006-12-07 21:48:37 +00:00
|
|
|
|
2011-01-17 07:47:51 +00:00
|
|
|
def getURL(self):
|
|
|
|
assert self._url
|
|
|
|
return self._url
|
2011-01-17 23:31:52 +00:00
|
|
|
|
2011-01-17 07:47:51 +00:00
|
|
|
def getPortnum(self):
|
|
|
|
assert self._portnum
|
|
|
|
return self._portnum
|
|
|
|
|
2006-12-07 21:48:37 +00:00
|
|
|
def startService(self):
|
2011-01-17 07:47:51 +00:00
|
|
|
def _got_port(lp):
|
|
|
|
self._portnum = lp.getHost().port
|
|
|
|
# what is our webport?
|
|
|
|
assert self._scheme
|
|
|
|
self._url = "%s://127.0.0.1:%d/" % (self._scheme, self._portnum)
|
|
|
|
self._started.callback(None)
|
|
|
|
return lp
|
|
|
|
def _fail(f):
|
|
|
|
self._started.errback(f)
|
|
|
|
return f
|
2008-01-08 01:04:56 +00:00
|
|
|
|
2011-01-17 07:47:51 +00:00
|
|
|
service.MultiService.startService(self)
|
2011-01-17 23:31:52 +00:00
|
|
|
s = self._listener
|
2011-01-17 07:47:51 +00:00
|
|
|
if hasattr(s, 'endpoint') and hasattr(s, '_waitingForPort'):
|
|
|
|
# Twisted 10.2 gives us a StreamServerEndpointService. This is
|
|
|
|
# ugly but should do for now.
|
|
|
|
classname = s.endpoint.__class__.__name__
|
|
|
|
if classname.startswith('SSL'):
|
|
|
|
self._scheme = 'https'
|
|
|
|
else:
|
|
|
|
self._scheme = 'http'
|
|
|
|
s._waitingForPort.addCallbacks(_got_port, _fail)
|
|
|
|
elif isinstance(s, internet.TCPServer):
|
|
|
|
# Twisted <= 10.1
|
|
|
|
self._scheme = 'http'
|
|
|
|
_got_port(s._port)
|
2008-01-08 01:04:56 +00:00
|
|
|
elif isinstance(s, internet.SSLServer):
|
2011-01-17 07:47:51 +00:00
|
|
|
# Twisted <= 10.1
|
|
|
|
self._scheme = 'https'
|
|
|
|
_got_port(s._port)
|
2008-01-08 01:04:56 +00:00
|
|
|
else:
|
2011-01-17 07:47:51 +00:00
|
|
|
# who knows, probably some weirdo future version of Twisted
|
|
|
|
self._started.errback(AssertionError("couldn't find out the scheme or port for the web-API server"))
|
|
|
|
|
2019-08-24 18:31:08 +00:00
|
|
|
def get_operations(self):
|
|
|
|
"""
|
|
|
|
:return: a reference to our "active operations" tracker
|
|
|
|
"""
|
|
|
|
return self._operations
|
|
|
|
|
2011-01-17 07:47:51 +00:00
|
|
|
|
2008-03-12 00:36:25 +00:00
|
|
|
class IntroducerWebishServer(WebishServer):
|
2009-02-20 19:15:54 +00:00
|
|
|
def __init__(self, introducer, webport, nodeurl_path=None, staticdir=None):
|
|
|
|
service.MultiService.__init__(self)
|
|
|
|
self.root = introweb.IntroducerRoot(introducer)
|
2020-11-18 20:47:06 +00:00
|
|
|
self.buildServer(webport, tempfile.tempdir, nodeurl_path, staticdir)
|