2011-01-16 23:47:51 -08:00
|
|
|
import re, time
|
2008-01-07 18:04:56 -07:00
|
|
|
from twisted.application import service, strports, internet
|
2016-04-28 00:35:52 -07:00
|
|
|
from twisted.web import http, static
|
2008-05-19 12:57:04 -07:00
|
|
|
from twisted.internet import defer
|
2018-04-20 16:03:19 -04:00
|
|
|
from twisted.internet.address import (
|
|
|
|
IPv4Address,
|
|
|
|
IPv6Address,
|
|
|
|
)
|
2016-04-28 00:35:52 -07:00
|
|
|
from nevow import appserver, inevow
|
2011-01-17 15:31:52 -08:00
|
|
|
from allmydata.util import log, fileutil
|
2006-12-04 04:06:09 -07:00
|
|
|
|
2008-05-19 12:57:04 -07:00
|
|
|
from allmydata.web import introweb, root
|
2019-08-07 17:47:52 -06:00
|
|
|
from allmydata.web.common import MyExceptionHandler
|
2019-08-07 19:14:15 -06:00
|
|
|
from allmydata.web.operations import OphandleTable
|
2006-12-04 19:54:35 -07:00
|
|
|
|
2019-07-24 15:37:24 -04:00
|
|
|
from .web.storage_plugins import (
|
|
|
|
StoragePlugins,
|
|
|
|
)
|
|
|
|
|
2007-08-10 17:25:33 -07:00
|
|
|
# we must override twisted.web.http.Request.requestReceived with a version
|
|
|
|
# that doesn't use cgi.parse_multipart() . Since we actually use Nevow, we
|
|
|
|
# override the nevow-specific subclass, nevow.appserver.NevowRequest . This
|
|
|
|
# is an exact copy of twisted.web.http.Request (from SVN HEAD on 10-Aug-2007)
|
|
|
|
# that modifies the way form arguments are parsed. Note that this sort of
|
|
|
|
# surgery may induce a dependency upon a particular version of twisted.web
|
|
|
|
|
|
|
|
parse_qs = http.parse_qs
|
2019-08-13 16:55:40 -04:00
|
|
|
class MyRequest(appserver.NevowRequest, object):
|
2007-12-25 00:07:04 -07:00
|
|
|
fields = None
|
2010-08-06 00:07:05 -07:00
|
|
|
_tahoe_request_had_error = None
|
|
|
|
|
2007-08-10 17:25:33 -07:00
|
|
|
def requestReceived(self, command, path, version):
|
|
|
|
"""Called by channel when all data has been received.
|
|
|
|
|
|
|
|
This method is not intended for users.
|
|
|
|
"""
|
|
|
|
self.content.seek(0,0)
|
|
|
|
self.args = {}
|
|
|
|
self.stack = []
|
2017-01-11 22:12:54 -07:00
|
|
|
self.setHeader("Referrer-Policy", "no-referrer")
|
2007-08-10 17:25:33 -07:00
|
|
|
|
|
|
|
self.method, self.uri = command, path
|
|
|
|
self.clientproto = version
|
|
|
|
x = self.uri.split('?', 1)
|
|
|
|
|
|
|
|
if len(x) == 1:
|
|
|
|
self.path = self.uri
|
|
|
|
else:
|
|
|
|
self.path, argstring = x
|
|
|
|
self.args = parse_qs(argstring, 1)
|
|
|
|
|
2015-02-09 02:27:32 +00:00
|
|
|
# Adding security headers. These will be sent for *all* HTTP requests.
|
2018-04-13 12:48:03 -04:00
|
|
|
# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
|
2015-02-09 02:27:32 +00:00
|
|
|
self.responseHeaders.setRawHeaders("X-Frame-Options", ["DENY"])
|
|
|
|
|
2007-08-10 17:25:33 -07:00
|
|
|
# Argument processing.
|
|
|
|
|
|
|
|
## The original twisted.web.http.Request.requestReceived code parsed the
|
|
|
|
## content and added the form fields it found there to self.args . It
|
|
|
|
## did this with cgi.parse_multipart, which holds the arguments in RAM
|
|
|
|
## and is thus unsuitable for large file uploads. The Nevow subclass
|
|
|
|
## (nevow.appserver.NevowRequest) uses cgi.FieldStorage instead (putting
|
|
|
|
## the results in self.fields), which is much more memory-efficient.
|
|
|
|
## Since we know we're using Nevow, we can anticipate these arguments
|
|
|
|
## appearing in self.fields instead of self.args, and thus skip the
|
|
|
|
## parse-content-into-self.args step.
|
|
|
|
|
|
|
|
## args = self.args
|
|
|
|
## ctype = self.getHeader('content-type')
|
|
|
|
## if self.method == "POST" and ctype:
|
|
|
|
## mfd = 'multipart/form-data'
|
|
|
|
## key, pdict = cgi.parse_header(ctype)
|
|
|
|
## if key == 'application/x-www-form-urlencoded':
|
|
|
|
## args.update(parse_qs(self.content.read(), 1))
|
|
|
|
## elif key == mfd:
|
|
|
|
## try:
|
|
|
|
## args.update(cgi.parse_multipart(self.content, pdict))
|
|
|
|
## except KeyError, e:
|
|
|
|
## if e.args[0] == 'content-disposition':
|
|
|
|
## # Parse_multipart can't cope with missing
|
|
|
|
## # content-dispostion headers in multipart/form-data
|
|
|
|
## # parts, so we catch the exception and tell the client
|
|
|
|
## # it was a bad request.
|
|
|
|
## self.channel.transport.write(
|
|
|
|
## "HTTP/1.1 400 Bad Request\r\n\r\n")
|
|
|
|
## self.channel.transport.loseConnection()
|
|
|
|
## return
|
|
|
|
## raise
|
2008-08-12 20:34:26 -07:00
|
|
|
self.processing_started_timestamp = time.time()
|
2007-08-10 17:25:33 -07:00
|
|
|
self.process()
|
|
|
|
|
2008-02-12 19:31:23 -07:00
|
|
|
def _logger(self):
|
|
|
|
# we build up a log string that hides most of the cap, to preserve
|
|
|
|
# user privacy. We retain the query args so we can identify things
|
|
|
|
# like t=json. Then we send it to the flog. We make no attempt to
|
|
|
|
# match apache formatting. TODO: when we move to DSA dirnodes and
|
|
|
|
# shorter caps, consider exposing a few characters of the cap, or
|
|
|
|
# maybe a few characters of its hash.
|
|
|
|
x = self.uri.split("?", 1)
|
|
|
|
if len(x) == 1:
|
|
|
|
# no query args
|
|
|
|
path = self.uri
|
|
|
|
queryargs = ""
|
|
|
|
else:
|
|
|
|
path, queryargs = x
|
|
|
|
# there is a form handler which redirects POST /uri?uri=FOO into
|
|
|
|
# GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make
|
|
|
|
# sure we censor these too.
|
|
|
|
if queryargs.startswith("uri="):
|
|
|
|
queryargs = "[uri=CENSORED]"
|
|
|
|
queryargs = "?" + queryargs
|
|
|
|
if path.startswith("/uri"):
|
|
|
|
path = "/uri/[CENSORED].."
|
2008-05-19 14:34:13 -07:00
|
|
|
elif path.startswith("/file"):
|
|
|
|
path = "/file/[CENSORED].."
|
|
|
|
elif path.startswith("/named"):
|
|
|
|
path = "/named/[CENSORED].."
|
|
|
|
|
2008-02-12 19:31:23 -07:00
|
|
|
uri = path + queryargs
|
|
|
|
|
2010-08-06 00:07:05 -07:00
|
|
|
error = ""
|
|
|
|
if self._tahoe_request_had_error:
|
|
|
|
error = " [ERROR]"
|
|
|
|
|
2018-04-20 15:59:35 -04:00
|
|
|
log.msg(
|
|
|
|
format=(
|
|
|
|
"web: %(clientip)s %(method)s %(uri)s %(code)s "
|
|
|
|
"%(length)s%(error)s"
|
|
|
|
),
|
2018-04-20 16:03:19 -04:00
|
|
|
clientip=_get_client_ip(self),
|
2018-04-20 15:59:35 -04:00
|
|
|
method=self.method,
|
|
|
|
uri=uri,
|
|
|
|
code=self.code,
|
|
|
|
length=(self.sentLength or "-"),
|
|
|
|
error=error,
|
|
|
|
facility="tahoe.webish",
|
|
|
|
level=log.OPERATIONAL,
|
|
|
|
)
|
2008-02-12 19:31:23 -07:00
|
|
|
|
2007-08-15 15:21:38 -07:00
|
|
|
|
2018-04-20 16:03:19 -04:00
|
|
|
def _get_client_ip(request):
|
|
|
|
try:
|
|
|
|
get = request.getClientAddress
|
|
|
|
except AttributeError:
|
|
|
|
return request.getClientIP()
|
|
|
|
else:
|
|
|
|
client_addr = get()
|
|
|
|
if isinstance(client_addr, (IPv4Address, IPv6Address)):
|
|
|
|
return client_addr.host
|
|
|
|
return None
|
2008-02-12 19:31:23 -07:00
|
|
|
|
2007-08-15 15:21:38 -07:00
|
|
|
|
2006-12-07 14:48:37 -07:00
|
|
|
class WebishServer(service.MultiService):
|
|
|
|
name = "webish"
|
|
|
|
|
Change OphandleTable to use a deterministic clock, so we can test it
To test the changes for #577, we need a deterministic way to simulate
the passage of long periods of time. twisted.internet.task.Clock seems,
from my Googling, to be the way to go for this functionality. I changed
a few things so that OphandleTable would use twisted.internet.task.Clock
when testing:
* WebishServer.__init___ now takes an optional 'clock' parameter,
* which it passes to the root.Root instance it creates.
* root.Root.__init__ now takes an optional 'clock' parameter, which it
passes to the OphandleTable.__init__ method.
* OphandleTable.__init__ now takes an optional 'clock' parameter. If
it is provided, and it isn't None, its callLater method will be used
to schedule ophandle expirations (as opposed to using
reactor.callLater, which is what OphandleTable does normally).
* The WebMixin object in test_web.py now sets a self.clock parameter,
which is a twisted.internet.task.Clock that it feeds to the
WebishServer it creates.
Tests using the WebMixin can control the passage of time in
OphandleTable by accessing self.clock.
2010-02-20 13:07:13 -08:00
|
|
|
def __init__(self, client, webport, nodeurl_path=None, staticdir=None,
|
2016-01-04 19:58:55 +00:00
|
|
|
clock=None, now_fn=time.time):
|
2006-12-07 14:48:37 -07:00
|
|
|
service.MultiService.__init__(self)
|
2009-02-20 12:15:54 -07:00
|
|
|
# the 'data' argument to all render() methods default to the Client
|
Change OphandleTable to use a deterministic clock, so we can test it
To test the changes for #577, we need a deterministic way to simulate
the passage of long periods of time. twisted.internet.task.Clock seems,
from my Googling, to be the way to go for this functionality. I changed
a few things so that OphandleTable would use twisted.internet.task.Clock
when testing:
* WebishServer.__init___ now takes an optional 'clock' parameter,
* which it passes to the root.Root instance it creates.
* root.Root.__init__ now takes an optional 'clock' parameter, which it
passes to the OphandleTable.__init__ method.
* OphandleTable.__init__ now takes an optional 'clock' parameter. If
it is provided, and it isn't None, its callLater method will be used
to schedule ophandle expirations (as opposed to using
reactor.callLater, which is what OphandleTable does normally).
* The WebMixin object in test_web.py now sets a self.clock parameter,
which is a twisted.internet.task.Clock that it feeds to the
WebishServer it creates.
Tests using the WebMixin can control the passage of time in
OphandleTable by accessing self.clock.
2010-02-20 13:07:13 -08:00
|
|
|
# the 'clock' argument to root.Root is, if set, a
|
|
|
|
# twisted.internet.task.Clock that is provided by the unit tests
|
|
|
|
# so that they can test features that involve the passage of
|
|
|
|
# time in a deterministic manner.
|
2019-03-21 01:37:47 -06:00
|
|
|
|
2019-03-21 15:00:08 -04:00
|
|
|
self.root = root.Root(client, clock, now_fn)
|
2009-02-20 12:15:54 -07:00
|
|
|
self.buildServer(webport, nodeurl_path, staticdir)
|
2019-08-06 17:06:57 -06:00
|
|
|
|
2019-08-07 19:14:15 -06:00
|
|
|
# If set, clock is a twisted.internet.task.Clock that the tests
|
|
|
|
# use to test ophandle expiration.
|
2019-08-24 12:31:08 -06:00
|
|
|
self._operations = OphandleTable(clock)
|
|
|
|
self._operations.setServiceParent(self)
|
|
|
|
self.root.putChild("operations", self._operations)
|
2009-02-20 12:15:54 -07:00
|
|
|
|
2019-08-02 11:22:45 -06:00
|
|
|
self.root.putChild(b"storage-plugins", StoragePlugins(client))
|
2019-07-24 15:37:24 -04:00
|
|
|
|
2009-02-20 12:15:54 -07:00
|
|
|
def buildServer(self, webport, nodeurl_path, staticdir):
|
|
|
|
self.webport = webport
|
|
|
|
self.site = site = appserver.NevowSite(self.root)
|
|
|
|
self.site.requestFactory = MyRequest
|
|
|
|
self.site.remember(MyExceptionHandler(), inevow.ICanHandleException)
|
2015-02-03 22:09:40 -08:00
|
|
|
self.staticdir = staticdir # so tests can check
|
2008-10-29 15:34:31 -07:00
|
|
|
if staticdir:
|
|
|
|
self.root.putChild("static", static.File(staticdir))
|
2011-01-16 23:47:51 -08:00
|
|
|
if re.search(r'^\d', webport):
|
|
|
|
webport = "tcp:"+webport # twisted warns about bare "0" or "3456"
|
2006-12-07 14:48:37 -07:00
|
|
|
s = strports.service(webport, site)
|
|
|
|
s.setServiceParent(self)
|
2011-01-16 23:47:51 -08:00
|
|
|
|
|
|
|
self._scheme = None
|
|
|
|
self._portnum = None
|
|
|
|
self._url = None
|
2011-01-17 15:31:52 -08:00
|
|
|
self._listener = s # stash it so we can query for the portnum
|
2011-01-16 23:47:51 -08:00
|
|
|
|
2007-12-03 14:52:42 -07:00
|
|
|
self._started = defer.Deferred()
|
2008-01-07 18:04:56 -07:00
|
|
|
if nodeurl_path:
|
2011-01-17 15:31:52 -08:00
|
|
|
def _write_nodeurl_file(ign):
|
|
|
|
# this file will be created with default permissions
|
write node.url and portnum files atomically, to fix race in test_runner
Previously, test_runner sometimes fails because the _node_has_started()
poller fires after the portnum file has been opened, but before it has
actually been filled, allowing the test process to observe an empty file,
which flunks the test.
This adds a new fileutil.write_atomically() function (using the usual
write-to-.tmp-then-rename approach), and uses it for both node.url and
client.port . These files are written a bit before the node is really up and
running, but they're late enough for test_runner's purposes, which is to know
when it's safe to read client.port and use 'tahoe restart' (and therefore
SIGINT) to restart the node.
The current node/client code doesn't offer any better "are you really done
with startup" indicator.. the ideal approach would be to either watch the
logfile, or connect to its flogport, but both are a hassle. Changing the node
to write out a new "all done" file would be intrusive for regular
operations.
2012-05-14 13:32:03 -07:00
|
|
|
line = self.getURL() + "\n"
|
|
|
|
fileutil.write_atomically(nodeurl_path, line, mode="")
|
2011-01-17 15:31:52 -08:00
|
|
|
self._started.addCallback(_write_nodeurl_file)
|
2006-12-07 14:48:37 -07:00
|
|
|
|
2011-01-16 23:47:51 -08:00
|
|
|
def getURL(self):
|
|
|
|
assert self._url
|
|
|
|
return self._url
|
2011-01-17 15:31:52 -08:00
|
|
|
|
2011-01-16 23:47:51 -08:00
|
|
|
def getPortnum(self):
|
|
|
|
assert self._portnum
|
|
|
|
return self._portnum
|
|
|
|
|
2006-12-07 14:48:37 -07:00
|
|
|
def startService(self):
|
2011-01-16 23:47:51 -08:00
|
|
|
def _got_port(lp):
|
|
|
|
self._portnum = lp.getHost().port
|
|
|
|
# what is our webport?
|
|
|
|
assert self._scheme
|
|
|
|
self._url = "%s://127.0.0.1:%d/" % (self._scheme, self._portnum)
|
|
|
|
self._started.callback(None)
|
|
|
|
return lp
|
|
|
|
def _fail(f):
|
|
|
|
self._started.errback(f)
|
|
|
|
return f
|
2008-01-07 18:04:56 -07:00
|
|
|
|
2011-01-16 23:47:51 -08:00
|
|
|
service.MultiService.startService(self)
|
2011-01-17 15:31:52 -08:00
|
|
|
s = self._listener
|
2011-01-16 23:47:51 -08:00
|
|
|
if hasattr(s, 'endpoint') and hasattr(s, '_waitingForPort'):
|
|
|
|
# Twisted 10.2 gives us a StreamServerEndpointService. This is
|
|
|
|
# ugly but should do for now.
|
|
|
|
classname = s.endpoint.__class__.__name__
|
|
|
|
if classname.startswith('SSL'):
|
|
|
|
self._scheme = 'https'
|
|
|
|
else:
|
|
|
|
self._scheme = 'http'
|
|
|
|
s._waitingForPort.addCallbacks(_got_port, _fail)
|
|
|
|
elif isinstance(s, internet.TCPServer):
|
|
|
|
# Twisted <= 10.1
|
|
|
|
self._scheme = 'http'
|
|
|
|
_got_port(s._port)
|
2008-01-07 18:04:56 -07:00
|
|
|
elif isinstance(s, internet.SSLServer):
|
2011-01-16 23:47:51 -08:00
|
|
|
# Twisted <= 10.1
|
|
|
|
self._scheme = 'https'
|
|
|
|
_got_port(s._port)
|
2008-01-07 18:04:56 -07:00
|
|
|
else:
|
2011-01-16 23:47:51 -08:00
|
|
|
# who knows, probably some weirdo future version of Twisted
|
|
|
|
self._started.errback(AssertionError("couldn't find out the scheme or port for the web-API server"))
|
|
|
|
|
2019-08-24 12:31:08 -06:00
|
|
|
def get_operations(self):
|
|
|
|
"""
|
|
|
|
:return: a reference to our "active operations" tracker
|
|
|
|
"""
|
|
|
|
return self._operations
|
|
|
|
|
2011-01-16 23:47:51 -08:00
|
|
|
|
2008-03-11 17:36:25 -07:00
|
|
|
class IntroducerWebishServer(WebishServer):
|
2009-02-20 12:15:54 -07:00
|
|
|
def __init__(self, introducer, webport, nodeurl_path=None, staticdir=None):
|
|
|
|
service.MultiService.__init__(self)
|
|
|
|
self.root = introweb.IntroducerRoot(introducer)
|
|
|
|
self.buildServer(webport, nodeurl_path, staticdir)
|