From d1599a924eb2df7e360c9783da32afdb578db9f0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 21 Oct 2020 14:42:30 -0400 Subject: [PATCH] Test and fix cap censoring in HTTP access logs --- src/allmydata/test/web/test_webish.py | 77 +++++++++++++++++++ src/allmydata/webish.py | 102 +++++++++++++------------- 2 files changed, 129 insertions(+), 50 deletions(-) diff --git a/src/allmydata/test/web/test_webish.py b/src/allmydata/test/web/test_webish.py index 33d1b48c3..45519fd33 100644 --- a/src/allmydata/test/web/test_webish.py +++ b/src/allmydata/test/web/test_webish.py @@ -8,12 +8,21 @@ from uuid import ( from testtools.matchers import ( AfterPreprocessing, + Contains, Equals, + MatchesAll, + Not, ) +from twisted.python.filepath import ( + FilePath, +) from twisted.web.test.requesthelper import ( DummyChannel, ) +from twisted.web.resource import ( + Resource, +) from ..common import ( SyncTestCase, @@ -21,6 +30,7 @@ from ..common import ( from ...webish import ( TahoeLAFSRequest, + tahoe_lafs_site, ) @@ -84,6 +94,73 @@ class TahoeLAFSRequestTests(SyncTestCase): ) +class TahoeLAFSSiteTests(SyncTestCase): + """ + Tests for the ``Site`` created by ``tahoe_lafs_site``. + """ + def _test_censoring(self, path, censored): + """ + """ + logPath = self.mktemp() + + site = tahoe_lafs_site(Resource(), logPath=logPath) + site.startFactory() + + channel = DummyChannel() + channel.factory = site + request = TahoeLAFSRequest(channel) + + request.gotLength(None) + request.requestReceived(b"GET", path, b"HTTP/1.1") + + self.assertThat( + FilePath(logPath).getContent(), + MatchesAll( + Contains(censored), + Not(Contains(path)), + ), + ) + + def test_uri_censoring(self): + """ + The log event for a request for **/uri/** has the capability value + censored. + """ + self._test_censoring( + b"/uri/URI:CHK:aaa:bbb", + b"/uri/[CENSORED]", + ) + + def test_file_censoring(self): + """ + The log event for a request for **/file/** has the capability value + censored. + """ + self._test_censoring( + b"/file/URI:CHK:aaa:bbb", + b"/file/[CENSORED]", + ) + + def test_named_censoring(self): + """ + The log event for a request for **/named/** has the capability value + censored. + """ + self._test_censoring( + b"/named/URI:CHK:aaa:bbb", + b"/named/[CENSORED]", + ) + + def test_uri_queryarg_censoring(self): + """ + The log event for a request for **/uri?cap=** has the capability + value censored. + """ + self._test_censoring( + b"/uri?uri=URI:CHK:aaa:bbb", + b"/uri?uri=[CENSORED]", + ) + def param(name, value): return u"; {}={}".format(name, value) diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index bd00b4094..75f0d03dc 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -1,5 +1,8 @@ import re, time +from functools import ( + partial, +) from cgi import ( FieldStorage, ) @@ -84,54 +87,6 @@ class TahoeLAFSRequest(Request, object): # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy self.setHeader("Referrer-Policy", "no-referrer") - def _logger(self): - # we build up a log string that hides most of the cap, to preserve - # user privacy. We retain the query args so we can identify things - # like t=json. Then we send it to the flog. We make no attempt to - # match apache formatting. TODO: when we move to DSA dirnodes and - # shorter caps, consider exposing a few characters of the cap, or - # maybe a few characters of its hash. - x = self.uri.split("?", 1) - if len(x) == 1: - # no query args - path = self.uri - queryargs = "" - else: - path, queryargs = x - # there is a form handler which redirects POST /uri?uri=FOO into - # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make - # sure we censor these too. - if queryargs.startswith("uri="): - queryargs = "[uri=CENSORED]" - queryargs = "?" + queryargs - if path.startswith("/uri"): - path = "/uri/[CENSORED].." - elif path.startswith("/file"): - path = "/file/[CENSORED].." - elif path.startswith("/named"): - path = "/named/[CENSORED].." - - uri = path + queryargs - - error = "" - if self._tahoe_request_had_error: - error = " [ERROR]" - - log.msg( - format=( - "web: %(clientip)s %(method)s %(uri)s %(code)s " - "%(length)s%(error)s" - ), - clientip=_get_client_ip(self), - method=self.method, - uri=uri, - code=self.code, - length=(self.sentLength or "-"), - error=error, - facility="tahoe.webish", - level=log.OPERATIONAL, - ) - def _get_client_ip(request): try: @@ -145,6 +100,54 @@ def _get_client_ip(request): return None +def _logFormatter(logDateTime, request): + # we build up a log string that hides most of the cap, to preserve + # user privacy. We retain the query args so we can identify things + # like t=json. Then we send it to the flog. We make no attempt to + # match apache formatting. TODO: when we move to DSA dirnodes and + # shorter caps, consider exposing a few characters of the cap, or + # maybe a few characters of its hash. + x = request.uri.split("?", 1) + if len(x) == 1: + # no query args + path = request.uri + queryargs = "" + else: + path, queryargs = x + # there is a form handler which redirects POST /uri?uri=FOO into + # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make + # sure we censor these too. + if queryargs.startswith("uri="): + queryargs = "uri=[CENSORED]" + queryargs = "?" + queryargs + if path.startswith("/uri/"): + path = "/uri/[CENSORED]" + elif path.startswith("/file/"): + path = "/file/[CENSORED]" + elif path.startswith("/named/"): + path = "/named/[CENSORED]" + + uri = path + queryargs + + template = "web: %(clientip)s %(method)s %(uri)s %(code)s %(length)s" + return template % dict( + clientip=_get_client_ip(request), + method=request.method, + uri=uri, + code=request.code, + length=(request.sentLength or "-"), + facility="tahoe.webish", + level=log.OPERATIONAL, + ) + + +tahoe_lafs_site = partial( + Site, + requestFactory=TahoeLAFSRequest, + logFormatter=_logFormatter, +) + + class WebishServer(service.MultiService): name = "webish" @@ -170,8 +173,7 @@ class WebishServer(service.MultiService): def buildServer(self, webport, nodeurl_path, staticdir): self.webport = webport - self.site = Site(self.root) - self.site.requestFactory = TahoeLAFSRequest + self.site = tahoe_lafs_site(self.root) self.staticdir = staticdir # so tests can check if staticdir: self.root.putChild("static", static.File(staticdir))