diff --git a/src/allmydata/test/web/test_webish.py b/src/allmydata/test/web/test_webish.py index 4a77d21ae..050f77d1c 100644 --- a/src/allmydata/test/web/test_webish.py +++ b/src/allmydata/test/web/test_webish.py @@ -202,6 +202,16 @@ class TahoeLAFSSiteTests(SyncTestCase): ), ) + def test_private_key_censoring(self): + """ + The log event for a request including a **private-key** query + argument has the private key value censored. + """ + self._test_censoring( + b"/uri?uri=URI:CHK:aaa:bbb&private-key=AAAAaaaabbbb==", + b"/uri?uri=[CENSORED]&private-key=[CENSORED]", + ) + def test_uri_censoring(self): """ The log event for a request for **/uri/** has the capability value diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 519b3e1f0..1b2b8192a 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -1,18 +1,12 @@ """ -Ported to Python 3. +General web server-related utilities. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations from six import ensure_str import re, time, tempfile +from urllib.parse import parse_qsl, urlencode from cgi import ( FieldStorage, @@ -45,40 +39,37 @@ from .web.storage_plugins import ( ) -if PY2: - FileUploadFieldStorage = FieldStorage -else: - class FileUploadFieldStorage(FieldStorage): - """ - Do terrible things to ensure files are still bytes. +class FileUploadFieldStorage(FieldStorage): + """ + Do terrible things to ensure files are still bytes. - On Python 2, uploaded files were always bytes. On Python 3, there's a - heuristic: if the filename is set on a field, it's assumed to be a file - upload and therefore bytes. If no filename is set, it's Unicode. + On Python 2, uploaded files were always bytes. On Python 3, there's a + heuristic: if the filename is set on a field, it's assumed to be a file + upload and therefore bytes. If no filename is set, it's Unicode. - Unfortunately, we always want it to be bytes, and Tahoe-LAFS also - enables setting the filename not via the MIME filename, but via a - separate field called "name". + Unfortunately, we always want it to be bytes, and Tahoe-LAFS also + enables setting the filename not via the MIME filename, but via a + separate field called "name". - Thus we need to do this ridiculous workaround. Mypy doesn't like it - either, thus the ``# type: ignore`` below. + Thus we need to do this ridiculous workaround. Mypy doesn't like it + either, thus the ``# type: ignore`` below. - Source for idea: - https://mail.python.org/pipermail/python-dev/2017-February/147402.html - """ - @property # type: ignore - def filename(self): - if self.name == "file" and not self._mime_filename: - # We use the file field to upload files, see directory.py's - # _POST_upload. Lack of _mime_filename means we need to trick - # FieldStorage into thinking there is a filename so it'll - # return bytes. - return "unknown-filename" - return self._mime_filename + Source for idea: + https://mail.python.org/pipermail/python-dev/2017-February/147402.html + """ + @property # type: ignore + def filename(self): + if self.name == "file" and not self._mime_filename: + # We use the file field to upload files, see directory.py's + # _POST_upload. Lack of _mime_filename means we need to trick + # FieldStorage into thinking there is a filename so it'll + # return bytes. + return "unknown-filename" + return self._mime_filename - @filename.setter - def filename(self, value): - self._mime_filename = value + @filename.setter + def filename(self, value): + self._mime_filename = value class TahoeLAFSRequest(Request, object): @@ -180,12 +171,7 @@ def _logFormatter(logDateTime, request): queryargs = b"" else: path, queryargs = x - # there is a form handler which redirects POST /uri?uri=FOO into - # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make - # sure we censor these too. - if queryargs.startswith(b"uri="): - queryargs = b"uri=[CENSORED]" - queryargs = b"?" + queryargs + queryargs = b"?" + censor(queryargs) if path.startswith(b"/uri/"): path = b"/uri/[CENSORED]" elif path.startswith(b"/file/"): @@ -207,6 +193,30 @@ def _logFormatter(logDateTime, request): ) +def censor(queryargs: bytes) -> bytes: + """ + Replace potentially sensitive values in query arguments with a + constant string. + """ + args = parse_qsl(queryargs.decode("ascii"), keep_blank_values=True, encoding="utf8") + result = [] + for k, v in args: + if k == "uri": + # there is a form handler which redirects POST /uri?uri=FOO into + # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make + # sure we censor these. + v = "[CENSORED]" + elif k == "private-key": + # Likewise, sometimes a private key is supplied with mutable + # creation. + v = "[CENSORED]" + + result.append((k, v)) + + # Customize safe to try to leave our markers intact. + return urlencode(result, safe="[]").encode("ascii") + + class TahoeLAFSSite(Site, object): """ The HTTP protocol factory used by Tahoe-LAFS.