diff --git a/setup.py b/setup.py index 5285b5d08..c84d0ecde 100644 --- a/setup.py +++ b/setup.py @@ -135,7 +135,8 @@ install_requires = [ "klein", "werkzeug", "treq", - "cbor2" + "cbor2", + "pycddl", ] setup_requires = [ diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 99275ae24..e9a593a3e 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -47,7 +47,9 @@ def _decode_cbor(response): else: raise ClientException(-1, "Server didn't send CBOR") else: - return fail(ClientException(response.code, response.phrase)) + return treq.content(response).addCallback( + lambda data: fail(ClientException(response.code, response.phrase, data)) + ) @attr.s diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index f648d8331..4bf552fc5 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -21,7 +21,7 @@ from werkzeug.datastructures import ContentRange # TODO Make sure to use pure Python versions? from cbor2 import dumps, loads - +from pycddl import Schema, ValidationError as CDDLValidationError from .server import StorageServer from .http_common import swissnum_auth_header, Secrets, get_content_type, CBOR_MIME_TYPE from .common import si_a2b @@ -215,6 +215,25 @@ class _HTTPError(Exception): self.code = code +# CDDL schemas. +# +# Tags are of the form #6.nnn, where the number is documented at +# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258 +# indicates a set. +_SCHEMAS = { + "allocate_buckets": Schema(""" + message = { + share-numbers: #6.258([* uint]) + allocated-size: uint + } + """), + "advise_corrupt_share": Schema(""" + message = { + reason: tstr + } + """) +} + class HTTPServer(object): """ A HTTP interface to the storage server. @@ -229,6 +248,12 @@ class HTTPServer(object): request.setResponseCode(failure.value.code) return b"" + @_app.handle_errors(CDDLValidationError) + def _cddl_validation_error(self, request, failure): + """Handle CDDL validation errors.""" + request.setResponseCode(http.BAD_REQUEST) + return str(failure.value).encode("utf-8") + def __init__( self, storage_server, swissnum ): # type: (StorageServer, bytes) -> None @@ -268,7 +293,7 @@ class HTTPServer(object): # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861 raise _HTTPError(http.NOT_ACCEPTABLE) - def _read_encoded(self, request) -> Any: + def _read_encoded(self, request, schema: Schema) -> Any: """ Read encoded request body data, decoding it with CBOR by default. """ @@ -276,7 +301,10 @@ class HTTPServer(object): if content_type == CBOR_MIME_TYPE: # TODO limit memory usage, client could send arbitrarily large data... # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - return loads(request.content.read()) + message = request.content.read() + schema.validate_cbor(message) + result = loads(message) + return result else: raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE) @@ -298,7 +326,7 @@ class HTTPServer(object): def allocate_buckets(self, request, authorization, storage_index): """Allocate buckets.""" upload_secret = authorization[Secrets.UPLOAD] - info = self._read_encoded(request) + info = self._read_encoded(request, _SCHEMAS["allocate_buckets"]) # We do NOT validate the upload secret for existing bucket uploads. # Another upload may be happening in parallel, with a different upload @@ -498,6 +526,6 @@ class HTTPServer(object): except KeyError: raise _HTTPError(http.NOT_FOUND) - info = self._read_encoded(request) + info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"]) bucket.advise_corrupt_share(info["reason"].encode("utf-8")) return b"" diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index af90d58a9..af868ddce 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -413,6 +413,36 @@ class GenericHTTPAPITests(SyncTestCase): ) self.assertEqual(version, expected_version) + def test_schema_validation(self): + """ + Ensure that schema validation is happening: invalid CBOR should result + in bad request response code (error 400). + + We don't bother checking every single request, the API on the + server-side is designed to require a schema, so it validates + everywhere. But we check at least one to ensure we get correct + response code on bad input, so we know validation happened. + """ + upload_secret = urandom(32) + lease_secret = urandom(32) + storage_index = urandom(16) + url = self.http.client.relative_url( + "/v1/immutable/" + _encode_si(storage_index) + ) + message = {"bad-message": "missing expected keys"} + + response = result_of( + self.http.client.request( + "POST", + url, + lease_renew_secret=lease_secret, + lease_cancel_secret=lease_secret, + upload_secret=upload_secret, + message_to_serialize=message, + ) + ) + self.assertEqual(response.code, http.BAD_REQUEST) + class ImmutableHTTPAPITests(SyncTestCase): """