Server-side schema validation of CBOR.

This commit is contained in:
Itamar Turner-Trauring 2022-04-11 13:11:45 -04:00
parent d4afa15d20
commit e5b0e51f72
4 changed files with 68 additions and 7 deletions

View File

@ -135,7 +135,8 @@ install_requires = [
"klein",
"werkzeug",
"treq",
"cbor2"
"cbor2",
"pycddl",
]
setup_requires = [

View File

@ -47,7 +47,9 @@ def _decode_cbor(response):
else:
raise ClientException(-1, "Server didn't send CBOR")
else:
return fail(ClientException(response.code, response.phrase))
return treq.content(response).addCallback(
lambda data: fail(ClientException(response.code, response.phrase, data))
)
@attr.s

View File

@ -21,7 +21,7 @@ from werkzeug.datastructures import ContentRange
# TODO Make sure to use pure Python versions?
from cbor2 import dumps, loads
from pycddl import Schema, ValidationError as CDDLValidationError
from .server import StorageServer
from .http_common import swissnum_auth_header, Secrets, get_content_type, CBOR_MIME_TYPE
from .common import si_a2b
@ -215,6 +215,25 @@ class _HTTPError(Exception):
self.code = code
# CDDL schemas.
#
# Tags are of the form #6.nnn, where the number is documented at
# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258
# indicates a set.
_SCHEMAS = {
"allocate_buckets": Schema("""
message = {
share-numbers: #6.258([* uint])
allocated-size: uint
}
"""),
"advise_corrupt_share": Schema("""
message = {
reason: tstr
}
""")
}
class HTTPServer(object):
"""
A HTTP interface to the storage server.
@ -229,6 +248,12 @@ class HTTPServer(object):
request.setResponseCode(failure.value.code)
return b""
@_app.handle_errors(CDDLValidationError)
def _cddl_validation_error(self, request, failure):
"""Handle CDDL validation errors."""
request.setResponseCode(http.BAD_REQUEST)
return str(failure.value).encode("utf-8")
def __init__(
self, storage_server, swissnum
): # type: (StorageServer, bytes) -> None
@ -268,7 +293,7 @@ class HTTPServer(object):
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861
raise _HTTPError(http.NOT_ACCEPTABLE)
def _read_encoded(self, request) -> Any:
def _read_encoded(self, request, schema: Schema) -> Any:
"""
Read encoded request body data, decoding it with CBOR by default.
"""
@ -276,7 +301,10 @@ class HTTPServer(object):
if content_type == CBOR_MIME_TYPE:
# TODO limit memory usage, client could send arbitrarily large data...
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
return loads(request.content.read())
message = request.content.read()
schema.validate_cbor(message)
result = loads(message)
return result
else:
raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE)
@ -298,7 +326,7 @@ class HTTPServer(object):
def allocate_buckets(self, request, authorization, storage_index):
"""Allocate buckets."""
upload_secret = authorization[Secrets.UPLOAD]
info = self._read_encoded(request)
info = self._read_encoded(request, _SCHEMAS["allocate_buckets"])
# We do NOT validate the upload secret for existing bucket uploads.
# Another upload may be happening in parallel, with a different upload
@ -498,6 +526,6 @@ class HTTPServer(object):
except KeyError:
raise _HTTPError(http.NOT_FOUND)
info = self._read_encoded(request)
info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"])
bucket.advise_corrupt_share(info["reason"].encode("utf-8"))
return b""

View File

@ -413,6 +413,36 @@ class GenericHTTPAPITests(SyncTestCase):
)
self.assertEqual(version, expected_version)
def test_schema_validation(self):
"""
Ensure that schema validation is happening: invalid CBOR should result
in bad request response code (error 400).
We don't bother checking every single request, the API on the
server-side is designed to require a schema, so it validates
everywhere. But we check at least one to ensure we get correct
response code on bad input, so we know validation happened.
"""
upload_secret = urandom(32)
lease_secret = urandom(32)
storage_index = urandom(16)
url = self.http.client.relative_url(
"/v1/immutable/" + _encode_si(storage_index)
)
message = {"bad-message": "missing expected keys"}
response = result_of(
self.http.client.request(
"POST",
url,
lease_renew_secret=lease_secret,
lease_cancel_secret=lease_secret,
upload_secret=upload_secret,
message_to_serialize=message,
)
)
self.assertEqual(response.code, http.BAD_REQUEST)
class ImmutableHTTPAPITests(SyncTestCase):
"""