mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-23 23:02:25 +00:00
Merge pull request #1190 from tahoe-lafs/3802-cddl
CDDL-based schema validation for the HTTP storage server Fixes ticket:3802
This commit is contained in:
commit
0c18dca1ad
@ -350,6 +350,9 @@ Because of the simple types used throughout
|
||||
and the equivalence described in `RFC 7049`_
|
||||
these examples should be representative regardless of which of these two encodings is chosen.
|
||||
|
||||
For CBOR messages, any sequence that is semantically a set (i.e. no repeated values allowed, order doesn't matter, and elements are hashable in Python) should be sent as a set.
|
||||
Tag 6.258 is used to indicate sets in CBOR; see `the CBOR registry <https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml>`_ for more details.
|
||||
|
||||
HTTP Design
|
||||
~~~~~~~~~~~
|
||||
|
||||
|
0
newsfragments/3802.minor
Normal file
0
newsfragments/3802.minor
Normal file
3
setup.py
3
setup.py
@ -135,7 +135,8 @@ install_requires = [
|
||||
"klein",
|
||||
"werkzeug",
|
||||
"treq",
|
||||
"cbor2"
|
||||
"cbor2",
|
||||
"pycddl",
|
||||
]
|
||||
|
||||
setup_requires = [
|
||||
|
@ -11,6 +11,7 @@ import attr
|
||||
|
||||
# TODO Make sure to import Python version?
|
||||
from cbor2 import loads, dumps
|
||||
from pycddl import Schema
|
||||
from collections_extended import RangeMap
|
||||
from werkzeug.datastructures import Range, ContentRange
|
||||
from twisted.web.http_headers import Headers
|
||||
@ -36,18 +37,69 @@ class ClientException(Exception):
|
||||
self.code = code
|
||||
|
||||
|
||||
def _decode_cbor(response):
|
||||
# Schemas for server responses.
|
||||
#
|
||||
# Tags are of the form #6.nnn, where the number is documented at
|
||||
# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258
|
||||
# indicates a set.
|
||||
_SCHEMAS = {
|
||||
"get_version": Schema(
|
||||
"""
|
||||
message = {'http://allmydata.org/tahoe/protocols/storage/v1' => {
|
||||
'maximum-immutable-share-size' => uint
|
||||
'maximum-mutable-share-size' => uint
|
||||
'available-space' => uint
|
||||
'tolerates-immutable-read-overrun' => bool
|
||||
'delete-mutable-shares-with-zero-length-writev' => bool
|
||||
'fills-holes-with-zero-bytes' => bool
|
||||
'prevents-read-past-end-of-share-data' => bool
|
||||
}
|
||||
'application-version' => bstr
|
||||
}
|
||||
"""
|
||||
),
|
||||
"allocate_buckets": Schema(
|
||||
"""
|
||||
message = {
|
||||
already-have: #6.258([* uint])
|
||||
allocated: #6.258([* uint])
|
||||
}
|
||||
"""
|
||||
),
|
||||
"immutable_write_share_chunk": Schema(
|
||||
"""
|
||||
message = {
|
||||
required: [* {begin: uint, end: uint}]
|
||||
}
|
||||
"""
|
||||
),
|
||||
"list_shares": Schema(
|
||||
"""
|
||||
message = #6.258([* uint])
|
||||
"""
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _decode_cbor(response, schema: Schema):
|
||||
"""Given HTTP response, return decoded CBOR body."""
|
||||
|
||||
def got_content(data):
|
||||
schema.validate_cbor(data)
|
||||
return loads(data)
|
||||
|
||||
if response.code > 199 and response.code < 300:
|
||||
content_type = get_content_type(response.headers)
|
||||
if content_type == CBOR_MIME_TYPE:
|
||||
# TODO limit memory usage
|
||||
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
|
||||
return treq.content(response).addCallback(loads)
|
||||
return treq.content(response).addCallback(got_content)
|
||||
else:
|
||||
raise ClientException(-1, "Server didn't send CBOR")
|
||||
else:
|
||||
return fail(ClientException(response.code, response.phrase))
|
||||
return treq.content(response).addCallback(
|
||||
lambda data: fail(ClientException(response.code, response.phrase, data))
|
||||
)
|
||||
|
||||
|
||||
@attr.s
|
||||
@ -149,7 +201,7 @@ class StorageClientGeneral(object):
|
||||
"""
|
||||
url = self._client.relative_url("/v1/version")
|
||||
response = yield self._client.request("GET", url)
|
||||
decoded_response = yield _decode_cbor(response)
|
||||
decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"])
|
||||
returnValue(decoded_response)
|
||||
|
||||
|
||||
@ -207,7 +259,7 @@ class StorageClientImmutables(object):
|
||||
upload_secret=upload_secret,
|
||||
message_to_serialize=message,
|
||||
)
|
||||
decoded_response = yield _decode_cbor(response)
|
||||
decoded_response = yield _decode_cbor(response, _SCHEMAS["allocate_buckets"])
|
||||
returnValue(
|
||||
ImmutableCreateResult(
|
||||
already_have=decoded_response["already-have"],
|
||||
@ -279,7 +331,7 @@ class StorageClientImmutables(object):
|
||||
raise ClientException(
|
||||
response.code,
|
||||
)
|
||||
body = yield _decode_cbor(response)
|
||||
body = yield _decode_cbor(response, _SCHEMAS["immutable_write_share_chunk"])
|
||||
remaining = RangeMap()
|
||||
for chunk in body["required"]:
|
||||
remaining.set(True, chunk["begin"], chunk["end"])
|
||||
@ -332,7 +384,7 @@ class StorageClientImmutables(object):
|
||||
url,
|
||||
)
|
||||
if response.code == http.OK:
|
||||
body = yield _decode_cbor(response)
|
||||
body = yield _decode_cbor(response, _SCHEMAS["list_shares"])
|
||||
returnValue(set(body))
|
||||
else:
|
||||
raise ClientException(response.code)
|
||||
|
@ -21,7 +21,7 @@ from werkzeug.datastructures import ContentRange
|
||||
|
||||
# TODO Make sure to use pure Python versions?
|
||||
from cbor2 import dumps, loads
|
||||
|
||||
from pycddl import Schema, ValidationError as CDDLValidationError
|
||||
from .server import StorageServer
|
||||
from .http_common import swissnum_auth_header, Secrets, get_content_type, CBOR_MIME_TYPE
|
||||
from .common import si_a2b
|
||||
@ -86,8 +86,8 @@ def _authorization_decorator(required_secrets):
|
||||
try:
|
||||
secrets = _extract_secrets(authorization, required_secrets)
|
||||
except ClientSecretsException:
|
||||
request.setResponseCode(400)
|
||||
return b""
|
||||
request.setResponseCode(http.BAD_REQUEST)
|
||||
return b"Missing required secrets"
|
||||
return f(self, request, secrets, *args, **kwargs)
|
||||
|
||||
return route
|
||||
@ -215,6 +215,25 @@ class _HTTPError(Exception):
|
||||
self.code = code
|
||||
|
||||
|
||||
# CDDL schemas.
|
||||
#
|
||||
# Tags are of the form #6.nnn, where the number is documented at
|
||||
# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258
|
||||
# indicates a set.
|
||||
_SCHEMAS = {
|
||||
"allocate_buckets": Schema("""
|
||||
message = {
|
||||
share-numbers: #6.258([* uint])
|
||||
allocated-size: uint
|
||||
}
|
||||
"""),
|
||||
"advise_corrupt_share": Schema("""
|
||||
message = {
|
||||
reason: tstr
|
||||
}
|
||||
""")
|
||||
}
|
||||
|
||||
class HTTPServer(object):
|
||||
"""
|
||||
A HTTP interface to the storage server.
|
||||
@ -229,6 +248,12 @@ class HTTPServer(object):
|
||||
request.setResponseCode(failure.value.code)
|
||||
return b""
|
||||
|
||||
@_app.handle_errors(CDDLValidationError)
|
||||
def _cddl_validation_error(self, request, failure):
|
||||
"""Handle CDDL validation errors."""
|
||||
request.setResponseCode(http.BAD_REQUEST)
|
||||
return str(failure.value).encode("utf-8")
|
||||
|
||||
def __init__(
|
||||
self, storage_server, swissnum
|
||||
): # type: (StorageServer, bytes) -> None
|
||||
@ -268,7 +293,7 @@ class HTTPServer(object):
|
||||
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861
|
||||
raise _HTTPError(http.NOT_ACCEPTABLE)
|
||||
|
||||
def _read_encoded(self, request) -> Any:
|
||||
def _read_encoded(self, request, schema: Schema) -> Any:
|
||||
"""
|
||||
Read encoded request body data, decoding it with CBOR by default.
|
||||
"""
|
||||
@ -276,7 +301,10 @@ class HTTPServer(object):
|
||||
if content_type == CBOR_MIME_TYPE:
|
||||
# TODO limit memory usage, client could send arbitrarily large data...
|
||||
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
|
||||
return loads(request.content.read())
|
||||
message = request.content.read()
|
||||
schema.validate_cbor(message)
|
||||
result = loads(message)
|
||||
return result
|
||||
else:
|
||||
raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE)
|
||||
|
||||
@ -298,7 +326,7 @@ class HTTPServer(object):
|
||||
def allocate_buckets(self, request, authorization, storage_index):
|
||||
"""Allocate buckets."""
|
||||
upload_secret = authorization[Secrets.UPLOAD]
|
||||
info = self._read_encoded(request)
|
||||
info = self._read_encoded(request, _SCHEMAS["allocate_buckets"])
|
||||
|
||||
# We do NOT validate the upload secret for existing bucket uploads.
|
||||
# Another upload may be happening in parallel, with a different upload
|
||||
@ -408,7 +436,7 @@ class HTTPServer(object):
|
||||
"""
|
||||
List shares for the given storage index.
|
||||
"""
|
||||
share_numbers = list(self._storage_server.get_buckets(storage_index).keys())
|
||||
share_numbers = set(self._storage_server.get_buckets(storage_index).keys())
|
||||
return self._send_encoded(request, share_numbers)
|
||||
|
||||
@_authorized_route(
|
||||
@ -498,6 +526,6 @@ class HTTPServer(object):
|
||||
except KeyError:
|
||||
raise _HTTPError(http.NOT_FOUND)
|
||||
|
||||
info = self._read_encoded(request)
|
||||
info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"])
|
||||
bucket.advise_corrupt_share(info["reason"].encode("utf-8"))
|
||||
return b""
|
||||
|
@ -18,6 +18,8 @@ from base64 import b64encode
|
||||
from contextlib import contextmanager
|
||||
from os import urandom
|
||||
|
||||
from cbor2 import dumps
|
||||
from pycddl import ValidationError as CDDLValidationError
|
||||
from hypothesis import assume, given, strategies as st
|
||||
from fixtures import Fixture, TempDir
|
||||
from treq.testing import StubTreq
|
||||
@ -49,7 +51,7 @@ from ..storage.http_client import (
|
||||
StorageClientGeneral,
|
||||
_encode_si,
|
||||
)
|
||||
from ..storage.http_common import get_content_type
|
||||
from ..storage.http_common import get_content_type, CBOR_MIME_TYPE
|
||||
from ..storage.common import si_b2a
|
||||
|
||||
|
||||
@ -239,6 +241,12 @@ class TestApp(object):
|
||||
else:
|
||||
return "BAD: {}".format(authorization)
|
||||
|
||||
@_authorized_route(_app, set(), "/v1/version", methods=["GET"])
|
||||
def bad_version(self, request, authorization):
|
||||
"""Return version result that violates the expected schema."""
|
||||
request.setHeader("content-type", CBOR_MIME_TYPE)
|
||||
return dumps({"garbage": 123})
|
||||
|
||||
|
||||
def result_of(d):
|
||||
"""
|
||||
@ -257,15 +265,15 @@ def result_of(d):
|
||||
)
|
||||
|
||||
|
||||
class RoutingTests(SyncTestCase):
|
||||
class CustomHTTPServerTests(SyncTestCase):
|
||||
"""
|
||||
Tests for the HTTP routing infrastructure.
|
||||
Tests that use a custom HTTP server.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
if PY2:
|
||||
self.skipTest("Not going to bother supporting Python 2")
|
||||
super(RoutingTests, self).setUp()
|
||||
super(CustomHTTPServerTests, self).setUp()
|
||||
# Could be a fixture, but will only be used in this test class so not
|
||||
# going to bother:
|
||||
self._http_server = TestApp()
|
||||
@ -277,8 +285,8 @@ class RoutingTests(SyncTestCase):
|
||||
|
||||
def test_authorization_enforcement(self):
|
||||
"""
|
||||
The requirement for secrets is enforced; if they are not given, a 400
|
||||
response code is returned.
|
||||
The requirement for secrets is enforced by the ``_authorized_route``
|
||||
decorator; if they are not given, a 400 response code is returned.
|
||||
"""
|
||||
# Without secret, get a 400 error.
|
||||
response = result_of(
|
||||
@ -298,6 +306,14 @@ class RoutingTests(SyncTestCase):
|
||||
self.assertEqual(response.code, 200)
|
||||
self.assertEqual(result_of(response.content()), b"GOOD SECRET")
|
||||
|
||||
def test_client_side_schema_validation(self):
|
||||
"""
|
||||
The client validates returned CBOR message against a schema.
|
||||
"""
|
||||
client = StorageClientGeneral(self.client)
|
||||
with self.assertRaises(CDDLValidationError):
|
||||
result_of(client.get_version())
|
||||
|
||||
|
||||
class HttpTestFixture(Fixture):
|
||||
"""
|
||||
@ -413,6 +429,36 @@ class GenericHTTPAPITests(SyncTestCase):
|
||||
)
|
||||
self.assertEqual(version, expected_version)
|
||||
|
||||
def test_server_side_schema_validation(self):
|
||||
"""
|
||||
Ensure that schema validation is happening: invalid CBOR should result
|
||||
in bad request response code (error 400).
|
||||
|
||||
We don't bother checking every single request, the API on the
|
||||
server-side is designed to require a schema, so it validates
|
||||
everywhere. But we check at least one to ensure we get correct
|
||||
response code on bad input, so we know validation happened.
|
||||
"""
|
||||
upload_secret = urandom(32)
|
||||
lease_secret = urandom(32)
|
||||
storage_index = urandom(16)
|
||||
url = self.http.client.relative_url(
|
||||
"/v1/immutable/" + _encode_si(storage_index)
|
||||
)
|
||||
message = {"bad-message": "missing expected keys"}
|
||||
|
||||
response = result_of(
|
||||
self.http.client.request(
|
||||
"POST",
|
||||
url,
|
||||
lease_renew_secret=lease_secret,
|
||||
lease_cancel_secret=lease_secret,
|
||||
upload_secret=upload_secret,
|
||||
message_to_serialize=message,
|
||||
)
|
||||
)
|
||||
self.assertEqual(response.code, http.BAD_REQUEST)
|
||||
|
||||
|
||||
class ImmutableHTTPAPITests(SyncTestCase):
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user