From bf2451bbcdbde50c72b90a20354ae4bc281b7b81 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 25 Jul 2023 15:31:18 -0400 Subject: [PATCH 1/6] Correct type. --- src/allmydata/test/test_storage_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 30f6a527d..52c47c6c3 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -172,7 +172,7 @@ class ExtractSecretsTests(SyncTestCase): ``ClientSecretsException``. """ with self.assertRaises(ClientSecretsException): - _extract_secrets(["FOO eA=="], {}) + _extract_secrets(["FOO eA=="], set()) def test_bad_secret_not_base64(self): """ From 46d10a6281d145f974bb5120120845e1eb6339e9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 25 Jul 2023 15:31:30 -0400 Subject: [PATCH 2/6] Ensure and test (and necessary refactor) that lack of content-type is same as CBOR content-type, as per spec. --- src/allmydata/storage/http_server.py | 119 ++++++++++++------------ src/allmydata/test/test_storage_http.py | 46 +++++++++ 2 files changed, 108 insertions(+), 57 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index c63a4ca08..3ff98e933 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -530,6 +530,60 @@ def _add_error_handling(app: Klein): return str(failure.value).encode("utf-8") +async def read_encoded( + reactor, request, schema: Schema, max_size: int = 1024 * 1024 +) -> Any: + """ + Read encoded request body data, decoding it with CBOR by default. + + Somewhat arbitrarily, limit body size to 1MiB by default. + """ + content_type = get_content_type(request.requestHeaders) + if content_type is None: + content_type = CBOR_MIME_TYPE + if content_type != CBOR_MIME_TYPE: + raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE) + + # Make sure it's not too large: + request.content.seek(0, SEEK_END) + size = request.content.tell() + if size > max_size: + raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE) + request.content.seek(0, SEEK_SET) + + # We don't want to load the whole message into memory, cause it might + # be quite large. The CDDL validator takes a read-only bytes-like + # thing. Luckily, for large request bodies twisted.web will buffer the + # data in a file, so we can use mmap() to get a memory view. The CDDL + # validator will not make a copy, so it won't increase memory usage + # beyond that. + try: + fd = request.content.fileno() + except (ValueError, OSError): + fd = -1 + if fd >= 0: + # It's a file, so we can use mmap() to save memory. + message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + else: + message = request.content.read() + + # Pycddl will release the GIL when validating larger documents, so + # let's take advantage of multiple CPUs: + if size > 10_000: + await defer_to_thread(reactor, schema.validate_cbor, message) + else: + schema.validate_cbor(message) + + # The CBOR parser will allocate more memory, but at least we can feed + # it the file-like object, so that if it's large it won't be make two + # copies. + request.content.seek(SEEK_SET, 0) + # Typically deserialization to Python will not release the GIL, and + # indeed as of Jan 2023 cbor2 didn't have any code to release the GIL + # in the decode path. As such, running it in a different thread has no benefit. + return cbor2.load(request.content) + + class HTTPServer(object): """ A HTTP interface to the storage server. @@ -587,56 +641,6 @@ class HTTPServer(object): # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861 raise _HTTPError(http.NOT_ACCEPTABLE) - async def _read_encoded( - self, request, schema: Schema, max_size: int = 1024 * 1024 - ) -> Any: - """ - Read encoded request body data, decoding it with CBOR by default. - - Somewhat arbitrarily, limit body size to 1MiB by default. - """ - content_type = get_content_type(request.requestHeaders) - if content_type != CBOR_MIME_TYPE: - raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE) - - # Make sure it's not too large: - request.content.seek(0, SEEK_END) - size = request.content.tell() - if size > max_size: - raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE) - request.content.seek(0, SEEK_SET) - - # We don't want to load the whole message into memory, cause it might - # be quite large. The CDDL validator takes a read-only bytes-like - # thing. Luckily, for large request bodies twisted.web will buffer the - # data in a file, so we can use mmap() to get a memory view. The CDDL - # validator will not make a copy, so it won't increase memory usage - # beyond that. - try: - fd = request.content.fileno() - except (ValueError, OSError): - fd = -1 - if fd >= 0: - # It's a file, so we can use mmap() to save memory. - message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ) - else: - message = request.content.read() - - # Pycddl will release the GIL when validating larger documents, so - # let's take advantage of multiple CPUs: - if size > 10_000: - await defer_to_thread(self._reactor, schema.validate_cbor, message) - else: - schema.validate_cbor(message) - - # The CBOR parser will allocate more memory, but at least we can feed - # it the file-like object, so that if it's large it won't be make two - # copies. - request.content.seek(SEEK_SET, 0) - # Typically deserialization to Python will not release the GIL, and - # indeed as of Jan 2023 cbor2 didn't have any code to release the GIL - # in the decode path. As such, running it in a different thread has no benefit. - return cbor2.load(request.content) ##### Generic APIs ##### @@ -677,8 +681,8 @@ class HTTPServer(object): """Allocate buckets.""" upload_secret = authorization[Secrets.UPLOAD] # It's just a list of up to ~256 shares, shouldn't use many bytes. - info = await self._read_encoded( - request, _SCHEMAS["allocate_buckets"], max_size=8192 + info = await read_encoded( + self._reactor, request, _SCHEMAS["allocate_buckets"], max_size=8192 ) # We do NOT validate the upload secret for existing bucket uploads. @@ -849,7 +853,8 @@ class HTTPServer(object): # The reason can be a string with explanation, so in theory it could be # longish? - info = await self._read_encoded( + info = await read_encoded( + self._reactor, request, _SCHEMAS["advise_corrupt_share"], max_size=32768, @@ -868,8 +873,8 @@ class HTTPServer(object): @async_to_deferred async def mutable_read_test_write(self, request, authorization, storage_index): """Read/test/write combined operation for mutables.""" - rtw_request = await self._read_encoded( - request, _SCHEMAS["mutable_read_test_write"], max_size=2**48 + rtw_request = await read_encoded( + self._reactor, request, _SCHEMAS["mutable_read_test_write"], max_size=2**48 ) secrets = ( authorization[Secrets.WRITE_ENABLER], @@ -955,8 +960,8 @@ class HTTPServer(object): # The reason can be a string with explanation, so in theory it could be # longish? - info = await self._read_encoded( - request, _SCHEMAS["advise_corrupt_share"], max_size=32768 + info = await read_encoded( + self._reactor, request, _SCHEMAS["advise_corrupt_share"], max_size=32768 ) self._storage_server.advise_corrupt_share( b"mutable", storage_index, share_number, info["reason"].encode("utf-8") diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 52c47c6c3..48ca072bc 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -42,6 +42,7 @@ from werkzeug.exceptions import NotFound as WNotFound from testtools.matchers import Equals from zope.interface import implementer +from ..util.deferredutil import async_to_deferred from .common import SyncTestCase from ..storage.http_common import ( get_content_type, @@ -59,6 +60,8 @@ from ..storage.http_server import ( _authorized_route, StorageIndexConverter, _add_error_handling, + read_encoded, + _SCHEMAS as SERVER_SCHEMAS, ) from ..storage.http_client import ( StorageClient, @@ -303,6 +306,14 @@ class TestApp(object): request.transport.loseConnection() return Deferred() + @_authorized_route(_app, set(), "/read_body", methods=["POST"]) + @async_to_deferred + async def read_body(self, request, authorization): + data = await read_encoded( + self.clock, request, SERVER_SCHEMAS["advise_corrupt_share"] + ) + return data["reason"] + def result_of(d): """ @@ -320,6 +331,7 @@ def result_of(d): + "This is probably a test design issue." ) + class CustomHTTPServerTests(SyncTestCase): """ Tests that use a custom HTTP server. @@ -504,6 +516,40 @@ class CustomHTTPServerTests(SyncTestCase): result_of(d) self.assertEqual(len(self._http_server.clock.getDelayedCalls()), 0) + def test_request_with_no_content_type_same_as_cbor(self): + """ + If no ``Content-Type`` header is set when sending a body, it is assumed + to be CBOR. + """ + response = result_of( + self.client.request( + "POST", + DecodedURL.from_text("http://127.0.0.1/read_body"), + data=dumps({"reason": "test"}), + ) + ) + self.assertEqual( + result_of(limited_content(response, self._http_server.clock, 100)).read(), + b"test", + ) + + def test_request_with_wrong_content(self): + """ + If a non-CBOR ``Content-Type`` header is set when sending a body, the + server complains appropriatly. + """ + headers = Headers() + headers.setRawHeaders("content-type", ["some/value"]) + response = result_of( + self.client.request( + "POST", + DecodedURL.from_text("http://127.0.0.1/read_body"), + data=dumps({"reason": "test"}), + headers=headers, + ) + ) + self.assertEqual(response.code, http.UNSUPPORTED_MEDIA_TYPE) + @implementer(IReactorFromThreads) class Reactor(Clock): From 7bac6996d18dc9d5f5b48686990a66a50a51021a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 25 Jul 2023 15:32:14 -0400 Subject: [PATCH 3/6] Updates based on changing specs. --- docs/proposed/http-storage-node-protocol.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 5009a992e..fed5bb5dc 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -279,7 +279,7 @@ Such an announcement will resemble this:: { "anonymous-storage-FURL": "pb://...", # The old key - "gbs-anonymous-storage-url": "pb://...#v=1" # The new key + "anonymous-storage-NURLs": ["pb://...#v=1"] # The new keys } The transition process will proceed in three stages: @@ -320,12 +320,7 @@ The follow sequence of events is likely: Ideally, the client would not rely on an update from the introducer to give it the GBS NURL for the updated storage server. -Therefore, -when an updated client connects to a storage server using Foolscap, -it should request the server's version information. -If this information indicates that GBS is supported then the client should cache this GBS information. -On subsequent connection attempts, -it should make use of this GBS information. +In practice, we have decided not to implement this functionality. Server Details -------------- From 792af1c9189e0d9c93d69eb450c680eb8eaa7de1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 25 Jul 2023 15:32:30 -0400 Subject: [PATCH 4/6] News fragment --- newsfragments/4042.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/4042.minor diff --git a/newsfragments/4042.minor b/newsfragments/4042.minor new file mode 100644 index 000000000..e69de29bb From ffe2e9773916e32a8a64e9eb0d0e1ae0939c7215 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Aug 2023 10:54:46 -0400 Subject: [PATCH 5/6] Better phrasing --- docs/proposed/http-storage-node-protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index fed5bb5dc..db400fb2b 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -278,8 +278,8 @@ This NURL will be announced alongside their existing Foolscap-based server's fUR Such an announcement will resemble this:: { - "anonymous-storage-FURL": "pb://...", # The old key - "anonymous-storage-NURLs": ["pb://...#v=1"] # The new keys + "anonymous-storage-FURL": "pb://...", # The old entry + "anonymous-storage-NURLs": ["pb://...#v=1"] # The new, additional entry } The transition process will proceed in three stages: From 341a32708b718caab6311ed2e517c548e493df35 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Aug 2023 10:55:45 -0400 Subject: [PATCH 6/6] Docstring. --- src/allmydata/test/test_storage_http.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 48ca072bc..1f17621d7 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -309,6 +309,11 @@ class TestApp(object): @_authorized_route(_app, set(), "/read_body", methods=["POST"]) @async_to_deferred async def read_body(self, request, authorization): + """ + Accept an advise_corrupt_share message, return the reason. + + I.e. exercise codepaths used for reading CBOR from the body. + """ data = await read_encoded( self.clock, request, SERVER_SCHEMAS["advise_corrupt_share"] )