From f09aa8c7969d8455a958c278d3fdb889aae15d71 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 20 Jan 2022 11:16:06 -0500 Subject: [PATCH] Use pre-existing parser for Range and Content-Range headers. --- docs/proposed/http-storage-node-protocol.rst | 2 +- nix/tahoe-lafs.nix | 4 +- setup.py | 1 + src/allmydata/storage/http_client.py | 2 +- src/allmydata/storage/http_server.py | 41 ++++++++++---------- src/allmydata/test/test_storage_http.py | 2 +- 6 files changed, 27 insertions(+), 25 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index bb1db750c..560220d00 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -640,7 +640,7 @@ For example:: Read a contiguous sequence of bytes from one share in one bucket. The response body is the raw share data (i.e., ``application/octet-stream``). -The ``Range`` header may be used to request exactly one ``bytes`` range. +The ``Range`` header may be used to request exactly one ``bytes`` range, in which case the response code will be 206 (partial content). Interpretation and response behavior is as specified in RFC 7233 ยง 4.1. Multiple ranges in a single request are *not* supported. diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index 04d6c4163..1885dd9ca 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -4,7 +4,7 @@ , setuptools, setuptoolsTrial, pyasn1, zope_interface , service-identity, pyyaml, magic-wormhole, treq, appdirs , beautifulsoup4, eliot, autobahn, cryptography, netifaces -, html5lib, pyutil, distro, configparser, klein, cbor2 +, html5lib, pyutil, distro, configparser, klein, werkzeug, cbor2 }: python.pkgs.buildPythonPackage rec { # Most of the time this is not exactly the release version (eg 1.17.0). @@ -98,7 +98,7 @@ EOF service-identity pyyaml magic-wormhole eliot autobahn cryptography netifaces setuptools future pyutil distro configparser collections-extended - klein cbor2 treq + klein werkzeug cbor2 treq ]; checkInputs = with python.pkgs; [ diff --git a/setup.py b/setup.py index 7e7a955c6..36e82a2b2 100644 --- a/setup.py +++ b/setup.py @@ -143,6 +143,7 @@ install_requires = [ # HTTP server and client "klein", + "werkzeug", "treq", "cbor2" ] diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 8fea86396..cf453fcfc 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -261,7 +261,7 @@ class StorageClientImmutables(object): } ), ) - if response.code == 200: + if response.code == http.PARTIAL_CONTENT: body = yield response.content() returnValue(body) else: diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 71c34124a..bbb42dbe1 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -22,6 +22,7 @@ from base64 import b64decode from klein import Klein from twisted.web import http import attr +from werkzeug.http import parse_range_header, parse_content_range_header # TODO Make sure to use pure Python versions? from cbor2 import dumps, loads @@ -218,11 +219,12 @@ class HTTPServer(object): def write_share_data(self, request, authorization, storage_index, share_number): """Write data to an in-progress immutable upload.""" storage_index = si_a2b(storage_index.encode("ascii")) - content_range = request.getHeader("content-range") - if content_range is None: - offset = 0 - else: - offset = int(content_range.split()[1].split("-")[0]) + content_range = parse_content_range_header(request.getHeader("content-range")) + # TODO in https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3860 + # 1. Malformed header should result in error + # 2. Non-bytes unit should result in error + # 3. Missing header means full upload in one request + offset = content_range.start # TODO basic checks on validity of start, offset, and content-range in general. also of share_number. # TODO basic check that body isn't infinite. require content-length? or maybe we should require content-range (it's optional now)? if so, needs to be rflected in protocol spec. @@ -256,22 +258,21 @@ class HTTPServer(object): ) def read_share_chunk(self, request, authorization, storage_index, share_number): """Read a chunk for an already uploaded immutable.""" - # TODO basic checks on validity + # TODO in https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3860 + # 1. basic checks on validity on storage index, share number + # 2. missing range header should have response code 200 and return whole thing + # 3. malformed range header should result in error? or return everything? + # 4. non-bytes range results in error + # 5. ranges make sense semantically (positive, etc.) + # 6. multiple ranges fails with error + # 7. missing end of range means "to the end of share" storage_index = si_a2b(storage_index.encode("ascii")) - range_header = request.getHeader("range") - if range_header is None: - offset = 0 - inclusive_end = None - else: - parts = range_header.split("=")[1].split("-") - offset = int(parts[0]) # TODO make sure valid - if len(parts) > 0: - inclusive_end = int(parts[1]) # TODO make sure valid - else: - inclusive_end = None - - assert inclusive_end != None # TODO support this case + range_header = parse_range_header(request.getHeader("range")) + offset, end = range_header.ranges[0] + assert end != None # TODO support this case # TODO if not found, 404 bucket = self._storage_server.get_buckets(storage_index)[share_number] - return bucket.read(offset, inclusive_end - offset + 1) + data = bucket.read(offset, end - offset) + request.setResponseCode(http.PARTIAL_CONTENT) + return data diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index a7aad608e..540e40c16 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -339,7 +339,7 @@ class ImmutableHTTPAPITests(AsyncTestCase): self.assertTrue(finished) # We can now read: - for offset, length in [(0, 100), (10, 19), (99, 0), (49, 200)]: + for offset, length in [(0, 100), (10, 19), (99, 1), (49, 200)]: downloaded = yield im_client.read_share_chunk( storage_index, 1, offset, length )