More memory usage reductions.

2025-02-08 12:20:19 +00:00 · 2022-06-29 11:48:54 -04:00 · 2022-06-29 11:48:54 -04:00 · 0e8f2aa702
commit 0e8f2aa702
parent 520456bdc0
2 changed files with 34 additions and 13 deletions
--- a/src/allmydata/storage/http_server.py
+++ b/src/allmydata/storage/http_server.py
@ -245,6 +245,8 @@ class _HTTPError(Exception):
 # Tags are of the form #6.nnn, where the number is documented at
 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258
 # indicates a set.
 #
 # TODO 3872 length limits in the schema.
 _SCHEMAS = {
    "allocate_buckets": Schema(
        """
@ -485,12 +487,18 @@ class HTTPServer(object):
    def _read_encoded(self, request, schema: Schema) -> Any:
        """
        Read encoded request body data, decoding it with CBOR by default.
        Somewhat arbitrarily, limit body size to 1MB; this may be too low, we
        may want to customize per query type, but this is the starting point
        for now.
        """
        content_type = get_content_type(request.requestHeaders)
        if content_type == CBOR_MIME_TYPE:
-            # TODO limit memory usage, client could send arbitrarily large data...
+            # Read 1 byte more than 1MB. We expect length to be 1MB or
-            # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
+            # less; if it's more assume it's not a legitimate message.
-            message = request.content.read()
+            message = request.content.read(1024 * 1024 + 1)
            if len(message) > 1024 * 1024:
                raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE)
            schema.validate_cbor(message)
            result = loads(message)
            return result
@ -586,20 +594,24 @@ class HTTPServer(object):
            request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE)
            return b""
        offset = content_range.start
        # TODO limit memory usage
        # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
        data = request.content.read(content_range.stop - content_range.start + 1)
        bucket = self._uploads.get_write_bucket(
            storage_index, share_number, authorization[Secrets.UPLOAD]
        )
        offset = content_range.start
        remaining = content_range.stop - content_range.start
        finished = False
-        try:
+        while remaining > 0:
-            finished = bucket.write(offset, data)
+            data = request.content.read(min(remaining, 65536))
-        except ConflictingWriteError:
+            assert data, "uploaded data length doesn't match range"
-            request.setResponseCode(http.CONFLICT)
+
-            return b""
+            try:
                finished = bucket.write(offset, data)
            except ConflictingWriteError:
                request.setResponseCode(http.CONFLICT)
                return b""
            remaining -= len(data)
            offset += len(data)
        if finished:
            bucket.close()
--- a/src/allmydata/test/test_storage_http.py
+++ b/src/allmydata/test/test_storage_http.py
@ -1139,6 +1139,15 @@ class MutableHTTPAPIsTests(SyncTestCase):
            b"aXYZef-0",
        )
    def test_too_large_write(self):
        """
        Writing too large of a chunk results in a REQUEST ENTITY TOO LARGE http
        error.
        """
        with self.assertRaises(ClientException) as e:
            self.create_upload(b"0123456789" * 1024 * 1024)
        self.assertEqual(e.exception.code, http.REQUEST_ENTITY_TOO_LARGE)
    def test_list_shares(self):
        """``list_shares()`` returns the shares for a given storage index."""
        storage_index, _, _ = self.create_upload()