Switch to using pycddl for CBOR decoding.

2025-02-25 19:21:36 +00:00 · 2024-01-24 13:50:55 -05:00 · 2024-01-24 13:50:55 -05:00 · fced1ab01b
commit fced1ab01b
parent 68d63fde27
5 changed files with 15 additions and 39 deletions
--- a/setup.py
+++ b/setup.py
@ -146,9 +146,8 @@ install_requires = [
    # 5.6.0 excluded because https://github.com/agronholm/cbor2/issues/208
    "cbor2 != 5.6.0",
-    # 0.4 adds the ability to pass in mmap() values which greatly reduces the
+    # 0.6 adds the ability to decode CBOR.
-    # amount of copying involved.
+    "pycddl >= 0.6",
    "pycddl >= 0.4",
    # Command-line parsing
    "click >= 8.1.1",
--- a/src/allmydata/storage/http_client.py
+++ b/src/allmydata/storage/http_client.py
@ -63,7 +63,7 @@ from ..util.hashutil import timing_safe_compare
 from ..util.deferredutil import async_to_deferred
 from ..util.tor_provider import _Provider as TorProvider
 from ..util.cputhreadpool import defer_to_thread
-from ..util.cbor import dumps, loads
+from ..util.cbor import dumps
 try:
    from txtorcon import Tor  # type: ignore
@ -560,8 +560,7 @@ class StorageClient(object):
                    data = f.read()
                    def validate_and_decode():
-                        schema.validate_cbor(data)
+                        return schema.validate_cbor(data, True)
                        return loads(data)
                    return await defer_to_thread(validate_and_decode)
                else:
--- a/src/allmydata/storage/http_server.py
+++ b/src/allmydata/storage/http_server.py
@ -637,17 +637,8 @@ async def read_encoded(
    # Pycddl will release the GIL when validating larger documents, so
    # let's take advantage of multiple CPUs:
-    await defer_to_thread(schema.validate_cbor, message)
+    decoded = await defer_to_thread(schema.validate_cbor, message, True)
-
+    return decoded
    # The CBOR parser will allocate more memory, but at least we can feed
    # it the file-like object, so that if it's large it won't be make two
    # copies.
    request.content.seek(SEEK_SET, 0)
    # Typically deserialization to Python will not release the GIL, and
    # indeed as of Jan 2023 cbor2 didn't have any code to release the GIL
    # in the decode path. As such, running it in a different thread has no benefit.
    return cbor.load(request.content)
 class HTTPServer(BaseApp):
    """
--- a/src/allmydata/test/test_storage_http.py
+++ b/src/allmydata/test/test_storage_http.py
@ -41,7 +41,7 @@ from werkzeug.exceptions import NotFound as WNotFound
 from testtools.matchers import Equals
 from zope.interface import implementer
-from ..util.cbor import dumps, loads
+from ..util.cbor import dumps
 from ..util.deferredutil import async_to_deferred
 from ..util.cputhreadpool import disable_thread_pool_for_test
 from .common import SyncTestCase
@ -1835,14 +1835,3 @@ class MutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase):
        A read with no range returns the whole mutable.
        """
        return self._read_with_no_range_test(data_length)
    def test_roundtrip_cbor2_encoding_issue(self):
        """
        Some versions of cbor2 (5.6.0) don't correctly encode bytestrings
        bigger than 65535
        """
        for size in range(0, 65535*2, 17):
            self.assertEqual(
                size,
                len(loads(dumps(b"\12" * size)))
            )
--- a/src/allmydata/util/cbor.py
+++ b/src/allmydata/util/cbor.py
@ -1,21 +1,19 @@
 """
 Unified entry point for CBOR encoding and decoding.
 """
-import sys
+Makes it less likely to use ``cbor2.loads()`` by mistake, which we want to avoid.
 """
 # We don't want to use the C extension for loading, at least for now, but using
 # it for dumping should be fine.
 from cbor2 import dumps, dump
-# Now, override the C extension so we can import the Python versions of loading
+def load(*args, **kwargs):
-# functions.
+    """
-del sys.modules["cbor2"]
+    Don't use this!  Here just in case someone uses it by mistake.
-sys.modules["_cbor2"] = None  # type: ignore[assignment]
+    """
-from cbor2 import load, loads
+    raise RuntimeError("Use pycddl for decoding CBOR")
-# Quick validation that we got the Python version, not the C version.
+loads = load
 assert type(load) == type(lambda: None), repr(load)   # type: ignore[comparison-overlap]
 assert type(loads) == type(lambda: None), repr(loads)   # type: ignore[comparison-overlap]
 __all__ = ["dumps", "loads", "dump", "load"]