Switch to using pycddl for CBOR decoding.

This commit is contained in:
Itamar Turner-Trauring 2024-01-24 13:50:55 -05:00
parent 68d63fde27
commit fced1ab01b
5 changed files with 15 additions and 39 deletions

View File

@ -146,9 +146,8 @@ install_requires = [
# 5.6.0 excluded because https://github.com/agronholm/cbor2/issues/208
"cbor2 != 5.6.0",
# 0.4 adds the ability to pass in mmap() values which greatly reduces the
# amount of copying involved.
"pycddl >= 0.4",
# 0.6 adds the ability to decode CBOR.
"pycddl >= 0.6",
# Command-line parsing
"click >= 8.1.1",

View File

@ -63,7 +63,7 @@ from ..util.hashutil import timing_safe_compare
from ..util.deferredutil import async_to_deferred
from ..util.tor_provider import _Provider as TorProvider
from ..util.cputhreadpool import defer_to_thread
from ..util.cbor import dumps, loads
from ..util.cbor import dumps
try:
from txtorcon import Tor # type: ignore
@ -560,8 +560,7 @@ class StorageClient(object):
data = f.read()
def validate_and_decode():
schema.validate_cbor(data)
return loads(data)
return schema.validate_cbor(data, True)
return await defer_to_thread(validate_and_decode)
else:

View File

@ -637,17 +637,8 @@ async def read_encoded(
# Pycddl will release the GIL when validating larger documents, so
# let's take advantage of multiple CPUs:
await defer_to_thread(schema.validate_cbor, message)
# The CBOR parser will allocate more memory, but at least we can feed
# it the file-like object, so that if it's large it won't be make two
# copies.
request.content.seek(SEEK_SET, 0)
# Typically deserialization to Python will not release the GIL, and
# indeed as of Jan 2023 cbor2 didn't have any code to release the GIL
# in the decode path. As such, running it in a different thread has no benefit.
return cbor.load(request.content)
decoded = await defer_to_thread(schema.validate_cbor, message, True)
return decoded
class HTTPServer(BaseApp):
"""

View File

@ -41,7 +41,7 @@ from werkzeug.exceptions import NotFound as WNotFound
from testtools.matchers import Equals
from zope.interface import implementer
from ..util.cbor import dumps, loads
from ..util.cbor import dumps
from ..util.deferredutil import async_to_deferred
from ..util.cputhreadpool import disable_thread_pool_for_test
from .common import SyncTestCase
@ -1835,14 +1835,3 @@ class MutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase):
A read with no range returns the whole mutable.
"""
return self._read_with_no_range_test(data_length)
def test_roundtrip_cbor2_encoding_issue(self):
"""
Some versions of cbor2 (5.6.0) don't correctly encode bytestrings
bigger than 65535
"""
for size in range(0, 65535*2, 17):
self.assertEqual(
size,
len(loads(dumps(b"\12" * size)))
)

View File

@ -1,21 +1,19 @@
"""
Unified entry point for CBOR encoding and decoding.
"""
import sys
Makes it less likely to use ``cbor2.loads()`` by mistake, which we want to avoid.
"""
# We don't want to use the C extension for loading, at least for now, but using
# it for dumping should be fine.
from cbor2 import dumps, dump
# Now, override the C extension so we can import the Python versions of loading
# functions.
del sys.modules["cbor2"]
sys.modules["_cbor2"] = None # type: ignore[assignment]
from cbor2 import load, loads
def load(*args, **kwargs):
"""
Don't use this! Here just in case someone uses it by mistake.
"""
raise RuntimeError("Use pycddl for decoding CBOR")
# Quick validation that we got the Python version, not the C version.
assert type(load) == type(lambda: None), repr(load) # type: ignore[comparison-overlap]
assert type(loads) == type(lambda: None), repr(loads) # type: ignore[comparison-overlap]
loads = load
__all__ = ["dumps", "loads", "dump", "load"]