Switch to using pycddl for CBOR decoding.

This commit is contained in:
Itamar Turner-Trauring 2024-01-24 13:50:55 -05:00
parent 68d63fde27
commit fced1ab01b
5 changed files with 15 additions and 39 deletions

View File

@ -146,9 +146,8 @@ install_requires = [
# 5.6.0 excluded because https://github.com/agronholm/cbor2/issues/208 # 5.6.0 excluded because https://github.com/agronholm/cbor2/issues/208
"cbor2 != 5.6.0", "cbor2 != 5.6.0",
# 0.4 adds the ability to pass in mmap() values which greatly reduces the # 0.6 adds the ability to decode CBOR.
# amount of copying involved. "pycddl >= 0.6",
"pycddl >= 0.4",
# Command-line parsing # Command-line parsing
"click >= 8.1.1", "click >= 8.1.1",

View File

@ -63,7 +63,7 @@ from ..util.hashutil import timing_safe_compare
from ..util.deferredutil import async_to_deferred from ..util.deferredutil import async_to_deferred
from ..util.tor_provider import _Provider as TorProvider from ..util.tor_provider import _Provider as TorProvider
from ..util.cputhreadpool import defer_to_thread from ..util.cputhreadpool import defer_to_thread
from ..util.cbor import dumps, loads from ..util.cbor import dumps
try: try:
from txtorcon import Tor # type: ignore from txtorcon import Tor # type: ignore
@ -560,8 +560,7 @@ class StorageClient(object):
data = f.read() data = f.read()
def validate_and_decode(): def validate_and_decode():
schema.validate_cbor(data) return schema.validate_cbor(data, True)
return loads(data)
return await defer_to_thread(validate_and_decode) return await defer_to_thread(validate_and_decode)
else: else:

View File

@ -637,17 +637,8 @@ async def read_encoded(
# Pycddl will release the GIL when validating larger documents, so # Pycddl will release the GIL when validating larger documents, so
# let's take advantage of multiple CPUs: # let's take advantage of multiple CPUs:
await defer_to_thread(schema.validate_cbor, message) decoded = await defer_to_thread(schema.validate_cbor, message, True)
return decoded
# The CBOR parser will allocate more memory, but at least we can feed
# it the file-like object, so that if it's large it won't be make two
# copies.
request.content.seek(SEEK_SET, 0)
# Typically deserialization to Python will not release the GIL, and
# indeed as of Jan 2023 cbor2 didn't have any code to release the GIL
# in the decode path. As such, running it in a different thread has no benefit.
return cbor.load(request.content)
class HTTPServer(BaseApp): class HTTPServer(BaseApp):
""" """

View File

@ -41,7 +41,7 @@ from werkzeug.exceptions import NotFound as WNotFound
from testtools.matchers import Equals from testtools.matchers import Equals
from zope.interface import implementer from zope.interface import implementer
from ..util.cbor import dumps, loads from ..util.cbor import dumps
from ..util.deferredutil import async_to_deferred from ..util.deferredutil import async_to_deferred
from ..util.cputhreadpool import disable_thread_pool_for_test from ..util.cputhreadpool import disable_thread_pool_for_test
from .common import SyncTestCase from .common import SyncTestCase
@ -1835,14 +1835,3 @@ class MutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase):
A read with no range returns the whole mutable. A read with no range returns the whole mutable.
""" """
return self._read_with_no_range_test(data_length) return self._read_with_no_range_test(data_length)
def test_roundtrip_cbor2_encoding_issue(self):
"""
Some versions of cbor2 (5.6.0) don't correctly encode bytestrings
bigger than 65535
"""
for size in range(0, 65535*2, 17):
self.assertEqual(
size,
len(loads(dumps(b"\12" * size)))
)

View File

@ -1,21 +1,19 @@
""" """
Unified entry point for CBOR encoding and decoding. Unified entry point for CBOR encoding and decoding.
"""
import sys Makes it less likely to use ``cbor2.loads()`` by mistake, which we want to avoid.
"""
# We don't want to use the C extension for loading, at least for now, but using # We don't want to use the C extension for loading, at least for now, but using
# it for dumping should be fine. # it for dumping should be fine.
from cbor2 import dumps, dump from cbor2 import dumps, dump
# Now, override the C extension so we can import the Python versions of loading def load(*args, **kwargs):
# functions. """
del sys.modules["cbor2"] Don't use this! Here just in case someone uses it by mistake.
sys.modules["_cbor2"] = None # type: ignore[assignment] """
from cbor2 import load, loads raise RuntimeError("Use pycddl for decoding CBOR")
# Quick validation that we got the Python version, not the C version. loads = load
assert type(load) == type(lambda: None), repr(load) # type: ignore[comparison-overlap]
assert type(loads) == type(lambda: None), repr(loads) # type: ignore[comparison-overlap]
__all__ = ["dumps", "loads", "dump", "load"] __all__ = ["dumps", "loads", "dump", "load"]