Support byte keys in internal dictionaries.

2025-04-09 03:44:23 +00:00 · 2021-01-26 11:01:50 -05:00 · 2021-01-26 11:01:50 -05:00 · 8493d70cab
commit 8493d70cab
parent e271cd1b64
2 changed files with 21 additions and 14 deletions
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@ -493,10 +493,10 @@ class JSONBytes(unittest.TestCase):
    def test_encode_bytes(self):
        """BytesJSONEncoder can encode bytes."""
        data = {
-            b"hello": [1, b"cd"],
+            b"hello": [1, b"cd", {b"abc": 123}],
        }
        expected = {
-            u"hello": [1, u"cd"],
+            u"hello": [1, u"cd", {u"abc": 123}],
        }
        # Bytes get passed through as if they were UTF-8 Unicode:
        encoded = jsonbytes.dumps(data)
--- a/src/allmydata/util/jsonbytes.py
+++ b/src/allmydata/util/jsonbytes.py
@ -13,20 +13,34 @@ from future.utils import PY2
 if PY2:
    from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min  # noqa: F401

-
 import json


+def _bytes_to_unicode(obj):
+    """Convert keys of dicts from bytes to unicode, recursively."""
+    if isinstance(obj, bytes):
+        return obj.decode("utf-8")
+    if isinstance(obj, dict):
+        new_obj = {}
+        for k, v in obj.items():
+            if isinstance(k, bytes):
+                k = k.decode("utf-8")
+            v = _bytes_to_unicode(v)
+            new_obj[k] = v
+        return new_obj
+    if isinstance(obj, (list, set, tuple)):
+        return [_bytes_to_unicode(i) for i in obj]
+    return obj
+
+
 class BytesJSONEncoder(json.JSONEncoder):
    """
    A JSON encoder than can also encode bytes.

    The bytes are assumed to be UTF-8 encoded Unicode strings.
    """
-    def default(self, o):
-        if isinstance(o, bytes):
-            return o.decode("utf-8")
-        return json.JSONEncoder.default(self, o)
+    def iterencode(self, o, **kwargs):
+        return json.JSONEncoder.iterencode(self, _bytes_to_unicode(o), **kwargs)


 def dumps(obj, *args, **kwargs):
@ -34,13 +48,6 @@ def dumps(obj, *args, **kwargs):

    The bytes are assumed to be UTF-8 encoded Unicode strings.
    """
-    if isinstance(obj, dict):
-        new_obj = {}
-        for k, v in obj.items():
-            if isinstance(k, bytes):
-                k = k.decode("utf-8")
-            new_obj[k] = v
-        obj = new_obj
    return json.dumps(obj, cls=BytesJSONEncoder, *args, **kwargs)