From 8493d70cab3d5eb86cba3f26ab2c6fbb9ebb916d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 26 Jan 2021 11:01:50 -0500 Subject: [PATCH] Support byte keys in internal dictionaries. --- src/allmydata/test/test_util.py | 4 ++-- src/allmydata/util/jsonbytes.py | 31 +++++++++++++++++++------------ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index c556eb4b9..58de96d1c 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -493,10 +493,10 @@ class JSONBytes(unittest.TestCase): def test_encode_bytes(self): """BytesJSONEncoder can encode bytes.""" data = { - b"hello": [1, b"cd"], + b"hello": [1, b"cd", {b"abc": 123}], } expected = { - u"hello": [1, u"cd"], + u"hello": [1, u"cd", {u"abc": 123}], } # Bytes get passed through as if they were UTF-8 Unicode: encoded = jsonbytes.dumps(data) diff --git a/src/allmydata/util/jsonbytes.py b/src/allmydata/util/jsonbytes.py index 406a471a0..ab9d5fac0 100644 --- a/src/allmydata/util/jsonbytes.py +++ b/src/allmydata/util/jsonbytes.py @@ -13,20 +13,34 @@ from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - import json +def _bytes_to_unicode(obj): + """Convert keys of dicts from bytes to unicode, recursively.""" + if isinstance(obj, bytes): + return obj.decode("utf-8") + if isinstance(obj, dict): + new_obj = {} + for k, v in obj.items(): + if isinstance(k, bytes): + k = k.decode("utf-8") + v = _bytes_to_unicode(v) + new_obj[k] = v + return new_obj + if isinstance(obj, (list, set, tuple)): + return [_bytes_to_unicode(i) for i in obj] + return obj + + class BytesJSONEncoder(json.JSONEncoder): """ A JSON encoder than can also encode bytes. The bytes are assumed to be UTF-8 encoded Unicode strings. """ - def default(self, o): - if isinstance(o, bytes): - return o.decode("utf-8") - return json.JSONEncoder.default(self, o) + def iterencode(self, o, **kwargs): + return json.JSONEncoder.iterencode(self, _bytes_to_unicode(o), **kwargs) def dumps(obj, *args, **kwargs): @@ -34,13 +48,6 @@ def dumps(obj, *args, **kwargs): The bytes are assumed to be UTF-8 encoded Unicode strings. """ - if isinstance(obj, dict): - new_obj = {} - for k, v in obj.items(): - if isinstance(k, bytes): - k = k.decode("utf-8") - new_obj[k] = v - obj = new_obj return json.dumps(obj, cls=BytesJSONEncoder, *args, **kwargs)