mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-04-07 10:56:49 +00:00
Merge pull request #1043 from tahoe-lafs/3672.non-utf-8-bytes-in-logs
Support logging non-UTF-8 bytes in logs Fixes ticket:3672
This commit is contained in:
commit
1c2ba6b33c
0
newsfragments/3672.minor
Normal file
0
newsfragments/3672.minor
Normal file
@ -125,5 +125,5 @@ if sys.platform == "win32":
|
||||
initialize()
|
||||
|
||||
from eliot import to_file
|
||||
from allmydata.util.jsonbytes import BytesJSONEncoder
|
||||
to_file(open("eliot.log", "wb"), encoder=BytesJSONEncoder)
|
||||
from allmydata.util.jsonbytes import AnyBytesJSONEncoder
|
||||
to_file(open("eliot.log", "wb"), encoder=AnyBytesJSONEncoder)
|
||||
|
@ -54,7 +54,7 @@ from twisted.python.monkey import (
|
||||
MonkeyPatcher,
|
||||
)
|
||||
|
||||
from ..util.jsonbytes import BytesJSONEncoder
|
||||
from ..util.jsonbytes import AnyBytesJSONEncoder
|
||||
|
||||
|
||||
_NAME = Field.for_types(
|
||||
@ -76,7 +76,7 @@ RUN_TEST = ActionType(
|
||||
if PY2:
|
||||
_memory_logger = MemoryLogger
|
||||
else:
|
||||
_memory_logger = lambda: MemoryLogger(encoder=BytesJSONEncoder)
|
||||
_memory_logger = lambda: MemoryLogger(encoder=AnyBytesJSONEncoder)
|
||||
|
||||
|
||||
@attr.s
|
||||
|
@ -69,7 +69,7 @@ from ..util.eliotutil import (
|
||||
_parse_destination_description,
|
||||
_EliotLogging,
|
||||
)
|
||||
from ..util.jsonbytes import BytesJSONEncoder
|
||||
from ..util.jsonbytes import AnyBytesJSONEncoder
|
||||
|
||||
from .common import (
|
||||
SyncTestCase,
|
||||
@ -109,7 +109,7 @@ class ParseDestinationDescriptionTests(SyncTestCase):
|
||||
reactor = object()
|
||||
self.assertThat(
|
||||
_parse_destination_description("file:-")(reactor),
|
||||
Equals(FileDestination(stdout, encoder=BytesJSONEncoder)),
|
||||
Equals(FileDestination(stdout, encoder=AnyBytesJSONEncoder)),
|
||||
)
|
||||
|
||||
|
||||
|
@ -495,10 +495,10 @@ class YAML(unittest.TestCase):
|
||||
|
||||
|
||||
class JSONBytes(unittest.TestCase):
|
||||
"""Tests for BytesJSONEncoder."""
|
||||
"""Tests for jsonbytes module."""
|
||||
|
||||
def test_encode_bytes(self):
|
||||
"""BytesJSONEncoder can encode bytes.
|
||||
"""jsonbytes.dumps() encodes bytes.
|
||||
|
||||
Bytes are presumed to be UTF-8 encoded.
|
||||
"""
|
||||
@ -515,7 +515,7 @@ class JSONBytes(unittest.TestCase):
|
||||
self.assertEqual(jsonbytes.loads(encoded), expected)
|
||||
|
||||
def test_encode_unicode(self):
|
||||
"""BytesJSONEncoder encodes Unicode string as usual."""
|
||||
"""jsonbytes.dumps() encodes Unicode string as usual."""
|
||||
expected = {
|
||||
u"hello": [1, u"cd"],
|
||||
}
|
||||
@ -529,6 +529,37 @@ class JSONBytes(unittest.TestCase):
|
||||
self.assertIsInstance(encoded, bytes)
|
||||
self.assertEqual(json.loads(encoded, encoding="utf-8"), x)
|
||||
|
||||
def test_any_bytes_unsupported_by_default(self):
|
||||
"""By default non-UTF-8 bytes raise error."""
|
||||
bytestring = b"abc\xff\x00"
|
||||
with self.assertRaises(UnicodeDecodeError):
|
||||
jsonbytes.dumps(bytestring)
|
||||
with self.assertRaises(UnicodeDecodeError):
|
||||
jsonbytes.dumps_bytes(bytestring)
|
||||
with self.assertRaises(UnicodeDecodeError):
|
||||
json.dumps(bytestring, cls=jsonbytes.UTF8BytesJSONEncoder)
|
||||
|
||||
def test_any_bytes(self):
|
||||
"""If any_bytes is True, non-UTF-8 bytes don't break encoding."""
|
||||
bytestring = b"abc\xff\xff123"
|
||||
o = {bytestring: bytestring}
|
||||
expected = {"abc\\xff\\xff123": "abc\\xff\\xff123"}
|
||||
self.assertEqual(
|
||||
json.loads(jsonbytes.dumps(o, any_bytes=True)),
|
||||
expected,
|
||||
)
|
||||
self.assertEqual(
|
||||
json.loads(json.dumps(
|
||||
o, cls=jsonbytes.AnyBytesJSONEncoder)),
|
||||
expected,
|
||||
)
|
||||
self.assertEqual(
|
||||
json.loads(jsonbytes.dumps(o, any_bytes=True),
|
||||
encoding="utf-8"),
|
||||
expected,
|
||||
)
|
||||
|
||||
|
||||
|
||||
class FakeGetVersion(object):
|
||||
"""Emulate an object with a get_version."""
|
||||
|
@ -92,7 +92,7 @@ class TestStreamingLogs(unittest.TestCase):
|
||||
@inlineCallbacks
|
||||
def test_one_log(self):
|
||||
"""
|
||||
Write a single Eliot log actin and see it streamed via websocket.
|
||||
Write a single Eliot log action and see it streamed via websocket.
|
||||
"""
|
||||
|
||||
proto = yield self.agent.open(
|
||||
@ -109,7 +109,7 @@ class TestStreamingLogs(unittest.TestCase):
|
||||
def do_a_thing(arguments):
|
||||
pass
|
||||
|
||||
do_a_thing(arguments=[u"hello", b"good-day", 123, {"a": 35}, [None]])
|
||||
do_a_thing(arguments=[u"hello", b"good-\xff-day", 123, {"a": 35}, [None]])
|
||||
|
||||
proto.transport.loseConnection()
|
||||
yield proto.is_closed
|
||||
@ -117,7 +117,7 @@ class TestStreamingLogs(unittest.TestCase):
|
||||
self.assertEqual(len(messages), 2)
|
||||
self.assertEqual(messages[0]["action_type"], "test:cli:some-exciting-action")
|
||||
self.assertEqual(messages[0]["arguments"],
|
||||
["hello", "good-day", 123, {"a": 35}, [None]])
|
||||
["hello", "good-\\xff-day", 123, {"a": 35}, [None]])
|
||||
self.assertEqual(messages[1]["action_type"], "test:cli:some-exciting-action")
|
||||
self.assertEqual("started", messages[0]["action_status"])
|
||||
self.assertEqual("succeeded", messages[1]["action_status"])
|
||||
|
@ -87,7 +87,7 @@ from twisted.internet.defer import (
|
||||
)
|
||||
from twisted.application.service import Service
|
||||
|
||||
from .jsonbytes import BytesJSONEncoder
|
||||
from .jsonbytes import AnyBytesJSONEncoder
|
||||
|
||||
|
||||
def validateInstanceOf(t):
|
||||
@ -306,7 +306,7 @@ class _DestinationParser(object):
|
||||
rotateLength=rotate_length,
|
||||
maxRotatedFiles=max_rotated_files,
|
||||
)
|
||||
return lambda reactor: FileDestination(get_file(), BytesJSONEncoder)
|
||||
return lambda reactor: FileDestination(get_file(), AnyBytesJSONEncoder)
|
||||
|
||||
|
||||
_parse_destination_description = _DestinationParser().parse
|
||||
@ -333,4 +333,4 @@ def log_call_deferred(action_type):
|
||||
if PY2:
|
||||
capture_logging = eliot_capture_logging
|
||||
else:
|
||||
capture_logging = partial(eliot_capture_logging, encoder_=BytesJSONEncoder)
|
||||
capture_logging = partial(eliot_capture_logging, encoder_=AnyBytesJSONEncoder)
|
||||
|
@ -14,45 +14,100 @@ if PY2:
|
||||
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
|
||||
|
||||
import json
|
||||
import codecs
|
||||
|
||||
if PY2:
|
||||
def backslashreplace_py2(ex):
|
||||
"""
|
||||
On Python 2 'backslashreplace' error handler doesn't work, so write our
|
||||
own.
|
||||
"""
|
||||
return ''.join('\\x{:02x}'.format(ord(c))
|
||||
for c in ex.object[ex.start:ex.end]), ex.end
|
||||
|
||||
codecs.register_error("backslashreplace_tahoe_py2", backslashreplace_py2)
|
||||
|
||||
|
||||
def _bytes_to_unicode(obj):
|
||||
"""Convert any bytes objects to unicode, recursively."""
|
||||
if isinstance(obj, bytes):
|
||||
return obj.decode("utf-8")
|
||||
if isinstance(obj, dict):
|
||||
new_obj = {}
|
||||
for k, v in obj.items():
|
||||
if isinstance(k, bytes):
|
||||
k = k.decode("utf-8")
|
||||
v = _bytes_to_unicode(v)
|
||||
new_obj[k] = v
|
||||
return new_obj
|
||||
if isinstance(obj, (list, set, tuple)):
|
||||
return [_bytes_to_unicode(i) for i in obj]
|
||||
return obj
|
||||
def _bytes_to_unicode(any_bytes, obj):
|
||||
"""Create a function that recursively converts bytes to unicode.
|
||||
|
||||
|
||||
class BytesJSONEncoder(json.JSONEncoder):
|
||||
:param any_bytes: If True, also support non-UTF-8-encoded bytes.
|
||||
:param obj: Object to de-byte-ify.
|
||||
"""
|
||||
A JSON encoder than can also encode bytes.
|
||||
errors = "backslashreplace" if any_bytes else "strict"
|
||||
if PY2 and errors == "backslashreplace":
|
||||
errors = "backslashreplace_tahoe_py2"
|
||||
|
||||
The bytes are assumed to be UTF-8 encoded Unicode strings.
|
||||
def doit(obj):
|
||||
"""Convert any bytes objects to unicode, recursively."""
|
||||
if isinstance(obj, bytes):
|
||||
return obj.decode("utf-8", errors=errors)
|
||||
if isinstance(obj, dict):
|
||||
new_obj = {}
|
||||
for k, v in obj.items():
|
||||
if isinstance(k, bytes):
|
||||
k = k.decode("utf-8", errors=errors)
|
||||
v = doit(v)
|
||||
new_obj[k] = v
|
||||
return new_obj
|
||||
if isinstance(obj, (list, set, tuple)):
|
||||
return [doit(i) for i in obj]
|
||||
return obj
|
||||
|
||||
return doit(obj)
|
||||
|
||||
|
||||
class UTF8BytesJSONEncoder(json.JSONEncoder):
|
||||
"""
|
||||
A JSON encoder than can also encode UTF-8 encoded strings.
|
||||
"""
|
||||
def encode(self, o, **kwargs):
|
||||
return json.JSONEncoder.encode(
|
||||
self, _bytes_to_unicode(False, o), **kwargs)
|
||||
|
||||
def iterencode(self, o, **kwargs):
|
||||
return json.JSONEncoder.iterencode(self, _bytes_to_unicode(o), **kwargs)
|
||||
return json.JSONEncoder.iterencode(
|
||||
self, _bytes_to_unicode(False, o), **kwargs)
|
||||
|
||||
|
||||
class AnyBytesJSONEncoder(json.JSONEncoder):
|
||||
"""
|
||||
A JSON encoder than can also encode bytes of any sort.
|
||||
|
||||
Bytes are decoded to strings using UTF-8, if that fails to decode then the
|
||||
bytes are quoted.
|
||||
"""
|
||||
def encode(self, o, **kwargs):
|
||||
return json.JSONEncoder.encode(
|
||||
self, _bytes_to_unicode(True, o), **kwargs)
|
||||
|
||||
def iterencode(self, o, **kwargs):
|
||||
return json.JSONEncoder.iterencode(
|
||||
self, _bytes_to_unicode(True, o), **kwargs)
|
||||
|
||||
|
||||
def dumps(obj, *args, **kwargs):
|
||||
"""Encode to JSON, supporting bytes as keys or values.
|
||||
|
||||
The bytes are assumed to be UTF-8 encoded Unicode strings.
|
||||
:param bool any_bytes: If False (the default) the bytes are assumed to be
|
||||
UTF-8 encoded Unicode strings. If True, non-UTF-8 bytes are quoted for
|
||||
human consumption.
|
||||
"""
|
||||
return json.dumps(obj, cls=BytesJSONEncoder, *args, **kwargs)
|
||||
any_bytes = kwargs.pop("any_bytes", False)
|
||||
if any_bytes:
|
||||
cls = AnyBytesJSONEncoder
|
||||
else:
|
||||
cls = UTF8BytesJSONEncoder
|
||||
return json.dumps(obj, cls=cls, *args, **kwargs)
|
||||
|
||||
|
||||
def dumps_bytes(obj, *args, **kwargs):
|
||||
"""Encode to JSON, then encode as bytes."""
|
||||
"""Encode to JSON, then encode as bytes.
|
||||
|
||||
:param bool any_bytes: If False (the default) the bytes are assumed to be
|
||||
UTF-8 encoded Unicode strings. If True, non-UTF-8 bytes are quoted for
|
||||
human consumption.
|
||||
"""
|
||||
result = dumps(obj, *args, **kwargs)
|
||||
if PY3:
|
||||
result = result.encode("utf-8")
|
||||
|
@ -47,10 +47,7 @@ class TokenAuthenticatedWebSocketServerProtocol(WebSocketServerProtocol):
|
||||
"""
|
||||
# probably want a try/except around here? what do we do if
|
||||
# transmission fails or anything else bad happens?
|
||||
encoded = json.dumps(message)
|
||||
if isinstance(encoded, str):
|
||||
# On Python 3 dumps() returns Unicode...
|
||||
encoded = encoded.encode("utf-8")
|
||||
encoded = json.dumps_bytes(message, any_bytes=True)
|
||||
self.sendMessage(encoded)
|
||||
|
||||
def onOpen(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user