From b56a95684310c0897c90ad2f7243c3a4d3e6bcb7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 15:42:21 -0400 Subject: [PATCH] Sort out this gross error reporting encoding/decoding mess A little, anyway --- src/allmydata/scripts/runner.py | 29 +++++++++++++++--- src/allmydata/test/test_runner.py | 50 +++++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index d9cd6e720..454c42c85 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -167,10 +167,31 @@ def parse_or_exit(config, argv, stdout, stderr): while hasattr(c, 'subOptions'): c = c.subOptions print(str(c), file=stdout) - # On Python 2 the string may turn into a unicode string, e.g. the error - # may be unicode, in which case it will print funny. Once we're on - # Python 3 we can just drop the ensure_str(). - print(six.ensure_str("%s: %s\n" % (argv[0], e)), file=stdout) + # On Python 2 the exception may hold non-ascii in a byte string. This + # makes it impossible to convert the exception to any kind of string + # using str() or unicode(). So, reach inside and get what we need. + # + # Then, since we are on Python 2, turn it into some entirely safe + # ascii that will survive being written to stdout without causing too + # much damage in the process. + # + # As a result, non-ascii will not be rendered correctly but instead as + # escape sequences. At least this can go away when we're done with + # Python 2 support. + if PY2: + exc_text = e.args[0].decode( + "utf-8", + ).encode( + "ascii", + errors="backslashreplace", + ).decode( + "ascii", + ) + else: + exc_text = unicode(e) + exc_bytes = six.ensure_binary(exc_text, "utf-8") + msg_bytes = b"%s: %s\n" % (six.ensure_binary(argv[0]), exc_bytes) + print(six.ensure_text(msg_bytes, "utf-8"), file=stdout) sys.exit(1) return config diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index f7fe538f1..a420581e9 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -24,6 +24,16 @@ import six from testtools import ( skipUnless, ) +from testtools.matchers import ( + MatchesListwise, + MatchesAny, + Contains, + Equals, + Always, +) +from testtools.twistedsupport import ( + succeeded, +) from eliot import ( log_call, ) @@ -55,6 +65,7 @@ from .common import ( from .common_util import ( parse_cli, run_cli, + run_cli_unicode, ) from .cli_node_api import ( CLINodeAPI, @@ -97,7 +108,7 @@ class ParseOptionsTests(SyncTestCase): does not exist and which also contains non-ascii characters, the exception it raises includes the subcommand encoded as UTF-8. """ - tricky = u"\u2621" + tricky = u"\u00F6" try: parse_options([tricky]) except usage.error as e: @@ -107,6 +118,35 @@ class ParseOptionsTests(SyncTestCase): ) +class ParseOrExitTests(SyncTestCase): + """ + Tests for ``parse_or_exit``. + """ + def test_nonascii_error_content(self): + """ + ``parse_or_exit`` can report errors that include non-ascii content. + """ + tricky = u"\u00F6" + self.assertThat( + run_cli_unicode(tricky, [], encoding="utf-8"), + succeeded( + MatchesListwise([ + # returncode + Equals(1), + # stdout + MatchesAny( + # Python 2 + Contains(u"Unknown command: \\xf6"), + # Python 3 + Contains(u"Unknown command: \xf6"), + ), + # stderr, + Always() + ]), + ), + ) + + @log_call(action_type="run-bin-tahoe") def run_bintahoe(extra_argv, python_options=None): """ @@ -143,11 +183,15 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): """ tricky = u"\u00F6" out, err, returncode = run_bintahoe([tricky]) + if PY2: + expected = u"Unknown command: \\xf6" + else: + expected = u"Unknown command: \xf6" self.assertEqual(returncode, 1) self.assertIn( - u"Unknown command: " + tricky, + expected, out, - "stdout: {!r}\nstderr: {!r}".format(out, err), + "expected {!r} not found in {!r}\nstderr: {!r}".format(expected, out, err), ) def test_with_python_options(self):