From dea42c474f7e5c87d512e3a1feadb02935c7b8fc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 11:03:31 -0500 Subject: [PATCH 01/31] news fragment --- newsfragments/3522.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3522.minor diff --git a/newsfragments/3522.minor b/newsfragments/3522.minor new file mode 100644 index 000000000..e69de29bb From 238590d7fd7929e68b7d218d02dbf676b5b911cb Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 11:03:44 -0500 Subject: [PATCH 02/31] Remove mock by removing a bunch of unicode shenanigans --- src/allmydata/scripts/tahoe_add_alias.py | 45 ++++++------- src/allmydata/test/cli/test_alias.py | 85 +++++------------------- 2 files changed, 34 insertions(+), 96 deletions(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index ddef46db6..c7215d1db 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -112,33 +112,26 @@ def _get_alias_details(nodedir): def list_aliases(options): - nodedir = options['node-directory'] - stdout = options.stdout - stderr = options.stderr - - data = _get_alias_details(nodedir) - - max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) - fmt = "%" + str(max_width) + "s: %s" - rc = 0 + data = _get_alias_details(options['node-directory']) if options['json']: - try: - # XXX why are we presuming utf-8 output? - print(json.dumps(data, indent=4).decode('utf-8'), file=stdout) - except (UnicodeEncodeError, UnicodeDecodeError): - print(json.dumps(data, indent=4), file=stderr) - rc = 1 + output = json.dumps(data, indent=4) else: - for name, details in data.items(): - dircap = details['readonly'] if options['readonly-uri'] else details['readwrite'] - try: - print(fmt % (unicode_to_output(name), unicode_to_output(dircap.decode('utf-8'))), file=stdout) - except (UnicodeEncodeError, UnicodeDecodeError): - print(fmt % (quote_output(name), quote_output(dircap)), file=stderr) - rc = 1 + def dircap(details): + return ( + details['readonly'] + if options['readonly-uri'] + else details['readwrite'] + ).decode("utf-8") - if rc == 1: - print("\nThis listing included aliases or caps that could not be converted to the terminal" \ - "\noutput encoding. These are shown using backslash escapes and in quotes.", file=stderr) - return rc + max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) + fmt = "%" + str(max_width) + "s: %s" + output = u"\n".join(list( + fmt % (name, dircap(details)) + for name, details + in data.items() + )) + + print(output, file=options.stdout) + + return 0 diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 6542d154f..6eaa82e3e 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -1,5 +1,4 @@ import json -from mock import patch from twisted.trial import unittest from twisted.internet.defer import inlineCallbacks @@ -15,91 +14,37 @@ from ..common_util import skip_if_cannot_represent_argv class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @inlineCallbacks - def test_list(self): + def _test_list(self, alias): self.basedir = "cli/ListAlias/test_list" self.set_up_grid(oneshare=True) rc, stdout, stderr = yield self.do_cli( "create-alias", - unicode_to_argv(u"tahoe"), + unicode_to_argv(alias), ) - self.failUnless(unicode_to_argv(u"Alias 'tahoe' created") in stdout) - self.failIf(stderr) + self.assertIn( + unicode_to_argv(u"Alias '{}' created".format(alias)), + stdout, + ) + self.assertEqual("", stderr) aliases = get_aliases(self.get_clientdir()) - self.failUnless(u"tahoe" in aliases) - self.failUnless(aliases[u"tahoe"].startswith("URI:DIR2:")) + self.assertIn(alias, aliases) + self.assertTrue(aliases[alias].startswith("URI:DIR2:")) rc, stdout, stderr = yield self.do_cli("list-aliases", "--json") self.assertEqual(0, rc) data = json.loads(stdout) - self.assertIn(u"tahoe", data) - data = data[u"tahoe"] + self.assertIn(alias, data) + data = data[alias] self.assertIn("readwrite", data) self.assertIn("readonly", data) - @inlineCallbacks - def test_list_unicode_mismatch_json(self): - """ - pretty hack-y test, but we want to cover the 'except' on Unicode - errors paths and I can't come up with a nicer way to trigger - this - """ - self.basedir = "cli/ListAlias/test_list_unicode_mismatch_json" - skip_if_cannot_represent_argv(u"tahoe\u263A") - self.set_up_grid(oneshare=True) - rc, stdout, stderr = yield self.do_cli( - "create-alias", - unicode_to_argv(u"tahoe\u263A"), - ) + def test_list(self): + return self._test_list(u"tahoe") - self.failUnless(unicode_to_argv(u"Alias 'tahoe\u263A' created") in stdout) - self.failIf(stderr) - booms = [] - - def boom(out, indent=4): - if not len(booms): - booms.append(out) - raise UnicodeEncodeError("foo", u"foo", 3, 5, "foo") - return str(out) - - with patch("allmydata.scripts.tahoe_add_alias.json.dumps", boom): - aliases = get_aliases(self.get_clientdir()) - self.failUnless(u"tahoe\u263A" in aliases) - self.failUnless(aliases[u"tahoe\u263A"].startswith("URI:DIR2:")) - - rc, stdout, stderr = yield self.do_cli("list-aliases", "--json") - - self.assertEqual(1, rc) - self.assertIn("could not be converted", stderr) - - @inlineCallbacks - def test_list_unicode_mismatch(self): - self.basedir = "cli/ListAlias/test_list_unicode_mismatch" - skip_if_cannot_represent_argv(u"tahoe\u263A") - self.set_up_grid(oneshare=True) - - rc, stdout, stderr = yield self.do_cli( - "create-alias", - unicode_to_argv(u"tahoe\u263A"), - ) - - def boom(out): - print("boom {}".format(out)) - return out - raise UnicodeEncodeError("foo", u"foo", 3, 5, "foo") - - with patch("allmydata.scripts.tahoe_add_alias.unicode_to_output", boom): - self.failUnless(unicode_to_argv(u"Alias 'tahoe\u263A' created") in stdout) - self.failIf(stderr) - aliases = get_aliases(self.get_clientdir()) - self.failUnless(u"tahoe\u263A" in aliases) - self.failUnless(aliases[u"tahoe\u263A"].startswith("URI:DIR2:")) - - rc, stdout, stderr = yield self.do_cli("list-aliases") - - self.assertEqual(1, rc) - self.assertIn("could not be converted", stderr) + def test_list_unicode(self): + return self._test_list(u"tahoe\{SNOWMAN}") From d29210a140145bb4445a3d12f3d227f236b30a29 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 11:04:05 -0500 Subject: [PATCH 03/31] unused import --- src/allmydata/scripts/tahoe_add_alias.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index c7215d1db..63fa47c44 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -10,7 +10,7 @@ from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place -from allmydata.util.encodingutil import unicode_to_output, quote_output +from allmydata.util.encodingutil import quote_output def add_line_to_aliasfile(aliasfile, alias, cap): From c4b58fe00b74fc8fef2670222c31d5d8781861d3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 11:04:19 -0500 Subject: [PATCH 04/31] unused import --- src/allmydata/test/cli/test_alias.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 6eaa82e3e..f2d65bb73 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -7,7 +7,6 @@ from allmydata.util.encodingutil import unicode_to_argv from allmydata.scripts.common import get_aliases from allmydata.test.no_network import GridTestMixin from .common import CLITestMixin -from ..common_util import skip_if_cannot_represent_argv # see also test_create_alias From 77bebb99161b729247c02c44a1ae80be81b18081 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 17:00:34 -0500 Subject: [PATCH 05/31] [wip] remove mock from test_alias, along with a bunch of encoding-related changes :/ --- src/allmydata/scripts/admin.py | 2 +- src/allmydata/scripts/tahoe_add_alias.py | 34 ++++++-- src/allmydata/test/cli/common.py | 16 +++- src/allmydata/test/cli/test_alias.py | 45 +++++++---- src/allmydata/test/cli/test_cp.py | 2 +- src/allmydata/test/cli/test_create_alias.py | 12 ++- src/allmydata/test/common_util.py | 88 ++++++++++++++++++--- src/allmydata/util/encodingutil.py | 7 ++ 8 files changed, 165 insertions(+), 41 deletions(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index e472ffd8c..30862a4ae 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -22,7 +22,7 @@ def print_keypair(options): class DerivePubkeyOptions(BaseOptions): def parseArgs(self, privkey): - self.privkey = privkey + self.privkey = privkey.encode("ascii") def getSynopsis(self): return "Usage: tahoe [global-options] admin derive-pubkey PRIVKEY" diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 63fa47c44..10e687f32 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -1,4 +1,5 @@ from __future__ import print_function +from __future__ import unicode_literals import os.path import codecs @@ -10,7 +11,7 @@ from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place -from allmydata.util.encodingutil import quote_output +from allmydata.util.encodingutil import quote_output, quote_output_u def add_line_to_aliasfile(aliasfile, alias, cap): @@ -48,14 +49,13 @@ def add_alias(options): old_aliases = get_aliases(nodedir) if alias in old_aliases: - print("Alias %s already exists!" % quote_output(alias), file=stderr) + print("Alias %s already exists!" % quote_output_u(alias), file=stderr) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") cap = uri.from_string_dirnode(cap).to_string() add_line_to_aliasfile(aliasfile, alias, cap) - - print("Alias %s added" % quote_output(alias), file=stdout) + show_output(stdout, "Alias {alias} added", alias=alias) return 0 def create_alias(options): @@ -93,11 +93,26 @@ def create_alias(options): # probably check for others.. add_line_to_aliasfile(aliasfile, alias, new_uri) - - print("Alias %s created" % (quote_output(alias),), file=stdout) + show_output(stdout, "Alias {alias} created", alias=alias) return 0 +def show_output(fp, template, **kwargs): + assert isinstance(template, unicode) + + # On Python 2 and Python 3 fp has an encoding attribute under real usage + # but the test suite passes StringIO in many places which has no such + # attribute. Make allowances for this until the test suite is fixed. + encoding = getattr(fp, "encoding", "utf-8") + output = template.format(**{ + k: quote_output_u(v, encoding=encoding) + for (k, v) + in kwargs.items() + }) + safe_output = output.encode(encoding, "namereplace").decode(encoding) + print(safe_output, file=fp) + + def _get_alias_details(nodedir): aliases = get_aliases(nodedir) alias_names = sorted(aliases.keys()) @@ -115,7 +130,7 @@ def list_aliases(options): data = _get_alias_details(options['node-directory']) if options['json']: - output = json.dumps(data, indent=4) + output = json.dumps(data, indent=4).decode("utf-8") else: def dircap(details): return ( @@ -132,6 +147,9 @@ def list_aliases(options): in data.items() )) - print(output, file=options.stdout) + if output: + # Show whatever we computed. Skip this if there is no output to avoid + # a spurious blank line. + print(output, file=options.stdout) return 0 diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 852dce52c..e4c96bab8 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,6 +1,6 @@ from ...util.encodingutil import unicode_to_argv from ...scripts import runner -from ..common_util import ReallyEqualMixin, run_cli +from ..common_util import ReallyEqualMixin, run_cli, run_cli_ex def parse_options(basedir, command, args): o = runner.Options() @@ -10,10 +10,18 @@ def parse_options(basedir, command, args): return o class CLITestMixin(ReallyEqualMixin): + def do_cli_ex(self, verb, argv, client_num=0, **kwargs): + # client_num is used to execute client CLI commands on a specific + # client. + client_dir = self.get_clientdir(i=client_num) + nodeargs = [ u"--node-directory", client_dir ] + return run_cli_ex(verb, argv, nodeargs=nodeargs, **kwargs) + + def do_cli(self, verb, *args, **kwargs): # client_num is used to execute client CLI commands on a specific # client. - client_num = kwargs.get("client_num", 0) + client_num = kwargs.pop("client_num", 0) client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) - nodeargs = [ "--node-directory", client_dir ] - return run_cli(verb, nodeargs=nodeargs, *args, **kwargs) + nodeargs = [ b"--node-directory", client_dir ] + return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index f2d65bb73..9064a8c2b 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -3,47 +3,60 @@ import json from twisted.trial import unittest from twisted.internet.defer import inlineCallbacks -from allmydata.util.encodingutil import unicode_to_argv from allmydata.scripts.common import get_aliases from allmydata.test.no_network import GridTestMixin from .common import CLITestMixin +from allmydata.util.encodingutil import quote_output # see also test_create_alias class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @inlineCallbacks - def _test_list(self, alias): - self.basedir = "cli/ListAlias/test_list" + def _test_list(self, alias, encoding): + self.basedir = self.mktemp() self.set_up_grid(oneshare=True) - rc, stdout, stderr = yield self.do_cli( - "create-alias", - unicode_to_argv(alias), + rc, stdout, stderr = yield self.do_cli_ex( + u"create-alias", + [alias], + encoding=encoding, ) self.assertIn( - unicode_to_argv(u"Alias '{}' created".format(alias)), - stdout, + b"Alias {} created".format(quote_output(alias, encoding=encoding)), + stdout.encode(encoding), ) self.assertEqual("", stderr) aliases = get_aliases(self.get_clientdir()) self.assertIn(alias, aliases) - self.assertTrue(aliases[alias].startswith("URI:DIR2:")) + self.assertTrue(aliases[alias].startswith(u"URI:DIR2:")) - rc, stdout, stderr = yield self.do_cli("list-aliases", "--json") + rc, stdout, stderr = yield self.do_cli_ex( + u"list-aliases", + [u"--json"], + encoding=encoding, + ) self.assertEqual(0, rc) data = json.loads(stdout) self.assertIn(alias, data) data = data[alias] - self.assertIn("readwrite", data) - self.assertIn("readonly", data) + self.assertIn(u"readwrite", data) + self.assertIn(u"readonly", data) - def test_list(self): - return self._test_list(u"tahoe") + def test_list_ascii(self): + return self._test_list(u"tahoe", encoding="ascii") - def test_list_unicode(self): - return self._test_list(u"tahoe\{SNOWMAN}") + def test_list_nonascii_ascii(self): + return self._test_list(u"tahoe\N{SNOWMAN}", encoding="ascii") + + + def test_list_utf_8(self): + return self._test_list(u"tahoe", encoding="utf-8") + + + def test_list_nonascii_utf_8(self): + return self._test_list(u"tahoe\N{SNOWMAN}", encoding="utf-8") diff --git a/src/allmydata/test/cli/test_cp.py b/src/allmydata/test/cli/test_cp.py index ba1894f1c..6cebec4a5 100644 --- a/src/allmydata/test/cli/test_cp.py +++ b/src/allmydata/test/cli/test_cp.py @@ -661,7 +661,7 @@ starting copy, 2 files, 1 directories # This test ensures that tahoe will copy a file from the grid to # a local directory without a specified file name. # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2027 - self.basedir = "cli/Cp/cp_verbose" + self.basedir = "cli/Cp/ticket_2027" self.set_up_grid(oneshare=True) # Write a test file, which we'll copy to the grid. diff --git a/src/allmydata/test/cli/test_create_alias.py b/src/allmydata/test/cli/test_create_alias.py index ea3200e2e..324b2364a 100644 --- a/src/allmydata/test/cli/test_create_alias.py +++ b/src/allmydata/test/cli/test_create_alias.py @@ -6,7 +6,7 @@ from allmydata.util import fileutil from allmydata.scripts.common import get_aliases from allmydata.scripts import cli, runner from ..no_network import GridTestMixin -from allmydata.util.encodingutil import quote_output, get_io_encoding +from allmydata.util.encodingutil import quote_output_u, get_io_encoding from .common import CLITestMixin class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @@ -171,7 +171,15 @@ class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): (rc, out, err) = args self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") - self.failUnlessIn("Alias %s created" % quote_output(u"\u00E9tudes"), out) + self.assertIn( + u"Alias %s created" % ( + quote_output_u( + u"\u00E9tudes", + encoding=get_io_encoding(), + ), + ), + out.decode(get_io_encoding()), + ) aliases = get_aliases(self.get_clientdir()) self.failUnless(aliases[u"\u00E9tudes"].startswith("URI:DIR2:")) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index e3f5cf750..48e89a851 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -5,6 +5,10 @@ import time import signal from random import randrange from six.moves import StringIO +from io import ( + TextIOWrapper, + BytesIO, +) from twisted.internet import reactor, defer from twisted.python import failure @@ -35,24 +39,90 @@ def skip_if_cannot_represent_argv(u): except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII argv could not be encoded on this platform.") + +def _getvalue(io): + """ + Read out the complete contents of a file-like object. + """ + io.seek(0) + return io.read() + + def run_cli(verb, *args, **kwargs): - precondition(not [True for arg in args if not isinstance(arg, str)], - "arguments to do_cli must be strs -- convert using unicode_to_argv", args=args) - nodeargs = kwargs.get("nodeargs", []) - argv = nodeargs + [verb] + list(args) - stdin = kwargs.get("stdin", "") - stdout = StringIO() - stderr = StringIO() + """ + Run some CLI command using Python 2 stdout/stderr semantics. + """ + nodeargs = kwargs.pop("nodeargs", []) + stdin = kwargs.pop("stdin", None) + precondition( + all(isinstance(arg, bytes) for arg in [verb] + (nodeargs or []) + list(args)), + "arguments to run_cli must be bytes -- convert using unicode_to_argv", + verb=verb, + args=args, + nodeargs=nodeargs, + ) + encoding = "utf-8" + d = run_cli_ex( + verb=verb.decode(encoding), + argv=list(arg.decode(encoding) for arg in args), + nodeargs=list(nodearg.decode(encoding) for nodearg in nodeargs), + stdin=stdin, + ) + def maybe_encode(result): + code, stdout, stderr = result + # Make sure we produce bytes output since that's what all the code + # written to use this interface expects. If you don't like that, use + # run_cli_ex instead. We use get_io_encoding here to make sure that + # whatever was written can actually be encoded that way, otherwise it + # wouldn't really be writeable under real usage. + if isinstance(stdout, unicode): + stdout = stdout.encode(encoding) + if isinstance(stderr, unicode): + stderr = stderr.encode(encoding) + return code, stdout, stderr + d.addCallback(maybe_encode) + return d + + +def run_cli_ex(verb, argv, nodeargs=None, stdin=None, encoding=None): + precondition( + all(isinstance(arg, unicode) for arg in [verb] + (nodeargs or []) + argv), + "arguments to run_cli_ex must be unicode", + verb=verb, + nodeargs=nodeargs, + argv=argv, + ) + if nodeargs is None: + nodeargs = [] + argv = nodeargs + [verb] + list(argv) + if stdin is None: + stdin = "" + if encoding is None: + # The original behavior, the Python 2 behavior, is to accept either + # bytes or unicode and try to automatically encode or decode as + # necessary. This works okay for ASCII and if LANG is set + # appropriately. These aren't great constraints so we should move + # away from this behavior. + stdout = StringIO() + stderr = StringIO() + else: + # The new behavior, the Python 3 behavior, is to accept unicode and + # encode it using a specific encoding. For older versions of Python + # 3, the encoding is determined from LANG (bad) but for newer Python + # 3, the encoding is always utf-8 (good). Tests can pass in different + # encodings to exercise different behaviors. + stdout = TextIOWrapper(BytesIO(), encoding) + stderr = TextIOWrapper(BytesIO(), encoding) d = defer.succeed(argv) d.addCallback(runner.parse_or_exit_with_explanation, stdout=stdout) d.addCallback(runner.dispatch, stdin=StringIO(stdin), stdout=stdout, stderr=stderr) def _done(rc): - return 0, stdout.getvalue(), stderr.getvalue() + return 0, _getvalue(stdout), _getvalue(stderr) def _err(f): f.trap(SystemExit) - return f.value.code, stdout.getvalue(), stderr.getvalue() + return f.value.code, _getvalue(stdout), _getvalue(stderr) d.addCallbacks(_done, _err) return d diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 17a7a2f38..8ffd7f2f5 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -252,6 +252,13 @@ ESCAPABLE_UNICODE = re.compile(u'([\uD800-\uDBFF][\uDC00-\uDFFF])|' # valid sur ESCAPABLE_8BIT = re.compile( br'[^ !#\x25-\x5B\x5D-\x5F\x61-\x7E]', re.DOTALL) +def quote_output_u(*args, **kwargs): + result = quote_output(*args, **kwargs) + if isinstance(result, unicode): + return result + return result.decode("utf-8") + + def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): """ Encode either a Unicode string or a UTF-8-encoded bytestring for representation From b464fa6483025bdf8124a4880f00f63bb094e208 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 18:28:11 -0500 Subject: [PATCH 06/31] docstring --- src/allmydata/util/encodingutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 8ffd7f2f5..cab5cd114 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -253,6 +253,9 @@ ESCAPABLE_UNICODE = re.compile(u'([\uD800-\uDBFF][\uDC00-\uDFFF])|' # valid sur ESCAPABLE_8BIT = re.compile( br'[^ !#\x25-\x5B\x5D-\x5F\x61-\x7E]', re.DOTALL) def quote_output_u(*args, **kwargs): + """ + Like ``quote_output`` but always return ``unicode``. + """ result = quote_output(*args, **kwargs) if isinstance(result, unicode): return result From 2955d22f723901612e62cd6411eea945a46bd3e0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 18:38:51 -0500 Subject: [PATCH 07/31] note a problem with test_system --- src/allmydata/test/test_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 7a7fe117b..d64c56f09 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -2618,7 +2618,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def _run_in_subprocess(ignored, verb, *args, **kwargs): stdin = kwargs.get("stdin") - env = kwargs.get("env", os.environ) + env = kwargs.get("env", os.environ) # XXX Gets mutated below, great. # Python warnings from the child process don't matter. env["PYTHONWARNINGS"] = "ignore" newargs = ["--node-directory", self.getdir("client0"), verb] + list(args) From 5aee8b422d8f03c24a3b08a7ea13b8ba382be93b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 18:39:09 -0500 Subject: [PATCH 08/31] Oops there's another case --- src/allmydata/scripts/tahoe_add_alias.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 10e687f32..1252b0e72 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -100,10 +100,12 @@ def create_alias(options): def show_output(fp, template, **kwargs): assert isinstance(template, unicode) - # On Python 2 and Python 3 fp has an encoding attribute under real usage - # but the test suite passes StringIO in many places which has no such - # attribute. Make allowances for this until the test suite is fixed. - encoding = getattr(fp, "encoding", "utf-8") + # On Python 3 fp has an encoding attribute under all real usage. On + # Python 2, the encoding attribute is None if stdio is not a tty. The + # test suite often passes StringIO which has no such attribute. Make + # allowances for this until the test suite is fixed and Python 2 is no + # more. + encoding = getattr(fp, "encoding", None) or "utf-8" output = template.format(**{ k: quote_output_u(v, encoding=encoding) for (k, v) From 613777d16619859570bd1f405508e988577aeff6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 19:23:13 -0500 Subject: [PATCH 09/31] Make sure this one is bytes too --- src/allmydata/scripts/debug.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index fd3f2b87c..b6d10c438 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -607,7 +607,7 @@ class FindSharesOptions(BaseOptions): def parseArgs(self, storage_index_s, *nodedirs): from allmydata.util.encodingutil import argv_to_abspath - self.si_s = storage_index_s + self.si_s = storage_index_s.encode("ascii") self.nodedirs = map(argv_to_abspath, nodedirs) description = """ From c12b082fa7362d92237952636f4afb8adbe96064 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 6 Dec 2020 20:37:28 -0500 Subject: [PATCH 10/31] Put run_cli back largely how it was Also deal with StringIO better in show_output --- src/allmydata/scripts/tahoe_add_alias.py | 27 ++++++- src/allmydata/test/cli/common.py | 6 +- src/allmydata/test/cli/test_alias.py | 20 ++++-- src/allmydata/test/cli/test_create_alias.py | 12 +--- src/allmydata/test/common_util.py | 79 ++++++++++----------- 5 files changed, 82 insertions(+), 62 deletions(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 1252b0e72..51d48e5a3 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -98,6 +98,20 @@ def create_alias(options): def show_output(fp, template, **kwargs): + """ + Print to just about anything. + + :param fp: A file-like object to which to print. This handles the case + where ``fp`` declares a support encoding with the ``encoding`` + attribute (eg sys.stdout on Python 3). It handles the case where + ``fp`` declares no supported encoding via ``None`` for its + ``encoding`` attribute (eg sys.stdout on Python 2 when stdout is not a + tty). It handles the case where ``fp`` declares an encoding that does + not support all of the characters in the output by forcing the + "namereplace" error handler. It handles the case where there is no + ``encoding`` attribute at all (eg StringIO.StringIO) by writing + utf-8-encoded bytes. + """ assert isinstance(template, unicode) # On Python 3 fp has an encoding attribute under all real usage. On @@ -105,13 +119,22 @@ def show_output(fp, template, **kwargs): # test suite often passes StringIO which has no such attribute. Make # allowances for this until the test suite is fixed and Python 2 is no # more. - encoding = getattr(fp, "encoding", None) or "utf-8" + try: + encoding = fp.encoding or "utf-8" + except AttributeError: + has_encoding = False + encoding = "utf-8" + else: + has_encoding = True + output = template.format(**{ k: quote_output_u(v, encoding=encoding) for (k, v) in kwargs.items() }) - safe_output = output.encode(encoding, "namereplace").decode(encoding) + safe_output = output.encode(encoding, "namereplace") + if has_encoding: + safe_output = safe_output.decode(encoding) print(safe_output, file=fp) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index e4c96bab8..3da959604 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,6 +1,6 @@ from ...util.encodingutil import unicode_to_argv from ...scripts import runner -from ..common_util import ReallyEqualMixin, run_cli, run_cli_ex +from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode def parse_options(basedir, command, args): o = runner.Options() @@ -10,12 +10,12 @@ def parse_options(basedir, command, args): return o class CLITestMixin(ReallyEqualMixin): - def do_cli_ex(self, verb, argv, client_num=0, **kwargs): + def do_cli_unicode(self, verb, argv, client_num=0, **kwargs): # client_num is used to execute client CLI commands on a specific # client. client_dir = self.get_clientdir(i=client_num) nodeargs = [ u"--node-directory", client_dir ] - return run_cli_ex(verb, argv, nodeargs=nodeargs, **kwargs) + return run_cli_unicode(verb, argv, nodeargs=nodeargs, **kwargs) def do_cli(self, verb, *args, **kwargs): diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 9064a8c2b..9a2b35c8c 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -17,22 +17,24 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): self.basedir = self.mktemp() self.set_up_grid(oneshare=True) - rc, stdout, stderr = yield self.do_cli_ex( + rc, stdout, stderr = yield self.do_cli_unicode( u"create-alias", [alias], encoding=encoding, ) - self.assertIn( - b"Alias {} created".format(quote_output(alias, encoding=encoding)), - stdout.encode(encoding), + self.assertEqual( + b"Alias {} created\n".format( + quote_output(alias, encoding=encoding), + ), + stdout, ) self.assertEqual("", stderr) aliases = get_aliases(self.get_clientdir()) self.assertIn(alias, aliases) self.assertTrue(aliases[alias].startswith(u"URI:DIR2:")) - rc, stdout, stderr = yield self.do_cli_ex( + rc, stdout, stderr = yield self.do_cli_unicode( u"list-aliases", [u"--json"], encoding=encoding, @@ -60,3 +62,11 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list_nonascii_utf_8(self): return self._test_list(u"tahoe\N{SNOWMAN}", encoding="utf-8") + + + def test_list_none(self): + return self._test_list(u"tahoe", encoding=None) + + + def test_list_nonascii_none(self): + return self._test_list(u"tahoe\N{SNOWMAN}", encoding=None) diff --git a/src/allmydata/test/cli/test_create_alias.py b/src/allmydata/test/cli/test_create_alias.py index 324b2364a..ea3200e2e 100644 --- a/src/allmydata/test/cli/test_create_alias.py +++ b/src/allmydata/test/cli/test_create_alias.py @@ -6,7 +6,7 @@ from allmydata.util import fileutil from allmydata.scripts.common import get_aliases from allmydata.scripts import cli, runner from ..no_network import GridTestMixin -from allmydata.util.encodingutil import quote_output_u, get_io_encoding +from allmydata.util.encodingutil import quote_output, get_io_encoding from .common import CLITestMixin class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @@ -171,15 +171,7 @@ class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): (rc, out, err) = args self.failUnlessReallyEqual(rc, 0) self.failUnlessReallyEqual(err, "") - self.assertIn( - u"Alias %s created" % ( - quote_output_u( - u"\u00E9tudes", - encoding=get_io_encoding(), - ), - ), - out.decode(get_io_encoding()), - ) + self.failUnlessIn("Alias %s created" % quote_output(u"\u00E9tudes"), out) aliases = get_aliases(self.get_clientdir()) self.failUnless(aliases[u"\u00E9tudes"].startswith("URI:DIR2:")) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 48e89a851..8b4a33197 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -48,55 +48,18 @@ def _getvalue(io): return io.read() -def run_cli(verb, *args, **kwargs): - """ - Run some CLI command using Python 2 stdout/stderr semantics. - """ +def run_cli_bytes(verb, *args, **kwargs): nodeargs = kwargs.pop("nodeargs", []) - stdin = kwargs.pop("stdin", None) + encoding = kwargs.pop("encoding", None) precondition( - all(isinstance(arg, bytes) for arg in [verb] + (nodeargs or []) + list(args)), + all(isinstance(arg, bytes) for arg in [verb] + nodeargs + list(args)), "arguments to run_cli must be bytes -- convert using unicode_to_argv", verb=verb, args=args, nodeargs=nodeargs, ) - encoding = "utf-8" - d = run_cli_ex( - verb=verb.decode(encoding), - argv=list(arg.decode(encoding) for arg in args), - nodeargs=list(nodearg.decode(encoding) for nodearg in nodeargs), - stdin=stdin, - ) - def maybe_encode(result): - code, stdout, stderr = result - # Make sure we produce bytes output since that's what all the code - # written to use this interface expects. If you don't like that, use - # run_cli_ex instead. We use get_io_encoding here to make sure that - # whatever was written can actually be encoded that way, otherwise it - # wouldn't really be writeable under real usage. - if isinstance(stdout, unicode): - stdout = stdout.encode(encoding) - if isinstance(stderr, unicode): - stderr = stderr.encode(encoding) - return code, stdout, stderr - d.addCallback(maybe_encode) - return d - - -def run_cli_ex(verb, argv, nodeargs=None, stdin=None, encoding=None): - precondition( - all(isinstance(arg, unicode) for arg in [verb] + (nodeargs or []) + argv), - "arguments to run_cli_ex must be unicode", - verb=verb, - nodeargs=nodeargs, - argv=argv, - ) - if nodeargs is None: - nodeargs = [] - argv = nodeargs + [verb] + list(argv) - if stdin is None: - stdin = "" + argv = nodeargs + [verb] + list(args) + stdin = kwargs.get("stdin", "") if encoding is None: # The original behavior, the Python 2 behavior, is to accept either # bytes or unicode and try to automatically encode or decode as @@ -126,6 +89,38 @@ def run_cli_ex(verb, argv, nodeargs=None, stdin=None, encoding=None): d.addCallbacks(_done, _err) return d + +def run_cli_unicode(verb, argv, nodeargs=None, stdin=None, encoding=None): + if nodeargs is None: + nodeargs = [] + precondition( + all(isinstance(arg, unicode) for arg in [verb] + nodeargs + argv), + "arguments to run_cli_unicode must be unicode", + verb=verb, + nodeargs=nodeargs, + argv=argv, + ) + d = run_cli_bytes( + verb.encode("utf-8"), + nodeargs=list(arg.encode("utf-8") for arg in nodeargs), + stdin=stdin, + encoding=encoding, + *list(arg.encode("utf-8") for arg in argv) + ) + def maybe_decode(result): + code, stdout, stderr = result + if isinstance(stdout, unicode): + stdout = stdout.encode("utf-8") + if isinstance(stderr, unicode): + stderr = stderr.encode("utf-8") + return code, stdout, stderr + d.addCallback(maybe_decode) + return d + + +run_cli = run_cli_bytes + + def parse_cli(*argv): # This parses the CLI options (synchronously), and returns the Options # argument, or throws usage.UsageError if something went wrong. From 8ca98bb8caf4602918b160cb361ca82bc8507c1f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:06:00 -0500 Subject: [PATCH 11/31] using run_cli_unicode, better expect unicode result --- src/allmydata/test/cli/test_alias.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 9a2b35c8c..150cf3fa2 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -6,7 +6,7 @@ from twisted.internet.defer import inlineCallbacks from allmydata.scripts.common import get_aliases from allmydata.test.no_network import GridTestMixin from .common import CLITestMixin -from allmydata.util.encodingutil import quote_output +from allmydata.util.encodingutil import quote_output_u # see also test_create_alias @@ -24,8 +24,8 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): ) self.assertEqual( - b"Alias {} created\n".format( - quote_output(alias, encoding=encoding), + u"Alias {} created\n".format( + quote_output_u(alias, encoding=encoding), ), stdout, ) From 93b30d0ddecf4f5ce3857e54a670fd9b6317cb23 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:06:21 -0500 Subject: [PATCH 12/31] The implementation can't reliably see the encoding we're faking without this --- src/allmydata/test/cli/test_alias.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 150cf3fa2..0cad819e7 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -7,6 +7,7 @@ from allmydata.scripts.common import get_aliases from allmydata.test.no_network import GridTestMixin from .common import CLITestMixin from allmydata.util.encodingutil import quote_output_u +from allmydata.util import encodingutil # see also test_create_alias @@ -17,6 +18,8 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): self.basedir = self.mktemp() self.set_up_grid(oneshare=True) + self.patch(encodingutil, "io_encoding", encoding) + rc, stdout, stderr = yield self.do_cli_unicode( u"create-alias", [alias], From 72a5b571caa91a88d9c18f0cf3ed2e0d85069563 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:10:59 -0500 Subject: [PATCH 13/31] Only test the cases we can make work everywhere These tests previously (in this branch) tried to exercise more ``show_output`` logic than they can actually reach due to the requirement that argv be interpretable. Shrink the test suite down to just what we can squeeze through argv and deal with fully testing ``show_output`` elsewhere. --- src/allmydata/test/cli/test_alias.py | 46 ++++++++++++++++++---------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 0cad819e7..a04727db5 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -52,24 +52,38 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list_ascii(self): - return self._test_list(u"tahoe", encoding="ascii") + """ + An alias composed of all ASCII-encodeable code points can be created when + the active encoding is ASCII. + """ + return self._test_list( + u"tahoe", + encoding="ascii", + ) - def test_list_nonascii_ascii(self): - return self._test_list(u"tahoe\N{SNOWMAN}", encoding="ascii") + def test_list_latin_1(self): + """ + An alias composed of all Latin-1-encodeable code points can be created + when the active encoding is Latin-1. + + This is very similar to ``test_list_utf_8`` but the assumption of + UTF-8 is nearly ubiquitous and explicitly exercising the codepaths + with a UTF-8-incompatible encoding helps flush out unintentional UTF-8 + assumptions. + """ + return self._test_list( + u"taho\N{LATIN SMALL LETTER E WITH ACUTE}", + encoding="latin-1", + ) def test_list_utf_8(self): - return self._test_list(u"tahoe", encoding="utf-8") - - - def test_list_nonascii_utf_8(self): - return self._test_list(u"tahoe\N{SNOWMAN}", encoding="utf-8") - - - def test_list_none(self): - return self._test_list(u"tahoe", encoding=None) - - - def test_list_nonascii_none(self): - return self._test_list(u"tahoe\N{SNOWMAN}", encoding=None) + """ + An alias composed of all UTF-8-encodeable code points can be created when + the active encoding is UTF-8. + """ + return self._test_list( + u"tahoe\N{SNOWMAN}", + encoding="utf-8", + ) From 56f141e170b59db25279583a397bb945ad01d569 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:12:04 -0500 Subject: [PATCH 14/31] decode instead of encoding in maybe_decode legacy from when the bytes/unicode tower was upsidedown compared to how it is now --- src/allmydata/test/common_util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 8b4a33197..4f07dda7c 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -109,10 +109,10 @@ def run_cli_unicode(verb, argv, nodeargs=None, stdin=None, encoding=None): ) def maybe_decode(result): code, stdout, stderr = result - if isinstance(stdout, unicode): - stdout = stdout.encode("utf-8") - if isinstance(stderr, unicode): - stderr = stderr.encode("utf-8") + if isinstance(stdout, bytes): + stdout = stdout.decode(encoding) + if isinstance(stderr, bytes): + stderr = stderr.decode(encoding) return code, stdout, stderr d.addCallback(maybe_decode) return d From f4432d3f23987c0e32b8f2f223e7a7ef530558d4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:12:38 -0500 Subject: [PATCH 15/31] Respect the provided encoding UTF-8 is great but if we're claiming the encoding is something else everywhere else we can't just make it UTF-8 here. --- src/allmydata/test/common_util.py | 6 +++--- src/allmydata/util/encodingutil.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 4f07dda7c..c6491d72e 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -101,11 +101,11 @@ def run_cli_unicode(verb, argv, nodeargs=None, stdin=None, encoding=None): argv=argv, ) d = run_cli_bytes( - verb.encode("utf-8"), - nodeargs=list(arg.encode("utf-8") for arg in nodeargs), + verb.encode(encoding), + nodeargs=list(arg.encode(encoding) for arg in nodeargs), stdin=stdin, encoding=encoding, - *list(arg.encode("utf-8") for arg in argv) + *list(arg.encode(encoding) for arg in argv) ) def maybe_decode(result): code, stdout, stderr = result diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index cab5cd114..f13dc5b8e 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -259,7 +259,7 @@ def quote_output_u(*args, **kwargs): result = quote_output(*args, **kwargs) if isinstance(result, unicode): return result - return result.decode("utf-8") + return result.decode(kwargs.get("encoding", None) or io_encoding) def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): From 7b3a5aceb82f3b28f198de485933f8823e9592c5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:21:56 -0500 Subject: [PATCH 16/31] These tests can't reach any of the codepaths where quote_output matters So simplify --- src/allmydata/test/cli/test_alias.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index a04727db5..335239d5f 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -6,7 +6,6 @@ from twisted.internet.defer import inlineCallbacks from allmydata.scripts.common import get_aliases from allmydata.test.no_network import GridTestMixin from .common import CLITestMixin -from allmydata.util.encodingutil import quote_output_u from allmydata.util import encodingutil # see also test_create_alias @@ -15,6 +14,21 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @inlineCallbacks def _test_list(self, alias, encoding): + """ + Assert that ``tahoe create-alias`` can be used to create an alias named + ``alias`` when argv is encoded using ``encoding``. + + :param unicode alias: The alias to try to create. + + :param str encoding: The name of an encoding to force the + ``create-alias`` implementation to use. This simulates the + effects of setting LANG and doing other locale-foolishness without + actually having to mess with this process's global locale state. + + :return Deferred: A Deferred that fires with success if the alias can + be created and that creation is reported on stdout appropriately + encoded or with failure if something goes wrong. + """ self.basedir = self.mktemp() self.set_up_grid(oneshare=True) @@ -27,9 +41,7 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): ) self.assertEqual( - u"Alias {} created\n".format( - quote_output_u(alias, encoding=encoding), - ), + u"Alias '{}' created\n".format(alias), stdout, ) self.assertEqual("", stderr) From 05d271c7c89a03e78c32d5bf94da8cbcdace1ac9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:26:58 -0500 Subject: [PATCH 17/31] a little more exposition --- src/allmydata/test/cli/test_alias.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 335239d5f..67f438fa5 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -32,6 +32,12 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): self.basedir = self.mktemp() self.set_up_grid(oneshare=True) + # We can pass an encoding into the test utilities to invoke the code + # under test but we can't pass such a parameter directly to the code + # under test. Instead, that code looks at io_encoding. So, + # monkey-patch that value to our desired value here. This is the code + # that most directly takes the place of messing with LANG or the + # locale module. self.patch(encodingutil, "io_encoding", encoding) rc, stdout, stderr = yield self.do_cli_unicode( @@ -40,15 +46,20 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): encoding=encoding, ) - self.assertEqual( - u"Alias '{}' created\n".format(alias), - stdout, - ) + # Make sure the result of the create-alias command is as we want it to + # be. + self.assertEqual(u"Alias '{}' created\n".format(alias), stdout) self.assertEqual("", stderr) + self.assertEqual(0, rc) + + # Make sure it had the intended side-effect, too - an alias created in + # the node filesystem state. aliases = get_aliases(self.get_clientdir()) self.assertIn(alias, aliases) self.assertTrue(aliases[alias].startswith(u"URI:DIR2:")) + # And inspect the state via the user interface list-aliases command + # too. rc, stdout, stderr = yield self.do_cli_unicode( u"list-aliases", [u"--json"], From 72744c9464309bc3fd402303c8f080801591e2ea Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:47:48 -0500 Subject: [PATCH 18/31] more docstrings and properly support (and use) encoding=None throughout --- src/allmydata/test/cli/common.py | 21 ++++++++++++++++++ src/allmydata/test/cli/test_alias.py | 18 ++++++++++++++-- src/allmydata/test/common_util.py | 32 ++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 3da959604..d90b6a39f 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -10,7 +10,24 @@ def parse_options(basedir, command, args): return o class CLITestMixin(ReallyEqualMixin): + """ + A mixin for use with ``GridTestMixin`` to execute CLI commands against + nodes created by methods of that mixin. + """ def do_cli_unicode(self, verb, argv, client_num=0, **kwargs): + """ + Run a Tahoe-LAFS CLI command. + + :param verb: See ``run_cli_unicode``. + + :param argv: See ``run_cli_unicode``. + + :param int client_num: The number of the ``GridTestMixin``-created + node against which to execute the command. + + :param kwargs: Additional keyword arguments to pass to + ``run_cli_unicode``. + """ # client_num is used to execute client CLI commands on a specific # client. client_dir = self.get_clientdir(i=client_num) @@ -19,6 +36,10 @@ class CLITestMixin(ReallyEqualMixin): def do_cli(self, verb, *args, **kwargs): + """ + Like ``do_cli_unicode`` but work with ``bytes`` everywhere instead of + ``unicode``. + """ # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 67f438fa5..635ed0aba 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -20,10 +20,13 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): :param unicode alias: The alias to try to create. - :param str encoding: The name of an encoding to force the + :param NoneType|str encoding: The name of an encoding to force the ``create-alias`` implementation to use. This simulates the effects of setting LANG and doing other locale-foolishness without actually having to mess with this process's global locale state. + If this is ``None`` then the encoding used will be ascii but the + stdio objects given to the code under test will not declare any + encoding (this is like Python 2 when stdio is not a tty). :return Deferred: A Deferred that fires with success if the alias can be created and that creation is reported on stdout appropriately @@ -38,7 +41,7 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): # monkey-patch that value to our desired value here. This is the code # that most directly takes the place of messing with LANG or the # locale module. - self.patch(encodingutil, "io_encoding", encoding) + self.patch(encodingutil, "io_encoding", encoding or "ascii") rc, stdout, stderr = yield self.do_cli_unicode( u"create-alias", @@ -74,6 +77,17 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): self.assertIn(u"readonly", data) + def test_list_none(self): + """ + An alias composed of all ASCII-encodeable code points can be created when + stdio aren't clearly marked with an encoding. + """ + return self._test_list( + u"tahoe", + encoding=None, + ) + + def test_list_ascii(self): """ An alias composed of all ASCII-encodeable code points can be created when diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index c6491d72e..c08dc03d5 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -91,6 +91,24 @@ def run_cli_bytes(verb, *args, **kwargs): def run_cli_unicode(verb, argv, nodeargs=None, stdin=None, encoding=None): + """ + Run a Tahoe-LAFS CLI command. + + :param unicode verb: The command to run. For example, ``u"create-node"``. + + :param [unicode] argv: The arguments to pass to the command. For example, + ``[u"--hostname=localhost"]``. + + :param [unicode] nodeargs: Extra arguments to pass to the Tahoe executable + before ``verb``. + + :param unicode stdin: Text to pass to the command via stdin. + + :param NoneType|str encoding: The name of an encoding to use for all + bytes/unicode conversions necessary *and* the encoding to cause stdio + to declare with its ``encoding`` attribute. ``None`` means ASCII will + be used and no declaration will be made at all. + """ if nodeargs is None: nodeargs = [] precondition( @@ -100,19 +118,21 @@ def run_cli_unicode(verb, argv, nodeargs=None, stdin=None, encoding=None): nodeargs=nodeargs, argv=argv, ) + codec = encoding or "ascii" + encode = lambda t: None if t is None else t.encode(codec) d = run_cli_bytes( - verb.encode(encoding), - nodeargs=list(arg.encode(encoding) for arg in nodeargs), - stdin=stdin, + encode(verb), + nodeargs=list(encode(arg) for arg in nodeargs), + stdin=encode(stdin), encoding=encoding, - *list(arg.encode(encoding) for arg in argv) + *list(encode(arg) for arg in argv) ) def maybe_decode(result): code, stdout, stderr = result if isinstance(stdout, bytes): - stdout = stdout.decode(encoding) + stdout = stdout.decode(codec) if isinstance(stderr, bytes): - stderr = stderr.decode(encoding) + stderr = stderr.decode(codec) return code, stdout, stderr d.addCallback(maybe_decode) return d From d2664121b98bbb9300f1504d6ea84cd35a483bea Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:51:34 -0500 Subject: [PATCH 19/31] backout no-longer required unrelated change --- src/allmydata/scripts/admin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index 30862a4ae..e472ffd8c 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -22,7 +22,7 @@ def print_keypair(options): class DerivePubkeyOptions(BaseOptions): def parseArgs(self, privkey): - self.privkey = privkey.encode("ascii") + self.privkey = privkey def getSynopsis(self): return "Usage: tahoe [global-options] admin derive-pubkey PRIVKEY" From a8e3424ef635afbc5e2432e8cbef253c16cccb2b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 09:55:27 -0500 Subject: [PATCH 20/31] remove another unrelated change that's no longer required --- src/allmydata/scripts/debug.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index b6d10c438..fd3f2b87c 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -607,7 +607,7 @@ class FindSharesOptions(BaseOptions): def parseArgs(self, storage_index_s, *nodedirs): from allmydata.util.encodingutil import argv_to_abspath - self.si_s = storage_index_s.encode("ascii") + self.si_s = storage_index_s self.nodedirs = map(argv_to_abspath, nodedirs) description = """ From c7358e66396642f0acf08c20d2ab06a338205ed2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 10:16:48 -0500 Subject: [PATCH 21/31] Switch over to the helper in the two functions that matter for this PR --- src/allmydata/scripts/tahoe_add_alias.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 51d48e5a3..4451f7fad 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -49,7 +49,7 @@ def add_alias(options): old_aliases = get_aliases(nodedir) if alias in old_aliases: - print("Alias %s already exists!" % quote_output_u(alias), file=stderr) + show_output(stderr, "Alias {alias} already exists!", alias=alias) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") cap = uri.from_string_dirnode(cap).to_string() @@ -75,7 +75,7 @@ def create_alias(options): old_aliases = get_aliases(nodedir) if alias in old_aliases: - print("Alias %s already exists!" % quote_output(alias), file=stderr) + show_output(stderr, "Alias {alias} already exists!", alias=alias) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") From 87e808b3927981321a4aa4441d5f65c1ad824834 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 10:18:05 -0500 Subject: [PATCH 22/31] one more switch --- src/allmydata/scripts/tahoe_add_alias.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 4451f7fad..df53c95a2 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -152,6 +152,9 @@ def _get_alias_details(nodedir): def list_aliases(options): + """ + Show aliases that exist. + """ data = _get_alias_details(options['node-directory']) if options['json']: @@ -175,6 +178,6 @@ def list_aliases(options): if output: # Show whatever we computed. Skip this if there is no output to avoid # a spurious blank line. - print(output, file=options.stdout) + show_output(options.stdout, output) return 0 From d6d64f6b2759be794e758370ef36395601eb3738 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 10:37:22 -0500 Subject: [PATCH 23/31] fix the json case --- src/allmydata/scripts/tahoe_add_alias.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index df53c95a2..581fb653a 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -151,6 +151,15 @@ def _get_alias_details(nodedir): return data +def _escape_format(t): + """ + _escape_format(t).format() == t + + :param unicode t: The text to escape. + """ + return t.replace("{", "{{").replace("}", "}}") + + def list_aliases(options): """ Show aliases that exist. @@ -158,7 +167,7 @@ def list_aliases(options): data = _get_alias_details(options['node-directory']) if options['json']: - output = json.dumps(data, indent=4).decode("utf-8") + output = _escape_format(json.dumps(data, indent=4).decode("utf-8")) else: def dircap(details): return ( From 1a77ba5698420ea75f7c1014e89bbc3d6cb8d94f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 10:37:25 -0500 Subject: [PATCH 24/31] remove redundant u prefix --- src/allmydata/scripts/tahoe_add_alias.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 581fb653a..442fecf71 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -178,7 +178,7 @@ def list_aliases(options): max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) fmt = "%" + str(max_width) + "s: %s" - output = u"\n".join(list( + output = "\n".join(list( fmt % (name, dircap(details)) for name, details in data.items() From 61ee26fb00f39efb78633f320d51d883aeeaf950 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 7 Dec 2020 10:46:20 -0500 Subject: [PATCH 25/31] ticket reference --- src/allmydata/test/test_system.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index d64c56f09..7b9ec5849 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -2618,7 +2618,8 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def _run_in_subprocess(ignored, verb, *args, **kwargs): stdin = kwargs.get("stdin") - env = kwargs.get("env", os.environ) # XXX Gets mutated below, great. + # XXX https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3548 + env = kwargs.get("env", os.environ) # Python warnings from the child process don't matter. env["PYTHONWARNINGS"] = "ignore" newargs = ["--node-directory", self.getdir("client0"), verb] + list(args) From c39f7721af8a4359c49efdb1357331a4ec45e8f2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 10 Dec 2020 07:03:24 -0500 Subject: [PATCH 26/31] run_cli_bytes docstring --- src/allmydata/test/common_util.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index c08dc03d5..2a8aad0e2 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -49,6 +49,27 @@ def _getvalue(io): def run_cli_bytes(verb, *args, **kwargs): + """ + Run a Tahoe-LAFS CLI command specified as bytes. + + Most code should prefer ``run_cli_unicode`` which deals with all the + necessary encoding considerations. + + :param bytes verb: The command to run. For example, ``b"create-node"``. + + :param [bytes] args: The arguments to pass to the command. For example, + ``(b"--hostname=localhost",)``. + + :param [bytes] nodeargs: Extra arguments to pass to the Tahoe executable + before ``verb``. + + :param bytes stdin: Text to pass to the command via stdin. + + :param NoneType|str encoding: The name of an encoding which stdout and + stderr will be configured to use. ``None`` means stdout and stderr + will accept bytes and unicode and use the default system encoding for + translating between them. + """ nodeargs = kwargs.pop("nodeargs", []) encoding = kwargs.pop("encoding", None) precondition( From 4bb28cadcb7c2963aa1a84f78533d5e2dc80243e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 10 Dec 2020 07:04:28 -0500 Subject: [PATCH 27/31] motivate its existence a bit more --- src/allmydata/test/common_util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 2a8aad0e2..341d383c1 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -53,7 +53,9 @@ def run_cli_bytes(verb, *args, **kwargs): Run a Tahoe-LAFS CLI command specified as bytes. Most code should prefer ``run_cli_unicode`` which deals with all the - necessary encoding considerations. + necessary encoding considerations. This helper still exists so that novel + misconfigurations can be explicitly tested (for example, receiving UTF-8 + bytes when the system encoding claims to be ASCII). :param bytes verb: The command to run. For example, ``b"create-node"``. From 2f532257650fcf651c7220a95811bc49d97eb3a3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 10 Dec 2020 07:06:01 -0500 Subject: [PATCH 28/31] better helper name --- src/allmydata/test/cli/test_alias.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 635ed0aba..72b634608 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -13,9 +13,9 @@ from allmydata.util import encodingutil class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @inlineCallbacks - def _test_list(self, alias, encoding): + def _check_create_alias(self, alias, encoding): """ - Assert that ``tahoe create-alias`` can be used to create an alias named + Verify that ``tahoe create-alias`` can be used to create an alias named ``alias`` when argv is encoded using ``encoding``. :param unicode alias: The alias to try to create. @@ -82,7 +82,7 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): An alias composed of all ASCII-encodeable code points can be created when stdio aren't clearly marked with an encoding. """ - return self._test_list( + return self._check_create_alias( u"tahoe", encoding=None, ) @@ -93,7 +93,7 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): An alias composed of all ASCII-encodeable code points can be created when the active encoding is ASCII. """ - return self._test_list( + return self._check_create_alias( u"tahoe", encoding="ascii", ) @@ -109,7 +109,7 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): with a UTF-8-incompatible encoding helps flush out unintentional UTF-8 assumptions. """ - return self._test_list( + return self._check_create_alias( u"taho\N{LATIN SMALL LETTER E WITH ACUTE}", encoding="latin-1", ) @@ -120,7 +120,7 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): An alias composed of all UTF-8-encodeable code points can be created when the active encoding is UTF-8. """ - return self._test_list( + return self._check_create_alias( u"tahoe\N{SNOWMAN}", encoding="utf-8", ) From d0c22a529ee1cf0734f131ecda4180ad8aa2c9a8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 10 Dec 2020 07:16:00 -0500 Subject: [PATCH 29/31] json.dumps output should always be ascii --- src/allmydata/scripts/tahoe_add_alias.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 442fecf71..44d08fb21 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -167,7 +167,7 @@ def list_aliases(options): data = _get_alias_details(options['node-directory']) if options['json']: - output = _escape_format(json.dumps(data, indent=4).decode("utf-8")) + output = _escape_format(json.dumps(data, indent=4).decode("ascii")) else: def dircap(details): return ( From 066e98874be50343ab2bbb03760d0d7887c88333 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 10 Dec 2020 07:17:24 -0500 Subject: [PATCH 30/31] Point at do_cli_unicode here too --- src/allmydata/test/cli/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index d90b6a39f..bf175de44 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -39,6 +39,8 @@ class CLITestMixin(ReallyEqualMixin): """ Like ``do_cli_unicode`` but work with ``bytes`` everywhere instead of ``unicode``. + + Where possible, prefer ``do_cli_unicode``. """ # client_num is used to execute client CLI commands on a specific # client. From 6f80862ec582eb0c66ebbb99c9b3486a9b4abf58 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 10 Dec 2020 07:19:27 -0500 Subject: [PATCH 31/31] Slightly clean up formatting implementation --- src/allmydata/scripts/tahoe_add_alias.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 44d08fb21..6f931556d 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -176,10 +176,13 @@ def list_aliases(options): else details['readwrite'] ).decode("utf-8") + def format_dircap(name, details): + return fmt % (name, dircap(details)) + max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) fmt = "%" + str(max_width) + "s: %s" output = "\n".join(list( - fmt % (name, dircap(details)) + format_dircap(name, details) for name, details in data.items() ))