From 73110f48da854b00ea2c3cb25c15152ed584ab04 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:56:46 -0500 Subject: [PATCH 01/43] Banish getProcessOutputAndValue from test_runner It cannot do the right thing on Windows for non-ASCII because Twisted uses pywin32 and on Python 2 pywin32 binds CreateProcessA. --- src/allmydata/test/test_runner.py | 147 +++++++++++++----------------- src/allmydata/test/test_system.py | 22 ++++- 2 files changed, 85 insertions(+), 84 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ef2b99a19..64afca939 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -5,6 +5,14 @@ from __future__ import ( import os.path, re, sys from os import linesep +from subprocess import ( + PIPE, + Popen, +) + +from eliot import ( + log_call, +) from twisted.trial import unittest @@ -19,7 +27,6 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin -from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output from allmydata.test import common_util import allmydata from .common_util import parse_cli, run_cli @@ -29,12 +36,8 @@ from .cli_node_api import ( on_stdout, on_stdout_and_stderr, ) -from ._twisted_9607 import ( - getProcessOutputAndValue, -) from ..util.eliotutil import ( inline_callbacks, - log_call_deferred, ) def get_root_from_file(src): @@ -54,93 +57,72 @@ srcfile = allmydata.__file__ rootdir = get_root_from_file(srcfile) -class RunBinTahoeMixin(object): - @log_call_deferred(action_type="run-bin-tahoe") - def run_bintahoe(self, args, stdin=None, python_options=[], env=None): - command = sys.executable - argv = python_options + ["-m", "allmydata.scripts.runner"] + args +@log_call(action_type="run-bin-tahoe") +def run_bintahoe(extra_argv): + """ + Run the main Tahoe entrypoint in a child process with the given additional + arguments. - if env is None: - env = os.environ + :param [unicode] extra_argv: More arguments for the child process argv. - d = getProcessOutputAndValue(command, argv, env, stdinBytes=stdin) - def fix_signal(result): - # Mirror subprocess.Popen.returncode structure - (out, err, signal) = result - return (out, err, -signal) - d.addErrback(fix_signal) - return d + :return: A three-tuple of stdout (unicode), stderr (unicode), and the + child process "returncode" (int). + """ + argv = [sys.executable, u"-m", u"allmydata.scripts.runner"] + extra_argv + p = Popen(argv, stdout=PIPE, stderr=PIPE) + out = p.stdout.read().decode("utf-8") + err = p.stderr.read().decode("utf-8") + returncode = p.wait() + return (out, err, returncode) -class BinTahoe(common_util.SignalMixin, unittest.TestCase, RunBinTahoeMixin): +class BinTahoe(common_util.SignalMixin, unittest.TestCase): def test_unicode_arguments_and_output(self): + """ + The runner script receives unmangled non-ASCII values in argv. + """ tricky = u"\u2621" - try: - tricky_arg = unicode_to_argv(tricky, mangle=True) - tricky_out = unicode_to_output(tricky) - except UnicodeEncodeError: - raise unittest.SkipTest("A non-ASCII argument/output could not be encoded on this platform.") + out, err, returncode = run_bintahoe([tricky]) + self.assertEqual(returncode, 1) + self.assertIn(u"Unknown command: " + tricky, out) - d = self.run_bintahoe([tricky_arg]) - def _cb(res): - out, err, rc_or_sig = res - self.failUnlessEqual(rc_or_sig, 1, str(res)) - self.failUnlessIn("Unknown command: "+tricky_out, out) - d.addCallback(_cb) - return d - - def test_run_with_python_options(self): - # -t is a harmless option that warns about tabs. - d = self.run_bintahoe(["--version"], python_options=["-t"]) - def _cb(res): - out, err, rc_or_sig = res - self.assertEqual(rc_or_sig, 0, str(res)) - self.assertTrue(out.startswith(allmydata.__appname__ + '/'), str(res)) - d.addCallback(_cb) - return d - - @inlineCallbacks def test_help_eliot_destinations(self): - out, err, rc_or_sig = yield self.run_bintahoe(["--help-eliot-destinations"]) - self.assertIn("\tfile:", out) - self.assertEqual(rc_or_sig, 0) + out, err, returncode = run_bintahoe([u"--help-eliot-destinations"]) + self.assertIn(u"\tfile:", out) + self.assertEqual(returncode, 0) - @inlineCallbacks def test_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ + out, err, returncode = run_bintahoe([ # Proves little but maybe more than nothing. - "--eliot-destination=file:-", + u"--eliot-destination=file:-", # Throw in *some* command or the process exits with error, making # it difficult for us to see if the previous arg was accepted or # not. - "--help", + u"--help", ]) - self.assertEqual(rc_or_sig, 0) + self.assertEqual(returncode, 0) - @inlineCallbacks def test_unknown_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=invalid:more", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=invalid:more", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("Unknown destination description", out) - self.assertIn("invalid:more", out) + self.assertEqual(1, returncode) + self.assertIn(u"Unknown destination description", out) + self.assertIn(u"invalid:more", out) - @inlineCallbacks def test_malformed_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=invalid", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=invalid", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("must be formatted like", out) + self.assertEqual(1, returncode) + self.assertIn(u"must be formatted like", out) - @inlineCallbacks def test_escape_in_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=file:@foo", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=file:@foo", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("Unsupported escape character", out) + self.assertEqual(1, returncode) + self.assertIn(u"Unsupported escape character", out) class CreateNode(unittest.TestCase): @@ -250,8 +232,7 @@ class CreateNode(unittest.TestCase): ) -class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, - RunBinTahoeMixin): +class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): """ exercise "tahoe run" for both introducer and client node, by spawning "tahoe run" as a subprocess. This doesn't get us line-level coverage, but @@ -271,18 +252,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, The introducer furl is stable across restarts. """ basedir = self.workdir("test_introducer") - c1 = os.path.join(basedir, "c1") + c1 = os.path.join(basedir, u"c1") tahoe = CLINodeAPI(reactor, FilePath(c1)) self.addCleanup(tahoe.stop_and_wait) - out, err, rc_or_sig = yield self.run_bintahoe([ - "--quiet", - "create-introducer", - "--basedir", c1, - "--hostname", "127.0.0.1", + out, err, returncode = run_bintahoe([ + u"--quiet", + u"create-introducer", + u"--basedir", c1, + u"--hostname", u"127.0.0.1", ]) - self.assertEqual(rc_or_sig, 0) + self.assertEqual(returncode, 0) # This makes sure that node.url is written, which allows us to # detect when the introducer restarts in _node_has_restarted below. @@ -350,18 +331,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, 3) Verify that the pid file is removed after SIGTERM (on POSIX). """ basedir = self.workdir("test_client") - c1 = os.path.join(basedir, "c1") + c1 = os.path.join(basedir, u"c1") tahoe = CLINodeAPI(reactor, FilePath(c1)) # Set this up right now so we don't forget later. self.addCleanup(tahoe.cleanup) - out, err, rc_or_sig = yield self.run_bintahoe([ - "--quiet", "create-node", "--basedir", c1, - "--webport", "0", - "--hostname", "localhost", + out, err, returncode = run_bintahoe([ + u"--quiet", u"create-node", u"--basedir", c1, + u"--webport", u"0", + u"--hostname", u"localhost", ]) - self.failUnlessEqual(rc_or_sig, 0) + self.failUnlessEqual(returncode, 0) # Check that the --webport option worked. config = fileutil.read(tahoe.config_file.path) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 235361cf8..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -51,6 +51,10 @@ from twisted.python.filepath import ( FilePath, ) +from ._twisted_9607 import ( + getProcessOutputAndValue, +) + from .common import ( TEST_RSA_KEY_SIZE, SameProcessStreamEndpointAssigner, @@ -61,13 +65,29 @@ from .web.common import ( ) # TODO: move this to common or common_util -from allmydata.test.test_runner import RunBinTahoeMixin from . import common_util as testutil from .common_util import run_cli_unicode from ..scripts.common import ( write_introducer, ) +class RunBinTahoeMixin(object): + def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + command = sys.executable + argv = python_options + ["-m", "allmydata.scripts.runner"] + args + + if env is None: + env = os.environ + + d = getProcessOutputAndValue(command, argv, env, stdinBytes=stdin) + def fix_signal(result): + # Mirror subprocess.Popen.returncode structure + (out, err, signal) = result + return (out, err, -signal) + d.addErrback(fix_signal) + return d + + def run_cli(*args, **kwargs): """ Run a Tahoe-LAFS CLI utility, but inline. From c6d108ddb25b367b041d97a6110a0666d9307062 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:07:37 -0500 Subject: [PATCH 02/43] Make test_runner and test_windows both use the good Popen --- src/allmydata/test/common.py | 22 ++++++++++++++++++++++ src/allmydata/test/test_runner.py | 13 ++++++++----- src/allmydata/test/test_windows.py | 14 ++------------ 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index f1dbf651d..a49a79ec0 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -9,6 +9,10 @@ __all__ = [ "flush_logged_errors", "skip", "skipIf", + + # Selected based on platform and re-exported for convenience. + "Popen", + "PIPE", ] from past.builtins import chr as byteschr, unicode @@ -48,6 +52,9 @@ from testtools.twistedsupport import ( flush_logged_errors, ) +from twisted.python.runtime import ( + platform, +) from twisted.application import service from twisted.plugin import IPlugin from twisted.internet import defer @@ -101,6 +108,21 @@ from .eliotutil import ( ) from .common_util import ShouldFailMixin # noqa: F401 +if platform.isWindows(): + # Python 2.7 doesn't have good options for launching a process with + # non-ASCII in its command line. So use this alternative that does a + # better job. However, only use it on Windows because it doesn't work + # anywhere else. + from ._win_subprocess import ( + PIPE, + Popen, + ) +else: + from subprocess import ( + PIPE, + Popen, + ) + TEST_RSA_KEY_SIZE = 522 diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 64afca939..c98d4e376 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -5,10 +5,6 @@ from __future__ import ( import os.path, re, sys from os import linesep -from subprocess import ( - PIPE, - Popen, -) from eliot import ( log_call, @@ -29,7 +25,14 @@ from twisted.python.runtime import ( from allmydata.util import fileutil, pollmixin from allmydata.test import common_util import allmydata -from .common_util import parse_cli, run_cli +from .common import ( + PIPE, + Popen, +) +from .common_util import ( + parse_cli, + run_cli, +) from .cli_node_api import ( CLINodeAPI, Expect, diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index f2c1318c5..01e4a57c1 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -29,11 +29,6 @@ from json import ( from textwrap import ( dedent, ) -from subprocess import ( - PIPE, - Popen, -) - from twisted.python.filepath import ( FilePath, ) @@ -66,6 +61,8 @@ from hypothesis.strategies import ( ) from .common import ( + PIPE, + Popen, SyncTestCase, ) @@ -132,13 +129,6 @@ class GetArgvTests(SyncTestCase): ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ - # Python 2.7 doesn't have good options for launching a process with - # non-ASCII in its command line. So use this alternative that does a - # better job. Bury the import here because it only works on Windows. - from ._win_subprocess import ( - Popen - ) - working_path = FilePath(self.mktemp()) working_path.makedirs() save_argv_path = working_path.child("script.py") From 834abfe6bf8736d5c96477aca7028a297e689717 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:09:25 -0500 Subject: [PATCH 03/43] _win_subprocess didn't actually export this --- src/allmydata/test/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index a49a79ec0..2b1adebd9 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -114,15 +114,15 @@ if platform.isWindows(): # better job. However, only use it on Windows because it doesn't work # anywhere else. from ._win_subprocess import ( - PIPE, Popen, ) else: from subprocess import ( - PIPE, Popen, ) - +from subprocess import ( + PIPE, +) TEST_RSA_KEY_SIZE = 522 From 5df86b46084355cdc92d875e527f9ca08281d0a1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:26:12 -0500 Subject: [PATCH 04/43] restore test_with_python_options now that I see what it's testing --- src/allmydata/test/test_runner.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index c98d4e376..ad03bd391 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -61,7 +61,7 @@ rootdir = get_root_from_file(srcfile) @log_call(action_type="run-bin-tahoe") -def run_bintahoe(extra_argv): +def run_bintahoe(extra_argv, python_options=None): """ Run the main Tahoe entrypoint in a child process with the given additional arguments. @@ -71,7 +71,11 @@ def run_bintahoe(extra_argv): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable, u"-m", u"allmydata.scripts.runner"] + extra_argv + argv = [sys.executable] + if python_options is not None: + argv.extend(python_options) + argv.extend([u"-m", u"allmydata.scripts.runner"]) + argv.extend(extra_argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -89,6 +93,21 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): self.assertEqual(returncode, 1) self.assertIn(u"Unknown command: " + tricky, out) + def test_with_python_options(self): + """ + Additional options for the Python interpreter don't prevent the runner + script from receiving the arguments meant for it. + """ + # This seems like a redundant test for someone else's functionality + # but on Windows we parse the whole command line string ourselves so + # we have to have our own implementation of skipping these options. + + # -t is a harmless option that warns about tabs so we can add it + # -without impacting other behavior noticably. + out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + self.assertEqual(returncode, 0) + self.assertTrue(out.startswith(allmydata.__appname__ + '/')) + def test_help_eliot_destinations(self): out, err, returncode = run_bintahoe([u"--help-eliot-destinations"]) self.assertIn(u"\tfile:", out) From b81d57779a1b9a3a09c6de44e9d3e68d56eaed63 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:29:12 -0500 Subject: [PATCH 05/43] Tahoe's .pyscript is ancient history --- src/allmydata/windows/fixups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index c71b85681..237f96ca5 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -185,8 +185,6 @@ def initialize(): # as in the case of a frozen executable created by bb-freeze or similar. sys.argv = argv[-len(sys.argv):] - if sys.argv[0].endswith('.pyscript'): - sys.argv[0] = sys.argv[0][:-9] def a_console(handle): From 1639aef197db54a3522293f16ed0acc34e0e955f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:29:32 -0500 Subject: [PATCH 06/43] Get rid of the argv unmangling that we no longer do --- src/allmydata/windows/fixups.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 237f96ca5..d34404aed 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -161,29 +161,13 @@ def initialize(): except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) - # This works around . - - # Because of (and similar limitations in - # twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments. - # Instead it "mangles" or escapes them using \x7F as an escape character, which we - # unescape here. - def unmangle(s): - return re.sub(u'\\x7F[0-9a-fA-F]*\\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s) - - argv_unicode = get_argv() - try: - argv = [unmangle(argv_u).encode('utf-8') for argv_u in argv_unicode] - except Exception as e: - _complain("%s: could not unmangle Unicode arguments.\n%r" - % (sys.argv[0], argv_unicode)) - raise + argv = list(arg.encode("utf-8") for arg in get_argv()) # Take only the suffix with the same number of arguments as sys.argv. # This accounts for anything that can cause initial arguments to be stripped, # for example, the Python interpreter or any options passed to it, or runner # scripts such as 'coverage run'. It works even if there are no such arguments, # as in the case of a frozen executable created by bb-freeze or similar. - sys.argv = argv[-len(sys.argv):] From 2306819db1829da1eb839fa38da8b2dd4cf4970a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:45:39 -0500 Subject: [PATCH 07/43] Get rid of unicode_to_argv and argv_to_unicode --- src/allmydata/scripts/cli.py | 42 ++++++++++++------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 +-- src/allmydata/test/cli/test_backup.py | 5 +-- src/allmydata/test/cli/test_put.py | 7 +++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_encodingutil.py | 35 ++------------------- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 38 +--------------------- 9 files changed, 42 insertions(+), 102 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 379e1d212..bad96a252 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = argv_to_unicode(arg1) + self.from_file = unicode(arg1, "utf-8") self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else argv_to_unicode(arg2) + self.to_file = None if arg2 is None else unicode(arg2, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = map(argv_to_unicode, args[:-1]) - self.destination = argv_to_unicode(args[-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = argv_to_unicode(topath) + self.to_dir = unicode(topath, "utf-8") synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = argv_to_unicode(pattern).strip() + g = unicode(pattern, "utf-8").strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ac17cf445..ed4f0c71d 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = argv_to_unicode(config.get("nickname") or "") + nickname = unicode(config.get("nickname") or "", "utf-8") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = argv_to_unicode(config.get("webport") or "none") + webport = unicode(config.get("webport") or "none", "utf-8") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..13445ef0a 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index ceecbd662..6aecd0af6 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,4 +1,5 @@ import os.path +from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -9,7 +10,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -413,7 +414,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..2deafb784 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,4 +1,7 @@ import os.path + +from six import ensure_str + from twisted.trial import unittest from twisted.python import usage @@ -7,7 +10,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -47,7 +50,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 2a70cff3a..7b3194d3f 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using unicode_to_argv", + "arguments to run_cli must be a native string -- convert using UTF-8", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index cbc9143b7..5f6700cd6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -81,12 +81,12 @@ from allmydata.test.common_util import ( ReallyEqualMixin, skip_if_cannot_represent_filename, ) from allmydata.util import encodingutil, fileutil -from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ +from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ - unicode_to_argv + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from + from twisted.python import usage @@ -138,12 +138,6 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): _reload() self.assertEqual(get_io_encoding(), 'utf-8') - def test_argv_to_unicode(self): - encodingutil.io_encoding = 'utf-8' - self.failUnlessRaises(usage.UsageError, - argv_to_unicode, - lumiere_nfc.encode('latin1')) - @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' @@ -213,19 +207,6 @@ class EncodingUtil(ReallyEqualMixin): sys.platform = self.original_platform _reload() - def test_argv_to_unicode(self): - if 'argv' not in dir(self): - return - - mock_stdout = MockStdout() - mock_stdout.encoding = self.io_encoding - self.patch(sys, 'stdout', mock_stdout) - - argu = lumiere_nfc - argv = self.argv - _reload() - self.failUnlessReallyEqual(argv_to_unicode(argv), argu) - def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") @@ -245,16 +226,6 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_argv_py2(self): - """unicode_to_argv() converts to bytes on Python 2.""" - self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding)) - - @skipIf(PY2, "Python 3 only.") - def test_unicode_to_argv_py3(self): - """unicode_to_argv() is noop on Python 3.""" - self.assertEqual(unicode_to_argv("abc"), "abc") - @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..03b9ba2de 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output, unicode_to_argv +from allmydata.util.encodingutil import quote_output from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - unicode_to_argv(filename)) + ensure_str(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index f13dc5b8e..5cc3b8d19 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -107,53 +107,17 @@ def get_io_encoding(): """ return io_encoding -def argv_to_unicode(s): - """ - Decode given argv element to unicode. If this fails, raise a UsageError. - """ - if isinstance(s, unicode): - return s - - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = argv_to_unicode(s) + decoded = unicode(s, "utf-8") if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) -def unicode_to_argv(s, mangle=False): - """ - Encode the given Unicode argument as a bytestring. - If the argument is to be passed to a different process, then the 'mangle' argument - should be true; on Windows, this uses a mangled encoding that will be reversed by - code in runner.py. - - On Python 3, just return the string unchanged, since argv is unicode. - """ - precondition(isinstance(s, unicode), s) - if PY3: - warnings.warn("This will be unnecessary once Python 2 is dropped.", - DeprecationWarning) - return s - - if mangle and sys.platform == "win32": - # This must be the same as 'mangle' in bin/tahoe-script.template. - return bytes(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s), io_encoding) - else: - return s.encode(io_encoding) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 260706d33015a3df608db94d9fbd0de4cea481cc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:00:42 -0500 Subject: [PATCH 08/43] Fix the collision with the builtin list --- src/allmydata/scripts/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index bad96a252..310cb20fc 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,4 +1,5 @@ from __future__ import print_function +import __builtin__ import os.path, re, fnmatch from twisted.python import usage @@ -218,7 +219,7 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.sources = __builtin__.list(unicode(a, "utf-8") for a in args[:-1]) self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -429,7 +430,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +447,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ From b8abec607335f3b4242f44ea7bf832cc7df8bef6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:00:48 -0500 Subject: [PATCH 09/43] Get rid of the Latin-1 case Here's a supposition: UTF-8 or bust --- src/allmydata/test/cli/test_alias.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 72b634608..07f42b29d 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -99,22 +99,6 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): ) - def test_list_latin_1(self): - """ - An alias composed of all Latin-1-encodeable code points can be created - when the active encoding is Latin-1. - - This is very similar to ``test_list_utf_8`` but the assumption of - UTF-8 is nearly ubiquitous and explicitly exercising the codepaths - with a UTF-8-incompatible encoding helps flush out unintentional UTF-8 - assumptions. - """ - return self._check_create_alias( - u"taho\N{LATIN SMALL LETTER E WITH ACUTE}", - encoding="latin-1", - ) - - def test_list_utf_8(self): """ An alias composed of all UTF-8-encodeable code points can be created when From ec6c036f87fb74937cc4a246fcb4916575809ba6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:14:34 -0500 Subject: [PATCH 10/43] less cheesy list collision fix --- src/allmydata/scripts/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 310cb20fc..c00917022 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,5 +1,4 @@ from __future__ import print_function -import __builtin__ import os.path, re, fnmatch from twisted.python import usage @@ -219,7 +218,7 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = __builtin__.list(unicode(a, "utf-8") for a in args[:-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -430,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -447,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -496,7 +495,7 @@ def list_aliases(options): rc = tahoe_add_alias.list_aliases(options) return rc -def list(options): +def list_(options): from allmydata.scripts import tahoe_ls rc = tahoe_ls.list(options) return rc @@ -582,7 +581,7 @@ dispatch = { "add-alias": add_alias, "create-alias": create_alias, "list-aliases": list_aliases, - "ls": list, + "ls": list_, "get": get, "put": put, "cp": cp, From de9bcc7ea85e0e0ab7da11aabd4b2d6a4ecdf07e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:21:20 -0500 Subject: [PATCH 11/43] encode Popen argv as UTF-8 on POSIX so we ignore locale --- src/allmydata/test/test_runner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ad03bd391..4054dc289 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -76,6 +76,11 @@ def run_bintahoe(extra_argv, python_options=None): argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) + if not platform.isWindows(): + # On POSIX Popen (via execvp) will encode argv using the "filesystem" + # encoding. Depending on LANG this may make our unicode arguments + # unencodable. Do our own UTF-8 encoding here instead. + argv = list(arg.encode("utf-8") for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") From 3d02545006ed08187be9cb9ad6902d1b6b543aa9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:29:15 -0500 Subject: [PATCH 12/43] Remove tests based on locale behavior We don't like locale behavior --- src/allmydata/test/test_encodingutil.py | 65 +------------------------ 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 5f6700cd6..d49abafb3 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -70,7 +70,7 @@ if __name__ == "__main__": sys.exit(0) -import os, sys, locale +import os, sys from unittest import skipIf from twisted.trial import unittest @@ -93,69 +93,6 @@ from twisted.python import usage class MockStdout(object): pass -class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): - def test_get_io_encoding(self): - mock_stdout = MockStdout() - self.patch(sys, 'stdout', mock_stdout) - - mock_stdout.encoding = 'UTF-8' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') - - mock_stdout.encoding = 'cp65001' - _reload() - self.assertEqual(get_io_encoding(), 'utf-8') - - mock_stdout.encoding = 'koi8-r' - expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - mock_stdout.encoding = 'nonexistent_encoding' - if sys.platform == "win32": - _reload() - self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') - else: - self.failUnlessRaises(AssertionError, _reload) - - def test_get_io_encoding_not_from_stdout(self): - preferredencoding = 'koi8-r' - def call_locale_getpreferredencoding(): - return preferredencoding - self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding) - mock_stdout = MockStdout() - self.patch(sys, 'stdout', mock_stdout) - - expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - mock_stdout.encoding = None - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - preferredencoding = None - _reload() - self.assertEqual(get_io_encoding(), 'utf-8') - - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_output(self): - encodingutil.io_encoding = 'koi8-r' - self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) - - def test_no_unicode_normalization(self): - # Pretend to run on a Unicode platform. - # listdir_unicode normalized to NFC in 1.7beta, but now doesn't. - - def call_os_listdir(path): - return [Artonwall_nfd] - self.patch(os, 'listdir', call_os_listdir) - self.patch(sys, 'platform', 'darwin') - - _reload() - self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) - - # The following tests apply only to platforms that don't store filenames as # Unicode entities on the filesystem. class EncodingUtilNonUnicodePlatform(unittest.TestCase): From 23c34004a74ed9e95e5d25f04d3410286e5a1cac Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:29:49 -0500 Subject: [PATCH 13/43] Get rid of tests for bad io_encoding values We don't like bad io_encoding values --- src/allmydata/test/test_encodingutil.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index d49abafb3..a3c92d41c 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -84,11 +84,9 @@ from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ - get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ + get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from -from twisted.python import usage - class MockStdout(object): pass @@ -371,13 +369,6 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_default(self): - self.patch(encodingutil, 'io_encoding', 'ascii') - self.test_quote_output_ascii(None) - - self.patch(encodingutil, 'io_encoding', 'latin1') - self.test_quote_output_latin1(None) - - self.patch(encodingutil, 'io_encoding', 'utf-8') self.test_quote_output_utf8(None) From 60a44b99e69e33395daf48c46dfb0d4dc1ea3981 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:30:15 -0500 Subject: [PATCH 14/43] improve fixtures --- src/allmydata/test/test_encodingutil.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index a3c92d41c..d9d6cfeed 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -97,12 +97,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): # Mock sys.platform because unicode_platform() uses it - self.original_platform = sys.platform - sys.platform = 'linux' - - def tearDown(self): - sys.platform = self.original_platform - _reload() + self.patch(sys, "platform", "linux") + self.addCleanup(_reload) def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 @@ -135,12 +131,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): class EncodingUtil(ReallyEqualMixin): def setUp(self): - self.original_platform = sys.platform - sys.platform = self.platform - - def tearDown(self): - sys.platform = self.original_platform - _reload() + self.patch(sys, "platform", self.platform) + self.addCleanup(_reload) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") From 70d2fd66729789e548903618c566aebca82f6105 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:31:22 -0500 Subject: [PATCH 15/43] Don't have a Latin-1 io_encoding It's bad --- src/allmydata/test/test_encodingutil.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index d9d6cfeed..992ebd690 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -472,14 +472,6 @@ class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): io_encoding = 'UTF-8' dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt'] -class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): - uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' - argv = b'lumi\xe8re' - platform = 'linux2' - filesystem_encoding = 'ISO-8859-1' - io_encoding = 'ISO-8859-1' - dirlist = [b'test_file', b'Blah blah.txt', b'\xc4rtonwall.mp3'] - class Windows(EncodingUtil, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' argv = b'lumi\xc3\xa8re' From 1810f4e99b5d06869bef8d050d4614f93ed4a2f4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:31:41 -0500 Subject: [PATCH 16/43] Force the encoding to utf-8 more often --- src/allmydata/util/encodingutil.py | 38 +++++++----------------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 5cc3b8d19..289874213 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -44,41 +44,19 @@ def canonical_encoding(encoding): return encoding -def check_encoding(encoding): - # sometimes Python returns an encoding name that it doesn't support for conversion - # fail early if this happens - try: - u"test".encode(encoding) - except (LookupError, AttributeError): - raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,)) -filesystem_encoding = None -io_encoding = None +# On Windows we install UTF-8 stream wrappers for sys.stdout and +# sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). +# +# On POSIX, we are moving towards a UTF-8-everything and ignore the locale. +io_encoding = "utf-8" + is_unicode_platform = False use_unicode_filepath = False +filesystem_encoding = "mbcs" if sys.platform == "win32" else "utf-8" def _reload(): - global filesystem_encoding, io_encoding, is_unicode_platform, use_unicode_filepath - - filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) - check_encoding(filesystem_encoding) - - if sys.platform == 'win32': - # On Windows we install UTF-8 stream wrappers for sys.stdout and - # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). - io_encoding = 'utf-8' - else: - ioenc = None - if hasattr(sys.stdout, 'encoding'): - ioenc = sys.stdout.encoding - if ioenc is None: - try: - ioenc = locale.getpreferredencoding() - except Exception: - pass # work around - io_encoding = canonical_encoding(ioenc) - - check_encoding(io_encoding) + global is_unicode_platform, use_unicode_filepath, filesystem_encoding is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] From 15c46924ce8200ed88f45cb20116417f794c60d5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:27:20 -0500 Subject: [PATCH 17/43] unused import --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 289874213..35bf26e0c 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -19,7 +19,7 @@ if PY2: from past.builtins import unicode -import sys, os, re, locale +import sys, os, re import unicodedata import warnings From 2889922a080771e0a1bb2dd28959929773df5eab Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:27:23 -0500 Subject: [PATCH 18/43] reign in scope - don't mess with filesystem encoding here It is a separate can of works from argv --- src/allmydata/util/encodingutil.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 35bf26e0c..32049b57f 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -32,6 +32,16 @@ from allmydata.util.fileutil import abspath_expanduser_unicode NoneType = type(None) +def check_encoding(encoding): + # sometimes Python returns an encoding name that it doesn't support for conversion + # fail early if this happens + try: + u"test".encode(encoding) + except (LookupError, AttributeError): + raise AssertionError( + "The character encoding '%s' is not supported for conversion." % (encoding,), + ) + def canonical_encoding(encoding): if encoding is None: log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) @@ -53,11 +63,12 @@ io_encoding = "utf-8" is_unicode_platform = False use_unicode_filepath = False -filesystem_encoding = "mbcs" if sys.platform == "win32" else "utf-8" +filesystem_encoding = None def _reload(): - global is_unicode_platform, use_unicode_filepath, filesystem_encoding + global filesystem_encoding, is_unicode_platform, use_unicode_filepath + filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] # Despite the Unicode-mode FilePath support added to Twisted in From a9a60857b2c5ee2a811eda6562e6e3c31c0b727c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:28:50 -0500 Subject: [PATCH 19/43] attempt to reduce diff noise --- src/allmydata/util/encodingutil.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 32049b57f..168f40a58 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -32,16 +32,6 @@ from allmydata.util.fileutil import abspath_expanduser_unicode NoneType = type(None) -def check_encoding(encoding): - # sometimes Python returns an encoding name that it doesn't support for conversion - # fail early if this happens - try: - u"test".encode(encoding) - except (LookupError, AttributeError): - raise AssertionError( - "The character encoding '%s' is not supported for conversion." % (encoding,), - ) - def canonical_encoding(encoding): if encoding is None: log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) @@ -54,6 +44,15 @@ def canonical_encoding(encoding): return encoding +def check_encoding(encoding): + # sometimes Python returns an encoding name that it doesn't support for conversion + # fail early if this happens + try: + u"test".encode(encoding) + except (LookupError, AttributeError): + raise AssertionError( + "The character encoding '%s' is not supported for conversion." % (encoding,), + ) # On Windows we install UTF-8 stream wrappers for sys.stdout and # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). From 7c0d2e3cd5cd3cc780882b6c583050f2fbe49e4e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:29:24 -0500 Subject: [PATCH 20/43] another un-re-shuffling --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 168f40a58..1c884a88d 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -60,9 +60,9 @@ def check_encoding(encoding): # On POSIX, we are moving towards a UTF-8-everything and ignore the locale. io_encoding = "utf-8" +filesystem_encoding = None is_unicode_platform = False use_unicode_filepath = False -filesystem_encoding = None def _reload(): global filesystem_encoding, is_unicode_platform, use_unicode_filepath From ae1a0c591bd5d3b1d2f69604a16dd77c568d863b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:58:34 -0500 Subject: [PATCH 21/43] Prefer to fix unicode_to_argv/argv_to_unicode instead of callers --- src/allmydata/scripts/cli.py | 42 +++++++++++++-------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 ++-- src/allmydata/test/cli/test_backup.py | 5 ++-- src/allmydata/test/cli/test_put.py | 6 ++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_runner.py | 11 +++---- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 27 +++++++++++++++++ 9 files changed, 64 insertions(+), 44 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index c00917022..eeae20fe1 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = unicode(arg1, "utf-8") + self.from_file = argv_to_unicode(arg1) self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else unicode(arg2, "utf-8") + self.to_file = None if arg2 is None else argv_to_unicode(arg2) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) - self.destination = unicode(args[-1], "utf-8") + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = unicode(topath, "utf-8") + self.to_dir = argv_to_unicode(topath) synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = unicode(pattern, "utf-8").strip() + g = argv_to_unicode(pattern).strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ed4f0c71d..ac17cf445 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = unicode(config.get("nickname") or "", "utf-8") + nickname = argv_to_unicode(config.get("nickname") or "") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = unicode(config.get("webport") or "none", "utf-8") + webport = argv_to_unicode(config.get("webport") or "none") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 13445ef0a..bf175de44 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,5 +1,4 @@ -from six import ensure_str - +from ...util.encodingutil import unicode_to_argv from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -46,6 +45,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = ensure_str(self.get_clientdir(i=client_num)) + client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index 6aecd0af6..ceecbd662 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,5 +1,4 @@ import os.path -from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -10,7 +9,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -414,7 +413,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 2deafb784..31eb671bb 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,7 +1,5 @@ import os.path -from six import ensure_str - from twisted.trial import unittest from twisted.python import usage @@ -10,7 +8,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -50,7 +48,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 7b3194d3f..2a70cff3a 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using UTF-8", + "arguments to run_cli must be a native string -- convert using unicode_to_argv", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 4054dc289..2f0ac0cbe 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -23,6 +23,7 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin +from allmydata.util.encodingutil import unicode_to_argv, get_filesystem_encoding from allmydata.test import common_util import allmydata from .common import ( @@ -71,16 +72,12 @@ def run_bintahoe(extra_argv, python_options=None): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable] + argv = [sys.executable.decode(get_filesystem_encoding())] if python_options is not None: argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) - if not platform.isWindows(): - # On POSIX Popen (via execvp) will encode argv using the "filesystem" - # encoding. Depending on LANG this may make our unicode arguments - # unencodable. Do our own UTF-8 encoding here instead. - argv = list(arg.encode("utf-8") for arg in argv) + argv = list(unicode_to_argv(arg) for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -109,7 +106,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # -t is a harmless option that warns about tabs so we can add it # -without impacting other behavior noticably. - out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 03b9ba2de..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output +from allmydata.util.encodingutil import quote_output, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - ensure_str(filename)) + unicode_to_argv(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 1c884a88d..c5a8639e8 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -18,6 +18,7 @@ if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 from past.builtins import unicode +from six import ensure_str import sys, os, re import unicodedata @@ -106,6 +107,32 @@ def argv_to_abspath(s, **kwargs): % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) + +def unicode_to_argv(s, mangle=False): + """ + Make the given unicode string suitable for use in an argv list. + + On Python 2, this encodes using UTF-8. On Python 3, this returns the + input unmodified. + """ + precondition(isinstance(s, unicode), s) + return ensure_str(s) + + +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 3dadd47416cc3338e7b17e035c7c8b9fcc179f8e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:11:20 -0500 Subject: [PATCH 22/43] unused import --- src/allmydata/windows/fixups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index d34404aed..b4204b5d3 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -1,6 +1,6 @@ from __future__ import print_function -import codecs, re +import codecs from functools import partial from ctypes import WINFUNCTYPE, windll, POINTER, c_int, WinError, byref, get_last_error From 8f498437cf22976f7033be7eed4731478b4baa7c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:11:23 -0500 Subject: [PATCH 23/43] whitespace --- src/allmydata/test/cli/test_put.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 31eb671bb..08a66f98d 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,5 +1,4 @@ import os.path - from twisted.trial import unittest from twisted.python import usage From db31d2bc1a85fa454aff57de8f390bed48826485 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:14:38 -0500 Subject: [PATCH 24/43] news fragment --- newsfragments/3588.incompat | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3588.incompat diff --git a/newsfragments/3588.incompat b/newsfragments/3588.incompat new file mode 100644 index 000000000..402ae8479 --- /dev/null +++ b/newsfragments/3588.incompat @@ -0,0 +1 @@ +The Tahoe command line now always uses UTF-8 to decode its arguments, regardless of locale. From 82d24bfaf7662c989816765e52d2b3fe962762dc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:46:04 -0500 Subject: [PATCH 25/43] one more --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index c5a8639e8..48d5cc7b4 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -101,7 +101,7 @@ def argv_to_abspath(s, **kwargs): Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = unicode(s, "utf-8") + decoded = argv_to_unicode(s) if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) From aa4f1130270191ed3b8992e370ba684b5d5d5136 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:11:43 -0500 Subject: [PATCH 26/43] Get the monkey patching right --- src/allmydata/test/test_encodingutil.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 992ebd690..da8ee8618 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -96,10 +96,14 @@ class MockStdout(object): class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): - # Mock sys.platform because unicode_platform() uses it - self.patch(sys, "platform", "linux") + # Make sure everything goes back to the way it was at the end of the + # test. self.addCleanup(_reload) + # Mock sys.platform because unicode_platform() uses it. Cleanups run + # in reverse order so we do this second so it gets undone first. + self.patch(sys, "platform", "linux") + def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 # filesystem? @@ -131,8 +135,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): class EncodingUtil(ReallyEqualMixin): def setUp(self): - self.patch(sys, "platform", self.platform) self.addCleanup(_reload) + self.patch(sys, "platform", self.platform) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") From 46d3ffb2e287217e2afb7977bf3e41e6521ddab1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:20:50 -0500 Subject: [PATCH 27/43] diff shrink --- src/allmydata/util/encodingutil.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 48d5cc7b4..6d4cd3a8f 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -96,6 +96,20 @@ def get_io_encoding(): """ return io_encoding +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. @@ -119,20 +133,6 @@ def unicode_to_argv(s, mangle=False): return ensure_str(s) -def argv_to_unicode(s): - """ - Perform the inverse of ``unicode_to_argv``. - """ - if isinstance(s, unicode): - return s - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 99f00818a8eecec76717d977d051c1b0bdac5cb6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:21:32 -0500 Subject: [PATCH 28/43] diff shrink --- src/allmydata/util/encodingutil.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 6d4cd3a8f..679ad2055 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -98,10 +98,13 @@ def get_io_encoding(): def argv_to_unicode(s): """ - Perform the inverse of ``unicode_to_argv``. + Decode given argv element to unicode. If this fails, raise a UsageError. + + This is the inverse of ``unicode_to_argv``. """ if isinstance(s, unicode): return s + precondition(isinstance(s, bytes), s) try: From 7ca3c86a3501b7ad7c702cd76c2d81d9cd9328c0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:35:03 -0500 Subject: [PATCH 29/43] debug nonsense --- src/allmydata/scripts/runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 1f993fda1..2a41d5cf5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -119,7 +119,8 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) + for f in stdout, open("debug.txt", "wt"): + print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) sys.exit(1) return config From ec92f0362d178fd125daaeb35e386502c5712eb2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:36:42 -0500 Subject: [PATCH 30/43] this? --- src/allmydata/scripts/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 2a41d5cf5..5af4083cb 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -119,7 +119,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - for f in stdout, open("debug.txt", "wt"): + for f in stdout, open("debug.txt", "wb"): print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) sys.exit(1) return config From 183ee10035cf59e427bee91ced6a66f9fb2a276e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:39:56 -0500 Subject: [PATCH 31/43] probably more useful debug info --- src/allmydata/scripts/runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 5af4083cb..ee0811ea5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,6 +107,8 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation(argv, stdout=sys.stdout): + with open("argv-debug.txt", "wt") as f: + print(repr(argv), file=f) config = Options() try: parse_options(argv, config=config) @@ -119,8 +121,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - for f in stdout, open("debug.txt", "wb"): - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) + print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) sys.exit(1) return config From e3a805caa724f5f3c9c9948c95bd71f995712dff Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:44:00 -0500 Subject: [PATCH 32/43] unicode_to_argv == id on win32 --- src/allmydata/util/encodingutil.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 679ad2055..20fecf4a1 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -129,10 +129,12 @@ def unicode_to_argv(s, mangle=False): """ Make the given unicode string suitable for use in an argv list. - On Python 2, this encodes using UTF-8. On Python 3, this returns the - input unmodified. + On Python 2 on POSIX, this encodes using UTF-8. On Python 3 and on + Windows, this returns the input unmodified. """ precondition(isinstance(s, unicode), s) + if sys.platform == "win32": + return s return ensure_str(s) From 622d67c9b937ed9ea8605c0ddd6149b255067726 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:55:40 -0500 Subject: [PATCH 33/43] done with this, thanks --- src/allmydata/scripts/runner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index ee0811ea5..1f993fda1 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,8 +107,6 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation(argv, stdout=sys.stdout): - with open("argv-debug.txt", "wt") as f: - print(repr(argv), file=f) config = Options() try: parse_options(argv, config=config) From 522f96b150cb11d0f6ddf0c00c42744d262b01b6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:56:37 -0500 Subject: [PATCH 34/43] may as well leave(/restore) this --- src/allmydata/util/encodingutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 20fecf4a1..28458c9dc 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -133,6 +133,9 @@ def unicode_to_argv(s, mangle=False): Windows, this returns the input unmodified. """ precondition(isinstance(s, unicode), s) + if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) if sys.platform == "win32": return s return ensure_str(s) From 5a145e74ef59c02cf72c36efb90b180eb49c913c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:59:16 -0500 Subject: [PATCH 35/43] a mild warning/suggestion here --- src/allmydata/test/test_system.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..bf115f127 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -73,6 +73,9 @@ from ..scripts.common import ( class RunBinTahoeMixin(object): def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + # test_runner.run_bintahoe has better unicode support but doesn't + # support env yet and is also synchronous. If we could get rid of + # this in favor of that, though, it would probably be an improvement. command = sys.executable argv = python_options + ["-m", "allmydata.scripts.runner"] + args From 44d76cb159b4d75312c1bcd22cd0bfa9010e620d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:00:02 -0500 Subject: [PATCH 36/43] fix formatting mistake --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 2f0ac0cbe..cf56e8baa 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -105,7 +105,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # we have to have our own implementation of skipping these options. # -t is a harmless option that warns about tabs so we can add it - # -without impacting other behavior noticably. + # without impacting other behavior noticably. out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) From 9c63703efc23161bda63f863963dc4f5f75cdc64 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:15:42 -0500 Subject: [PATCH 37/43] no effort being made to support these locales --- src/allmydata/test/test_encodingutil.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index da8ee8618..06340496b 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -492,20 +492,6 @@ class MacOSXLeopard(EncodingUtil, unittest.TestCase): io_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] -class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase): - uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' - platform = 'darwin' - filesystem_encoding = 'utf-8' - io_encoding = 'US-ASCII' - dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] - -class OpenBSD(EncodingUtil, unittest.TestCase): - uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' - platform = 'openbsd4' - filesystem_encoding = '646' - io_encoding = '646' - # Oops, I cannot write filenames containing non-ascii characters - class TestToFromStr(ReallyEqualMixin, unittest.TestCase): def test_to_bytes(self): From 6c430bd4e60a0e2f42a66ecf024edf0ae5b430de Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:16:18 -0500 Subject: [PATCH 38/43] re-add a direct unicode_to_argv test harder to express the conditional in skips so the two tests become one --- src/allmydata/test/test_encodingutil.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 06340496b..f7987d466 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -85,8 +85,8 @@ from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from - + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ + unicode_to_argv class MockStdout(object): pass @@ -157,6 +157,20 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) + def test_unicode_to_argv(self): + """ + unicode_to_argv() returns its unicode argument on Windows and Python 2 and + converts to bytes using UTF-8 elsewhere. + """ + result = unicode_to_argv(lumiere_nfc) + if PY3 or self.platform == "win32": + expected_value = lumiere_nfc + else: + expected_value = lumiere_nfc.encode(self.io_encoding) + + self.assertIsInstance(result, type(expected_value)) + self.assertEqual(result, expected_value) + @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { From 6984f2be3ffc709aff663e69291d3ed998dd2599 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:58:07 -0500 Subject: [PATCH 39/43] Try to get the Python 2 / Windows case working --- src/allmydata/test/cli/common.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..f1c48d1af 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,12 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + # If we were really going to launch a child process then + # `unicode_to_argv` would be the right thing to do here. However, + # we're just going to call some Python functions directly and those + # Python functions want native strings. So ignore the requirements + # for passing arguments to another process and make sure this argument + # is a native string. + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) From 43dc85501f7ba57f38c33eb73a5346e5cf670e44 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 16:03:28 -0500 Subject: [PATCH 40/43] is this api less troublesome? --- src/allmydata/test/cli/test_put.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..fadc09c06 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -46,21 +46,21 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.basedir = "cli/Put/unlinked_immutable_from_file" self.set_up_grid(oneshare=True) - rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + rel_fn = unicode(os.path.join(self.basedir, "DATAFILE")) + abs_fn = abspath_expanduser_unicode(rel_fn) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") - d = self.do_cli("put", rel_fn) + d = self.do_cli_unicode(u"put", [rel_fn]) def _uploaded(args): (rc, out, err) = args readcap = out self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.readcap = readcap d.addCallback(_uploaded) - d.addCallback(lambda res: self.do_cli("put", "./" + rel_fn)) + d.addCallback(lambda res: self.do_cli_unicode(u"put", [u"./" + rel_fn])) d.addCallback(lambda rc_stdout_stderr: self.failUnlessReallyEqual(rc_stdout_stderr[1], self.readcap)) - d.addCallback(lambda res: self.do_cli("put", abs_fn)) + d.addCallback(lambda res: self.do_cli_unicode(u"put", [abs_fn])) d.addCallback(lambda rc_stdout_stderr: self.failUnlessReallyEqual(rc_stdout_stderr[1], self.readcap)) # we just have to assume that ~ is handled properly From 216efb2aed019fe893b760dd4e40780da4a202e3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 16:52:43 -0500 Subject: [PATCH 41/43] unused import --- src/allmydata/test/cli/test_put.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index fadc09c06..3392e67b4 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -7,7 +7,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin From 9b2a9e14ae1b1ceaab4f08de634d5cfe1d97a41d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 9 Feb 2021 21:21:31 -0500 Subject: [PATCH 42/43] Re-add the check so we still get early failure if this ever happens --- src/allmydata/util/encodingutil.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 28458c9dc..483871b5d 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -69,6 +69,7 @@ def _reload(): global filesystem_encoding, is_unicode_platform, use_unicode_filepath filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) + check_encoding(filesystem_encoding) is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] # Despite the Unicode-mode FilePath support added to Twisted in From 11e1fabbe4b1d9241223983ba3cb8e4416c984a0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 13:10:31 -0500 Subject: [PATCH 43/43] Change the platform check to one mypy can recognize :/ --- src/allmydata/test/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 3c9bfa7aa..230bca648 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -17,6 +17,7 @@ __all__ = [ from past.builtins import chr as byteschr, unicode +import sys import os, random, struct import six import tempfile @@ -52,9 +53,6 @@ from testtools.twistedsupport import ( flush_logged_errors, ) -from twisted.python.runtime import ( - platform, -) from twisted.application import service from twisted.plugin import IPlugin from twisted.internet import defer @@ -108,7 +106,7 @@ from .eliotutil import ( ) from .common_util import ShouldFailMixin # noqa: F401 -if platform.isWindows(): +if sys.platform == "win32": # Python 2.7 doesn't have good options for launching a process with # non-ASCII in its command line. So use this alternative that does a # better job. However, only use it on Windows because it doesn't work