From ae1a0c591bd5d3b1d2f69604a16dd77c568d863b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:58:34 -0500 Subject: [PATCH] Prefer to fix unicode_to_argv/argv_to_unicode instead of callers --- src/allmydata/scripts/cli.py | 42 +++++++++++++-------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 ++-- src/allmydata/test/cli/test_backup.py | 5 ++-- src/allmydata/test/cli/test_put.py | 6 ++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_runner.py | 11 +++---- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 27 +++++++++++++++++ 9 files changed, 64 insertions(+), 44 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index c00917022..eeae20fe1 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = unicode(arg1, "utf-8") + self.from_file = argv_to_unicode(arg1) self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else unicode(arg2, "utf-8") + self.to_file = None if arg2 is None else argv_to_unicode(arg2) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) - self.destination = unicode(args[-1], "utf-8") + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = unicode(topath, "utf-8") + self.to_dir = argv_to_unicode(topath) synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = unicode(pattern, "utf-8").strip() + g = argv_to_unicode(pattern).strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ed4f0c71d..ac17cf445 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = unicode(config.get("nickname") or "", "utf-8") + nickname = argv_to_unicode(config.get("nickname") or "") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = unicode(config.get("webport") or "none", "utf-8") + webport = argv_to_unicode(config.get("webport") or "none") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 13445ef0a..bf175de44 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,5 +1,4 @@ -from six import ensure_str - +from ...util.encodingutil import unicode_to_argv from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -46,6 +45,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = ensure_str(self.get_clientdir(i=client_num)) + client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index 6aecd0af6..ceecbd662 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,5 +1,4 @@ import os.path -from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -10,7 +9,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -414,7 +413,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 2deafb784..31eb671bb 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,7 +1,5 @@ import os.path -from six import ensure_str - from twisted.trial import unittest from twisted.python import usage @@ -10,7 +8,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -50,7 +48,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 7b3194d3f..2a70cff3a 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using UTF-8", + "arguments to run_cli must be a native string -- convert using unicode_to_argv", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 4054dc289..2f0ac0cbe 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -23,6 +23,7 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin +from allmydata.util.encodingutil import unicode_to_argv, get_filesystem_encoding from allmydata.test import common_util import allmydata from .common import ( @@ -71,16 +72,12 @@ def run_bintahoe(extra_argv, python_options=None): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable] + argv = [sys.executable.decode(get_filesystem_encoding())] if python_options is not None: argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) - if not platform.isWindows(): - # On POSIX Popen (via execvp) will encode argv using the "filesystem" - # encoding. Depending on LANG this may make our unicode arguments - # unencodable. Do our own UTF-8 encoding here instead. - argv = list(arg.encode("utf-8") for arg in argv) + argv = list(unicode_to_argv(arg) for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -109,7 +106,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # -t is a harmless option that warns about tabs so we can add it # -without impacting other behavior noticably. - out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 03b9ba2de..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output +from allmydata.util.encodingutil import quote_output, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - ensure_str(filename)) + unicode_to_argv(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 1c884a88d..c5a8639e8 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -18,6 +18,7 @@ if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 from past.builtins import unicode +from six import ensure_str import sys, os, re import unicodedata @@ -106,6 +107,32 @@ def argv_to_abspath(s, **kwargs): % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) + +def unicode_to_argv(s, mangle=False): + """ + Make the given unicode string suitable for use in an argv list. + + On Python 2, this encodes using UTF-8. On Python 3, this returns the + input unmodified. + """ + precondition(isinstance(s, unicode), s) + return ensure_str(s) + + +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes.