From 2306819db1829da1eb839fa38da8b2dd4cf4970a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:45:39 -0500 Subject: [PATCH] Get rid of unicode_to_argv and argv_to_unicode --- src/allmydata/scripts/cli.py | 42 ++++++++++++------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 +-- src/allmydata/test/cli/test_backup.py | 5 +-- src/allmydata/test/cli/test_put.py | 7 +++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_encodingutil.py | 35 ++------------------- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 38 +--------------------- 9 files changed, 42 insertions(+), 102 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 379e1d212..bad96a252 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = argv_to_unicode(arg1) + self.from_file = unicode(arg1, "utf-8") self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else argv_to_unicode(arg2) + self.to_file = None if arg2 is None else unicode(arg2, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = map(argv_to_unicode, args[:-1]) - self.destination = argv_to_unicode(args[-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = argv_to_unicode(topath) + self.to_dir = unicode(topath, "utf-8") synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = argv_to_unicode(pattern).strip() + g = unicode(pattern, "utf-8").strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ac17cf445..ed4f0c71d 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = argv_to_unicode(config.get("nickname") or "") + nickname = unicode(config.get("nickname") or "", "utf-8") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = argv_to_unicode(config.get("webport") or "none") + webport = unicode(config.get("webport") or "none", "utf-8") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..13445ef0a 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index ceecbd662..6aecd0af6 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,4 +1,5 @@ import os.path +from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -9,7 +10,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -413,7 +414,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..2deafb784 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,4 +1,7 @@ import os.path + +from six import ensure_str + from twisted.trial import unittest from twisted.python import usage @@ -7,7 +10,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -47,7 +50,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 2a70cff3a..7b3194d3f 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using unicode_to_argv", + "arguments to run_cli must be a native string -- convert using UTF-8", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index cbc9143b7..5f6700cd6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -81,12 +81,12 @@ from allmydata.test.common_util import ( ReallyEqualMixin, skip_if_cannot_represent_filename, ) from allmydata.util import encodingutil, fileutil -from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ +from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ - unicode_to_argv + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from + from twisted.python import usage @@ -138,12 +138,6 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): _reload() self.assertEqual(get_io_encoding(), 'utf-8') - def test_argv_to_unicode(self): - encodingutil.io_encoding = 'utf-8' - self.failUnlessRaises(usage.UsageError, - argv_to_unicode, - lumiere_nfc.encode('latin1')) - @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' @@ -213,19 +207,6 @@ class EncodingUtil(ReallyEqualMixin): sys.platform = self.original_platform _reload() - def test_argv_to_unicode(self): - if 'argv' not in dir(self): - return - - mock_stdout = MockStdout() - mock_stdout.encoding = self.io_encoding - self.patch(sys, 'stdout', mock_stdout) - - argu = lumiere_nfc - argv = self.argv - _reload() - self.failUnlessReallyEqual(argv_to_unicode(argv), argu) - def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") @@ -245,16 +226,6 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_argv_py2(self): - """unicode_to_argv() converts to bytes on Python 2.""" - self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding)) - - @skipIf(PY2, "Python 3 only.") - def test_unicode_to_argv_py3(self): - """unicode_to_argv() is noop on Python 3.""" - self.assertEqual(unicode_to_argv("abc"), "abc") - @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..03b9ba2de 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output, unicode_to_argv +from allmydata.util.encodingutil import quote_output from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - unicode_to_argv(filename)) + ensure_str(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index f13dc5b8e..5cc3b8d19 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -107,53 +107,17 @@ def get_io_encoding(): """ return io_encoding -def argv_to_unicode(s): - """ - Decode given argv element to unicode. If this fails, raise a UsageError. - """ - if isinstance(s, unicode): - return s - - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = argv_to_unicode(s) + decoded = unicode(s, "utf-8") if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) -def unicode_to_argv(s, mangle=False): - """ - Encode the given Unicode argument as a bytestring. - If the argument is to be passed to a different process, then the 'mangle' argument - should be true; on Windows, this uses a mangled encoding that will be reversed by - code in runner.py. - - On Python 3, just return the string unchanged, since argv is unicode. - """ - precondition(isinstance(s, unicode), s) - if PY3: - warnings.warn("This will be unnecessary once Python 2 is dropped.", - DeprecationWarning) - return s - - if mangle and sys.platform == "win32": - # This must be the same as 'mangle' in bin/tahoe-script.template. - return bytes(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s), io_encoding) - else: - return s.encode(io_encoding) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes.