Prefer to fix unicode_to_argv/argv_to_unicode instead of callers

This commit is contained in:
Jean-Paul Calderone 2021-01-12 09:58:34 -05:00
parent 7c0d2e3cd5
commit ae1a0c591b
9 changed files with 64 additions and 44 deletions

View File

@ -4,7 +4,7 @@ import os.path, re, fnmatch
from twisted.python import usage
from allmydata.scripts.common import get_aliases, get_default_nodedir, \
DEFAULT_ALIAS, BaseOptions
from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path
from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path
from .tahoe_status import TahoeStatusCommand
NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")
@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions):
]
def parseArgs(self, where=""):
self.where = unicode(where, "utf-8")
self.where = argv_to_unicode(where)
if self['format']:
if self['format'].upper() not in ("SDMF", "MDMF"):
@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions):
class AddAliasOptions(FileStoreOptions):
def parseArgs(self, alias, cap):
self.alias = unicode(alias, "utf-8")
self.alias = argv_to_unicode(alias)
if self.alias.endswith(u':'):
self.alias = self.alias[:-1]
self.cap = cap
@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions):
class CreateAliasOptions(FileStoreOptions):
def parseArgs(self, alias):
self.alias = unicode(alias, "utf-8")
self.alias = argv_to_unicode(alias)
if self.alias.endswith(u':'):
self.alias = self.alias[:-1]
@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions):
("json", None, "Show the raw JSON output."),
]
def parseArgs(self, where=""):
self.where = unicode(where, "utf-8")
self.where = argv_to_unicode(where)
synopsis = "[options] [PATH]"
@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions):
if arg2 == "-":
arg2 = None
self.from_file = unicode(arg1, "utf-8")
self.from_file = argv_to_unicode(arg1)
self.to_file = None if arg2 is None else argv_to_abspath(arg2)
synopsis = "[options] REMOTE_FILE LOCAL_FILE"
@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions):
arg1 = None
self.from_file = None if arg1 is None else argv_to_abspath(arg1)
self.to_file = None if arg2 is None else unicode(arg2, "utf-8")
self.to_file = None if arg2 is None else argv_to_unicode(arg2)
if self['format']:
if self['format'].upper() not in ("SDMF", "MDMF", "CHK"):
@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions):
def parseArgs(self, *args):
if len(args) < 2:
raise usage.UsageError("cp requires at least two arguments")
self.sources = list(unicode(a, "utf-8") for a in args[:-1])
self.destination = unicode(args[-1], "utf-8")
self.sources = map(argv_to_unicode, args[:-1])
self.destination = argv_to_unicode(args[-1])
synopsis = "[options] FROM.. TO"
@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions):
class UnlinkOptions(FileStoreOptions):
def parseArgs(self, where):
self.where = unicode(where, "utf-8")
self.where = argv_to_unicode(where)
synopsis = "[options] REMOTE_FILE"
description = "Remove a named file from its parent directory."
class MvOptions(FileStoreOptions):
def parseArgs(self, frompath, topath):
self.from_file = unicode(frompath, "utf-8")
self.to_file = unicode(topath, "utf-8")
self.from_file = argv_to_unicode(frompath)
self.to_file = argv_to_unicode(topath)
synopsis = "[options] FROM TO"
@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions):
class LnOptions(FileStoreOptions):
def parseArgs(self, frompath, topath):
self.from_file = unicode(frompath, "utf-8")
self.to_file = unicode(topath, "utf-8")
self.from_file = argv_to_unicode(frompath)
self.to_file = argv_to_unicode(topath)
synopsis = "[options] FROM_LINK TO_LINK"
@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions):
def parseArgs(self, localdir, topath):
self.from_dir = argv_to_abspath(localdir)
self.to_dir = unicode(topath, "utf-8")
self.to_dir = argv_to_unicode(topath)
synopsis = "[options] FROM ALIAS:TO"
def opt_exclude(self, pattern):
"""Ignore files matching a glob pattern. You may give multiple
'--exclude' options."""
g = unicode(pattern, "utf-8").strip()
g = argv_to_unicode(pattern).strip()
if g:
exclude = self['exclude']
exclude.add(g)
@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions):
("info", "i", "Open the t=info page for the file"),
]
def parseArgs(self, where=''):
self.where = unicode(where, "utf-8")
self.where = argv_to_unicode(where)
synopsis = "[options] [ALIAS:PATH]"
@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions):
("raw", "r", "Display raw JSON data instead of parsed."),
]
def parseArgs(self, where=''):
self.where = unicode(where, "utf-8")
self.where = argv_to_unicode(where)
synopsis = "[options] [ALIAS:PATH]"
description = """
@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions):
("raw", "r", "Display raw JSON data instead of parsed"),
]
def parseArgs(self, where=''):
self.where = unicode(where, "utf-8")
self.where = argv_to_unicode(where)
synopsis = "[options] [ALIAS:PATH]"
description = """
@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions):
("add-lease", None, "Add/renew lease on all shares."),
]
def parseArgs(self, *locations):
self.locations = list(unicode(a, "utf-8") for a in locations)
self.locations = map(argv_to_unicode, locations)
synopsis = "[options] [ALIAS:PATH]"
description = """
@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions):
("verbose", "v", "Be noisy about what is happening."),
]
def parseArgs(self, *locations):
self.locations = list(unicode(a, "utf-8") for a in locations)
self.locations = map(argv_to_unicode, locations)
synopsis = "[options] [ALIAS:PATH]"
description = """

View File

@ -16,7 +16,7 @@ from allmydata.scripts.common import (
)
from allmydata.scripts.default_nodedir import _default_nodedir
from allmydata.util.assertutil import precondition
from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding
from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding
from allmydata.util import fileutil, i2p_provider, iputil, tor_provider
from wormhole import wormhole
@ -238,7 +238,7 @@ def write_node_config(c, config):
c.write("\n")
c.write("[node]\n")
nickname = unicode(config.get("nickname") or "", "utf-8")
nickname = argv_to_unicode(config.get("nickname") or "")
c.write("nickname = %s\n" % (nickname.encode('utf-8'),))
if config["hide-ip"]:
c.write("reveal-IP-address = false\n")
@ -246,7 +246,7 @@ def write_node_config(c, config):
c.write("reveal-IP-address = true\n")
# TODO: validate webport
webport = unicode(config.get("webport") or "none", "utf-8")
webport = argv_to_unicode(config.get("webport") or "none")
if webport.lower() == "none":
webport = ""
c.write("web.port = %s\n" % (webport.encode('utf-8'),))

View File

@ -1,5 +1,4 @@
from six import ensure_str
from ...util.encodingutil import unicode_to_argv
from ...scripts import runner
from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode
@ -46,6 +45,6 @@ class CLITestMixin(ReallyEqualMixin):
# client_num is used to execute client CLI commands on a specific
# client.
client_num = kwargs.pop("client_num", 0)
client_dir = ensure_str(self.get_clientdir(i=client_num))
client_dir = unicode_to_argv(self.get_clientdir(i=client_num))
nodeargs = [ b"--node-directory", client_dir ]
return run_cli(verb, *args, nodeargs=nodeargs, **kwargs)

View File

@ -1,5 +1,4 @@
import os.path
from six import ensure_str
from six.moves import cStringIO as StringIO
from datetime import timedelta
import re
@ -10,7 +9,7 @@ from twisted.python.monkey import MonkeyPatcher
import __builtin__
from allmydata.util import fileutil
from allmydata.util.fileutil import abspath_expanduser_unicode
from allmydata.util.encodingutil import get_io_encoding
from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
from allmydata.util.namespace import Namespace
from allmydata.scripts import cli, backupdb
from ..common_util import StallMixin
@ -414,7 +413,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
return StringIO()
patcher = MonkeyPatcher((__builtin__, 'file', call_file))
patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to'])
patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to'])
self.failUnless(ns.called)
def test_ignore_symlinks(self):

View File

@ -1,7 +1,5 @@
import os.path
from six import ensure_str
from twisted.trial import unittest
from twisted.python import usage
@ -10,7 +8,7 @@ from allmydata.scripts.common import get_aliases
from allmydata.scripts import cli
from ..no_network import GridTestMixin
from ..common_util import skip_if_cannot_represent_filename
from allmydata.util.encodingutil import get_io_encoding
from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
from allmydata.util.fileutil import abspath_expanduser_unicode
from .common import CLITestMixin
@ -50,7 +48,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
self.set_up_grid(oneshare=True)
rel_fn = os.path.join(self.basedir, "DATAFILE")
abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn)))
abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn)))
# we make the file small enough to fit in a LIT file, for speed
fileutil.write(rel_fn, "short file")
d = self.do_cli("put", rel_fn)

View File

@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs):
encoding = kwargs.pop("encoding", None)
precondition(
all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)),
"arguments to run_cli must be a native string -- convert using UTF-8",
"arguments to run_cli must be a native string -- convert using unicode_to_argv",
verb=verb,
args=args,
nodeargs=nodeargs,

View File

@ -23,6 +23,7 @@ from twisted.python.runtime import (
platform,
)
from allmydata.util import fileutil, pollmixin
from allmydata.util.encodingutil import unicode_to_argv, get_filesystem_encoding
from allmydata.test import common_util
import allmydata
from .common import (
@ -71,16 +72,12 @@ def run_bintahoe(extra_argv, python_options=None):
:return: A three-tuple of stdout (unicode), stderr (unicode), and the
child process "returncode" (int).
"""
argv = [sys.executable]
argv = [sys.executable.decode(get_filesystem_encoding())]
if python_options is not None:
argv.extend(python_options)
argv.extend([u"-m", u"allmydata.scripts.runner"])
argv.extend(extra_argv)
if not platform.isWindows():
# On POSIX Popen (via execvp) will encode argv using the "filesystem"
# encoding. Depending on LANG this may make our unicode arguments
# unencodable. Do our own UTF-8 encoding here instead.
argv = list(arg.encode("utf-8") for arg in argv)
argv = list(unicode_to_argv(arg) for arg in argv)
p = Popen(argv, stdout=PIPE, stderr=PIPE)
out = p.stdout.read().decode("utf-8")
err = p.stderr.read().decode("utf-8")
@ -109,7 +106,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase):
# -t is a harmless option that warns about tabs so we can add it
# -without impacting other behavior noticably.
out, err, returncode = run_bintahoe(["--version"], python_options=["-t"])
out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"])
self.assertEqual(returncode, 0)
self.assertTrue(out.startswith(allmydata.__appname__ + '/'))

View File

@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode
from allmydata.immutable.filenode import ImmutableFileNode
from allmydata.util import idlib, mathutil, pollmixin, fileutil
from allmydata.util import log, base32
from allmydata.util.encodingutil import quote_output
from allmydata.util.encodingutil import quote_output, unicode_to_argv
from allmydata.util.fileutil import abspath_expanduser_unicode
from allmydata.util.consumer import MemoryConsumer, download_to_data
from allmydata.interfaces import IDirectoryNode, IFileNode, \
@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
log.msg("test_system.SystemTest._test_runner using %r" % filename)
rc,output,err = yield run_cli("debug", "dump-share", "--offsets",
ensure_str(filename))
unicode_to_argv(filename))
self.failUnlessEqual(rc, 0)
# we only upload a single file, so we can assert some things about

View File

@ -18,6 +18,7 @@ if PY2:
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401
from past.builtins import unicode
from six import ensure_str
import sys, os, re
import unicodedata
@ -106,6 +107,32 @@ def argv_to_abspath(s, **kwargs):
% (quote_output(s), quote_output(os.path.join('.', s))))
return abspath_expanduser_unicode(decoded, **kwargs)
def unicode_to_argv(s, mangle=False):
"""
Make the given unicode string suitable for use in an argv list.
On Python 2, this encodes using UTF-8. On Python 3, this returns the
input unmodified.
"""
precondition(isinstance(s, unicode), s)
return ensure_str(s)
def argv_to_unicode(s):
"""
Perform the inverse of ``unicode_to_argv``.
"""
if isinstance(s, unicode):
return s
precondition(isinstance(s, bytes), s)
try:
return unicode(s, io_encoding)
except UnicodeDecodeError:
raise usage.UsageError("Argument %s cannot be decoded as %s." %
(quote_output(s), io_encoding))
def unicode_to_url(s):
"""
Encode an unicode object used in an URL to bytes.