Merged masted

Signed-off-by: fenn-cs <fenn25.fn@gmail.com>
2025-04-19 08:16:19 +00:00 · 2021-08-31 12:17:20 +01:00 · 2021-08-31 12:17:20 +01:00 · cb81e13462
commit cb81e13462
parent 342a1c2c31 bbc860fdb1
10 changed files with 345 additions and 101 deletions
--- a/newsfragments/3528.minor
+++ b/newsfragments/3528.minor
--- a/src/allmydata/scripts/runner.py
+++ b/src/allmydata/scripts/runner.py
@ -9,6 +9,7 @@ if PY2:

 import os, sys
 from six.moves import StringIO
+from past.builtins import unicode
 import six

 try:
@ -115,23 +116,75 @@ for module in (create_node,):
 def parse_options(argv, config=None):
    if not config:
        config = Options()
-    config.parseOptions(argv) # may raise usage.error
+    try:
+        config.parseOptions(argv)
+    except usage.error as e:
+        if six.PY2:
+            # On Python 2 the exception may hold non-ascii in a byte string.
+            # This makes it impossible to convert the exception to any kind of
+            # string using str() or unicode().  It could also hold non-ascii
+            # in a unicode string which still makes it difficult to convert it
+            # to a byte string later.
+            #
+            # So, reach inside and turn it into some entirely safe ascii byte
+            # strings that will survive being written to stdout without
+            # causing too much damage in the process.
+            #
+            # As a result, non-ascii will not be rendered correctly but
+            # instead as escape sequences.  At least this can go away when
+            # we're done with Python 2 support.
+            raise usage.error(*(
+                arg.encode("ascii", errors="backslashreplace")
+                if isinstance(arg, unicode)
+                else arg.decode("utf-8").encode("ascii", errors="backslashreplace")
+                for arg
+                in e.args
+            ))
+        raise
    return config

+def parse_or_exit(config, argv, stdout, stderr):
+    """
+    Parse Tahoe-LAFS CLI arguments and return a configuration object if they
+    are valid.

-def parse_or_exit_with_explanation(argv, stdout=sys.stdout):
-    config = Options()
+    If they are invalid, write an explanation to ``stdout`` and exit.
+
+    :param allmydata.scripts.runner.Options config: An instance of the
+        argument-parsing class to use.
+
+    :param [unicode] argv: The argument list to parse, including the name of the
+        program being run as ``argv[0]``.
+
+    :param stdout: The file-like object to use as stdout.
+    :param stderr: The file-like object to use as stderr.
+
+    :raise SystemExit: If there is an argument-parsing problem.
+
+    :return: ``config``, after using it to parse the argument list.
+    """
    try:
-        parse_options(argv, config=config)
+        parse_options(argv[1:], config=config)
    except usage.error as e:
+        # `parse_options` may have the side-effect of initializing a
+        # "sub-option" of the given configuration, even if it ultimately
+        # raises an exception.  For example, `tahoe run --invalid-option` will
+        # set `config.subOptions` to an instance of
+        # `allmydata.scripts.tahoe_run.RunOptions` and then raise a
+        # `usage.error` because `RunOptions` does not recognize
+        # `--invalid-option`.  If `run` itself had a sub-options then the same
+        # thing could happen but with another layer of nesting.  We can
+        # present the user with the most precise information about their usage
+        # error possible by finding the most "sub" of the sub-options and then
+        # showing that to the user along with the usage error.
        c = config
        while hasattr(c, 'subOptions'):
            c = c.subOptions
        print(str(c), file=stdout)
-        # On Python 2 the string may turn into a unicode string, e.g. the error
-        # may be unicode, in which case it will print funny. Once we're on
-        # Python 3 we can just drop the ensure_str().
-        print(six.ensure_str("%s:  %s\n" % (sys.argv[0], e)), file=stdout)
+        exc_str = str(e)
+        exc_bytes = six.ensure_binary(exc_str, "utf-8")
+        msg_bytes = b"%s:  %s\n" % (six.ensure_binary(argv[0]), exc_bytes)
+        print(six.ensure_text(msg_bytes, "utf-8"), file=stdout)
        sys.exit(1)
    return config

@ -184,25 +237,64 @@ def _maybe_enable_eliot_logging(options, reactor):
    return options


-def run():
+def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stderr):
+    """
+    Run a Tahoe-LAFS node.
+
+    :param configFactory: A zero-argument callable which creates the config
+        object to use to parse the argument list.
+
+    :param [str] argv: The argument list to use to configure the run.
+
+    :param stdout: The file-like object to use for stdout.
+    :param stderr: The file-like object to use for stderr.
+
+    :raise SystemExit: Always raised after the run is complete.
+    """
    if sys.platform == "win32":
        from allmydata.windows.fixups import initialize
        initialize()
    # doesn't return: calls sys.exit(rc)
-    task.react(_run_with_reactor)
+    task.react(
+        lambda reactor: _run_with_reactor(
+            reactor,
+            configFactory(),
+            argv,
+            stdout,
+            stderr,
+        ),
+    )


-def _setup_coverage(reactor):
+def _setup_coverage(reactor, argv):
    """
-    Arrange for coverage to be collected if the 'coverage' package is
-    installed
+    If coverage measurement was requested, start collecting coverage
+    measurements and arrange to record those measurements when the process is
+    done.
+
+    Coverage measurement is considered requested if ``"--coverage"`` is in
+    ``argv`` (and it will be removed from ``argv`` if it is found).  There
+    should be a ``.coveragerc`` file in the working directory if coverage
+    measurement is requested.
+
+    This is only necessary to support multi-process coverage measurement,
+    typically when the test suite is running, and with the pytest-based
+    *integration* test suite (at ``integration/`` in the root of the source
+    tree) foremost in mind.  The idea is that if you are running Tahoe-LAFS in
+    a configuration where multiple processes are involved - for example, a
+    test process and a client node process, if you only measure coverage from
+    the test process then you will fail to observe most Tahoe-LAFS code that
+    is being run.
+
+    This function arranges to have any Tahoe-LAFS process (such as that
+    client node process) collect and report coverage measurements as well.
    """
    # can we put this _setup_coverage call after we hit
    # argument-parsing?
    # ensure_str() only necessary on Python 2.
    if six.ensure_str('--coverage') not in sys.argv:
        return
-    sys.argv.remove('--coverage')
+    argv.remove('--coverage')

    try:
        import coverage
@ -233,14 +325,37 @@ def _setup_coverage(reactor):
    reactor.addSystemEventTrigger('after', 'shutdown', write_coverage_data)


-def _run_with_reactor(reactor):
+def _run_with_reactor(reactor, config, argv, stdout, stderr):
+    """
+    Run a Tahoe-LAFS node using the given reactor.

-    _setup_coverage(reactor)
+    :param reactor: The reactor to use.  This implementation largely ignores
+        this and lets the rest of the implementation pick its own reactor.
+        Oops.

-    argv = list(map(argv_to_unicode, sys.argv[1:]))
-    d = defer.maybeDeferred(parse_or_exit_with_explanation, argv)
+    :param twisted.python.usage.Options config: The config object to use to
+        parse the argument list.
+
+    :param [str] argv: The argument list to parse, *excluding* the name of the
+        program being run.
+
+    :param stdout: See ``run``.
+    :param stderr: See ``run``.
+
+    :return: A ``Deferred`` that fires when the run is complete.
+    """
+    _setup_coverage(reactor, argv)
+
+    argv = list(map(argv_to_unicode, argv))
+    d = defer.maybeDeferred(
+        parse_or_exit,
+        config,
+        argv,
+        stdout,
+        stderr,
+    )
    d.addCallback(_maybe_enable_eliot_logging, reactor)
-    d.addCallback(dispatch)
+    d.addCallback(dispatch, stdout=stdout, stderr=stderr)
    def _show_exception(f):
        # when task.react() notices a non-SystemExit exception, it does
        # log.err() with the failure and then exits with rc=1. We want this
@ -248,7 +363,7 @@ def _run_with_reactor(reactor):
        # weren't using react().
        if f.check(SystemExit):
            return f # dispatch function handled it
-        f.printTraceback(file=sys.stderr)
+        f.printTraceback(file=stderr)
        sys.exit(1)
    d.addErrback(_show_exception)
    return d
--- a/src/allmydata/scripts/tahoe_run.py
+++ b/src/allmydata/scripts/tahoe_run.py
@ -192,7 +192,7 @@ class DaemonizeTahoeNodePlugin(object):
        return DaemonizeTheRealService(self.nodetype, self.basedir, so)


-def run(config):
+def run(config, runApp=twistd.runApp):
    """
    Runs a Tahoe-LAFS node in the foreground.

@ -212,10 +212,11 @@ def run(config):
    if not nodetype:
        print("%s is not a recognizable node directory" % quoted_basedir, file=err)
        return 1
-    # Now prepare to turn into a twistd process. This os.chdir is the point
-    # of no return.
-    os.chdir(basedir)
-    twistd_args = ["--nodaemon"]
+
+    twistd_args = ["--nodaemon", "--rundir", basedir]
+    if sys.platform != "win32":
+        pidfile = get_pidfile(basedir)
+        twistd_args.extend(["--pidfile", pidfile])
    twistd_args.extend(config.twistd_args)
    twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin

@ -232,12 +233,11 @@ def run(config):
    twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)}

    # handle invalid PID file (twistd might not start otherwise)
-    pidfile = get_pidfile(basedir)
-    if get_pid_from_pidfile(pidfile) == -1:
+    if sys.platform != "win32" and get_pid_from_pidfile(pidfile) == -1:
        print("found invalid PID file in %s - deleting it" % basedir, file=err)
        os.remove(pidfile)

    # We always pass --nodaemon so twistd.runApp does not daemonize.
    print("running node in %s" % (quoted_basedir,), file=out)
-    twistd.runApp(twistd_config)
+    runApp(twistd_config)
    return 0
--- a/src/allmydata/test/cli/test_cli.py
+++ b/src/allmydata/test/cli/test_cli.py
@ -11,23 +11,22 @@ if PY2:
    from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min  # noqa: F401

 from six.moves import cStringIO as StringIO
-from six import ensure_text, ensure_str
+import re
+from six import ensure_text

 import os.path
-import sys
-import re
-from mock import patch, Mock
 from urllib.parse import quote as url_quote

 from twisted.trial import unittest
-from twisted.python.monkey import MonkeyPatcher
-from twisted.internet import task
-from twisted.python.filepath import FilePath
-
+from twisted.internet.testing import (
+    MemoryReactor,
+)
+from twisted.internet.test.modulehelpers import (
+    AlternateReactor,
+)
 import allmydata
 from allmydata.crypto import ed25519
 from allmydata.util import fileutil, hashutil, base32
-from allmydata.util.namespace import Namespace
 from allmydata import uri
 from allmydata.immutable import upload
 from allmydata.dirnode import normalize
@ -524,42 +523,34 @@ class CLI(CLITestMixin, unittest.TestCase):
            self.failUnlessIn(normalize(file), filenames)

    def test_exception_catcher(self):
+        """
+        An exception that is otherwise unhandled during argument dispatch is
+        written to stderr and causes the process to exit with code 1.
+        """
        self.basedir = "cli/exception_catcher"

-        stderr = StringIO()
        exc = Exception("canary")
-        ns = Namespace()
+        class BrokenOptions(object):
+            def parseOptions(self, argv):
+                raise exc

-        ns.parse_called = False
-        def call_parse_or_exit(args):
-            ns.parse_called = True
-            raise exc
+        stderr = StringIO()

-        ns.sys_exit_called = False
-        def call_sys_exit(exitcode):
-            ns.sys_exit_called = True
-            self.failUnlessEqual(exitcode, 1)
+        reactor = MemoryReactor()

-        def fake_react(f):
-            reactor = Mock()
-            d = f(reactor)
-            # normally this Deferred would be errbacked with SystemExit, but
-            # since we mocked out sys.exit, it will be fired with None. So
-            # it's safe to drop it on the floor.
-            del d
+        with AlternateReactor(reactor):
+            with self.assertRaises(SystemExit) as ctx:
+                runner.run(
+                    configFactory=BrokenOptions,
+                    argv=["tahoe"],
+                    stderr=stderr,
+                )

-        patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation',
-                                 call_parse_or_exit),
-                                (sys, 'argv', ["tahoe"]),
-                                (sys, 'exit', call_sys_exit),
-                                (sys, 'stderr', stderr),
-                                (task, 'react', fake_react),
-                                )
-        patcher.runWithPatches(runner.run)
+        self.assertTrue(reactor.hasRun)
+        self.assertFalse(reactor.running)

-        self.failUnless(ns.parse_called)
-        self.failUnless(ns.sys_exit_called)
        self.failUnlessIn(str(exc), stderr.getvalue())
+        self.assertEqual(1, ctx.exception.code)


 class Help(unittest.TestCase):
@ -1331,30 +1322,3 @@ class Options(ReallyEqualMixin, unittest.TestCase):
                              ["--node-directory=there", "run", some_twistd_option])
        self.failUnlessRaises(usage.UsageError, self.parse,
                              ["run", "--basedir=here", some_twistd_option])
-
-
-class Run(unittest.TestCase):
-
-    @patch('allmydata.scripts.tahoe_run.os.chdir')
-    @patch('allmydata.scripts.tahoe_run.twistd')
-    def test_non_numeric_pid(self, mock_twistd, chdir):
-        """
-        If the pidfile exists but does not contain a numeric value, a complaint to
-        this effect is written to stderr.
-        """
-        basedir = FilePath(ensure_str(self.mktemp()))
-        basedir.makedirs()
-        basedir.child(u"twistd.pid").setContent(b"foo")
-        basedir.child(u"tahoe-client.tac").setContent(b"")
-
-        config = tahoe_run.RunOptions()
-        config.stdout = StringIO()
-        config.stderr = StringIO()
-        config['basedir'] = ensure_text(basedir.path)
-        config.twistd_args = []
-
-        result_code = tahoe_run.run(config)
-        self.assertIn("invalid PID file", config.stderr.getvalue())
-        self.assertTrue(len(mock_twistd.mock_calls), 1)
-        self.assertEqual(mock_twistd.mock_calls[0][0], 'runApp')
-        self.assertEqual(0, result_code)
--- a/src/allmydata/test/cli/test_run.py
+++ b/src/allmydata/test/cli/test_run.py
@ -16,11 +16,19 @@ from six.moves import (
    StringIO,
 )

+from testtools import (
+    skipIf,
+)
+
 from testtools.matchers import (
    Contains,
    Equals,
+    HasLength,
 )

+from twisted.python.runtime import (
+    platform,
+)
 from twisted.python.filepath import (
    FilePath,
 )
@ -33,6 +41,8 @@ from twisted.internet.test.modulehelpers import (

 from ...scripts.tahoe_run import (
    DaemonizeTheRealService,
+    RunOptions,
+    run,
 )

 from ...scripts.runner import (
@ -135,3 +145,40 @@ class DaemonizeTheRealServiceTests(SyncTestCase):
            """,
            "Privacy requested",
        )
+
+
+class RunTests(SyncTestCase):
+    """
+    Tests for ``run``.
+    """
+    @skipIf(platform.isWindows(), "There are no PID files on Windows.")
+    def test_non_numeric_pid(self):
+        """
+        If the pidfile exists but does not contain a numeric value, a complaint to
+        this effect is written to stderr.
+        """
+        basedir = FilePath(self.mktemp()).asTextMode()
+        basedir.makedirs()
+        basedir.child(u"twistd.pid").setContent(b"foo")
+        basedir.child(u"tahoe-client.tac").setContent(b"")
+
+        config = RunOptions()
+        config.stdout = StringIO()
+        config.stderr = StringIO()
+        config['basedir'] = basedir.path
+        config.twistd_args = []
+
+        runs = []
+        result_code = run(config, runApp=runs.append)
+        self.assertThat(
+            config.stderr.getvalue(),
+            Contains("found invalid PID file in"),
+        )
+        self.assertThat(
+            runs,
+            HasLength(1),
+        )
+        self.assertThat(
+            result_code,
+            Equals(0),
+        )
--- a/src/allmydata/test/cli_node_api.py
+++ b/src/allmydata/test/cli_node_api.py
@ -35,6 +35,9 @@ from twisted.internet.error import (
 from twisted.internet.interfaces import (
    IProcessProtocol,
 )
+from twisted.python.log import (
+    msg,
+)
 from twisted.python.filepath import (
    FilePath,
 )
@ -99,7 +102,10 @@ class _ProcessProtocolAdapter(ProcessProtocol, object):
        try:
            proto = self._fds[childFD]
        except KeyError:
-            pass
+            msg(format="Received unhandled output on %(fd)s: %(output)s",
+                fd=childFD,
+                output=data,
+            )
        else:
            proto.dataReceived(data)

@ -158,6 +164,9 @@ class CLINodeAPI(object):
            u"-m",
            u"allmydata.scripts.runner",
        ] + argv
+        msg(format="Executing %(argv)s",
+            argv=argv,
+        )
        return self.reactor.spawnProcess(
            processProtocol=process_protocol,
            executable=exe,
--- a/src/allmydata/test/common_util.py
+++ b/src/allmydata/test/common_util.py
@ -15,6 +15,9 @@ import os
 import sys
 import time
 import signal
+from functools import (
+    partial,
+)
 from random import randrange
 if PY2:
    from StringIO import StringIO
@ -98,7 +101,7 @@ def run_cli_native(verb, *args, **kwargs):
        args=args,
        nodeargs=nodeargs,
    )
-    argv = nodeargs + [verb] + list(args)
+    argv = ["tahoe"] + nodeargs + [verb] + list(args)
    stdin = kwargs.get("stdin", "")
    if PY2:
        # The original behavior, the Python 2 behavior, is to accept either
@ -128,10 +131,20 @@ def run_cli_native(verb, *args, **kwargs):
        stdout = TextIOWrapper(BytesIO(), encoding)
        stderr = TextIOWrapper(BytesIO(), encoding)
    d = defer.succeed(argv)
-    d.addCallback(runner.parse_or_exit_with_explanation, stdout=stdout)
-    d.addCallback(runner.dispatch,
-                  stdin=stdin,
-                  stdout=stdout, stderr=stderr)
+    d.addCallback(
+        partial(
+            runner.parse_or_exit,
+            runner.Options(),
+        ),
+        stdout=stdout,
+        stderr=stderr,
+    )
+    d.addCallback(
+        runner.dispatch,
+        stdin=stdin,
+        stdout=stdout,
+        stderr=stderr,
+    )
    def _done(rc, stdout=stdout, stderr=stderr):
        if return_bytes and PY3:
            stdout = stdout.buffer
--- a/src/allmydata/test/test_runner.py
+++ b/src/allmydata/test/test_runner.py
@ -19,6 +19,21 @@ import os.path, re, sys
 from os import linesep
 import locale

+import six
+
+from testtools import (
+    skipUnless,
+)
+from testtools.matchers import (
+    MatchesListwise,
+    MatchesAny,
+    Contains,
+    Equals,
+    Always,
+)
+from testtools.twistedsupport import (
+    succeeded,
+)
 from eliot import (
    log_call,
 )
@ -39,6 +54,10 @@ from allmydata.util import fileutil, pollmixin
 from allmydata.util.encodingutil import unicode_to_argv
 from allmydata.test import common_util
 import allmydata
+from allmydata.scripts.runner import (
+    parse_options,
+)
+
 from .common import (
    PIPE,
    Popen,
@ -46,6 +65,7 @@ from .common import (
 from .common_util import (
    parse_cli,
    run_cli,
+    run_cli_unicode,
 )
 from .cli_node_api import (
    CLINodeAPI,
@ -56,6 +76,9 @@ from .cli_node_api import (
 from ..util.eliotutil import (
    inline_callbacks,
 )
+from .common import (
+    SyncTestCase,
+)

 def get_root_from_file(src):
    srcdir = os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(src))))
@ -74,6 +97,56 @@ srcfile = allmydata.__file__
 rootdir = get_root_from_file(srcfile)


+class ParseOptionsTests(SyncTestCase):
+    """
+    Tests for ``parse_options``.
+    """
+    @skipUnless(six.PY2, "Only Python 2 exceptions must stringify to bytes.")
+    def test_nonascii_unknown_subcommand_python2(self):
+        """
+        When ``parse_options`` is called with an argv indicating a subcommand that
+        does not exist and which also contains non-ascii characters, the
+        exception it raises includes the subcommand encoded as UTF-8.
+        """
+        tricky = u"\u00F6"
+        try:
+            parse_options([tricky])
+        except usage.error as e:
+            self.assertEqual(
+                b"Unknown command: \\xf6",
+                b"{}".format(e),
+            )
+
+
+class ParseOrExitTests(SyncTestCase):
+    """
+    Tests for ``parse_or_exit``.
+    """
+    def test_nonascii_error_content(self):
+        """
+        ``parse_or_exit`` can report errors that include non-ascii content.
+        """
+        tricky = u"\u00F6"
+        self.assertThat(
+            run_cli_unicode(tricky, [], encoding="utf-8"),
+            succeeded(
+                MatchesListwise([
+                    # returncode
+                    Equals(1),
+                    # stdout
+                    MatchesAny(
+                        # Python 2
+                        Contains(u"Unknown command: \\xf6"),
+                        # Python 3
+                        Contains(u"Unknown command: \xf6"),
+                    ),
+                    # stderr,
+                    Always()
+                ]),
+            ),
+        )
+
+
@log_call(action_type="run-bin-tahoe")
 def run_bintahoe(extra_argv, python_options=None):
    """
@ -110,8 +183,16 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase):
        """
        tricky = u"\u00F6"
        out, err, returncode = run_bintahoe([tricky])
+        if PY2:
+            expected = u"Unknown command: \\xf6"
+        else:
+            expected = u"Unknown command: \xf6"
        self.assertEqual(returncode, 1)
-        self.assertIn(u"Unknown command: " + tricky, out)
+        self.assertIn(
+            expected,
+            out,
+            "expected {!r} not found in {!r}\nstderr: {!r}".format(expected, out, err),
+        )

    def test_with_python_options(self):
        """
@ -305,7 +386,12 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin):
            u"--hostname", u"127.0.0.1",
        ])

-        self.assertEqual(returncode, 0)
+        self.assertEqual(
+            returncode,
+            0,
+            "stdout: {!r}\n"
+            "stderr: {!r}\n",
+        )

        # This makes sure that node.url is written, which allows us to
        # detect when the introducer restarts in _node_has_restarted below.
--- a/src/allmydata/util/encodingutil.py
+++ b/src/allmydata/util/encodingutil.py
@ -127,7 +127,7 @@ def argv_to_abspath(s, **kwargs):
    return abspath_expanduser_unicode(decoded, **kwargs)


-def unicode_to_argv(s, mangle=False):
+def unicode_to_argv(s):
    """
    Make the given unicode string suitable for use in an argv list.

--- a/src/allmydata/windows/fixups.py
+++ b/src/allmydata/windows/fixups.py
@ -188,7 +188,17 @@ def initialize():
    # for example, the Python interpreter or any options passed to it, or runner
    # scripts such as 'coverage run'. It works even if there are no such arguments,
    # as in the case of a frozen executable created by bb-freeze or similar.
-    sys.argv = argv[-len(sys.argv):]
+    #
+    # Also, modify sys.argv in place.  If any code has already taken a
+    # reference to the original argument list object then this ensures that
+    # code sees the new values.  This reliance on mutation of shared state is,
+    # of course, awful.  Why does this function even modify sys.argv?  Why not
+    # have a function that *returns* the properly initialized argv as a new
+    # list?  I don't know.
+    #
+    # At least Python 3 gets sys.argv correct so before very much longer we
+    # should be able to fix this bad design by deleting it.
+    sys.argv[:] = argv[-len(sys.argv):]


 def a_console(handle):