diff --git a/newsfragments/3528.minor b/newsfragments/3528.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index a42b30624..145ee6464 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -9,6 +9,7 @@ if PY2: import os, sys from six.moves import StringIO +from past.builtins import unicode import six try: @@ -115,23 +116,75 @@ for module in (create_node,): def parse_options(argv, config=None): if not config: config = Options() - config.parseOptions(argv) # may raise usage.error + try: + config.parseOptions(argv) + except usage.error as e: + if six.PY2: + # On Python 2 the exception may hold non-ascii in a byte string. + # This makes it impossible to convert the exception to any kind of + # string using str() or unicode(). It could also hold non-ascii + # in a unicode string which still makes it difficult to convert it + # to a byte string later. + # + # So, reach inside and turn it into some entirely safe ascii byte + # strings that will survive being written to stdout without + # causing too much damage in the process. + # + # As a result, non-ascii will not be rendered correctly but + # instead as escape sequences. At least this can go away when + # we're done with Python 2 support. + raise usage.error(*( + arg.encode("ascii", errors="backslashreplace") + if isinstance(arg, unicode) + else arg.decode("utf-8").encode("ascii", errors="backslashreplace") + for arg + in e.args + )) + raise return config +def parse_or_exit(config, argv, stdout, stderr): + """ + Parse Tahoe-LAFS CLI arguments and return a configuration object if they + are valid. -def parse_or_exit_with_explanation(argv, stdout=sys.stdout): - config = Options() + If they are invalid, write an explanation to ``stdout`` and exit. + + :param allmydata.scripts.runner.Options config: An instance of the + argument-parsing class to use. + + :param [unicode] argv: The argument list to parse, including the name of the + program being run as ``argv[0]``. + + :param stdout: The file-like object to use as stdout. + :param stderr: The file-like object to use as stderr. + + :raise SystemExit: If there is an argument-parsing problem. + + :return: ``config``, after using it to parse the argument list. + """ try: - parse_options(argv, config=config) + parse_options(argv[1:], config=config) except usage.error as e: + # `parse_options` may have the side-effect of initializing a + # "sub-option" of the given configuration, even if it ultimately + # raises an exception. For example, `tahoe run --invalid-option` will + # set `config.subOptions` to an instance of + # `allmydata.scripts.tahoe_run.RunOptions` and then raise a + # `usage.error` because `RunOptions` does not recognize + # `--invalid-option`. If `run` itself had a sub-options then the same + # thing could happen but with another layer of nesting. We can + # present the user with the most precise information about their usage + # error possible by finding the most "sub" of the sub-options and then + # showing that to the user along with the usage error. c = config while hasattr(c, 'subOptions'): c = c.subOptions print(str(c), file=stdout) - # On Python 2 the string may turn into a unicode string, e.g. the error - # may be unicode, in which case it will print funny. Once we're on - # Python 3 we can just drop the ensure_str(). - print(six.ensure_str("%s: %s\n" % (sys.argv[0], e)), file=stdout) + exc_str = str(e) + exc_bytes = six.ensure_binary(exc_str, "utf-8") + msg_bytes = b"%s: %s\n" % (six.ensure_binary(argv[0]), exc_bytes) + print(six.ensure_text(msg_bytes, "utf-8"), file=stdout) sys.exit(1) return config @@ -184,25 +237,64 @@ def _maybe_enable_eliot_logging(options, reactor): return options -def run(): +def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stderr): + """ + Run a Tahoe-LAFS node. + + :param configFactory: A zero-argument callable which creates the config + object to use to parse the argument list. + + :param [str] argv: The argument list to use to configure the run. + + :param stdout: The file-like object to use for stdout. + :param stderr: The file-like object to use for stderr. + + :raise SystemExit: Always raised after the run is complete. + """ if sys.platform == "win32": from allmydata.windows.fixups import initialize initialize() # doesn't return: calls sys.exit(rc) - task.react(_run_with_reactor) + task.react( + lambda reactor: _run_with_reactor( + reactor, + configFactory(), + argv, + stdout, + stderr, + ), + ) -def _setup_coverage(reactor): +def _setup_coverage(reactor, argv): """ - Arrange for coverage to be collected if the 'coverage' package is - installed + If coverage measurement was requested, start collecting coverage + measurements and arrange to record those measurements when the process is + done. + + Coverage measurement is considered requested if ``"--coverage"`` is in + ``argv`` (and it will be removed from ``argv`` if it is found). There + should be a ``.coveragerc`` file in the working directory if coverage + measurement is requested. + + This is only necessary to support multi-process coverage measurement, + typically when the test suite is running, and with the pytest-based + *integration* test suite (at ``integration/`` in the root of the source + tree) foremost in mind. The idea is that if you are running Tahoe-LAFS in + a configuration where multiple processes are involved - for example, a + test process and a client node process, if you only measure coverage from + the test process then you will fail to observe most Tahoe-LAFS code that + is being run. + + This function arranges to have any Tahoe-LAFS process (such as that + client node process) collect and report coverage measurements as well. """ # can we put this _setup_coverage call after we hit # argument-parsing? # ensure_str() only necessary on Python 2. if six.ensure_str('--coverage') not in sys.argv: return - sys.argv.remove('--coverage') + argv.remove('--coverage') try: import coverage @@ -233,14 +325,37 @@ def _setup_coverage(reactor): reactor.addSystemEventTrigger('after', 'shutdown', write_coverage_data) -def _run_with_reactor(reactor): +def _run_with_reactor(reactor, config, argv, stdout, stderr): + """ + Run a Tahoe-LAFS node using the given reactor. - _setup_coverage(reactor) + :param reactor: The reactor to use. This implementation largely ignores + this and lets the rest of the implementation pick its own reactor. + Oops. - argv = list(map(argv_to_unicode, sys.argv[1:])) - d = defer.maybeDeferred(parse_or_exit_with_explanation, argv) + :param twisted.python.usage.Options config: The config object to use to + parse the argument list. + + :param [str] argv: The argument list to parse, *excluding* the name of the + program being run. + + :param stdout: See ``run``. + :param stderr: See ``run``. + + :return: A ``Deferred`` that fires when the run is complete. + """ + _setup_coverage(reactor, argv) + + argv = list(map(argv_to_unicode, argv)) + d = defer.maybeDeferred( + parse_or_exit, + config, + argv, + stdout, + stderr, + ) d.addCallback(_maybe_enable_eliot_logging, reactor) - d.addCallback(dispatch) + d.addCallback(dispatch, stdout=stdout, stderr=stderr) def _show_exception(f): # when task.react() notices a non-SystemExit exception, it does # log.err() with the failure and then exits with rc=1. We want this @@ -248,7 +363,7 @@ def _run_with_reactor(reactor): # weren't using react(). if f.check(SystemExit): return f # dispatch function handled it - f.printTraceback(file=sys.stderr) + f.printTraceback(file=stderr) sys.exit(1) d.addErrback(_show_exception) return d diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 218d8d458..01f1a354c 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -192,7 +192,7 @@ class DaemonizeTahoeNodePlugin(object): return DaemonizeTheRealService(self.nodetype, self.basedir, so) -def run(config): +def run(config, runApp=twistd.runApp): """ Runs a Tahoe-LAFS node in the foreground. @@ -212,10 +212,11 @@ def run(config): if not nodetype: print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - # Now prepare to turn into a twistd process. This os.chdir is the point - # of no return. - os.chdir(basedir) - twistd_args = ["--nodaemon"] + + twistd_args = ["--nodaemon", "--rundir", basedir] + if sys.platform != "win32": + pidfile = get_pidfile(basedir) + twistd_args.extend(["--pidfile", pidfile]) twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -232,12 +233,11 @@ def run(config): twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} # handle invalid PID file (twistd might not start otherwise) - pidfile = get_pidfile(basedir) - if get_pid_from_pidfile(pidfile) == -1: + if sys.platform != "win32" and get_pid_from_pidfile(pidfile) == -1: print("found invalid PID file in %s - deleting it" % basedir, file=err) os.remove(pidfile) # We always pass --nodaemon so twistd.runApp does not daemonize. print("running node in %s" % (quoted_basedir,), file=out) - twistd.runApp(twistd_config) + runApp(twistd_config) return 0 diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 8a9b4dfd6..72eb011d0 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -11,23 +11,22 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from six.moves import cStringIO as StringIO -from six import ensure_text, ensure_str +import re +from six import ensure_text import os.path -import sys -import re -from mock import patch, Mock from urllib.parse import quote as url_quote from twisted.trial import unittest -from twisted.python.monkey import MonkeyPatcher -from twisted.internet import task -from twisted.python.filepath import FilePath - +from twisted.internet.testing import ( + MemoryReactor, +) +from twisted.internet.test.modulehelpers import ( + AlternateReactor, +) import allmydata from allmydata.crypto import ed25519 from allmydata.util import fileutil, hashutil, base32 -from allmydata.util.namespace import Namespace from allmydata import uri from allmydata.immutable import upload from allmydata.dirnode import normalize @@ -524,42 +523,34 @@ class CLI(CLITestMixin, unittest.TestCase): self.failUnlessIn(normalize(file), filenames) def test_exception_catcher(self): + """ + An exception that is otherwise unhandled during argument dispatch is + written to stderr and causes the process to exit with code 1. + """ self.basedir = "cli/exception_catcher" - stderr = StringIO() exc = Exception("canary") - ns = Namespace() + class BrokenOptions(object): + def parseOptions(self, argv): + raise exc - ns.parse_called = False - def call_parse_or_exit(args): - ns.parse_called = True - raise exc + stderr = StringIO() - ns.sys_exit_called = False - def call_sys_exit(exitcode): - ns.sys_exit_called = True - self.failUnlessEqual(exitcode, 1) + reactor = MemoryReactor() - def fake_react(f): - reactor = Mock() - d = f(reactor) - # normally this Deferred would be errbacked with SystemExit, but - # since we mocked out sys.exit, it will be fired with None. So - # it's safe to drop it on the floor. - del d + with AlternateReactor(reactor): + with self.assertRaises(SystemExit) as ctx: + runner.run( + configFactory=BrokenOptions, + argv=["tahoe"], + stderr=stderr, + ) - patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation', - call_parse_or_exit), - (sys, 'argv', ["tahoe"]), - (sys, 'exit', call_sys_exit), - (sys, 'stderr', stderr), - (task, 'react', fake_react), - ) - patcher.runWithPatches(runner.run) + self.assertTrue(reactor.hasRun) + self.assertFalse(reactor.running) - self.failUnless(ns.parse_called) - self.failUnless(ns.sys_exit_called) self.failUnlessIn(str(exc), stderr.getvalue()) + self.assertEqual(1, ctx.exception.code) class Help(unittest.TestCase): @@ -1331,30 +1322,3 @@ class Options(ReallyEqualMixin, unittest.TestCase): ["--node-directory=there", "run", some_twistd_option]) self.failUnlessRaises(usage.UsageError, self.parse, ["run", "--basedir=here", some_twistd_option]) - - -class Run(unittest.TestCase): - - @patch('allmydata.scripts.tahoe_run.os.chdir') - @patch('allmydata.scripts.tahoe_run.twistd') - def test_non_numeric_pid(self, mock_twistd, chdir): - """ - If the pidfile exists but does not contain a numeric value, a complaint to - this effect is written to stderr. - """ - basedir = FilePath(ensure_str(self.mktemp())) - basedir.makedirs() - basedir.child(u"twistd.pid").setContent(b"foo") - basedir.child(u"tahoe-client.tac").setContent(b"") - - config = tahoe_run.RunOptions() - config.stdout = StringIO() - config.stderr = StringIO() - config['basedir'] = ensure_text(basedir.path) - config.twistd_args = [] - - result_code = tahoe_run.run(config) - self.assertIn("invalid PID file", config.stderr.getvalue()) - self.assertTrue(len(mock_twistd.mock_calls), 1) - self.assertEqual(mock_twistd.mock_calls[0][0], 'runApp') - self.assertEqual(0, result_code) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 6100d2568..28613e8c1 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -16,11 +16,19 @@ from six.moves import ( StringIO, ) +from testtools import ( + skipIf, +) + from testtools.matchers import ( Contains, Equals, + HasLength, ) +from twisted.python.runtime import ( + platform, +) from twisted.python.filepath import ( FilePath, ) @@ -33,6 +41,8 @@ from twisted.internet.test.modulehelpers import ( from ...scripts.tahoe_run import ( DaemonizeTheRealService, + RunOptions, + run, ) from ...scripts.runner import ( @@ -135,3 +145,40 @@ class DaemonizeTheRealServiceTests(SyncTestCase): """, "Privacy requested", ) + + +class RunTests(SyncTestCase): + """ + Tests for ``run``. + """ + @skipIf(platform.isWindows(), "There are no PID files on Windows.") + def test_non_numeric_pid(self): + """ + If the pidfile exists but does not contain a numeric value, a complaint to + this effect is written to stderr. + """ + basedir = FilePath(self.mktemp()).asTextMode() + basedir.makedirs() + basedir.child(u"twistd.pid").setContent(b"foo") + basedir.child(u"tahoe-client.tac").setContent(b"") + + config = RunOptions() + config.stdout = StringIO() + config.stderr = StringIO() + config['basedir'] = basedir.path + config.twistd_args = [] + + runs = [] + result_code = run(config, runApp=runs.append) + self.assertThat( + config.stderr.getvalue(), + Contains("found invalid PID file in"), + ) + self.assertThat( + runs, + HasLength(1), + ) + self.assertThat( + result_code, + Equals(0), + ) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index be0381e11..410796be2 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -35,6 +35,9 @@ from twisted.internet.error import ( from twisted.internet.interfaces import ( IProcessProtocol, ) +from twisted.python.log import ( + msg, +) from twisted.python.filepath import ( FilePath, ) @@ -99,7 +102,10 @@ class _ProcessProtocolAdapter(ProcessProtocol, object): try: proto = self._fds[childFD] except KeyError: - pass + msg(format="Received unhandled output on %(fd)s: %(output)s", + fd=childFD, + output=data, + ) else: proto.dataReceived(data) @@ -158,6 +164,9 @@ class CLINodeAPI(object): u"-m", u"allmydata.scripts.runner", ] + argv + msg(format="Executing %(argv)s", + argv=argv, + ) return self.reactor.spawnProcess( processProtocol=process_protocol, executable=exe, diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 21b8817a4..b5229ca11 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -15,6 +15,9 @@ import os import sys import time import signal +from functools import ( + partial, +) from random import randrange if PY2: from StringIO import StringIO @@ -98,7 +101,7 @@ def run_cli_native(verb, *args, **kwargs): args=args, nodeargs=nodeargs, ) - argv = nodeargs + [verb] + list(args) + argv = ["tahoe"] + nodeargs + [verb] + list(args) stdin = kwargs.get("stdin", "") if PY2: # The original behavior, the Python 2 behavior, is to accept either @@ -128,10 +131,20 @@ def run_cli_native(verb, *args, **kwargs): stdout = TextIOWrapper(BytesIO(), encoding) stderr = TextIOWrapper(BytesIO(), encoding) d = defer.succeed(argv) - d.addCallback(runner.parse_or_exit_with_explanation, stdout=stdout) - d.addCallback(runner.dispatch, - stdin=stdin, - stdout=stdout, stderr=stderr) + d.addCallback( + partial( + runner.parse_or_exit, + runner.Options(), + ), + stdout=stdout, + stderr=stderr, + ) + d.addCallback( + runner.dispatch, + stdin=stdin, + stdout=stdout, + stderr=stderr, + ) def _done(rc, stdout=stdout, stderr=stderr): if return_bytes and PY3: stdout = stdout.buffer diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index a9ec83524..44c7e1bee 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -19,6 +19,21 @@ import os.path, re, sys from os import linesep import locale +import six + +from testtools import ( + skipUnless, +) +from testtools.matchers import ( + MatchesListwise, + MatchesAny, + Contains, + Equals, + Always, +) +from testtools.twistedsupport import ( + succeeded, +) from eliot import ( log_call, ) @@ -39,6 +54,10 @@ from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv from allmydata.test import common_util import allmydata +from allmydata.scripts.runner import ( + parse_options, +) + from .common import ( PIPE, Popen, @@ -46,6 +65,7 @@ from .common import ( from .common_util import ( parse_cli, run_cli, + run_cli_unicode, ) from .cli_node_api import ( CLINodeAPI, @@ -56,6 +76,9 @@ from .cli_node_api import ( from ..util.eliotutil import ( inline_callbacks, ) +from .common import ( + SyncTestCase, +) def get_root_from_file(src): srcdir = os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(src)))) @@ -74,6 +97,56 @@ srcfile = allmydata.__file__ rootdir = get_root_from_file(srcfile) +class ParseOptionsTests(SyncTestCase): + """ + Tests for ``parse_options``. + """ + @skipUnless(six.PY2, "Only Python 2 exceptions must stringify to bytes.") + def test_nonascii_unknown_subcommand_python2(self): + """ + When ``parse_options`` is called with an argv indicating a subcommand that + does not exist and which also contains non-ascii characters, the + exception it raises includes the subcommand encoded as UTF-8. + """ + tricky = u"\u00F6" + try: + parse_options([tricky]) + except usage.error as e: + self.assertEqual( + b"Unknown command: \\xf6", + b"{}".format(e), + ) + + +class ParseOrExitTests(SyncTestCase): + """ + Tests for ``parse_or_exit``. + """ + def test_nonascii_error_content(self): + """ + ``parse_or_exit`` can report errors that include non-ascii content. + """ + tricky = u"\u00F6" + self.assertThat( + run_cli_unicode(tricky, [], encoding="utf-8"), + succeeded( + MatchesListwise([ + # returncode + Equals(1), + # stdout + MatchesAny( + # Python 2 + Contains(u"Unknown command: \\xf6"), + # Python 3 + Contains(u"Unknown command: \xf6"), + ), + # stderr, + Always() + ]), + ), + ) + + @log_call(action_type="run-bin-tahoe") def run_bintahoe(extra_argv, python_options=None): """ @@ -110,8 +183,16 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): """ tricky = u"\u00F6" out, err, returncode = run_bintahoe([tricky]) + if PY2: + expected = u"Unknown command: \\xf6" + else: + expected = u"Unknown command: \xf6" self.assertEqual(returncode, 1) - self.assertIn(u"Unknown command: " + tricky, out) + self.assertIn( + expected, + out, + "expected {!r} not found in {!r}\nstderr: {!r}".format(expected, out, err), + ) def test_with_python_options(self): """ @@ -305,7 +386,12 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): u"--hostname", u"127.0.0.1", ]) - self.assertEqual(returncode, 0) + self.assertEqual( + returncode, + 0, + "stdout: {!r}\n" + "stderr: {!r}\n", + ) # This makes sure that node.url is written, which allows us to # detect when the introducer restarts in _node_has_restarted below. diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 576091539..f32710688 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -127,7 +127,7 @@ def argv_to_abspath(s, **kwargs): return abspath_expanduser_unicode(decoded, **kwargs) -def unicode_to_argv(s, mangle=False): +def unicode_to_argv(s): """ Make the given unicode string suitable for use in an argv list. diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index e8d05a659..53eb14d53 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -188,7 +188,17 @@ def initialize(): # for example, the Python interpreter or any options passed to it, or runner # scripts such as 'coverage run'. It works even if there are no such arguments, # as in the case of a frozen executable created by bb-freeze or similar. - sys.argv = argv[-len(sys.argv):] + # + # Also, modify sys.argv in place. If any code has already taken a + # reference to the original argument list object then this ensures that + # code sees the new values. This reliance on mutation of shared state is, + # of course, awful. Why does this function even modify sys.argv? Why not + # have a function that *returns* the properly initialized argv as a new + # list? I don't know. + # + # At least Python 3 gets sys.argv correct so before very much longer we + # should be able to fix this bad design by deleting it. + sys.argv[:] = argv[-len(sys.argv):] def a_console(handle):