From 96b54e8f621475a73ac2f0699fc5ca386b36a449 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 20:50:08 -0500 Subject: [PATCH 01/73] news fragment --- newsfragments/3528.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3528.minor diff --git a/newsfragments/3528.minor b/newsfragments/3528.minor new file mode 100644 index 000000000..e69de29bb From d5bff458b6ffcc220ea5b2624f9b7c151ec99707 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 20:51:01 -0500 Subject: [PATCH 02/73] Parameterize argv to allmydata.scripts.runner.run --- src/allmydata/scripts/runner.py | 20 ++++++++++---------- src/allmydata/test/cli/test_cli.py | 9 ++++++--- src/allmydata/test/common_util.py | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 1f993fda1..c50b6ae99 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -109,7 +109,7 @@ def parse_options(argv, config=None): def parse_or_exit_with_explanation(argv, stdout=sys.stdout): config = Options() try: - parse_options(argv, config=config) + parse_options(argv[1:], config=config) except usage.error as e: c = config while hasattr(c, 'subOptions'): @@ -119,7 +119,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) + print("%s: %s\n" % (argv[0], quote_output(msg, quotemarks=False)), file=stdout) sys.exit(1) return config @@ -171,7 +171,7 @@ def _maybe_enable_eliot_logging(options, reactor): # Pass on the options so we can dispatch the subcommand. return options -def run(): +def run(argv=sys.argv): # TODO(3035): Remove tox-check when error becomes a warning if 'TOX_ENV_NAME' not in os.environ: assert sys.version_info < (3,), u"Tahoe-LAFS does not run under Python 3. Please use Python 2.7.x." @@ -180,19 +180,19 @@ def run(): from allmydata.windows.fixups import initialize initialize() # doesn't return: calls sys.exit(rc) - task.react(_run_with_reactor) + task.react(_run_with_reactor, argv) -def _setup_coverage(reactor): +def _setup_coverage(reactor, argv): """ Arrange for coverage to be collected if the 'coverage' package is installed """ # can we put this _setup_coverage call after we hit # argument-parsing? - if '--coverage' not in sys.argv: + if '--coverage' not in argv: return - sys.argv.remove('--coverage') + argv.remove('--coverage') try: import coverage @@ -223,11 +223,11 @@ def _setup_coverage(reactor): reactor.addSystemEventTrigger('after', 'shutdown', write_coverage_data) -def _run_with_reactor(reactor): +def _run_with_reactor(reactor, argv): - _setup_coverage(reactor) + _setup_coverage(reactor, argv) - d = defer.maybeDeferred(parse_or_exit_with_explanation, sys.argv[1:]) + d = defer.maybeDeferred(parse_or_exit_with_explanation, argv) d.addCallback(_maybe_enable_eliot_logging, reactor) d.addCallback(dispatch) def _show_exception(f): diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 2b1bc1c86..2a19e65f7 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -524,7 +524,7 @@ class CLI(CLITestMixin, unittest.TestCase): ns.sys_exit_called = True self.failUnlessEqual(exitcode, 1) - def fake_react(f): + def fake_react(f, *args): reactor = Mock() d = f(reactor) # normally this Deferred would be errbacked with SystemExit, but @@ -534,12 +534,15 @@ class CLI(CLITestMixin, unittest.TestCase): patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation', call_parse_or_exit), - (sys, 'argv', ["tahoe"]), (sys, 'exit', call_sys_exit), (sys, 'stderr', stderr), (task, 'react', fake_react), ) - patcher.runWithPatches(runner.run) + patcher.runWithPatches( + lambda: runner.run( + ["tahoe"], + ), + ) self.failUnless(ns.parse_called) self.failUnless(ns.sys_exit_called) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 341d383c1..6a221f509 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -81,7 +81,7 @@ def run_cli_bytes(verb, *args, **kwargs): args=args, nodeargs=nodeargs, ) - argv = nodeargs + [verb] + list(args) + argv = ["tahoe"] + nodeargs + [verb] + list(args) stdin = kwargs.get("stdin", "") if encoding is None: # The original behavior, the Python 2 behavior, is to accept either From 1f229ce9f6571bdd972cd75ff361a3a65c209058 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 20:51:11 -0500 Subject: [PATCH 03/73] All you have to do to drop it is not save it in the first place Also it would have been dropped as soon as this function returned, anyway. --- src/allmydata/test/cli/test_cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 2a19e65f7..b8eab763e 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -526,11 +526,10 @@ class CLI(CLITestMixin, unittest.TestCase): def fake_react(f, *args): reactor = Mock() - d = f(reactor) # normally this Deferred would be errbacked with SystemExit, but # since we mocked out sys.exit, it will be fired with None. So # it's safe to drop it on the floor. - del d + f(reactor, *args) patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation', call_parse_or_exit), From a4b0b4a01ab964e286e593d373480674ab629beb Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 20:55:00 -0500 Subject: [PATCH 04/73] Parameterize stderr to allmydata.scripts.runner.run --- src/allmydata/scripts/runner.py | 8 ++++---- src/allmydata/test/cli/test_cli.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index c50b6ae99..9843edb6e 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -171,7 +171,7 @@ def _maybe_enable_eliot_logging(options, reactor): # Pass on the options so we can dispatch the subcommand. return options -def run(argv=sys.argv): +def run(argv=sys.argv, stderr=sys.stderr): # TODO(3035): Remove tox-check when error becomes a warning if 'TOX_ENV_NAME' not in os.environ: assert sys.version_info < (3,), u"Tahoe-LAFS does not run under Python 3. Please use Python 2.7.x." @@ -180,7 +180,7 @@ def run(argv=sys.argv): from allmydata.windows.fixups import initialize initialize() # doesn't return: calls sys.exit(rc) - task.react(_run_with_reactor, argv) + task.react(_run_with_reactor, argv, stderr) def _setup_coverage(reactor, argv): @@ -223,7 +223,7 @@ def _setup_coverage(reactor, argv): reactor.addSystemEventTrigger('after', 'shutdown', write_coverage_data) -def _run_with_reactor(reactor, argv): +def _run_with_reactor(reactor, argv, stderr): _setup_coverage(reactor, argv) @@ -237,7 +237,7 @@ def _run_with_reactor(reactor, argv): # weren't using react(). if f.check(SystemExit): return f # dispatch function handled it - f.printTraceback(file=sys.stderr) + f.printTraceback(file=stderr) sys.exit(1) d.addErrback(_show_exception) return d diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index b8eab763e..9c3958704 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -534,12 +534,12 @@ class CLI(CLITestMixin, unittest.TestCase): patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation', call_parse_or_exit), (sys, 'exit', call_sys_exit), - (sys, 'stderr', stderr), (task, 'react', fake_react), ) patcher.runWithPatches( lambda: runner.run( - ["tahoe"], + argv=["tahoe"], + stderr=stderr, ), ) From 2746eb9ae1919084218ae66fb439d20e106996ec Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 20:58:27 -0500 Subject: [PATCH 05/73] Fix the broken fake_react by not using the argv feature --- src/allmydata/scripts/runner.py | 2 +- src/allmydata/test/cli/test_cli.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 9843edb6e..8b730d9cd 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -180,7 +180,7 @@ def run(argv=sys.argv, stderr=sys.stderr): from allmydata.windows.fixups import initialize initialize() # doesn't return: calls sys.exit(rc) - task.react(_run_with_reactor, argv, stderr) + task.react(lambda reactor: _run_with_reactor(reactor, argv, stderr)) def _setup_coverage(reactor, argv): diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 9c3958704..d1bcfc128 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -524,12 +524,12 @@ class CLI(CLITestMixin, unittest.TestCase): ns.sys_exit_called = True self.failUnlessEqual(exitcode, 1) - def fake_react(f, *args): + def fake_react(f): reactor = Mock() # normally this Deferred would be errbacked with SystemExit, but # since we mocked out sys.exit, it will be fired with None. So # it's safe to drop it on the floor. - f(reactor, *args) + f(reactor) patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation', call_parse_or_exit), From a04a915628f6a532eebcccdc0d8208e10a964965 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 21:15:24 -0500 Subject: [PATCH 06/73] Parameterize the Options class so we can synthesize an unhandled exception --- src/allmydata/scripts/runner.py | 27 ++++++++++++++++++++------- src/allmydata/test/cli/test_cli.py | 17 +++++++---------- src/allmydata/test/common_util.py | 21 +++++++++++++++++---- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 8b730d9cd..bc887e11a 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -106,8 +106,7 @@ def parse_options(argv, config=None): config.parseOptions(argv) # may raise usage.error return config -def parse_or_exit_with_explanation(argv, stdout=sys.stdout): - config = Options() +def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): try: parse_options(argv[1:], config=config) except usage.error as e: @@ -171,7 +170,7 @@ def _maybe_enable_eliot_logging(options, reactor): # Pass on the options so we can dispatch the subcommand. return options -def run(argv=sys.argv, stderr=sys.stderr): +def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stderr): # TODO(3035): Remove tox-check when error becomes a warning if 'TOX_ENV_NAME' not in os.environ: assert sys.version_info < (3,), u"Tahoe-LAFS does not run under Python 3. Please use Python 2.7.x." @@ -180,7 +179,15 @@ def run(argv=sys.argv, stderr=sys.stderr): from allmydata.windows.fixups import initialize initialize() # doesn't return: calls sys.exit(rc) - task.react(lambda reactor: _run_with_reactor(reactor, argv, stderr)) + task.react( + lambda reactor: _run_with_reactor( + reactor, + configFactory(), + argv, + stdout, + stderr, + ), + ) def _setup_coverage(reactor, argv): @@ -223,13 +230,19 @@ def _setup_coverage(reactor, argv): reactor.addSystemEventTrigger('after', 'shutdown', write_coverage_data) -def _run_with_reactor(reactor, argv, stderr): +def _run_with_reactor(reactor, config, argv, stdout, stderr): _setup_coverage(reactor, argv) - d = defer.maybeDeferred(parse_or_exit_with_explanation, argv) + d = defer.maybeDeferred( + parse_or_exit_with_explanation_with_config, + config, + argv, + stdout, + stderr, + ) d.addCallback(_maybe_enable_eliot_logging, reactor) - d.addCallback(dispatch) + d.addCallback(dispatch, stdout=stdout, stderr=stderr) def _show_exception(f): # when task.react() notices a non-SystemExit exception, it does # log.err() with the failure and then exits with rc=1. We want this diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index d1bcfc128..f0e6fd2e4 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -510,14 +510,13 @@ class CLI(CLITestMixin, unittest.TestCase): def test_exception_catcher(self): self.basedir = "cli/exception_catcher" - stderr = StringIO() exc = Exception("canary") - ns = Namespace() + class BrokenOptions(object): + def parseOptions(self, argv): + raise exc - ns.parse_called = False - def call_parse_or_exit(args): - ns.parse_called = True - raise exc + stderr = StringIO() + ns = Namespace() ns.sys_exit_called = False def call_sys_exit(exitcode): @@ -531,19 +530,17 @@ class CLI(CLITestMixin, unittest.TestCase): # it's safe to drop it on the floor. f(reactor) - patcher = MonkeyPatcher((runner, 'parse_or_exit_with_explanation', - call_parse_or_exit), - (sys, 'exit', call_sys_exit), + patcher = MonkeyPatcher((sys, 'exit', call_sys_exit), (task, 'react', fake_react), ) patcher.runWithPatches( lambda: runner.run( + configFactory=BrokenOptions, argv=["tahoe"], stderr=stderr, ), ) - self.failUnless(ns.parse_called) self.failUnless(ns.sys_exit_called) self.failUnlessIn(str(exc), stderr.getvalue()) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 6a221f509..8885e067e 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -3,6 +3,9 @@ from __future__ import print_function import os import time import signal +from functools import ( + partial, +) from random import randrange from six.moves import StringIO from io import ( @@ -100,10 +103,20 @@ def run_cli_bytes(verb, *args, **kwargs): stdout = TextIOWrapper(BytesIO(), encoding) stderr = TextIOWrapper(BytesIO(), encoding) d = defer.succeed(argv) - d.addCallback(runner.parse_or_exit_with_explanation, stdout=stdout) - d.addCallback(runner.dispatch, - stdin=StringIO(stdin), - stdout=stdout, stderr=stderr) + d.addCallback( + partial( + runner.parse_or_exit_with_explanation_with_config, + runner.Options(), + ), + stdout=stdout, + stderr=stderr, + ) + d.addCallback( + runner.dispatch, + stdin=StringIO(stdin), + stdout=stdout, + stderr=stderr, + ) def _done(rc): return 0, _getvalue(stdout), _getvalue(stderr) def _err(f): From faf8da82dd2c6bca84a0c4541ed86b53183c1d8b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 21:20:16 -0500 Subject: [PATCH 07/73] Get rid of the sys.exit monkey-patch It's just an exception. Let it get logged and then check after. --- src/allmydata/test/cli/test_cli.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index f0e6fd2e4..520461232 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -516,12 +516,6 @@ class CLI(CLITestMixin, unittest.TestCase): raise exc stderr = StringIO() - ns = Namespace() - - ns.sys_exit_called = False - def call_sys_exit(exitcode): - ns.sys_exit_called = True - self.failUnlessEqual(exitcode, 1) def fake_react(f): reactor = Mock() @@ -530,8 +524,7 @@ class CLI(CLITestMixin, unittest.TestCase): # it's safe to drop it on the floor. f(reactor) - patcher = MonkeyPatcher((sys, 'exit', call_sys_exit), - (task, 'react', fake_react), + patcher = MonkeyPatcher((task, 'react', fake_react), ) patcher.runWithPatches( lambda: runner.run( @@ -541,8 +534,9 @@ class CLI(CLITestMixin, unittest.TestCase): ), ) - self.failUnless(ns.sys_exit_called) self.failUnlessIn(str(exc), stderr.getvalue()) + [exit_exc] = self.flushLoggedErrors(SystemExit) + self.assertEqual(1, exit_exc.value.code) class Help(unittest.TestCase): From 240d5d11643f8af352e3d7a41ec0464695027eaf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 21:25:50 -0500 Subject: [PATCH 08/73] Remove react monkey patching by supplying an alternate reactor Let react run and do its thing. This gives us an even nicer way to check the exit code. --- src/allmydata/test/cli/test_cli.py | 40 ++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 520461232..7720d555c 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -8,7 +8,12 @@ from twisted.trial import unittest from twisted.python.monkey import MonkeyPatcher from twisted.internet import task from twisted.python.filepath import FilePath - +from twisted.internet.testing import ( + MemoryReactor, +) +from twisted.internet.test.modulehelpers import ( + AlternateReactor, +) import allmydata from allmydata.crypto import ed25519 from allmydata.util import fileutil, hashutil, base32 @@ -508,6 +513,10 @@ class CLI(CLITestMixin, unittest.TestCase): self.failUnlessIn(normalize(file), filenames) def test_exception_catcher(self): + """ + An exception that is otherwise unhandled during argument dispatch is + written to stderr and causes the process to exit with code 1. + """ self.basedir = "cli/exception_catcher" exc = Exception("canary") @@ -517,26 +526,21 @@ class CLI(CLITestMixin, unittest.TestCase): stderr = StringIO() - def fake_react(f): - reactor = Mock() - # normally this Deferred would be errbacked with SystemExit, but - # since we mocked out sys.exit, it will be fired with None. So - # it's safe to drop it on the floor. - f(reactor) + reactor = MemoryReactor() - patcher = MonkeyPatcher((task, 'react', fake_react), - ) - patcher.runWithPatches( - lambda: runner.run( - configFactory=BrokenOptions, - argv=["tahoe"], - stderr=stderr, - ), - ) + with AlternateReactor(reactor): + with self.assertRaises(SystemExit) as ctx: + runner.run( + configFactory=BrokenOptions, + argv=["tahoe"], + stderr=stderr, + ) + + self.assertTrue(reactor.hasRun) + self.assertFalse(reactor.running) self.failUnlessIn(str(exc), stderr.getvalue()) - [exit_exc] = self.flushLoggedErrors(SystemExit) - self.assertEqual(1, exit_exc.value.code) + self.assertEqual(1, ctx.exception.code) class Help(unittest.TestCase): From bb495b6dc5c339fff553466f4b58d4644577f5eb Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 16 Dec 2020 21:26:59 -0500 Subject: [PATCH 09/73] unused imports --- src/allmydata/test/cli/test_cli.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 7720d555c..8d9c60a6c 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -1,12 +1,10 @@ import os.path from six.moves import cStringIO as StringIO -import urllib, sys +import urllib import re -from mock import patch, Mock +from mock import patch from twisted.trial import unittest -from twisted.python.monkey import MonkeyPatcher -from twisted.internet import task from twisted.python.filepath import FilePath from twisted.internet.testing import ( MemoryReactor, @@ -17,7 +15,6 @@ from twisted.internet.test.modulehelpers import ( import allmydata from allmydata.crypto import ed25519 from allmydata.util import fileutil, hashutil, base32 -from allmydata.util.namespace import Namespace from allmydata import uri from allmydata.immutable import upload from allmydata.dirnode import normalize From a363c8de67eaa3aefd85370be35afd7f2c8a7857 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 17 Dec 2020 09:11:29 -0500 Subject: [PATCH 10/73] Fix test_non_numeric_pid (and put it in a better place too) --- src/allmydata/scripts/tahoe_run.py | 9 +++---- src/allmydata/test/cli/test_cli.py | 29 ---------------------- src/allmydata/test/cli/test_run.py | 39 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index bc4ba27d1..071dfac60 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -182,7 +182,7 @@ class DaemonizeTahoeNodePlugin(object): return DaemonizeTheRealService(self.nodetype, self.basedir, so) -def run(config): +def run(config, runApp=twistd.runApp): """ Runs a Tahoe-LAFS node in the foreground. @@ -202,10 +202,7 @@ def run(config): if not nodetype: print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - # Now prepare to turn into a twistd process. This os.chdir is the point - # of no return. - os.chdir(basedir) - twistd_args = ["--nodaemon"] + twistd_args = ["--nodaemon", "--rundir", basedir] twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -229,5 +226,5 @@ def run(config): # We always pass --nodaemon so twistd.runApp does not daemonize. print("running node in %s" % (quoted_basedir,), file=out) - twistd.runApp(twistd_config) + runApp(twistd_config) return 0 diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 8d9c60a6c..a851790e6 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -2,10 +2,8 @@ import os.path from six.moves import cStringIO as StringIO import urllib import re -from mock import patch from twisted.trial import unittest -from twisted.python.filepath import FilePath from twisted.internet.testing import ( MemoryReactor, ) @@ -1308,30 +1306,3 @@ class Options(ReallyEqualMixin, unittest.TestCase): ["--node-directory=there", "run", some_twistd_option]) self.failUnlessRaises(usage.UsageError, self.parse, ["run", "--basedir=here", some_twistd_option]) - - -class Run(unittest.TestCase): - - @patch('allmydata.scripts.tahoe_run.os.chdir') - @patch('allmydata.scripts.tahoe_run.twistd') - def test_non_numeric_pid(self, mock_twistd, chdir): - """ - If the pidfile exists but does not contain a numeric value, a complaint to - this effect is written to stderr. - """ - basedir = FilePath(self.mktemp().decode("ascii")) - basedir.makedirs() - basedir.child(u"twistd.pid").setContent(b"foo") - basedir.child(u"tahoe-client.tac").setContent(b"") - - config = tahoe_run.RunOptions() - config.stdout = StringIO() - config.stderr = StringIO() - config['basedir'] = basedir.path - config.twistd_args = [] - - result_code = tahoe_run.run(config) - self.assertIn("invalid PID file", config.stderr.getvalue()) - self.assertTrue(len(mock_twistd.mock_calls), 1) - self.assertEqual(mock_twistd.mock_calls[0][0], 'runApp') - self.assertEqual(0, result_code) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index d27791f34..e4bcdfbcd 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -9,6 +9,7 @@ from six.moves import ( from testtools.matchers import ( Contains, Equals, + HasLength, ) from twisted.python.filepath import ( @@ -23,6 +24,8 @@ from twisted.internet.test.modulehelpers import ( from ...scripts.tahoe_run import ( DaemonizeTheRealService, + RunOptions, + run, ) from ...scripts.runner import ( @@ -125,3 +128,39 @@ class DaemonizeTheRealServiceTests(SyncTestCase): """, "Privacy requested", ) + + +class RunTests(SyncTestCase): + """ + Tests for ``run``. + """ + def test_non_numeric_pid(self): + """ + If the pidfile exists but does not contain a numeric value, a complaint to + this effect is written to stderr. + """ + basedir = FilePath(self.mktemp().decode("ascii")) + basedir.makedirs() + basedir.child(u"twistd.pid").setContent(b"foo") + basedir.child(u"tahoe-client.tac").setContent(b"") + + config = RunOptions() + config.stdout = StringIO() + config.stderr = StringIO() + config['basedir'] = basedir.path + config.twistd_args = [] + + runs = [] + result_code = run(config, runApp=runs.append) + self.assertThat( + config.stderr.getvalue(), + Contains("found invalid PID file in"), + ) + self.assertThat( + runs, + HasLength(1), + ) + self.assertThat( + result_code, + Equals(0), + ) From f88061e31c5f643441b558ed8c72329cd9f54f0b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 17 Dec 2020 09:16:05 -0500 Subject: [PATCH 11/73] docstring --- src/allmydata/scripts/runner.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index bc887e11a..4d8601828 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,6 +107,25 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): + """ + Parse Tahoe-LAFS CLI arguments and return a configuration object if they + are valid. + + If they are invalid, write an explanation to ``stdout`` and exit. + + :param twisted.python.usage.Options config: An instance of the + argument-parsing class to use. + + :param [str] argv: The argument list to parse, including the name of the + program being run as ``argv[0]``. + + :param stdout: The file-like object to use as stdout. + :param stderr: The file-like object to use as stderr. + + :raise SystemExit: If there is an argument-parsing problem. + + :return: ``config``, after using it to parse the argument list. + """ try: parse_options(argv[1:], config=config) except usage.error as e: From 70305131f15c195a9522ee7692dbd30609b28de9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 17 Dec 2020 09:22:43 -0500 Subject: [PATCH 12/73] docstrings --- src/allmydata/scripts/runner.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 4d8601828..5b8e2bd4a 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -190,6 +190,19 @@ def _maybe_enable_eliot_logging(options, reactor): return options def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stderr): + """ + Run a Tahoe-LAFS node. + + :param configFactory: A zero-argument callable which creates the config + object to use to parse the argument list. + + :param [str] argv: The argument list to use to configure the run. + + :param stdout: The file-like object to use for stdout. + :param stderr: The file-like object to use for stderr. + + :raise SystemExit: Always raised after the run is complete. + """ # TODO(3035): Remove tox-check when error becomes a warning if 'TOX_ENV_NAME' not in os.environ: assert sys.version_info < (3,), u"Tahoe-LAFS does not run under Python 3. Please use Python 2.7.x." @@ -250,7 +263,23 @@ def _setup_coverage(reactor, argv): def _run_with_reactor(reactor, config, argv, stdout, stderr): + """ + Run a Tahoe-LAFS node using the given reactor. + :param reactor: The reactor to use. This implementation largely ignores + this and lets the rest of the implementation pick its own reactor. + Oops. + + :param twisted.python.usage.Options config: The config object to use to + parse the argument list. + + :param argv: See ``run``. + + :param stdout: See ``run``. + :param stderr: See ``run``. + + :return: A ``Deferred`` that fires when the run is complete. + """ _setup_coverage(reactor, argv) d = defer.maybeDeferred( From 6e152daf05f01191c88a1515f313c060baacdee8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 17 Dec 2020 09:37:45 -0500 Subject: [PATCH 13/73] Put the pidfile in the right place Seems we relied on the chdir for that to happen, previously. --- src/allmydata/scripts/tahoe_run.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 071dfac60..80c867202 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -202,7 +202,9 @@ def run(config, runApp=twistd.runApp): if not nodetype: print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - twistd_args = ["--nodaemon", "--rundir", basedir] + + pidfile = get_pidfile(basedir) + twistd_args = ["--nodaemon", "--rundir", basedir, "--pidfile", pidfile] twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -219,7 +221,6 @@ def run(config, runApp=twistd.runApp): twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} # handle invalid PID file (twistd might not start otherwise) - pidfile = get_pidfile(basedir) if get_pid_from_pidfile(pidfile) == -1: print("found invalid PID file in %s - deleting it" % basedir, file=err) os.remove(pidfile) From c9b3ccedb4e91b728c449ab504eddabc197f410a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 4 Jan 2021 11:59:58 -0500 Subject: [PATCH 14/73] explain this while loop --- src/allmydata/scripts/runner.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 5b8e2bd4a..9e6aa0bcc 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -113,7 +113,7 @@ def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): If they are invalid, write an explanation to ``stdout`` and exit. - :param twisted.python.usage.Options config: An instance of the + :param allmydata.scripts.runner.Options config: An instance of the argument-parsing class to use. :param [str] argv: The argument list to parse, including the name of the @@ -129,6 +129,17 @@ def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): try: parse_options(argv[1:], config=config) except usage.error as e: + # `parse_options` may have the side-effect of initializing a + # "sub-option" of the given configuration, even if it ultimately + # raises an exception. For example, `tahoe run --invalid-option` will + # set `config.subOptions` to an instance of + # `allmydata.scripts.tahoe_run.RunOptions` and then raise a + # `usage.error` because `RunOptions` does not recognize + # `--invalid-option`. If `run` itself had a sub-options then the same + # thing could happen but with another layer of nesting. We can + # present the user with the most precise information about their usage + # error possible by finding the most "sub" of the sub-options and then + # showing that to the user along with the usage error. c = config while hasattr(c, 'subOptions'): c = c.subOptions From 9958236c3192bda57b8f7f08866f3b2a0b2ba582 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 4 Jan 2021 12:06:03 -0500 Subject: [PATCH 15/73] explain the extra coverage stuff --- src/allmydata/scripts/runner.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 9e6aa0bcc..6d3696d9b 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -235,11 +235,28 @@ def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stde def _setup_coverage(reactor, argv): """ - Arrange for coverage to be collected if the 'coverage' package is - installed + If coverage measurement was requested, start collecting coverage + measurements and arrange to record those measurements when the process is + done. + + Coverage measurement is considered requested if ``"--coverage"`` is in + ``argv`` (and it will be removed from ``argv`` if it is found). There + should be a ``.coveragerc`` file in the working directory if coverage + measurement is requested. + + This is only necessary to support multi-process coverage measurement, + typically when the test suite is running, and with the pytest-based + *integration* test suite (at ``integration/`` in the root of the source + tree) foremost in mind. The idea is that if you are running Tahoe-LAFS in + a configuration where multiple processes are involved - for example, a + test process and a client node process, if you only measure coverage from + the test process then you will fail to observe most Tahoe-LAFS code that + is being run. + + This function arranges to have any Tahoe-LAFS process (such as that + client node process) collect and report coverage measurements as well. """ - # can we put this _setup_coverage call after we hit - # argument-parsing? + # can we put this _setup_coverage call after we hit argument-parsing? if '--coverage' not in argv: return argv.remove('--coverage') From 190d9a7319e155ee72918e1cda714e327b27ddc4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 4 Jan 2021 12:13:53 -0500 Subject: [PATCH 16/73] Skip the pidfile test on Windows where there are no pidfiles --- src/allmydata/test/cli/test_run.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index e4bcdfbcd..84befafc1 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -6,12 +6,19 @@ from six.moves import ( StringIO, ) +from testtools import ( + skipIf, +) + from testtools.matchers import ( Contains, Equals, HasLength, ) +from twisted.python.runtime import ( + platform, +) from twisted.python.filepath import ( FilePath, ) @@ -134,6 +141,7 @@ class RunTests(SyncTestCase): """ Tests for ``run``. """ + @skipIf(platform.isWindows(), "There are no PID files on Windows.") def test_non_numeric_pid(self): """ If the pidfile exists but does not contain a numeric value, a complaint to From 916ddd590ea58f48811780d3adf8e8233f4feee5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 4 Jan 2021 14:13:34 -0500 Subject: [PATCH 17/73] Maybe a useful test to demonstrate the lower-level behavior? Or maybe trash. I don't know. --- src/allmydata/test/test_runner.py | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ef2b99a19..2c8e65fff 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -6,6 +6,12 @@ from __future__ import ( import os.path, re, sys from os import linesep +import six + +from testtools import ( + skipUnless, +) + from twisted.trial import unittest from twisted.internet import reactor @@ -22,6 +28,10 @@ from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output from allmydata.test import common_util import allmydata +from allmydata.scripts.runner import ( + parse_options, +) + from .common_util import parse_cli, run_cli from .cli_node_api import ( CLINodeAPI, @@ -36,6 +46,9 @@ from ..util.eliotutil import ( inline_callbacks, log_call_deferred, ) +from .common import ( + SyncTestCase, +) def get_root_from_file(src): srcdir = os.path.dirname(os.path.dirname(os.path.normcase(os.path.realpath(src)))) @@ -72,6 +85,27 @@ class RunBinTahoeMixin(object): return d +class ParseOptionsTests(SyncTestCase): + """ + Tests for ``parse_options``. + """ + @skipUnless(six.PY2, "Only Python 2 exceptions must stringify to bytes.") + def test_nonascii_unknown_subcommand_python2(self): + """ + When ``parse_options`` is called with an argv indicating a subcommand that + does not exist and which also contains non-ascii characters, the + exception it raises includes the subcommand encoded as UTF-8. + """ + tricky = u"\u2621" + try: + parse_options([unicode_to_argv(tricky, mangle=True)]) + except usage.error as e: + self.assertEqual( + b"Unknown command: " + tricky.encode("utf-8"), + str(e) + ) + + class BinTahoe(common_util.SignalMixin, unittest.TestCase, RunBinTahoeMixin): def test_unicode_arguments_and_output(self): tricky = u"\u2621" From 680a5a0575adbb237e050478bfffaaa0452b3878 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:26:08 -0500 Subject: [PATCH 18/73] mangling no longer a thing --- src/allmydata/test/test_runner.py | 2 +- src/allmydata/util/encodingutil.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 6ed9a65c6..0a71f45a3 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -86,7 +86,7 @@ class ParseOptionsTests(SyncTestCase): """ tricky = u"\u2621" try: - parse_options([unicode_to_argv(tricky, mangle=True)]) + parse_options([unicode_to_argv(tricky)]) except usage.error as e: self.assertEqual( b"Unknown command: " + tricky.encode("utf-8"), diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 483871b5d..4e1cf57ba 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -126,7 +126,7 @@ def argv_to_abspath(s, **kwargs): return abspath_expanduser_unicode(decoded, **kwargs) -def unicode_to_argv(s, mangle=False): +def unicode_to_argv(s): """ Make the given unicode string suitable for use in an argv list. From 4575deb27cb320ed22614542fb4b46a4e808c852 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:29:04 -0500 Subject: [PATCH 19/73] Attempt to address non-ascii exceptions from the option parser --- src/allmydata/scripts/runner.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index e83b2b38d..1f326a461 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -108,7 +108,17 @@ for module in (create_node,): def parse_options(argv, config=None): if not config: config = Options() - config.parseOptions(argv) # may raise usage.error + try: + config.parseOptions(argv) + except usage.error as e: + if six.PY2: + # Exceptions must stringify to bytes on Python 2. + raise usage.error(*( + arg.encode("utf-8") if isinstance(arg, unicode) else arg + for arg + in e.args + )) + raise return config def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): From 50e033f263cf85c91ed9ba807d700e7f31c5337f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:37:53 -0500 Subject: [PATCH 20/73] Log unhandled output from the tahoe runner helper --- src/allmydata/test/cli_node_api.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index 34d73a199..aaa280371 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -24,6 +24,9 @@ from twisted.internet.error import ( from twisted.internet.interfaces import ( IProcessProtocol, ) +from twisted.python.log import ( + msg, +) from twisted.python.filepath import ( FilePath, ) @@ -88,7 +91,10 @@ class _ProcessProtocolAdapter(ProcessProtocol, object): try: proto = self._fds[childFD] except KeyError: - pass + msg("Received unhandled output on {fd}: {output}", + fd=childFD, + output=data, + ) else: proto.dataReceived(data) From 6458183df2531a3e80f3e62baf8080ba77317664 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:38:43 -0500 Subject: [PATCH 21/73] maybe it's this --- src/allmydata/test/cli_node_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index aaa280371..fbfa55287 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -91,7 +91,7 @@ class _ProcessProtocolAdapter(ProcessProtocol, object): try: proto = self._fds[childFD] except KeyError: - msg("Received unhandled output on {fd}: {output}", + msg("Received unhandled output on %(fd)s: %(output)s", fd=childFD, output=data, ) From 47b60c0faa0ba0304b241332914e0dbba337b272 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:39:28 -0500 Subject: [PATCH 22/73] oh yea I think it's this --- src/allmydata/test/cli_node_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index fbfa55287..f43fce148 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -91,7 +91,7 @@ class _ProcessProtocolAdapter(ProcessProtocol, object): try: proto = self._fds[childFD] except KeyError: - msg("Received unhandled output on %(fd)s: %(output)s", + msg(format="Received unhandled output on %(fd)s: %(output)s", fd=childFD, output=data, ) From e9adccd43238ec900435ae3e03854ff8f01ad7e5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:42:30 -0500 Subject: [PATCH 23/73] more logs --- src/allmydata/test/cli_node_api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index f43fce148..a5c877b0b 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -152,6 +152,9 @@ class CLINodeAPI(object): u"-m", u"allmydata.scripts.runner", ] + argv + msg(format="Executing %(argv)s", + argv=argv, + ) return self.reactor.spawnProcess( processProtocol=process_protocol, executable=exe, From f0ac092109388c49c70a5586e1680e5199dc420f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 14:49:20 -0500 Subject: [PATCH 24/73] Avoid the pidfile stuff on Windows --- src/allmydata/scripts/tahoe_run.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 80c867202..a1524ab35 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -203,8 +203,10 @@ def run(config, runApp=twistd.runApp): print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - pidfile = get_pidfile(basedir) - twistd_args = ["--nodaemon", "--rundir", basedir, "--pidfile", pidfile] + twistd_args = ["--nodaemon", "--rundir", basedir] + if sys.platform != "win32": + pidfile = get_pidfile(basedir) + twistd_args.extend(["--pidfile", pidfile]) twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -221,7 +223,7 @@ def run(config, runApp=twistd.runApp): twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} # handle invalid PID file (twistd might not start otherwise) - if get_pid_from_pidfile(pidfile) == -1: + if sys.platform != "win32" and get_pid_from_pidfile(pidfile) == -1: print("found invalid PID file in %s - deleting it" % basedir, file=err) os.remove(pidfile) From 15312009cecc2355cd1a180b45af6bf760a22de5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 12:58:51 -0400 Subject: [PATCH 25/73] Fix mis-merge: This test moved to test_run.py --- src/allmydata/test/cli/test_cli.py | 31 +----------------------------- 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index 7d8ad2046..72eb011d0 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -12,13 +12,11 @@ if PY2: from six.moves import cStringIO as StringIO import re -from six import ensure_text, ensure_str +from six import ensure_text import os.path -from mock import patch from urllib.parse import quote as url_quote -from twisted.python.filepath import FilePath from twisted.trial import unittest from twisted.internet.testing import ( MemoryReactor, @@ -1324,30 +1322,3 @@ class Options(ReallyEqualMixin, unittest.TestCase): ["--node-directory=there", "run", some_twistd_option]) self.failUnlessRaises(usage.UsageError, self.parse, ["run", "--basedir=here", some_twistd_option]) - - -class Run(unittest.TestCase): - - @patch('allmydata.scripts.tahoe_run.os.chdir') - @patch('allmydata.scripts.tahoe_run.twistd') - def test_non_numeric_pid(self, mock_twistd, chdir): - """ - If the pidfile exists but does not contain a numeric value, a complaint to - this effect is written to stderr. - """ - basedir = FilePath(ensure_str(self.mktemp())) - basedir.makedirs() - basedir.child(u"twistd.pid").setContent(b"foo") - basedir.child(u"tahoe-client.tac").setContent(b"") - - config = tahoe_run.RunOptions() - config.stdout = StringIO() - config.stderr = StringIO() - config['basedir'] = ensure_text(basedir.path) - config.twistd_args = [] - - result_code = tahoe_run.run(config) - self.assertIn("invalid PID file", config.stderr.getvalue()) - self.assertTrue(len(mock_twistd.mock_calls), 1) - self.assertEqual(mock_twistd.mock_calls[0][0], 'runApp') - self.assertEqual(0, result_code) From 9632b35abebc0e57ae851a8fb2c2a899c9009b4f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 13:19:15 -0400 Subject: [PATCH 26/73] Fix mismerge: Put Py3 warning back --- src/allmydata/scripts/runner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index d48dd1395..ba979fdfa 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -221,6 +221,9 @@ def _maybe_enable_eliot_logging(options, reactor): # Pass on the options so we can dispatch the subcommand. return options +PYTHON_3_WARNING = ("Support for Python 3 is an incomplete work-in-progress." + " Use at your own risk.") + def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stderr): """ Run a Tahoe-LAFS node. @@ -235,6 +238,8 @@ def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stde :raise SystemExit: Always raised after the run is complete. """ + if six.PY3: + print(PYTHON_3_WARNING, file=sys.stderr) if sys.platform == "win32": from allmydata.windows.fixups import initialize initialize() From ffbcbf78a77917d44fd2300a41cd088aa38a08cd Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 13:30:39 -0400 Subject: [PATCH 27/73] Send the warning to the parameterized stderr --- src/allmydata/scripts/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index ba979fdfa..89265590c 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -239,7 +239,7 @@ def run(configFactory=Options, argv=sys.argv, stdout=sys.stdout, stderr=sys.stde :raise SystemExit: Always raised after the run is complete. """ if six.PY3: - print(PYTHON_3_WARNING, file=sys.stderr) + print(PYTHON_3_WARNING, file=stderr) if sys.platform == "win32": from allmydata.windows.fixups import initialize initialize() From 5f6ae1f8f5ccdd62eb427697e6404e0b3854b6e1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 13:30:52 -0400 Subject: [PATCH 28/73] Fix mis-merge: don't try to stripe argv[0] twice --- src/allmydata/scripts/runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 89265590c..d6c1bfdc7 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -325,7 +325,8 @@ def _run_with_reactor(reactor, config, argv, stdout, stderr): :param twisted.python.usage.Options config: The config object to use to parse the argument list. - :param argv: See ``run``. + :param [str] argv: The argument list to parse, *excluding* the name of the + program being run. :param stdout: See ``run``. :param stderr: See ``run``. @@ -334,7 +335,7 @@ def _run_with_reactor(reactor, config, argv, stdout, stderr): """ _setup_coverage(reactor, argv) - argv = list(map(argv_to_unicode, argv[1:])) + argv = list(map(argv_to_unicode, argv)) d = defer.maybeDeferred( parse_or_exit_with_explanation_with_config, config, From 2244f0374e8887fd258c98f63f8863891ccdea27 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 13:31:07 -0400 Subject: [PATCH 29/73] fail more informatively --- src/allmydata/test/test_runner.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 48e3f4f21..4ec7a2153 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -338,7 +338,12 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): u"--hostname", u"127.0.0.1", ]) - self.assertEqual(returncode, 0) + self.assertEqual( + returncode, + 0, + "stdout: {!r}\n" + "stderr: {!r}\n", + ) # This makes sure that node.url is written, which allows us to # detect when the introducer restarts in _node_has_restarted below. From 0176583e7537a2eae0fa3b4a2d29c2828638406c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 13:32:04 -0400 Subject: [PATCH 30/73] Get a text-mode FilePath in a py3 compatible way --- src/allmydata/test/cli/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index c4342b3fe..28613e8c1 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -157,7 +157,7 @@ class RunTests(SyncTestCase): If the pidfile exists but does not contain a numeric value, a complaint to this effect is written to stderr. """ - basedir = FilePath(self.mktemp().decode("ascii")) + basedir = FilePath(self.mktemp()).asTextMode() basedir.makedirs() basedir.child(u"twistd.pid").setContent(b"foo") basedir.child(u"tahoe-client.tac").setContent(b"") From d56c218586b79393afe296b54bd35ce99a69a381 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:25:24 -0400 Subject: [PATCH 31/73] Options are defined with unicode now; argv better be unicode. --- src/allmydata/scripts/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index d6c1bfdc7..5c966b531 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -138,7 +138,7 @@ def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): :param allmydata.scripts.runner.Options config: An instance of the argument-parsing class to use. - :param [str] argv: The argument list to parse, including the name of the + :param [unicode] argv: The argument list to parse, including the name of the program being run as ``argv[0]``. :param stdout: The file-like object to use as stdout. From 4d41e30ce9c3c2bc15f8e6af93a0afc49b002e6d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:25:40 -0400 Subject: [PATCH 32/73] Just pass unicode at this layer --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 4ec7a2153..c60dd4f01 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -99,7 +99,7 @@ class ParseOptionsTests(SyncTestCase): """ tricky = u"\u2621" try: - parse_options([unicode_to_argv(tricky)]) + parse_options([tricky]) except usage.error as e: self.assertEqual( b"Unknown command: " + tricky.encode("utf-8"), From 1d75bbfd72f3a4c0ad84946a6e7f1a07fa39a0e4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:25:47 -0400 Subject: [PATCH 33/73] `str` is a kind of weird future thing; coerce another way --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index c60dd4f01..c9ad4d97c 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -103,7 +103,7 @@ class ParseOptionsTests(SyncTestCase): except usage.error as e: self.assertEqual( b"Unknown command: " + tricky.encode("utf-8"), - str(e) + b"{}".format(e), ) From fd3d3bc68823c78f36b14f3c610e2182051a9d91 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:27:21 -0400 Subject: [PATCH 34/73] Give the py3 static checker something to resolve `unicode` to It's pretty much just a bug in the static checker. :/ --- src/allmydata/scripts/runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 5c966b531..93875a167 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -9,6 +9,7 @@ if PY2: import os, sys from six.moves import StringIO +from past.builtins import unicode import six try: From 975f268d8dd20505d075a4815eab7b2480471ed6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:39:32 -0400 Subject: [PATCH 35/73] Provide enough output to debug the failure --- src/allmydata/test/test_runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index c9ad4d97c..f7fe538f1 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -144,7 +144,11 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): tricky = u"\u00F6" out, err, returncode = run_bintahoe([tricky]) self.assertEqual(returncode, 1) - self.assertIn(u"Unknown command: " + tricky, out) + self.assertIn( + u"Unknown command: " + tricky, + out, + "stdout: {!r}\nstderr: {!r}".format(out, err), + ) def test_with_python_options(self): """ From 6931d10ace1a7415395918067ab3996f004e17f8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:40:22 -0400 Subject: [PATCH 36/73] Fix mis-merge: use argv parameter instead of sys.argv --- src/allmydata/scripts/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 93875a167..a818aafe8 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -170,7 +170,7 @@ def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): # On Python 2 the string may turn into a unicode string, e.g. the error # may be unicode, in which case it will print funny. Once we're on # Python 3 we can just drop the ensure_str(). - print(six.ensure_str("%s: %s\n" % (sys.argv[0], e)), file=stdout) + print(six.ensure_str("%s: %s\n" % (argv[0], e)), file=stdout) sys.exit(1) return config From 13dae392cba3cef3d83213a0d15b7d8417f64763 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 14:41:18 -0400 Subject: [PATCH 37/73] Go with a shorter name --- src/allmydata/scripts/runner.py | 4 ++-- src/allmydata/test/common_util.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index a818aafe8..d9cd6e720 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -129,7 +129,7 @@ def parse_options(argv, config=None): raise return config -def parse_or_exit_with_explanation_with_config(config, argv, stdout, stderr): +def parse_or_exit(config, argv, stdout, stderr): """ Parse Tahoe-LAFS CLI arguments and return a configuration object if they are valid. @@ -338,7 +338,7 @@ def _run_with_reactor(reactor, config, argv, stdout, stderr): argv = list(map(argv_to_unicode, argv)) d = defer.maybeDeferred( - parse_or_exit_with_explanation_with_config, + parse_or_exit, config, argv, stdout, diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 4db217705..b5229ca11 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -133,7 +133,7 @@ def run_cli_native(verb, *args, **kwargs): d = defer.succeed(argv) d.addCallback( partial( - runner.parse_or_exit_with_explanation_with_config, + runner.parse_or_exit, runner.Options(), ), stdout=stdout, From b56a95684310c0897c90ad2f7243c3a4d3e6bcb7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 15:42:21 -0400 Subject: [PATCH 38/73] Sort out this gross error reporting encoding/decoding mess A little, anyway --- src/allmydata/scripts/runner.py | 29 +++++++++++++++--- src/allmydata/test/test_runner.py | 50 +++++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index d9cd6e720..454c42c85 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -167,10 +167,31 @@ def parse_or_exit(config, argv, stdout, stderr): while hasattr(c, 'subOptions'): c = c.subOptions print(str(c), file=stdout) - # On Python 2 the string may turn into a unicode string, e.g. the error - # may be unicode, in which case it will print funny. Once we're on - # Python 3 we can just drop the ensure_str(). - print(six.ensure_str("%s: %s\n" % (argv[0], e)), file=stdout) + # On Python 2 the exception may hold non-ascii in a byte string. This + # makes it impossible to convert the exception to any kind of string + # using str() or unicode(). So, reach inside and get what we need. + # + # Then, since we are on Python 2, turn it into some entirely safe + # ascii that will survive being written to stdout without causing too + # much damage in the process. + # + # As a result, non-ascii will not be rendered correctly but instead as + # escape sequences. At least this can go away when we're done with + # Python 2 support. + if PY2: + exc_text = e.args[0].decode( + "utf-8", + ).encode( + "ascii", + errors="backslashreplace", + ).decode( + "ascii", + ) + else: + exc_text = unicode(e) + exc_bytes = six.ensure_binary(exc_text, "utf-8") + msg_bytes = b"%s: %s\n" % (six.ensure_binary(argv[0]), exc_bytes) + print(six.ensure_text(msg_bytes, "utf-8"), file=stdout) sys.exit(1) return config diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index f7fe538f1..a420581e9 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -24,6 +24,16 @@ import six from testtools import ( skipUnless, ) +from testtools.matchers import ( + MatchesListwise, + MatchesAny, + Contains, + Equals, + Always, +) +from testtools.twistedsupport import ( + succeeded, +) from eliot import ( log_call, ) @@ -55,6 +65,7 @@ from .common import ( from .common_util import ( parse_cli, run_cli, + run_cli_unicode, ) from .cli_node_api import ( CLINodeAPI, @@ -97,7 +108,7 @@ class ParseOptionsTests(SyncTestCase): does not exist and which also contains non-ascii characters, the exception it raises includes the subcommand encoded as UTF-8. """ - tricky = u"\u2621" + tricky = u"\u00F6" try: parse_options([tricky]) except usage.error as e: @@ -107,6 +118,35 @@ class ParseOptionsTests(SyncTestCase): ) +class ParseOrExitTests(SyncTestCase): + """ + Tests for ``parse_or_exit``. + """ + def test_nonascii_error_content(self): + """ + ``parse_or_exit`` can report errors that include non-ascii content. + """ + tricky = u"\u00F6" + self.assertThat( + run_cli_unicode(tricky, [], encoding="utf-8"), + succeeded( + MatchesListwise([ + # returncode + Equals(1), + # stdout + MatchesAny( + # Python 2 + Contains(u"Unknown command: \\xf6"), + # Python 3 + Contains(u"Unknown command: \xf6"), + ), + # stderr, + Always() + ]), + ), + ) + + @log_call(action_type="run-bin-tahoe") def run_bintahoe(extra_argv, python_options=None): """ @@ -143,11 +183,15 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): """ tricky = u"\u00F6" out, err, returncode = run_bintahoe([tricky]) + if PY2: + expected = u"Unknown command: \\xf6" + else: + expected = u"Unknown command: \xf6" self.assertEqual(returncode, 1) self.assertIn( - u"Unknown command: " + tricky, + expected, out, - "stdout: {!r}\nstderr: {!r}".format(out, err), + "expected {!r} not found in {!r}\nstderr: {!r}".format(expected, out, err), ) def test_with_python_options(self): From 893d21fcbbbe330434a8ad96e3076e1746bb381c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 11 Aug 2021 16:46:29 -0400 Subject: [PATCH 39/73] Fix the UsageError closer in the Py2 codepath we already have for it --- src/allmydata/scripts/runner.py | 43 +++++++++++++------------------ src/allmydata/test/test_runner.py | 2 +- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 454c42c85..185af2322 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -120,9 +120,23 @@ def parse_options(argv, config=None): config.parseOptions(argv) except usage.error as e: if six.PY2: - # Exceptions must stringify to bytes on Python 2. + # On Python 2 the exception may hold non-ascii in a byte string. + # This makes it impossible to convert the exception to any kind of + # string using str() or unicode(). It could also hold non-ascii + # in a unicode string which still makes it difficult to convert it + # to a byte string later. + # + # So, reach inside and turn it into some entirely safe ascii byte + # strings that will survive being written to stdout without + # causing too much damage in the process. + # + # As a result, non-ascii will not be rendered correctly but + # instead as escape sequences. At least this can go away when + # we're done with Python 2 support. raise usage.error(*( - arg.encode("utf-8") if isinstance(arg, unicode) else arg + arg.encode("ascii", errors="backslashreplace") + if isinstance(arg, unicode) + else arg.decode("utf-8").encode("ascii", errors="backslashreplace") for arg in e.args )) @@ -167,29 +181,8 @@ def parse_or_exit(config, argv, stdout, stderr): while hasattr(c, 'subOptions'): c = c.subOptions print(str(c), file=stdout) - # On Python 2 the exception may hold non-ascii in a byte string. This - # makes it impossible to convert the exception to any kind of string - # using str() or unicode(). So, reach inside and get what we need. - # - # Then, since we are on Python 2, turn it into some entirely safe - # ascii that will survive being written to stdout without causing too - # much damage in the process. - # - # As a result, non-ascii will not be rendered correctly but instead as - # escape sequences. At least this can go away when we're done with - # Python 2 support. - if PY2: - exc_text = e.args[0].decode( - "utf-8", - ).encode( - "ascii", - errors="backslashreplace", - ).decode( - "ascii", - ) - else: - exc_text = unicode(e) - exc_bytes = six.ensure_binary(exc_text, "utf-8") + exc_str = str(e) + exc_bytes = six.ensure_binary(exc_str, "utf-8") msg_bytes = b"%s: %s\n" % (six.ensure_binary(argv[0]), exc_bytes) print(six.ensure_text(msg_bytes, "utf-8"), file=stdout) sys.exit(1) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index a420581e9..44c7e1bee 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -113,7 +113,7 @@ class ParseOptionsTests(SyncTestCase): parse_options([tricky]) except usage.error as e: self.assertEqual( - b"Unknown command: " + tricky.encode("utf-8"), + b"Unknown command: \\xf6", b"{}".format(e), ) From 85ba6567ba8798d78e6950283d7a53d8c2c30c1e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 12 Aug 2021 12:11:39 -0400 Subject: [PATCH 40/73] Try to make sure fixed argv is used on Py27+Windows Previous version that rebound sys.argv didn't work so well with early binding used by some some functions for default argument values. --- src/allmydata/windows/fixups.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index e8d05a659..53eb14d53 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -188,7 +188,17 @@ def initialize(): # for example, the Python interpreter or any options passed to it, or runner # scripts such as 'coverage run'. It works even if there are no such arguments, # as in the case of a frozen executable created by bb-freeze or similar. - sys.argv = argv[-len(sys.argv):] + # + # Also, modify sys.argv in place. If any code has already taken a + # reference to the original argument list object then this ensures that + # code sees the new values. This reliance on mutation of shared state is, + # of course, awful. Why does this function even modify sys.argv? Why not + # have a function that *returns* the properly initialized argv as a new + # list? I don't know. + # + # At least Python 3 gets sys.argv correct so before very much longer we + # should be able to fix this bad design by deleting it. + sys.argv[:] = argv[-len(sys.argv):] def a_console(handle): From dfff187ad0ce631e175c1eb1a4e0b4ad16290d5f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 19 Aug 2021 13:23:01 -0400 Subject: [PATCH 41/73] Make time pluggable to support better testing. --- src/allmydata/storage/server.py | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 6cf6f6672..fb0cf1201 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -62,7 +62,8 @@ class StorageServer(service.MultiService, Referenceable): expiration_mode="age", expiration_override_lease_duration=None, expiration_cutoff_date=None, - expiration_sharetypes=("mutable", "immutable")): + expiration_sharetypes=("mutable", "immutable"), + get_current_time=time.time): service.MultiService.__init__(self) assert isinstance(nodeid, bytes) assert len(nodeid) == 20 @@ -114,6 +115,7 @@ class StorageServer(service.MultiService, Referenceable): expiration_cutoff_date, expiration_sharetypes) self.lease_checker.setServiceParent(self) + self._get_current_time = get_current_time def __repr__(self): return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) @@ -264,7 +266,7 @@ class StorageServer(service.MultiService, Referenceable): # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated # to a particular owner. - start = time.time() + start = self._get_current_time() self.count("allocate") alreadygot = set() bucketwriters = {} # k: shnum, v: BucketWriter @@ -277,7 +279,7 @@ class StorageServer(service.MultiService, Referenceable): # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. - expire_time = time.time() + 31*24*60*60 + expire_time = self._get_current_time() + 31*24*60*60 lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) @@ -331,7 +333,7 @@ class StorageServer(service.MultiService, Referenceable): if bucketwriters: fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) - self.add_latency("allocate", time.time() - start) + self.add_latency("allocate", self._get_current_time() - start) return alreadygot, bucketwriters def _iter_share_files(self, storage_index): @@ -351,26 +353,26 @@ class StorageServer(service.MultiService, Referenceable): def remote_add_lease(self, storage_index, renew_secret, cancel_secret, owner_num=1): - start = time.time() + start = self._get_current_time() self.count("add-lease") - new_expire_time = time.time() + 31*24*60*60 + new_expire_time = self._get_current_time() + 31*24*60*60 lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, new_expire_time, self.my_nodeid) for sf in self._iter_share_files(storage_index): sf.add_or_renew_lease(lease_info) - self.add_latency("add-lease", time.time() - start) + self.add_latency("add-lease", self._get_current_time() - start) return None def remote_renew_lease(self, storage_index, renew_secret): - start = time.time() + start = self._get_current_time() self.count("renew") - new_expire_time = time.time() + 31*24*60*60 + new_expire_time = self._get_current_time() + 31*24*60*60 found_buckets = False for sf in self._iter_share_files(storage_index): found_buckets = True sf.renew_lease(renew_secret, new_expire_time) - self.add_latency("renew", time.time() - start) + self.add_latency("renew", self._get_current_time() - start) if not found_buckets: raise IndexError("no such lease to renew") @@ -394,7 +396,7 @@ class StorageServer(service.MultiService, Referenceable): pass def remote_get_buckets(self, storage_index): - start = time.time() + start = self._get_current_time() self.count("get") si_s = si_b2a(storage_index) log.msg("storage: get_buckets %r" % si_s) @@ -402,7 +404,7 @@ class StorageServer(service.MultiService, Referenceable): for shnum, filename in self._get_bucket_shares(storage_index): bucketreaders[shnum] = BucketReader(self, filename, storage_index, shnum) - self.add_latency("get", time.time() - start) + self.add_latency("get", self._get_current_time() - start) return bucketreaders def get_leases(self, storage_index): @@ -563,7 +565,7 @@ class StorageServer(service.MultiService, Referenceable): :return LeaseInfo: Information for a new lease for a share. """ ownerid = 1 # TODO - expire_time = time.time() + 31*24*60*60 # one month + expire_time = self._get_current_time() + 31*24*60*60 # one month lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret, expire_time, self.my_nodeid) @@ -599,7 +601,7 @@ class StorageServer(service.MultiService, Referenceable): See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. """ - start = time.time() + start = self._get_current_time() self.count("writev") si_s = si_b2a(storage_index) log.msg("storage: slot_writev %r" % si_s) @@ -640,7 +642,7 @@ class StorageServer(service.MultiService, Referenceable): self._add_or_renew_leases(remaining_shares, lease_info) # all done - self.add_latency("writev", time.time() - start) + self.add_latency("writev", self._get_current_time() - start) return (testv_is_good, read_data) def remote_slot_testv_and_readv_and_writev(self, storage_index, @@ -666,7 +668,7 @@ class StorageServer(service.MultiService, Referenceable): return share def remote_slot_readv(self, storage_index, shares, readv): - start = time.time() + start = self._get_current_time() self.count("readv") si_s = si_b2a(storage_index) lp = log.msg("storage: slot_readv %r %r" % (si_s, shares), @@ -675,7 +677,7 @@ class StorageServer(service.MultiService, Referenceable): # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) if not os.path.isdir(bucketdir): - self.add_latency("readv", time.time() - start) + self.add_latency("readv", self._get_current_time() - start) return {} datavs = {} for sharenum_s in os.listdir(bucketdir): @@ -689,7 +691,7 @@ class StorageServer(service.MultiService, Referenceable): datavs[sharenum] = msf.readv(readv) log.msg("returning shares %s" % (list(datavs.keys()),), facility="tahoe.storage", level=log.NOISY, parent=lp) - self.add_latency("readv", time.time() - start) + self.add_latency("readv", self._get_current_time() - start) return datavs def remote_advise_corrupt_share(self, share_type, storage_index, shnum, From d599568c7988b2769d72c9e63ab4a8368fb9867c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 19 Aug 2021 13:56:13 -0400 Subject: [PATCH 42/73] Tests validating that the operation for adding a lease will renew the lease instead if it already exists. --- src/allmydata/storage/server.py | 5 +- src/allmydata/test/test_storage.py | 116 ++++++++++++++++++++++------- 2 files changed, 92 insertions(+), 29 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index fb0cf1201..4615c9ec9 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -49,6 +49,9 @@ from allmydata.storage.expirer import LeaseCheckingCrawler NUM_RE=re.compile("^[0-9]+$") +# Number of seconds to add to expiration time on lease renewal: +DEFAULT_RENEWAL_TIME = 31 * 24 * 60 * 60 + @implementer(RIStorageServer, IStatsProducer) class StorageServer(service.MultiService, Referenceable): @@ -279,7 +282,7 @@ class StorageServer(service.MultiService, Referenceable): # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. - expire_time = self._get_current_time() + 31*24*60*60 + expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index d4de0c18b..8ad99a7ab 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -24,11 +24,12 @@ import gc from twisted.trial import unittest from twisted.internet import defer +from twisted.internet.task import Clock import itertools from allmydata import interfaces from allmydata.util import fileutil, hashutil, base32 -from allmydata.storage.server import StorageServer +from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile @@ -168,7 +169,7 @@ class Bucket(unittest.TestCase): assert len(renewsecret) == 32 cancelsecret = b'THIS LETS ME KILL YOUR FILE HAHA' assert len(cancelsecret) == 32 - expirationtime = struct.pack('>L', 60*60*24*31) # 31 days in seconds + expirationtime = struct.pack('>L', DEFAULT_RENEWAL_TIME) # 31 days in seconds lease_data = ownernumber + renewsecret + cancelsecret + expirationtime @@ -354,10 +355,11 @@ class Server(unittest.TestCase): basedir = os.path.join("storage", "Server", name) return basedir - def create(self, name, reserved_space=0, klass=StorageServer): + def create(self, name, reserved_space=0, klass=StorageServer, get_current_time=time.time): workdir = self.workdir(name) ss = klass(workdir, b"\x00" * 20, reserved_space=reserved_space, - stats_provider=FakeStatsProvider()) + stats_provider=FakeStatsProvider(), + get_current_time=get_current_time) ss.setServiceParent(self.sparent) return ss @@ -646,6 +648,25 @@ class Server(unittest.TestCase): f2 = open(filename, "rb") self.failUnlessEqual(f2.read(5), b"start") + def create_bucket(self, ss, storage_index, expected_already=0, expected_writers=5): + """ + Given a StorageServer, create a bucket and return renewal and + cancellation secrets. + """ + canary = FakeCanary() + sharenums = list(range(5)) + size = 100 + + # Creating a bucket also creates a lease: + rs, cs = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), + hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) + already, writers = ss.remote_allocate_buckets(storage_index, rs, cs, + sharenums, size, canary) + self.failUnlessEqual(len(already), expected_already) + self.failUnlessEqual(len(writers), expected_writers) + for wb in writers.values(): + wb.remote_close() + return rs, cs def test_leases(self): ss = self.create("test_leases") @@ -653,34 +674,16 @@ class Server(unittest.TestCase): sharenums = list(range(5)) size = 100 - rs0,cs0 = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) - already,writers = ss.remote_allocate_buckets(b"si0", rs0, cs0, - sharenums, size, canary) - self.failUnlessEqual(len(already), 0) - self.failUnlessEqual(len(writers), 5) - for wb in writers.values(): - wb.remote_close() - + # Create a bucket: + rs0, cs0 = self.create_bucket(ss, b"si0") leases = list(ss.get_leases(b"si0")) self.failUnlessEqual(len(leases), 1) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs0])) - rs1,cs1 = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) - already,writers = ss.remote_allocate_buckets(b"si1", rs1, cs1, - sharenums, size, canary) - for wb in writers.values(): - wb.remote_close() + rs1, cs1 = self.create_bucket(ss, b"si1") # take out a second lease on si1 - rs2,cs2 = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) - already,writers = ss.remote_allocate_buckets(b"si1", rs2, cs2, - sharenums, size, canary) - self.failUnlessEqual(len(already), 5) - self.failUnlessEqual(len(writers), 0) - + rs2, cs2 = self.create_bucket(ss, b"si1", 5, 0) leases = list(ss.get_leases(b"si1")) self.failUnlessEqual(len(leases), 2) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2])) @@ -741,6 +744,27 @@ class Server(unittest.TestCase): leases = list(ss.get_leases(b"si3")) self.failUnlessEqual(len(leases), 2) + def test_immutable_add_lease_renews(self): + """ + Adding a lease on an already leased immutable just renews it. + """ + clock = Clock() + clock.advance(123) + ss = self.create("test_immutable_add_lease_renews", get_current_time=clock.seconds) + + # Start out with single lease created with bucket: + renewal_secret, cancel_secret = self.create_bucket(ss, b"si0") + [lease] = ss.get_leases(b"si0") + self.assertEqual(lease.expiration_time, 123 + DEFAULT_RENEWAL_TIME) + + # Time passes: + clock.advance(123456) + + # Adding a lease with matching renewal secret just renews it: + ss.remote_add_lease(b"si0", renewal_secret, cancel_secret) + [lease] = ss.get_leases(b"si0") + self.assertEqual(lease.expiration_time, 123 + 123456 + DEFAULT_RENEWAL_TIME) + def test_have_shares(self): """By default the StorageServer has no shares.""" workdir = self.workdir("test_have_shares") @@ -840,9 +864,10 @@ class MutableServer(unittest.TestCase): basedir = os.path.join("storage", "MutableServer", name) return basedir - def create(self, name): + def create(self, name, get_current_time=time.time): workdir = self.workdir(name) - ss = StorageServer(workdir, b"\x00" * 20) + ss = StorageServer(workdir, b"\x00" * 20, + get_current_time=get_current_time) ss.setServiceParent(self.sparent) return ss @@ -1379,6 +1404,41 @@ class MutableServer(unittest.TestCase): {0: ([], [(500, b"make me really bigger")], None)}, []) self.compare_leases_without_timestamps(all_leases, list(s0.get_leases())) + def test_mutable_add_lease_renews(self): + """ + Adding a lease on an already leased mutable just renews it. + """ + clock = Clock() + clock.advance(235) + ss = self.create("test_mutable_add_lease_renews", + get_current_time=clock.seconds) + def secrets(n): + return ( self.write_enabler(b"we1"), + self.renew_secret(b"we1-%d" % n), + self.cancel_secret(b"we1-%d" % n) ) + data = b"".join([ (b"%d" % i) * 10 for i in range(10) ]) + write = ss.remote_slot_testv_and_readv_and_writev + read = ss.remote_slot_readv + write_enabler, renew_secret, cancel_secret = secrets(0) + rc = write(b"si1", (write_enabler, renew_secret, cancel_secret), + {0: ([], [(0,data)], None)}, []) + self.failUnlessEqual(rc, (True, {})) + + bucket_dir = os.path.join(self.workdir("test_mutable_add_lease_renews"), + "shares", storage_index_to_dir(b"si1")) + s0 = MutableShareFile(os.path.join(bucket_dir, "0")) + [lease] = s0.get_leases() + self.assertEqual(lease.expiration_time, 235 + DEFAULT_RENEWAL_TIME) + + # Time passes... + clock.advance(835) + + # Adding a lease renews it: + ss.remote_add_lease(b"si1", renew_secret, cancel_secret) + [lease] = s0.get_leases() + self.assertEqual(lease.expiration_time, + 235 + 835 + DEFAULT_RENEWAL_TIME) + def test_remove(self): ss = self.create("test_remove") self.allocate(ss, b"si1", b"we1", next(self._lease_secret), From 00163690258c5dd8242a470775211109bcdffa37 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 19 Aug 2021 16:07:03 -0400 Subject: [PATCH 43/73] News file. --- newsfragments/3773.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3773.minor diff --git a/newsfragments/3773.minor b/newsfragments/3773.minor new file mode 100644 index 000000000..e69de29bb From 59fab99d9d52c4d172087256f6cba1dd5b9f9a7e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 19 Aug 2021 16:40:45 -0400 Subject: [PATCH 44/73] Nothing uses RIStorageServer.renew_lease, so removing it is simple. --- src/allmydata/interfaces.py | 24 ------------------------ src/allmydata/storage_client.py | 11 ----------- 2 files changed, 35 deletions(-) diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 2e055a888..1c64bce8a 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -154,25 +154,9 @@ class RIStorageServer(RemoteInterface): """ return Any() # returns None now, but future versions might change - def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret): - """ - Renew the lease on a given bucket, resetting the timer to 31 days. - Some networks will use this, some will not. If there is no bucket for - the given storage_index, IndexError will be raised. - - For mutable shares, if the given renew_secret does not match an - existing lease, IndexError will be raised with a note listing the - server-nodeids on the existing leases, so leases on migrated shares - can be renewed. For immutable shares, IndexError (without the note) - will be raised. - """ - return Any() - def get_buckets(storage_index=StorageIndex): return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS) - - def slot_readv(storage_index=StorageIndex, shares=ListOf(int), readv=ReadVector): """Read a vector from the numbered shares associated with the given @@ -343,14 +327,6 @@ class IStorageServer(Interface): :see: ``RIStorageServer.add_lease`` """ - def renew_lease( - storage_index, - renew_secret, - ): - """ - :see: ``RIStorageServer.renew_lease`` - """ - def get_buckets( storage_index, ): diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index b0bcc6835..22dd09bcb 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -965,17 +965,6 @@ class _StorageServer(object): cancel_secret, ) - def renew_lease( - self, - storage_index, - renew_secret, - ): - return self._rref.callRemote( - "renew_lease", - storage_index, - renew_secret, - ) - def get_buckets( self, storage_index, From 442d61da7db3776955241b4c85af3e52af363ba9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 19 Aug 2021 16:44:05 -0400 Subject: [PATCH 45/73] Get rid of separate renewal of leases in HTTP API. --- docs/proposed/http-storage-node-protocol.rst | 28 +------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 41a0a0fea..d1f63afed 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -369,7 +369,7 @@ For example:: ``PUT /v1/lease/:storage_index`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Create a new lease that applies to all shares for the given storage index. +Either renew or create a new lease that applies to all shares for the given storage index. The details of the lease are encoded in the request body. For example:: @@ -394,37 +394,11 @@ Several behaviors here are blindly copied from the Foolscap-based storage server * There is a cancel secret but there is no API to use it to cancel a lease. * The lease period is hard-coded at 31 days. * There is no way to differentiate between success and an unknown **storage index**. -* There are separate **add** and **renew** lease APIs. These are not necessarily ideal behaviors but they are adopted to avoid any *semantic* changes between the Foolscap- and HTTP-based protocols. It is expected that some or all of these behaviors may change in a future revision of the HTTP-based protocol. -``POST /v1/lease/:storage_index`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -Renew an existing lease for all shares for the given storage index. -The details of the lease are encoded in the request body. -For example:: - - {"renew-secret": "abcd"} - -If there are no shares for the given ``storage_index`` -then ``NOT FOUND`` is returned. - -If there is no lease with a matching ``renew-secret`` value on the given storage index -then ``NOT FOUND`` is returned. -In this case, -if the storage index refers to mutable data -then the response also includes a list of nodeids where the lease can be renewed. -For example:: - - {"nodeids": ["aaa...", "bbb..."]} - -Othewise, -the matching lease's expiration time is changed to be 31 days from the time of this operation -and ``NO CONTENT`` is returned. - Immutable --------- From 1b5a3c9cb17b987e687dfe88910cd500b263afdd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 19 Aug 2021 16:49:01 -0400 Subject: [PATCH 46/73] Bad merge, I think. --- docs/proposed/http-storage-node-protocol.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 99840620f..30ccaa9d2 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -400,7 +400,6 @@ Several behaviors here are blindly copied from the Foolscap-based storage server * There is a cancel secret but there is no API to use it to cancel a lease (see ticket:3768). * The lease period is hard-coded at 31 days. -* There is no way to differentiate between success and an unknown **storage index**. These are not necessarily ideal behaviors but they are adopted to avoid any *semantic* changes between the Foolscap- and HTTP-based protocols. From 370d1ddafe0a7267a456d6107b854a08fee3af05 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 Aug 2021 08:20:24 -0400 Subject: [PATCH 47/73] Fix flake. --- src/allmydata/test/test_storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 8ad99a7ab..32c2b25b8 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1418,7 +1418,6 @@ class MutableServer(unittest.TestCase): self.cancel_secret(b"we1-%d" % n) ) data = b"".join([ (b"%d" % i) * 10 for i in range(10) ]) write = ss.remote_slot_testv_and_readv_and_writev - read = ss.remote_slot_readv write_enabler, renew_secret, cancel_secret = secrets(0) rc = write(b"si1", (write_enabler, renew_secret, cancel_secret), {0: ([], [(0,data)], None)}, []) From 11331ddf8209249ebba6280e17db52d1534b3459 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 Aug 2021 11:17:20 -0400 Subject: [PATCH 48/73] Update examples to lack of separate renewal endpoint. --- docs/proposed/http-storage-node-protocol.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 30ccaa9d2..dd696c340 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -650,8 +650,8 @@ Immutable Data #. Renew the lease on all immutable shares in bucket ``AAAAAAAAAAAAAAAA``:: - POST /v1/lease/AAAAAAAAAAAAAAAA - {"renew-secret": "efgh"} + PUT /v1/lease/AAAAAAAAAAAAAAAA + {"renew-secret": "efgh", "cancel-secret": "ijkl"} 204 NO CONTENT @@ -731,8 +731,8 @@ Mutable Data #. Renew the lease on previously uploaded mutable share in slot ``BBBBBBBBBBBBBBBB``:: - POST /v1/lease/BBBBBBBBBBBBBBBB - {"renew-secret": "efgh"} + PUT /v1/lease/BBBBBBBBBBBBBBBB + {"renew-secret": "efgh", "cancel-secret": "ijkl"} 204 NO CONTENT From 7d32335353e4960b3be9d7b8ad91214e493e9b4f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 30 Aug 2021 16:49:11 -0400 Subject: [PATCH 49/73] Clarify and remove duplication. --- src/allmydata/storage/server.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 4615c9ec9..56b47ae89 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -49,7 +49,8 @@ from allmydata.storage.expirer import LeaseCheckingCrawler NUM_RE=re.compile("^[0-9]+$") -# Number of seconds to add to expiration time on lease renewal: +# Number of seconds to add to expiration time on lease renewal. +# For now it's not actually configurable, but maybe someday. DEFAULT_RENEWAL_TIME = 31 * 24 * 60 * 60 @@ -358,7 +359,7 @@ class StorageServer(service.MultiService, Referenceable): owner_num=1): start = self._get_current_time() self.count("add-lease") - new_expire_time = self._get_current_time() + 31*24*60*60 + new_expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, new_expire_time, self.my_nodeid) @@ -370,7 +371,7 @@ class StorageServer(service.MultiService, Referenceable): def remote_renew_lease(self, storage_index, renew_secret): start = self._get_current_time() self.count("renew") - new_expire_time = self._get_current_time() + 31*24*60*60 + new_expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME found_buckets = False for sf in self._iter_share_files(storage_index): found_buckets = True @@ -568,7 +569,7 @@ class StorageServer(service.MultiService, Referenceable): :return LeaseInfo: Information for a new lease for a share. """ ownerid = 1 # TODO - expire_time = self._get_current_time() + 31*24*60*60 # one month + expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret, expire_time, self.my_nodeid) From e408322c3da5c6b93d5eab5755985c0b4323c3ae Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 30 Aug 2021 16:51:36 -0400 Subject: [PATCH 50/73] Use the correct APIs. --- src/allmydata/test/test_storage.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 32c2b25b8..4b76fed31 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -386,8 +386,8 @@ class Server(unittest.TestCase): self.failUnlessIn(b'available-space', sv1) def allocate(self, ss, storage_index, sharenums, size, canary=None): - renew_secret = hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)) - cancel_secret = hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)) + renew_secret = hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)) + cancel_secret = hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret)) if not canary: canary = FakeCanary() return ss.remote_allocate_buckets(storage_index, @@ -658,8 +658,8 @@ class Server(unittest.TestCase): size = 100 # Creating a bucket also creates a lease: - rs, cs = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) + rs, cs = (hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)), + hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret))) already, writers = ss.remote_allocate_buckets(storage_index, rs, cs, sharenums, size, canary) self.failUnlessEqual(len(already), expected_already) @@ -689,8 +689,8 @@ class Server(unittest.TestCase): self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2])) # and a third lease, using add-lease - rs2a,cs2a = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) + rs2a,cs2a = (hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)), + hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret))) ss.remote_add_lease(b"si1", rs2a, cs2a) leases = list(ss.get_leases(b"si1")) self.failUnlessEqual(len(leases), 3) @@ -718,10 +718,10 @@ class Server(unittest.TestCase): "ss should not have a 'remote_cancel_lease' method/attribute") # test overlapping uploads - rs3,cs3 = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) - rs4,cs4 = (hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret)), - hashutil.tagged_hash(b"blah", b"%d" % next(self._lease_secret))) + rs3,cs3 = (hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)), + hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret))) + rs4,cs4 = (hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)), + hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret))) already,writers = ss.remote_allocate_buckets(b"si3", rs3, cs3, sharenums, size, canary) self.failUnlessEqual(len(already), 0) From e6803670d1aae43d349af240b586d4aefffef86e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 30 Aug 2021 16:54:44 -0400 Subject: [PATCH 51/73] Improve explanations. --- src/allmydata/test/test_storage.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 4b76fed31..bd0ab80f3 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -648,10 +648,12 @@ class Server(unittest.TestCase): f2 = open(filename, "rb") self.failUnlessEqual(f2.read(5), b"start") - def create_bucket(self, ss, storage_index, expected_already=0, expected_writers=5): + def create_bucket_5_shares( + self, ss, storage_index, expected_already=0, expected_writers=5 + ): """ - Given a StorageServer, create a bucket and return renewal and - cancellation secrets. + Given a StorageServer, create a bucket with 5 shares and return renewal + and cancellation secrets. """ canary = FakeCanary() sharenums = list(range(5)) @@ -675,15 +677,15 @@ class Server(unittest.TestCase): size = 100 # Create a bucket: - rs0, cs0 = self.create_bucket(ss, b"si0") + rs0, cs0 = self.create_bucket_5_shares(ss, b"si0") leases = list(ss.get_leases(b"si0")) self.failUnlessEqual(len(leases), 1) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs0])) - rs1, cs1 = self.create_bucket(ss, b"si1") + rs1, cs1 = self.create_bucket_5_shares(ss, b"si1") # take out a second lease on si1 - rs2, cs2 = self.create_bucket(ss, b"si1", 5, 0) + rs2, cs2 = self.create_bucket_5_shares(ss, b"si1", 5, 0) leases = list(ss.get_leases(b"si1")) self.failUnlessEqual(len(leases), 2) self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2])) @@ -746,14 +748,15 @@ class Server(unittest.TestCase): def test_immutable_add_lease_renews(self): """ - Adding a lease on an already leased immutable just renews it. + Adding a lease on an already leased immutable with the same secret just + renews it. """ clock = Clock() clock.advance(123) ss = self.create("test_immutable_add_lease_renews", get_current_time=clock.seconds) # Start out with single lease created with bucket: - renewal_secret, cancel_secret = self.create_bucket(ss, b"si0") + renewal_secret, cancel_secret = self.create_bucket_5_shares(ss, b"si0") [lease] = ss.get_leases(b"si0") self.assertEqual(lease.expiration_time, 123 + DEFAULT_RENEWAL_TIME) @@ -1406,7 +1409,8 @@ class MutableServer(unittest.TestCase): def test_mutable_add_lease_renews(self): """ - Adding a lease on an already leased mutable just renews it. + Adding a lease on an already leased mutable with the same secret just + renews it. """ clock = Clock() clock.advance(235) From 16af282c193430c38287f475c8a85b3830c90f1a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 2 Sep 2021 13:11:51 -0400 Subject: [PATCH 52/73] Clarify chunking; lift ordering requirement; document response status --- docs/proposed/http-storage-node-protocol.rst | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index dd696c340..5012d4143 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -458,17 +458,21 @@ The response includes ``already-have`` and ``allocated`` for two reasons: Write data for the indicated share. The share number must belong to the storage index. The request body is the raw share data (i.e., ``application/octet-stream``). -*Content-Range* requests are encouraged for large transfers. +*Content-Range* requests are encouraged for large transfers to allow partially complete uploads to be resumed. For example, -for a 1MiB share the data can be broken in to 8 128KiB chunks. -Each chunk can be *PUT* separately with the appropriate *Content-Range* header. +a 1MiB share can be divided in to eight separate 128KiB chunks. +Each chunk can be uploaded in a separate request. +Each request can include a *Content-Range* value indicating its placement within the complete share. +If any one of these requests fails then at most 128KiB of upload work needs to be retried. + The server must recognize when all of the data has been received and mark the share as complete (which it can do because it was informed of the size when the storage index was initialized). Clients should upload chunks in re-assembly order. -Servers may reject out-of-order chunks for implementation simplicity. -If an individual *PUT* fails then only a limited amount of effort is wasted on the necessary retry. -.. think about copying https://developers.google.com/drive/api/v2/resumable-upload + +When a chunk that does not complete the share is successfully uploaded the response is ``OK``. +When the chunk that completes the share is successfully uploaded the response is ``CREATED``. +If the *Content-Range* for a request covers part of the share that has already been uploaded the response is ``CONFLICT``. ``POST /v1/immutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! From 61b5c8873834d458e58bdbab7ee0fb7fb385211b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 2 Sep 2021 13:16:20 -0400 Subject: [PATCH 53/73] reveal to clients what data is still required This lets a client recover from an upload that completes but for which the response is lost (eg because network error or client restart) --- docs/proposed/http-storage-node-protocol.rst | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 5012d4143..d7a41e827 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -469,10 +469,21 @@ The server must recognize when all of the data has been received and mark the sh (which it can do because it was informed of the size when the storage index was initialized). Clients should upload chunks in re-assembly order. +* When a chunk that does not complete the share is successfully uploaded the response is ``OK``. +* When the chunk that completes the share is successfully uploaded the response is ``CREATED``. +* If the *Content-Range* for a request covers part of the share that has already been uploaded the response is ``CONFLICT``. + The response body indicates the range of share data that has yet to be uploaded. + That is:: + + { "required": + [ { "begin": + , "end": + } + , + ... + ] + } -When a chunk that does not complete the share is successfully uploaded the response is ``OK``. -When the chunk that completes the share is successfully uploaded the response is ``CREATED``. -If the *Content-Range* for a request covers part of the share that has already been uploaded the response is ``CONFLICT``. ``POST /v1/immutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! From ae9ec48e131b90dfc5118551cf7e8888bd650b32 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 2 Sep 2021 13:19:49 -0400 Subject: [PATCH 54/73] news fragment --- newsfragments/3769.documentation | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3769.documentation diff --git a/newsfragments/3769.documentation b/newsfragments/3769.documentation new file mode 100644 index 000000000..3d4ef7d4c --- /dev/null +++ b/newsfragments/3769.documentation @@ -0,0 +1 @@ +The Great Black Swamp specification now allows parallel upload of immutable share data. From 78f70d6bdc1853537d7cc865f69368be541f746d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 2 Sep 2021 15:53:14 -0400 Subject: [PATCH 55/73] write some words about lease renewal secrets --- docs/proposed/http-storage-node-protocol.rst | 5 +++ docs/specifications/index.rst | 1 + docs/specifications/lease.rst | 41 ++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 docs/specifications/lease.rst diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index d7a41e827..09c193c65 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -380,6 +380,11 @@ then the expiration time of that lease will be changed to 31 days after the time If it does not match an existing lease then a new lease will be created with this ``renew-secret`` which expires 31 days after the time of this operation. +The renew and cancel secrets must be 32 bytes long +(or in the case of JSON encoding they must UTF-8 encode to 32 bytes). +The server treats them as opaque values. +See `leases`_ for details about how the Tahoe-LAFS storage client constructs these values. + In these cases the response is ``NO CONTENT`` with an empty body. It is possible that the storage server will have no shares for the given ``storage_index`` because: diff --git a/docs/specifications/index.rst b/docs/specifications/index.rst index 2029c9e5a..e813acf07 100644 --- a/docs/specifications/index.rst +++ b/docs/specifications/index.rst @@ -14,5 +14,6 @@ the data formats used by Tahoe. URI-extension mutable dirnodes + lease servers-of-happiness backends/raic diff --git a/docs/specifications/lease.rst b/docs/specifications/lease.rst new file mode 100644 index 000000000..fd5535701 --- /dev/null +++ b/docs/specifications/lease.rst @@ -0,0 +1,41 @@ +.. -*- coding: utf-8 -*- + +Share Leases +============ + +A lease is a marker attached to a share indicating that some client has asked for that share to be retained for some amount of time. +The intent is to allow clients and servers to collaborate to determine which data should still be retained and which can be discarded to reclaim storage space. +Zero or more leases may be attached to any particular share. + +Renewal Secrets +--------------- + +Each lease is uniquely identified by its **renewal secret**. +This is a 32 byte string which can be used to extend the validity period of that lease. + +To a storage server a renewal secret is an opaque value which is only ever compared to other renewal secrets to determine equality. + +Storage clients will typically want to follow a scheme to deterministically derive the renewal secret for a particular share from information the client already holds about that share. +This allows a client to maintain and renew single long-lived lease without maintaining additional local state. + +The scheme in use in Tahoe-LAFS as of 1.16.0 is as follows. + +* The **netstring encoding** of a byte string is the concatenation of: + * the base 10 representation of the length of the string + * ":" + * the string + * "," +* The **sha256d digest** is the **sha256 digest** of the **sha256 digest** of a string. +* The **sha256d tagged digest** is the **sha256d digest** of the concatenation of the **netstring encoding** of one string with one other unmodified string. +* The **sha256d tagged pair digest** the **sha256d digest** of the concatenation of the **netstring encodings** of each of three strings. +* The **bucket renewal tag** is ``allmydata_bucket_renewal_secret_v1``. +* The **file renewal tag** is ``allmydata_file_renewal_secret_v1``. +* The **client renewal tag** is ``allmydata_client_renewal_secret_v1``. +* The **lease secret** is a 32 byte string, typically randomly generated once and then persisted for all future uses. +* The **client renewal secret** is the **sha256d tagged digest** of (**lease secret**, **client renewal tag**). +* The **storage index** is constructed using a capability-type-specific scheme. + See ``storage_index_hash`` and ``ssk_storage_index_hash`` calls in ``src/allmydata/uri.py``. +* The **file renewal secret** is the **sha256d tagged pair digest** of (**file renewal tag**, **client renewal secret**, **storage index**). +* The **base32 encoding** is ``base64.b32encode`` lowercased and with trailing ``=`` stripped. +* The **peer id** is the **base32 encoding** of the SHA1 digest of the server's x509 certificate. +* The **renewal secret** is the **sha256d tagged pair digest** of (**bucket renewal tag**, **file renewal secret**, **peer id**). From b6173eea839a21cdefedb6117cb5edf9b0fbee02 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 2 Sep 2021 16:42:27 -0400 Subject: [PATCH 56/73] news fragment --- docs/3774.documentation | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/3774.documentation diff --git a/docs/3774.documentation b/docs/3774.documentation new file mode 100644 index 000000000..d58105966 --- /dev/null +++ b/docs/3774.documentation @@ -0,0 +1 @@ +There is now a specification for the scheme which Tahoe-LAFS storage clients use to derive their lease renewal secrets. From 11a0b8d20979b1d0340819bd94b6b614496abde0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 2 Sep 2021 16:44:42 -0400 Subject: [PATCH 57/73] attempt to appease rst --- docs/specifications/lease.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/specifications/lease.rst b/docs/specifications/lease.rst index fd5535701..bf8e8f181 100644 --- a/docs/specifications/lease.rst +++ b/docs/specifications/lease.rst @@ -21,10 +21,12 @@ This allows a client to maintain and renew single long-lived lease without maint The scheme in use in Tahoe-LAFS as of 1.16.0 is as follows. * The **netstring encoding** of a byte string is the concatenation of: + * the base 10 representation of the length of the string * ":" * the string * "," + * The **sha256d digest** is the **sha256 digest** of the **sha256 digest** of a string. * The **sha256d tagged digest** is the **sha256d digest** of the concatenation of the **netstring encoding** of one string with one other unmodified string. * The **sha256d tagged pair digest** the **sha256d digest** of the concatenation of the **netstring encodings** of each of three strings. From bb63331720f3e3d32fc6e2775e057064a1cc28b7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 09:04:08 -0400 Subject: [PATCH 58/73] put the newsfragment in the right place --- {docs => newsfragments}/3774.documentation | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {docs => newsfragments}/3774.documentation (100%) diff --git a/docs/3774.documentation b/newsfragments/3774.documentation similarity index 100% rename from docs/3774.documentation rename to newsfragments/3774.documentation From 3ba379ce7e90736adbc2b325f9965117bec09690 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 09:06:27 -0400 Subject: [PATCH 59/73] some formatting improvements --- docs/specifications/lease.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/specifications/lease.rst b/docs/specifications/lease.rst index bf8e8f181..a8bee427e 100644 --- a/docs/specifications/lease.rst +++ b/docs/specifications/lease.rst @@ -22,17 +22,17 @@ The scheme in use in Tahoe-LAFS as of 1.16.0 is as follows. * The **netstring encoding** of a byte string is the concatenation of: - * the base 10 representation of the length of the string - * ":" - * the string - * "," + * the ascii encoding of the base 10 representation of the length of the string + * ``":"`` + * the string itself + * ``","`` * The **sha256d digest** is the **sha256 digest** of the **sha256 digest** of a string. * The **sha256d tagged digest** is the **sha256d digest** of the concatenation of the **netstring encoding** of one string with one other unmodified string. * The **sha256d tagged pair digest** the **sha256d digest** of the concatenation of the **netstring encodings** of each of three strings. -* The **bucket renewal tag** is ``allmydata_bucket_renewal_secret_v1``. -* The **file renewal tag** is ``allmydata_file_renewal_secret_v1``. -* The **client renewal tag** is ``allmydata_client_renewal_secret_v1``. +* The **bucket renewal tag** is ``"allmydata_bucket_renewal_secret_v1"``. +* The **file renewal tag** is ``"allmydata_file_renewal_secret_v1"``. +* The **client renewal tag** is ``"allmydata_client_renewal_secret_v1"``. * The **lease secret** is a 32 byte string, typically randomly generated once and then persisted for all future uses. * The **client renewal secret** is the **sha256d tagged digest** of (**lease secret**, **client renewal tag**). * The **storage index** is constructed using a capability-type-specific scheme. From 8fe9532faf64c9d65f46f0ecf92d38af2634568e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 09:17:34 -0400 Subject: [PATCH 60/73] get the cross-reference right --- docs/proposed/http-storage-node-protocol.rst | 2 +- docs/specifications/lease.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 09c193c65..47f94db49 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -383,7 +383,7 @@ then a new lease will be created with this ``renew-secret`` which expires 31 day The renew and cancel secrets must be 32 bytes long (or in the case of JSON encoding they must UTF-8 encode to 32 bytes). The server treats them as opaque values. -See `leases`_ for details about how the Tahoe-LAFS storage client constructs these values. +:ref:`Share Leases` gives details about how the Tahoe-LAFS storage client constructs these values. In these cases the response is ``NO CONTENT`` with an empty body. diff --git a/docs/specifications/lease.rst b/docs/specifications/lease.rst index a8bee427e..54ee5cd28 100644 --- a/docs/specifications/lease.rst +++ b/docs/specifications/lease.rst @@ -1,5 +1,7 @@ .. -*- coding: utf-8 -*- +.. _share leases: + Share Leases ============ From 78a1d65b784b50486c8c59f8cdc5bad706405797 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 12:33:07 -0400 Subject: [PATCH 61/73] RFC 7049, section 4.1 describes correct JSON encoding for byte strings --- docs/proposed/http-storage-node-protocol.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 47f94db49..bc44468a6 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -380,8 +380,7 @@ then the expiration time of that lease will be changed to 31 days after the time If it does not match an existing lease then a new lease will be created with this ``renew-secret`` which expires 31 days after the time of this operation. -The renew and cancel secrets must be 32 bytes long -(or in the case of JSON encoding they must UTF-8 encode to 32 bytes). +The renew and cancel secrets must be 32 bytes long. The server treats them as opaque values. :ref:`Share Leases` gives details about how the Tahoe-LAFS storage client constructs these values. From a864bd51323f4e1378ced63a50516bad645e559f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 12:44:23 -0400 Subject: [PATCH 62/73] more precision --- docs/proposed/http-storage-node-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index bc44468a6..ade0bc167 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -380,7 +380,7 @@ then the expiration time of that lease will be changed to 31 days after the time If it does not match an existing lease then a new lease will be created with this ``renew-secret`` which expires 31 days after the time of this operation. -The renew and cancel secrets must be 32 bytes long. +``renew-secret`` and ``cancel-secret`` values must be 32 bytes long. The server treats them as opaque values. :ref:`Share Leases` gives details about how the Tahoe-LAFS storage client constructs these values. From bb57fcfb50d4e01bbc4de2e23dbbf7a60c004031 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 12:45:45 -0400 Subject: [PATCH 63/73] words about the cancel secret --- docs/specifications/lease.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/specifications/lease.rst b/docs/specifications/lease.rst index 54ee5cd28..8fc7c42eb 100644 --- a/docs/specifications/lease.rst +++ b/docs/specifications/lease.rst @@ -43,3 +43,21 @@ The scheme in use in Tahoe-LAFS as of 1.16.0 is as follows. * The **base32 encoding** is ``base64.b32encode`` lowercased and with trailing ``=`` stripped. * The **peer id** is the **base32 encoding** of the SHA1 digest of the server's x509 certificate. * The **renewal secret** is the **sha256d tagged pair digest** of (**bucket renewal tag**, **file renewal secret**, **peer id**). + +Cancel Secrets +-------------- + +Lease cancellation is unimplemented. +Nevertheless, +a cancel secret is sent by storage clients to storage servers and stored in lease records. + +The scheme for deriving **cancel secret** in use in Tahoe-LAFS as of 1.16.0 is similar to that used to derive the **renewal secret**. + +The differences are: + +* Use of **client renewal tag** is replaced by use of **client cancel tag**. +* Use of **file renewal secret** is replaced by use of **file cancel tag**. +* Use of **bucket renewal tag** is replaced by use of **bucket cancel tag**. +* **client cancel tag** is ``"allmydata_client_cancel_secret_v1"``. +* **file cancel tag** is ``"allmydata_file_cancel_secret_v1"``. +* **bucket cancel tag** is ``"allmydata_bucket_cancel_secret_v1"``. From f0fe323fa1e32555aeb21d9332ad30ff319084b7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 15:14:09 -0400 Subject: [PATCH 64/73] Simplify the immutable share reading interface --- docs/proposed/http-storage-node-protocol.rst | 25 +++++++------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index d7a41e827..fcf1e43fe 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -509,28 +509,21 @@ For example:: [1, 5] -``GET /v1/immutable/:storage_index?share=:s0&share=:sN&offset=o1&size=z0&offset=oN&size=zN`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +``GET /v1/immutable/:storage_index/:share_number?offset=:offset&length=:length +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Read data from the indicated immutable shares. -If ``share`` query parameters are given, selecte only those shares for reading. -Otherwise, select all shares present. -If ``size`` and ``offset`` query parameters are given, -only the portions thus identified of the selected shares are returned. -Otherwise, all data is from the selected shares is returned. +Read a contiguous sequence of bytes from one share in one bucket. +If the ``offset`` query parameter is given then it is interpreted as a base 10 representation of an integer giving the position at which to begin reading. +If it is not given then begin reading at the beginning of the share. +If the ``length`` query parameter is given then it is interpreted as a base 10 representation of an integer giving the maximum number of bytes to read and return. +If it is not given then bytes will be read until the end of the share is reached. -The response body contains a mapping giving the read data. -For example:: - - { - 3: ["foo", "bar"], - 7: ["baz", "quux"] - } +The response body is the raw share data (i.e., ``application/octet-stream``). Discussion `````````` -Offset and size of the requested data are specified here as query arguments. +Offset and length of the requested data are specified here as query arguments. Instead, this information could be present in a ``Range`` header in the request. This is the more obvious choice and leverages an HTTP feature built for exactly this use-case. However, HTTP requires that the ``Content-Type`` of the response to "range requests" be ``multipart/...``. From 895c77fecf9559a31a7770ae98841846124e995e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 15:15:48 -0400 Subject: [PATCH 65/73] news fragment --- newsfragments/3777.documentation | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3777.documentation diff --git a/newsfragments/3777.documentation b/newsfragments/3777.documentation new file mode 100644 index 000000000..7635cc1e6 --- /dev/null +++ b/newsfragments/3777.documentation @@ -0,0 +1 @@ +The Great Black Swamp proposed specification now has a simplified interface for reading data from immutable shares. From 18cbff1789c6f3525c3f8127b629f534bc5c9a9f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 15:22:29 -0400 Subject: [PATCH 66/73] minimal discussion about this change --- docs/proposed/http-storage-node-protocol.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index fcf1e43fe..fb327b0a8 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -534,6 +534,15 @@ There are many drawbacks to this framing technique: 2. It is resource-intensive to parse. 3. It is complex to parse safely [#]_ [#]_ [#]_ [#]_. +A previous revision of this specification allowed requesting one or more contiguous sequences from one or more shares. +This *superficially* mirrored the Foolscap based interface somewhat closely. +The interface was simplified to this version because this version is all that is required to let clients retrieve any desired information. +It only requires that the client issue multiple requests. +This can be done with pipelining or parallel requests to avoid an additional latency penalty. +In the future, +if there are performance goals, +benchmarks can demonstrate whether they are achieved by a more complicated interface or some other change. + Mutable ------- From 8d15a0d5ebcbf16979a984484807a0c54f6b4a24 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 16:27:57 -0400 Subject: [PATCH 67/73] words about authorization --- docs/proposed/http-storage-node-protocol.rst | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index d7a41e827..0daa48cab 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -24,11 +24,21 @@ Glossary storage server a Tahoe-LAFS process configured to offer storage and reachable over the network for store and retrieve operations + storage service + a Python object held in memory in the storage server which provides the implementation of the storage protocol + introducer a Tahoe-LAFS process at a known location configured to re-publish announcements about the location of storage servers fURL a self-authenticating URL-like string which can be used to locate a remote object using the Foolscap protocol + (the storage service is an example of such an object) + + NURL + a self-authenticating URL-like string almost exactly like a fURL but without being tied to Foolscap + + swissnum + a short random string which is part of a fURL and which acts as a shared secret to authorize clients to use a storage service lease state associated with a share informing a storage server of the duration of storage desired by a client @@ -128,6 +138,8 @@ The Foolscap-based protocol offers: * A careful configuration of the TLS connection parameters *may* also offer **forward secrecy**. However, Tahoe-LAFS' use of Foolscap takes no steps to ensure this is the case. +* **Storage authorization** by way of a capability contained in the fURL addressing a storage service. + Discussion !!!!!!!!!! @@ -158,6 +170,10 @@ there is no way to write data which appears legitimate to a legitimate client). Therefore, **message confidentiality** is necessary when exchanging these secrets. **Forward secrecy** is preferred so that an attacker recording an exchange today cannot launch this attack at some future point after compromising the necessary keys. +A storage service offers service only to some clients. +A client proves their authorization to use the storage service by presenting a shared secret taken from the fURL. +In this way **storage authorization** is performed to prevent disallowed parties from consuming all available storage resources. + Functionality ------------- @@ -214,6 +230,10 @@ Additionally, by continuing to interact using TLS, Bob's client and Alice's storage node are assured of both **message authentication** and **message confidentiality**. +Bob's client further inspects the fURL for the *swissnum*. +When Bob's client issues HTTP requests to Alice's storage node it includes the *swissnum* in its requests. +**Storage authorization** has been achieved. + .. note:: Foolscap TubIDs are 20 bytes (SHA1 digest of the certificate). @@ -343,6 +363,12 @@ one branch contains all of the share data; another branch contains all of the lease data; etc. +Authorization is required for all endpoints. +The standard HTTP authorization protocol is used. +The authentication *type* used is ``Tahoe-LAFS``. +The swissnum from the NURL used to locate the storage service is used as the *credentials*. +If credentials are not presented or the swissnum is not associated with a storage service then no storage processing is performed and the request receives an ``UNAUTHORIZED`` response. + General ~~~~~~~ From a4334b35a0bb6f669429306187cc0fa93cef2518 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 3 Sep 2021 16:28:27 -0400 Subject: [PATCH 68/73] news fragment --- newsfragments/3785.documentation | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3785.documentation diff --git a/newsfragments/3785.documentation b/newsfragments/3785.documentation new file mode 100644 index 000000000..4eb268f79 --- /dev/null +++ b/newsfragments/3785.documentation @@ -0,0 +1 @@ +The Great Black Swamp specification now describes the required authorization scheme. From e2b483e093b11cd31a513b2069755df7a0bf13ed Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 7 Sep 2021 08:13:03 -0400 Subject: [PATCH 69/73] an even stronger prevention is provided --- docs/proposed/http-storage-node-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 0daa48cab..0c6186bc1 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -172,7 +172,7 @@ Therefore, **message confidentiality** is necessary when exchanging these secret A storage service offers service only to some clients. A client proves their authorization to use the storage service by presenting a shared secret taken from the fURL. -In this way **storage authorization** is performed to prevent disallowed parties from consuming all available storage resources. +In this way **storage authorization** is performed to prevent disallowed parties from consuming any storage resources. Functionality ------------- From 7219291343299263050ddaf7e39ac1f6af2c62b7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 7 Sep 2021 13:30:21 -0400 Subject: [PATCH 70/73] add a reference implementation for lease renewal secret derivation --- docs/proposed/http-storage-node-protocol.rst | 2 +- docs/specifications/derive_renewal_secret.py | 87 ++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 docs/specifications/derive_renewal_secret.py diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index ade0bc167..3a09ccae0 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -45,7 +45,7 @@ Glossary (sometimes "slot" is considered a synonym for "storage index of a slot") storage index - a short string which can address a slot or a bucket + a 16 byte string which can address a slot or a bucket (in practice, derived by hashing the encryption key associated with contents of that slot or bucket) write enabler diff --git a/docs/specifications/derive_renewal_secret.py b/docs/specifications/derive_renewal_secret.py new file mode 100644 index 000000000..75009eda4 --- /dev/null +++ b/docs/specifications/derive_renewal_secret.py @@ -0,0 +1,87 @@ + +""" +This is a reference implementation of the lease renewal secret derivation +protocol in use by Tahoe-LAFS clients as of 1.16.0. +""" + +from allmydata.util.base32 import ( + a2b as b32decode, + b2a as b32encode, +) +from allmydata.util.hashutil import ( + tagged_hash, + tagged_pair_hash, +) + + +def derive_renewal_secret(lease_secret: bytes, storage_index: bytes, tubid: bytes) -> bytes: + assert len(lease_secret) == 32 + assert len(storage_index) == 16 + assert len(tubid) == 20 + + bucket_renewal_tag = b"allmydata_bucket_renewal_secret_v1" + file_renewal_tag = b"allmydata_file_renewal_secret_v1" + client_renewal_tag = b"allmydata_client_renewal_secret_v1" + + client_renewal_secret = tagged_hash(lease_secret, client_renewal_tag) + file_renewal_secret = tagged_pair_hash( + file_renewal_tag, + client_renewal_secret, + storage_index, + ) + peer_id = tubid + + return tagged_pair_hash(bucket_renewal_tag, file_renewal_secret, peer_id) + +def demo(): + secret = b32encode(derive_renewal_secret( + b"lease secretxxxxxxxxxxxxxxxxxxxx", + b"storage indexxxx", + b"tub idxxxxxxxxxxxxxx", + )).decode("ascii") + print("An example renewal secret: {}".format(secret)) + +def test(): + # These test vectors created by intrumenting Tahoe-LAFS + # bb57fcfb50d4e01bbc4de2e23dbbf7a60c004031 to emit `self.renew_secret` in + # allmydata.immutable.upload.ServerTracker.query and then uploading a + # couple files to a couple different storage servers. + test_vector = [ + dict(lease_secret=b"boity2cdh7jvl3ltaeebuiobbspjmbuopnwbde2yeh4k6x7jioga", + storage_index=b"vrttmwlicrzbt7gh5qsooogr7u", + tubid=b"v67jiisoty6ooyxlql5fuucitqiok2ic", + expected=b"osd6wmc5vz4g3ukg64sitmzlfiaaordutrez7oxdp5kkze7zp5zq", + ), + dict(lease_secret=b"boity2cdh7jvl3ltaeebuiobbspjmbuopnwbde2yeh4k6x7jioga", + storage_index=b"75gmmfts772ww4beiewc234o5e", + tubid=b"v67jiisoty6ooyxlql5fuucitqiok2ic", + expected=b"35itmusj7qm2pfimh62snbyxp3imreofhx4djr7i2fweta75szda", + ), + dict(lease_secret=b"boity2cdh7jvl3ltaeebuiobbspjmbuopnwbde2yeh4k6x7jioga", + storage_index=b"75gmmfts772ww4beiewc234o5e", + tubid=b"lh5fhobkjrmkqjmkxhy3yaonoociggpz", + expected=b"srrlruge47ws3lm53vgdxprgqb6bz7cdblnuovdgtfkqrygrjm4q", + ), + dict(lease_secret=b"vacviff4xfqxsbp64tdr3frg3xnkcsuwt5jpyat2qxcm44bwu75a", + storage_index=b"75gmmfts772ww4beiewc234o5e", + tubid=b"lh5fhobkjrmkqjmkxhy3yaonoociggpz", + expected=b"b4jledjiqjqekbm2erekzqumqzblegxi23i5ojva7g7xmqqnl5pq", + ), + ] + + for n, item in enumerate(test_vector): + derived = b32encode(derive_renewal_secret( + b32decode(item["lease_secret"]), + b32decode(item["storage_index"]), + b32decode(item["tubid"]), + )) + assert derived == item["expected"] , \ + "Test vector {} failed: {} (expected) != {} (derived)".format( + n, + item["expected"], + derived, + ) + print("{} test vectors validated".format(len(test_vector))) + +test() +demo() From ee224305b7a64f071103d387a7eb646f197710b7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 7 Sep 2021 13:37:12 -0400 Subject: [PATCH 71/73] link to the reference implementation --- docs/specifications/lease.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/specifications/lease.rst b/docs/specifications/lease.rst index 8fc7c42eb..16adef0a7 100644 --- a/docs/specifications/lease.rst +++ b/docs/specifications/lease.rst @@ -44,6 +44,12 @@ The scheme in use in Tahoe-LAFS as of 1.16.0 is as follows. * The **peer id** is the **base32 encoding** of the SHA1 digest of the server's x509 certificate. * The **renewal secret** is the **sha256d tagged pair digest** of (**bucket renewal tag**, **file renewal secret**, **peer id**). +A reference implementation is available. + +.. literalinclude:: derive_renewal_secret.py + :language: python + :linenos: + Cancel Secrets -------------- From 82f94ae5af4791ff8d322f908432ae8c1fc7b2d0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 7 Sep 2021 14:17:38 -0400 Subject: [PATCH 72/73] Yay use the Range request header --- docs/proposed/http-storage-node-protocol.rst | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index eb5154e45..5f67fcaa6 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -539,24 +539,20 @@ For example:: [1, 5] -``GET /v1/immutable/:storage_index/:share_number?offset=:offset&length=:length -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +``GET /v1/immutable/:storage_index/:share_number +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. -If the ``offset`` query parameter is given then it is interpreted as a base 10 representation of an integer giving the position at which to begin reading. -If it is not given then begin reading at the beginning of the share. -If the ``length`` query parameter is given then it is interpreted as a base 10 representation of an integer giving the maximum number of bytes to read and return. -If it is not given then bytes will be read until the end of the share is reached. - The response body is the raw share data (i.e., ``application/octet-stream``). +The ``Range`` header may be used to request exactly one ``bytes`` range. +Interpretation and response behavior is as specified in RFC 7233 ยง 4.1. +Multiple ranges in a single request are *not* supported. Discussion `````````` -Offset and length of the requested data are specified here as query arguments. -Instead, this information could be present in a ``Range`` header in the request. -This is the more obvious choice and leverages an HTTP feature built for exactly this use-case. -However, HTTP requires that the ``Content-Type`` of the response to "range requests" be ``multipart/...``. +Multiple ``bytes`` ranges are not supported. +HTTP requires that the ``Content-Type`` of the response in that case be ``multipart/...``. The ``multipart`` major type brings along string sentinel delimiting as a means to frame the different response parts. There are many drawbacks to this framing technique: From a988f126bee53252c0692dda40b3c0262000cba4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 7 Sep 2021 16:12:01 -0400 Subject: [PATCH 73/73] fix markup error --- docs/proposed/http-storage-node-protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 5f67fcaa6..a84d62176 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -539,8 +539,8 @@ For example:: [1, 5] -``GET /v1/immutable/:storage_index/:share_number -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +``GET /v1/immutable/:storage_index/:share_number`` +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. The response body is the raw share data (i.e., ``application/octet-stream``).