From 492bcbbd128b5c3a3d4c4c9f9898e9ddac522713 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Fri, 13 Aug 2021 18:22:10 +0100 Subject: [PATCH 001/220] Refactored test_logs to be consistent with base testcases Signed-off-by: fenn-cs --- newsfragments/3758.other | 1 + src/allmydata/test/web/test_logs.py | 27 ++++++++++++++++----------- src/allmydata/util/eliotutil.py | 9 +++++++-- 3 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 newsfragments/3758.other diff --git a/newsfragments/3758.other b/newsfragments/3758.other new file mode 100644 index 000000000..d0eb1d4c1 --- /dev/null +++ b/newsfragments/3758.other @@ -0,0 +1 @@ +Refactored test_logs, test_grid and test_root in web tests to use custom base test cases diff --git a/src/allmydata/test/web/test_logs.py b/src/allmydata/test/web/test_logs.py index 89ec7ba42..043541690 100644 --- a/src/allmydata/test/web/test_logs.py +++ b/src/allmydata/test/web/test_logs.py @@ -17,10 +17,8 @@ if PY2: import json -from twisted.trial import unittest from twisted.internet.defer import inlineCallbacks -from eliot import log_call from autobahn.twisted.testing import create_memory_agent, MemoryReactorClockResolver, create_pumper @@ -48,6 +46,7 @@ from .matchers import ( from ..common import ( SyncTestCase, + AsyncTestCase, ) from ...web.logs import ( @@ -55,6 +54,10 @@ from ...web.logs import ( TokenAuthenticatedWebSocketServerProtocol, ) +from ...util.eliotutil import ( + log_call_deferred +) + class StreamingEliotLogsTests(SyncTestCase): """ Tests for the log streaming resources created by ``create_log_resources``. @@ -75,18 +78,20 @@ class StreamingEliotLogsTests(SyncTestCase): ) -class TestStreamingLogs(unittest.TestCase): +class TestStreamingLogs(AsyncTestCase): """ Test websocket streaming of logs """ def setUp(self): + super(TestStreamingLogs, self).setUp() self.reactor = MemoryReactorClockResolver() self.pumper = create_pumper() self.agent = create_memory_agent(self.reactor, self.pumper, TokenAuthenticatedWebSocketServerProtocol) return self.pumper.start() def tearDown(self): + super(TestStreamingLogs, self).tearDown() return self.pumper.stop() @inlineCallbacks @@ -105,7 +110,7 @@ class TestStreamingLogs(unittest.TestCase): messages.append(json.loads(msg)) proto.on("message", got_message) - @log_call(action_type=u"test:cli:some-exciting-action") + @log_call_deferred(action_type=u"test:cli:some-exciting-action") def do_a_thing(arguments): pass @@ -114,10 +119,10 @@ class TestStreamingLogs(unittest.TestCase): proto.transport.loseConnection() yield proto.is_closed - self.assertEqual(len(messages), 2) - self.assertEqual(messages[0]["action_type"], "test:cli:some-exciting-action") - self.assertEqual(messages[0]["arguments"], - ["hello", "good-\\xff-day", 123, {"a": 35}, [None]]) - self.assertEqual(messages[1]["action_type"], "test:cli:some-exciting-action") - self.assertEqual("started", messages[0]["action_status"]) - self.assertEqual("succeeded", messages[1]["action_status"]) + self.assertThat(len(messages), Equals(3)) + self.assertThat(messages[0]["action_type"], Equals("test:cli:some-exciting-action")) + self.assertThat(messages[0]["arguments"], + Equals(["hello", "good-\\xff-day", 123, {"a": 35}, [None]])) + self.assertThat(messages[1]["action_type"], Equals("test:cli:some-exciting-action")) + self.assertThat("started", Equals(messages[0]["action_status"])) + self.assertThat("succeeded", Equals(messages[1]["action_status"])) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index 4e48fbb9f..ec4c0bf97 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -87,7 +87,11 @@ from twisted.internet.defer import ( ) from twisted.application.service import Service -from .jsonbytes import AnyBytesJSONEncoder +from .jsonbytes import ( + AnyBytesJSONEncoder, + bytes_to_unicode +) + def validateInstanceOf(t): @@ -320,7 +324,8 @@ def log_call_deferred(action_type): def logged_f(*a, **kw): # Use the action's context method to avoid ending the action when # the `with` block ends. - with start_action(action_type=action_type).context(): + args = bytes_to_unicode(True, kw['arguments']) + with start_action(action_type=action_type, arguments=args).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. d = maybeDeferred(f, *a, **kw) From 27c8e62cf648a1186a91e84aa0cd84e62774c5e9 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Sat, 14 Aug 2021 00:09:34 +0100 Subject: [PATCH 002/220] Replaced fixed arg with dynamic args in log_call_deferred Signed-off-by: fenn-cs --- src/allmydata/util/eliotutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index ec4c0bf97..d989c9e2a 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -324,8 +324,8 @@ def log_call_deferred(action_type): def logged_f(*a, **kw): # Use the action's context method to avoid ending the action when # the `with` block ends. - args = bytes_to_unicode(True, kw['arguments']) - with start_action(action_type=action_type, arguments=args).context(): + args = {k: bytes_to_unicode(True, kw[k]) for k in kw} + with start_action(action_type=action_type, **args).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. d = maybeDeferred(f, *a, **kw) From f7f08c93f9088b187bbef8de225c0e8352a6cd36 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Mon, 16 Aug 2021 12:57:24 +0100 Subject: [PATCH 003/220] Refactored test_root to be consistent with base testcases Signed-off-by: fenn-cs --- src/allmydata/test/web/test_root.py | 39 ++++++++++++++++++----------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/allmydata/test/web/test_root.py b/src/allmydata/test/web/test_root.py index ca3cc695d..1d5e45ba4 100644 --- a/src/allmydata/test/web/test_root.py +++ b/src/allmydata/test/web/test_root.py @@ -20,10 +20,11 @@ from bs4 import ( BeautifulSoup, ) -from twisted.trial import unittest from twisted.web.template import Tag from twisted.web.test.requesthelper import DummyRequest from twisted.application import service +from testtools.twistedsupport import succeeded +from twisted.internet.defer import inlineCallbacks from ...storage_client import ( NativeStorageServer, @@ -44,7 +45,17 @@ from ..common import ( EMPTY_CLIENT_CONFIG, ) -class RenderSlashUri(unittest.TestCase): +from ..common import ( + SyncTestCase, +) + +from testtools.matchers import ( + Equals, + Contains, + AfterPreprocessing, +) + +class RenderSlashUri(SyncTestCase): """ Ensure that URIs starting with /uri?uri= only accept valid capabilities @@ -53,7 +64,9 @@ class RenderSlashUri(unittest.TestCase): def setUp(self): self.client = object() self.res = URIHandler(self.client) + super(RenderSlashUri, self).setUp() + @inlineCallbacks def test_valid_query_redirect(self): """ A syntactically valid capability given in the ``uri`` query argument @@ -64,9 +77,7 @@ class RenderSlashUri(unittest.TestCase): b"mukesarwdjxiyqsjinbfiiro6q7kgmmekocxfjcngh23oxwyxtzq:2:5:5874882" ) query_args = {b"uri": [cap]} - response_body = self.successResultOf( - render(self.res, query_args), - ) + response_body = yield render(self.res, query_args) soup = BeautifulSoup(response_body, 'html5lib') tag = assert_soup_has_tag_with_attributes( self, @@ -74,9 +85,9 @@ class RenderSlashUri(unittest.TestCase): u"meta", {u"http-equiv": "refresh"}, ) - self.assertIn( - quote(cap, safe=""), + self.assertThat( tag.attrs.get(u"content"), + Contains(quote(cap, safe="")), ) def test_invalid(self): @@ -84,16 +95,14 @@ class RenderSlashUri(unittest.TestCase): A syntactically invalid capbility results in an error. """ query_args = {b"uri": [b"not a capability"]} - response_body = self.successResultOf( - render(self.res, query_args), - ) - self.assertEqual( + response_body = render(self.res, query_args) + self.assertThat( response_body, - b"Invalid capability", + succeeded(AfterPreprocessing(bytes, Equals(b"Invalid capability"))), ) -class RenderServiceRow(unittest.TestCase): +class RenderServiceRow(SyncTestCase): def test_missing(self): """ minimally-defined static servers just need anonymous-storage-FURL @@ -127,5 +136,5 @@ class RenderServiceRow(unittest.TestCase): # Coerce `items` to list and pick the first item from it. item = list(items)[0] - self.assertEqual(item.slotData.get("version"), "") - self.assertEqual(item.slotData.get("nickname"), "") + self.assertThat(item.slotData.get("version"), Equals("")) + self.assertThat(item.slotData.get("nickname"), Equals("")) From bef2413e4b9bfbfe3553be3b56c9d3a57cf4f623 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 17 Aug 2021 13:11:54 +0100 Subject: [PATCH 004/220] Refactored test_grid to be consistent with base testcases Signed-off-by: fenn-cs --- src/allmydata/test/web/test_grid.py | 204 +++++++++++++++------------- 1 file changed, 107 insertions(+), 97 deletions(-) diff --git a/src/allmydata/test/web/test_grid.py b/src/allmydata/test/web/test_grid.py index edcf32268..54aa13941 100644 --- a/src/allmydata/test/web/test_grid.py +++ b/src/allmydata/test/web/test_grid.py @@ -18,7 +18,6 @@ from six.moves import StringIO from bs4 import BeautifulSoup from twisted.web import resource -from twisted.trial import unittest from allmydata import uri, dirnode from allmydata.util import base32 from allmydata.util.encodingutil import to_bytes @@ -43,6 +42,20 @@ from .common import ( unknown_rwcap, ) +from ..common import ( + AsyncTestCase, +) + +from testtools.matchers import ( + Equals, + Contains, + Is, + Not, +) + +from testtools.twistedsupport import flush_logged_errors + + DIR_HTML_TAG = '' class CompletelyUnhandledError(Exception): @@ -53,7 +66,7 @@ class ErrorBoom(resource.Resource, object): def render(self, req): raise CompletelyUnhandledError("whoops") -class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMixin, unittest.TestCase): +class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMixin, AsyncTestCase): def CHECK(self, ign, which, args, clientnum=0): fileurl = self.fileurls[which] @@ -117,37 +130,37 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "good", "t=check") def _got_html_good(res): - self.failUnlessIn("Healthy", res) - self.failIfIn("Not Healthy", res) + self.assertThat(res, Contains("Healthy")) + self.assertThat(res, Not(Contains("Not Healthy", ))) soup = BeautifulSoup(res, 'html5lib') assert_soup_has_favicon(self, soup) d.addCallback(_got_html_good) d.addCallback(self.CHECK, "good", "t=check&return_to=somewhere") def _got_html_good_return_to(res): - self.failUnlessIn("Healthy", res) - self.failIfIn("Not Healthy", res) - self.failUnlessIn('Return to file', res) + self.assertThat(res, Contains("Healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) + self.assertThat(res, Contains('Return to file')) d.addCallback(_got_html_good_return_to) d.addCallback(self.CHECK, "good", "t=check&output=json") def _got_json_good(res): r = json.loads(res) self.failUnlessEqual(r["summary"], "Healthy") self.failUnless(r["results"]["healthy"]) - self.failIfIn("needs-rebalancing", r["results"]) + self.assertThat(r["results"], Not(Contains("needs-rebalancing",))) self.failUnless(r["results"]["recoverable"]) d.addCallback(_got_json_good) d.addCallback(self.CHECK, "small", "t=check") def _got_html_small(res): - self.failUnlessIn("Literal files are always healthy", res) - self.failIfIn("Not Healthy", res) + self.assertThat(res, Contains("Literal files are always healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) d.addCallback(_got_html_small) d.addCallback(self.CHECK, "small", "t=check&return_to=somewhere") def _got_html_small_return_to(res): - self.failUnlessIn("Literal files are always healthy", res) - self.failIfIn("Not Healthy", res) - self.failUnlessIn('Return to file', res) + self.assertThat(res, Contains("Literal files are always healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) + self.assertThat(res, Contains('Return to file')) d.addCallback(_got_html_small_return_to) d.addCallback(self.CHECK, "small", "t=check&output=json") def _got_json_small(res): @@ -158,8 +171,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "smalldir", "t=check") def _got_html_smalldir(res): - self.failUnlessIn("Literal files are always healthy", res) - self.failIfIn("Not Healthy", res) + self.assertThat(res, Contains("Literal files are always healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) d.addCallback(_got_html_smalldir) d.addCallback(self.CHECK, "smalldir", "t=check&output=json") def _got_json_smalldir(res): @@ -170,43 +183,43 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "sick", "t=check") def _got_html_sick(res): - self.failUnlessIn("Not Healthy", res) + self.assertThat(res, Contains("Not Healthy")) d.addCallback(_got_html_sick) d.addCallback(self.CHECK, "sick", "t=check&output=json") def _got_json_sick(res): r = json.loads(res) self.failUnlessEqual(r["summary"], "Not Healthy: 9 shares (enc 3-of-10)") - self.failIf(r["results"]["healthy"]) + self.assertThat(r["results"]["healthy"], Is(False)) self.failUnless(r["results"]["recoverable"]) - self.failIfIn("needs-rebalancing", r["results"]) + self.assertThat(r["results"], Not(Contains("needs-rebalancing"))) d.addCallback(_got_json_sick) d.addCallback(self.CHECK, "dead", "t=check") def _got_html_dead(res): - self.failUnlessIn("Not Healthy", res) + self.assertThat(res, Contains("Not Healthy")) d.addCallback(_got_html_dead) d.addCallback(self.CHECK, "dead", "t=check&output=json") def _got_json_dead(res): r = json.loads(res) self.failUnlessEqual(r["summary"], "Not Healthy: 1 shares (enc 3-of-10)") - self.failIf(r["results"]["healthy"]) - self.failIf(r["results"]["recoverable"]) - self.failIfIn("needs-rebalancing", r["results"]) + self.assertThat(r["results"]["healthy"], Is(False)) + self.assertThat(r["results"]["recoverable"], Is(False)) + self.assertThat(r["results"], Not(Contains("needs-rebalancing"))) d.addCallback(_got_json_dead) d.addCallback(self.CHECK, "corrupt", "t=check&verify=true") def _got_html_corrupt(res): - self.failUnlessIn("Not Healthy! : Unhealthy", res) + self.assertThat(res, Contains("Not Healthy! : Unhealthy")) d.addCallback(_got_html_corrupt) d.addCallback(self.CHECK, "corrupt", "t=check&verify=true&output=json") def _got_json_corrupt(res): r = json.loads(res) - self.failUnlessIn("Unhealthy: 9 shares (enc 3-of-10)", r["summary"]) - self.failIf(r["results"]["healthy"]) + self.assertThat(r["summary"], Contains("Unhealthy: 9 shares (enc 3-of-10)")) + self.assertThat(r["results"]["healthy"], Is(False)) self.failUnless(r["results"]["recoverable"]) - self.failIfIn("needs-rebalancing", r["results"]) + self.assertThat(r["results"], Not(Contains("needs-rebalancing"))) self.failUnlessReallyEqual(r["results"]["count-happiness"], 9) self.failUnlessReallyEqual(r["results"]["count-shares-good"], 9) self.failUnlessReallyEqual(r["results"]["count-corrupt-shares"], 1) @@ -261,9 +274,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "good", "t=check&repair=true") def _got_html_good(res): - self.failUnlessIn("Healthy", res) - self.failIfIn("Not Healthy", res) - self.failUnlessIn("No repair necessary", res) + self.assertThat(res, Contains("Healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) + self.assertThat(res, Contains("No repair necessary", )) soup = BeautifulSoup(res, 'html5lib') assert_soup_has_favicon(self, soup) @@ -271,9 +284,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "sick", "t=check&repair=true") def _got_html_sick(res): - self.failUnlessIn("Healthy : healthy", res) - self.failIfIn("Not Healthy", res) - self.failUnlessIn("Repair successful", res) + self.assertThat(res, Contains("Healthy : healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) + self.assertThat(res, Contains("Repair successful")) d.addCallback(_got_html_sick) # repair of a dead file will fail, of course, but it isn't yet @@ -290,9 +303,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "corrupt", "t=check&verify=true&repair=true") def _got_html_corrupt(res): - self.failUnlessIn("Healthy : Healthy", res) - self.failIfIn("Not Healthy", res) - self.failUnlessIn("Repair successful", res) + self.assertThat(res, Contains("Healthy : Healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) + self.assertThat(res, Contains("Repair successful")) d.addCallback(_got_html_corrupt) d.addErrback(self.explain_web_error) @@ -392,31 +405,31 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi if expect_rw_uri: self.failUnlessReallyEqual(to_bytes(f[1]["rw_uri"]), unknown_rwcap, data) else: - self.failIfIn("rw_uri", f[1]) + self.assertThat(f[1], Not(Contains("rw_uri"))) if immutable: self.failUnlessReallyEqual(to_bytes(f[1]["ro_uri"]), unknown_immcap, data) else: self.failUnlessReallyEqual(to_bytes(f[1]["ro_uri"]), unknown_rocap, data) - self.failUnlessIn("metadata", f[1]) + self.assertThat(f[1], Contains("metadata")) d.addCallback(_check_directory_json, expect_rw_uri=not immutable) def _check_info(res, expect_rw_uri, expect_ro_uri): if expect_rw_uri: - self.failUnlessIn(unknown_rwcap, res) + self.assertThat(res, Contains(unknown_rwcap)) if expect_ro_uri: if immutable: - self.failUnlessIn(unknown_immcap, res) + self.assertThat(res, Contains(unknown_immcap)) else: - self.failUnlessIn(unknown_rocap, res) + self.assertThat(res, Contains(unknown_rocap)) else: - self.failIfIn(unknown_rocap, res) + self.assertThat(res, Not(Contains(unknown_rocap))) res = str(res, "utf-8") - self.failUnlessIn("Object Type: unknown", res) - self.failIfIn("Raw data as", res) - self.failIfIn("Directory writecap", res) - self.failIfIn("Checker Operations", res) - self.failIfIn("Mutable File Operations", res) - self.failIfIn("Directory Operations", res) + self.assertThat(res, Contains("Object Type: unknown")) + self.assertThat(res, Not(Contains("Raw data as"))) + self.assertThat(res, Not(Contains("Directory writecap"))) + self.assertThat(res, Not(Contains("Checker Operations"))) + self.assertThat(res, Not(Contains("Mutable File Operations"))) + self.assertThat(res, Not(Contains("Directory Operations"))) # FIXME: these should have expect_rw_uri=not immutable; I don't know # why they fail. Possibly related to ticket #922. @@ -432,7 +445,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi if expect_rw_uri: self.failUnlessReallyEqual(to_bytes(data[1]["rw_uri"]), unknown_rwcap, data) else: - self.failIfIn("rw_uri", data[1]) + self.assertThat(data[1], Not(Contains("rw_uri"))) if immutable: self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_immcap, data) @@ -442,10 +455,10 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessReallyEqual(data[1]["mutable"], True) else: self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_rocap, data) - self.failIfIn("mutable", data[1]) + self.assertThat(data[1], Not(Contains("mutable"))) # TODO: check metadata contents - self.failUnlessIn("metadata", data[1]) + self.assertThat(data[1], Contains("metadata")) d.addCallback(lambda ign: self.GET("%s/%s?t=json" % (self.rooturl, str(name)))) d.addCallback(_check_json, expect_rw_uri=not immutable) @@ -519,14 +532,14 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi def _created(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) - self.failIf(dn.is_mutable()) + self.assertThat(dn.is_mutable(), Is(False)) self.failUnless(dn.is_readonly()) # This checks that if we somehow ended up calling dn._decrypt_rwcapdata, it would fail. - self.failIf(hasattr(dn._node, 'get_writekey')) + self.assertThat(hasattr(dn._node, 'get_writekey'), Is(False)) rep = str(dn) - self.failUnlessIn("RO-IMM", rep) + self.assertThat(rep, Contains("RO-IMM")) cap = dn.get_cap() - self.failUnlessIn(b"CHK", cap.to_string()) + self.assertThat(cap.to_string(), Contains(b"CHK")) self.cap = cap self.rootnode = dn self.rooturl = "uri/" + url_quote(dn.get_uri()) @@ -546,7 +559,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi (name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) name = name_utf8.decode("utf-8") self.failUnlessEqual(rwcapdata, b"") - self.failUnlessIn(name, kids) + self.assertThat(kids, Contains(name)) (expected_child, ign) = kids[name] self.failUnlessReallyEqual(ro_uri, expected_child.get_readonly_uri()) numkids += 1 @@ -572,27 +585,27 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(lambda ign: self.GET(self.rooturl)) def _check_html(res): soup = BeautifulSoup(res, 'html5lib') - self.failIfIn(b"URI:SSK", res) + self.assertThat(res, Not(Contains(b"URI:SSK"))) found = False for td in soup.find_all(u"td"): if td.text != u"FILE": continue a = td.findNextSibling()(u"a")[0] - self.assertIn(url_quote(lonely_uri), a[u"href"]) - self.assertEqual(u"lonely", a.text) - self.assertEqual(a[u"rel"], [u"noreferrer"]) - self.assertEqual(u"{}".format(len("one")), td.findNextSibling().findNextSibling().text) + self.assertThat(a[u"href"], Contains(url_quote(lonely_uri))) + self.assertThat(a.text, Equals(u"lonely")) + self.assertThat(a[u"rel"], Equals([u"noreferrer"])) + self.assertThat(td.findNextSibling().findNextSibling().text, Equals(u"{}".format(len("one")))) found = True break - self.assertTrue(found) + self.assertThat(found, Is(True)) infos = list( a[u"href"] for a in soup.find_all(u"a") if a.text == u"More Info" ) - self.assertEqual(1, len(infos)) - self.assertTrue(infos[0].endswith(url_quote(lonely_uri) + "?t=info")) + self.assertThat(len(infos), Equals(1)) + self.assertThat(infos[0].endswith(url_quote(lonely_uri) + "?t=info"), Is(True)) d.addCallback(_check_html) # ... and in JSON. @@ -604,7 +617,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessReallyEqual(sorted(listed_children.keys()), [u"lonely"]) ll_type, ll_data = listed_children[u"lonely"] self.failUnlessEqual(ll_type, "filenode") - self.failIfIn("rw_uri", ll_data) + self.assertThat(ll_data, Not(Contains("rw_uri"))) self.failUnlessReallyEqual(to_bytes(ll_data["ro_uri"]), lonely_uri) d.addCallback(_check_json) return d @@ -744,8 +757,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi error_line = lines[first_error] error_msg = lines[first_error+1:] error_msg_s = "\n".join(error_msg) + "\n" - self.failUnlessIn("ERROR: UnrecoverableFileError(no recoverable versions)", - error_line) + self.assertThat(error_line, Contains("ERROR: UnrecoverableFileError(no recoverable versions)")) self.failUnless(len(error_msg) > 2, error_msg_s) # some traceback units = [json.loads(line) for line in lines[:first_error]] self.failUnlessReallyEqual(len(units), 6) # includes subdir @@ -765,8 +777,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi error_line = lines[first_error] error_msg = lines[first_error+1:] error_msg_s = "\n".join(error_msg) + "\n" - self.failUnlessIn("ERROR: UnrecoverableFileError(no recoverable versions)", - error_line) + self.assertThat(error_line, Contains("ERROR: UnrecoverableFileError(no recoverable versions)")) self.failUnless(len(error_msg) > 2, error_msg_s) # some traceback units = [json.loads(line) for line in lines[:first_error]] self.failUnlessReallyEqual(len(units), 6) # includes subdir @@ -936,8 +947,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(self.CHECK, "one", "t=check") # no add-lease def _got_html_good(res): - self.failUnlessIn("Healthy", res) - self.failIfIn("Not Healthy", res) + self.assertThat(res, Contains("Healthy")) + self.assertThat(res, Not(Contains("Not Healthy"))) d.addCallback(_got_html_good) d.addCallback(self._count_leases, "one") @@ -1111,7 +1122,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.GET, self.fileurls["0shares"])) def _check_zero_shares(body): body = str(body, "utf-8") - self.failIfIn("", body) + self.assertThat(body, Not(Contains(""))) body = " ".join(body.strip().split()) exp = ("NoSharesError: no shares could be found. " "Zero shares usually indicates a corrupt URI, or that " @@ -1129,7 +1140,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.GET, self.fileurls["1share"])) def _check_one_share(body): body = str(body, "utf-8") - self.failIfIn("", body) + self.assertThat(body, Not(Contains(""))) body = " ".join(body.strip().split()) msgbase = ("NotEnoughSharesError: This indicates that some " "servers were unavailable, or that shares have been " @@ -1154,17 +1165,16 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.GET, self.fileurls["imaginary"])) def _missing_child(body): body = str(body, "utf-8") - self.failUnlessIn("No such child: imaginary", body) + self.assertThat(body, Contains("No such child: imaginary")) d.addCallback(_missing_child) d.addCallback(lambda ignored: self.GET_unicode(self.fileurls["dir-0share"])) def _check_0shares_dir_html(body): - self.failUnlessIn(DIR_HTML_TAG, body) + self.assertThat(body, Contains(DIR_HTML_TAG)) # we should see the regular page, but without the child table or # the dirops forms body = " ".join(body.strip().split()) - self.failUnlessIn('href="?t=info">More info on this directory', - body) + self.assertThat(body, Contains('href="?t=info">More info on this directory')) exp = ("UnrecoverableFileError: the directory (or mutable file) " "could not be retrieved, because there were insufficient " "good shares. This might indicate that no servers were " @@ -1172,8 +1182,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi "was corrupt, or that shares have been lost due to server " "departure, hard drive failure, or disk corruption. You " "should perform a filecheck on this object to learn more.") - self.failUnlessIn(exp, body) - self.failUnlessIn("No upload forms: directory is unreadable", body) + self.assertThat(body, Contains(exp)) + self.assertThat(body, Contains("No upload forms: directory is unreadable")) d.addCallback(_check_0shares_dir_html) d.addCallback(lambda ignored: self.GET_unicode(self.fileurls["dir-1share"])) @@ -1182,10 +1192,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi # and some-shares like we did for immutable files (since there # are different sorts of advice to offer in each case). For now, # they present the same way. - self.failUnlessIn(DIR_HTML_TAG, body) + self.assertThat(body, Contains(DIR_HTML_TAG)) body = " ".join(body.strip().split()) - self.failUnlessIn('href="?t=info">More info on this directory', - body) + self.assertThat(body, Contains('href="?t=info">More info on this directory')) exp = ("UnrecoverableFileError: the directory (or mutable file) " "could not be retrieved, because there were insufficient " "good shares. This might indicate that no servers were " @@ -1193,8 +1202,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi "was corrupt, or that shares have been lost due to server " "departure, hard drive failure, or disk corruption. You " "should perform a filecheck on this object to learn more.") - self.failUnlessIn(exp, body) - self.failUnlessIn("No upload forms: directory is unreadable", body) + self.assertThat(body, Contains(exp)) + self.assertThat(body, Contains("No upload forms: directory is unreadable")) d.addCallback(_check_1shares_dir_html) d.addCallback(lambda ignored: @@ -1204,7 +1213,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.fileurls["dir-0share-json"])) def _check_unrecoverable_file(body): body = str(body, "utf-8") - self.failIfIn("", body) + self.assertThat(body, Not(Contains(""))) body = " ".join(body.strip().split()) exp = ("UnrecoverableFileError: the directory (or mutable file) " "could not be retrieved, because there were insufficient " @@ -1213,7 +1222,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi "was corrupt, or that shares have been lost due to server " "departure, hard drive failure, or disk corruption. You " "should perform a filecheck on this object to learn more.") - self.failUnlessIn(exp, body) + self.assertThat(body, Contains(exp)) d.addCallback(_check_unrecoverable_file) d.addCallback(lambda ignored: @@ -1245,7 +1254,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi headers={"accept": "*/*"})) def _internal_error_html1(body): body = str(body, "utf-8") - self.failUnlessIn("", "expected HTML, not '%s'" % body) + self.assertThat("expected HTML, not '%s'" % body, Contains("")) d.addCallback(_internal_error_html1) d.addCallback(lambda ignored: @@ -1255,8 +1264,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi headers={"accept": "text/plain"})) def _internal_error_text2(body): body = str(body, "utf-8") - self.failIfIn("", body) + self.assertThat(body, Not(Contains(""))) self.failUnless(body.startswith("Traceback "), body) + d.addCallback(_internal_error_text2) CLI_accepts = "text/plain, application/octet-stream" @@ -1267,7 +1277,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi headers={"accept": CLI_accepts})) def _internal_error_text3(body): body = str(body, "utf-8") - self.failIfIn("", body) + self.assertThat(body, Not(Contains(""))) self.failUnless(body.startswith("Traceback "), body) d.addCallback(_internal_error_text3) @@ -1276,12 +1286,12 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 500, "Internal Server Error", None, self.GET, "ERRORBOOM")) def _internal_error_html4(body): - self.failUnlessIn(b"", body) + self.assertThat(body, Contains(b"")) d.addCallback(_internal_error_html4) def _flush_errors(res): # Trial: please ignore the CompletelyUnhandledError in the logs - self.flushLoggedErrors(CompletelyUnhandledError) + flush_logged_errors(CompletelyUnhandledError) return res d.addBoth(_flush_errors) @@ -1312,8 +1322,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi d.addCallback(_stash_dir) d.addCallback(lambda ign: self.GET_unicode(self.dir_url, followRedirect=True)) def _check_dir_html(body): - self.failUnlessIn(DIR_HTML_TAG, body) - self.failUnlessIn("blacklisted.txt", body) + self.assertThat(body, Contains(DIR_HTML_TAG)) + self.assertThat(body, Contains("blacklisted.txt")) d.addCallback(_check_dir_html) d.addCallback(lambda ign: self.GET(self.url)) d.addCallback(lambda body: self.failUnlessEqual(DATA, body)) @@ -1336,8 +1346,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi # We should still be able to list the parent directory, in HTML... d.addCallback(lambda ign: self.GET_unicode(self.dir_url, followRedirect=True)) def _check_dir_html2(body): - self.failUnlessIn(DIR_HTML_TAG, body) - self.failUnlessIn("blacklisted.txt", body) + self.assertThat(body, Contains(DIR_HTML_TAG)) + self.assertThat(body, Contains("blacklisted.txt")) d.addCallback(_check_dir_html2) # ... and in JSON (used by CLI). @@ -1347,8 +1357,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnless(isinstance(data, list), data) self.failUnlessEqual(data[0], "dirnode") self.failUnless(isinstance(data[1], dict), data) - self.failUnlessIn("children", data[1]) - self.failUnlessIn("blacklisted.txt", data[1]["children"]) + self.assertThat(data[1], Contains("children")) + self.assertThat(data[1]["children"], Contains("blacklisted.txt")) childdata = data[1]["children"]["blacklisted.txt"] self.failUnless(isinstance(childdata, list), data) self.failUnlessEqual(childdata[0], "filenode") @@ -1387,7 +1397,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.child_url = b"uri/"+dn.get_readonly_uri()+b"/child" d.addCallback(_get_dircap) d.addCallback(lambda ign: self.GET(self.dir_url_base, followRedirect=True)) - d.addCallback(lambda body: self.failUnlessIn(DIR_HTML_TAG, str(body, "utf-8"))) + d.addCallback(lambda body: self.assertThat(str(body, "utf-8"), Contains(DIR_HTML_TAG))) d.addCallback(lambda ign: self.GET(self.dir_url_json1)) d.addCallback(lambda res: json.loads(res)) # just check it decodes d.addCallback(lambda ign: self.GET(self.dir_url_json2)) From b4cdf7f96915943be85ee2b48c6954a1b2c128e7 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Wed, 8 Sep 2021 00:08:37 +0100 Subject: [PATCH 005/220] changed fragment to minor, improved test_grid.py refactor Signed-off-by: fenn-cs --- newsfragments/3758.minor | 0 newsfragments/3758.other | 1 - src/allmydata/test/web/test_grid.py | 21 +++++++++++---------- 3 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 newsfragments/3758.minor delete mode 100644 newsfragments/3758.other diff --git a/newsfragments/3758.minor b/newsfragments/3758.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3758.other b/newsfragments/3758.other deleted file mode 100644 index d0eb1d4c1..000000000 --- a/newsfragments/3758.other +++ /dev/null @@ -1 +0,0 @@ -Refactored test_logs, test_grid and test_root in web tests to use custom base test cases diff --git a/src/allmydata/test/web/test_grid.py b/src/allmydata/test/web/test_grid.py index 54aa13941..1ebe3a90f 100644 --- a/src/allmydata/test/web/test_grid.py +++ b/src/allmydata/test/web/test_grid.py @@ -49,8 +49,9 @@ from ..common import ( from testtools.matchers import ( Equals, Contains, - Is, Not, + HasLength, + EndsWith, ) from testtools.twistedsupport import flush_logged_errors @@ -190,7 +191,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi r = json.loads(res) self.failUnlessEqual(r["summary"], "Not Healthy: 9 shares (enc 3-of-10)") - self.assertThat(r["results"]["healthy"], Is(False)) + self.assertThat(r["results"]["healthy"], Equals(False)) self.failUnless(r["results"]["recoverable"]) self.assertThat(r["results"], Not(Contains("needs-rebalancing"))) d.addCallback(_got_json_sick) @@ -204,8 +205,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi r = json.loads(res) self.failUnlessEqual(r["summary"], "Not Healthy: 1 shares (enc 3-of-10)") - self.assertThat(r["results"]["healthy"], Is(False)) - self.assertThat(r["results"]["recoverable"], Is(False)) + self.assertThat(r["results"]["healthy"], Equals(False)) + self.assertThat(r["results"]["recoverable"], Equals(False)) self.assertThat(r["results"], Not(Contains("needs-rebalancing"))) d.addCallback(_got_json_dead) @@ -217,7 +218,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi def _got_json_corrupt(res): r = json.loads(res) self.assertThat(r["summary"], Contains("Unhealthy: 9 shares (enc 3-of-10)")) - self.assertThat(r["results"]["healthy"], Is(False)) + self.assertThat(r["results"]["healthy"], Equals(False)) self.failUnless(r["results"]["recoverable"]) self.assertThat(r["results"], Not(Contains("needs-rebalancing"))) self.failUnlessReallyEqual(r["results"]["count-happiness"], 9) @@ -532,10 +533,10 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi def _created(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) - self.assertThat(dn.is_mutable(), Is(False)) + self.assertThat(dn.is_mutable(), Equals(False)) self.failUnless(dn.is_readonly()) # This checks that if we somehow ended up calling dn._decrypt_rwcapdata, it would fail. - self.assertThat(hasattr(dn._node, 'get_writekey'), Is(False)) + self.assertThat(hasattr(dn._node, 'get_writekey'), Equals(False)) rep = str(dn) self.assertThat(rep, Contains("RO-IMM")) cap = dn.get_cap() @@ -597,15 +598,15 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.assertThat(td.findNextSibling().findNextSibling().text, Equals(u"{}".format(len("one")))) found = True break - self.assertThat(found, Is(True)) + self.assertThat(found, Equals(True)) infos = list( a[u"href"] for a in soup.find_all(u"a") if a.text == u"More Info" ) - self.assertThat(len(infos), Equals(1)) - self.assertThat(infos[0].endswith(url_quote(lonely_uri) + "?t=info"), Is(True)) + self.assertThat(infos, HasLength(1)) + self.assertThat(infos[0], EndsWith(url_quote(lonely_uri) + "?t=info")) d.addCallback(_check_html) # ... and in JSON. From 5bd5ee580acd3d0a95b190074d2da1fc5d98975e Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Sat, 18 Sep 2021 23:50:34 +0100 Subject: [PATCH 006/220] layout for tests that check if log_call_deffered decorates parametized functions correctly Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 3f915ecd2..7db23ce9b 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -281,3 +281,44 @@ class LogCallDeferredTests(TestCase): ), ), ) + + @capture_logging( + lambda self, logger: + assertHasAction(self, logger, u"the-action", succeeded=True), + ) + def test_gets_positional_arguments(self, logger): + """ + Check that positional arguments are logged when using ``log_call_deferred`` + """ + @log_call_deferred(action_type=u"the-action") + def f(a): + return a ** 2 + self.assertThat( + f(4), succeeded(Equals(16))) + + @capture_logging( + lambda self, logger: + assertHasAction(self, logger, u"the-action", succeeded=True), + ) + def test_gets_keyword_arguments(self, logger): + """ + Check that keyword arguments are logged when using ``log_call_deferred`` + """ + @log_call_deferred(action_type=u"the-action") + def f(base, exp): + return base ** exp + self.assertThat(f(exp=2,base=10), succeeded(Equals(100))) + + + @capture_logging( + lambda self, logger: + assertHasAction(self, logger, u"the-action", succeeded=True), + ) + def test_gets_keyword_and_positional_arguments(self, logger): + """ + Check that both keyword and positional arguments are logged when using ``log_call_deferred`` + """ + @log_call_deferred(action_type=u"the-action") + def f(base, exp, message): + return base ** exp + self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) \ No newline at end of file From dd8aa8a66648a9582a8732afde704cebdd4b16fa Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Wed, 22 Sep 2021 23:37:33 +0100 Subject: [PATCH 007/220] test if log_call_deffered decorates parametized functions correctly Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 9 ++++++++- src/allmydata/util/eliotutil.py | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 7db23ce9b..7edd4e780 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -56,6 +56,7 @@ from eliot.testing import ( capture_logging, assertHasAction, swap_logger, + assertContainsFields, ) from twisted.internet.defer import ( @@ -295,6 +296,8 @@ class LogCallDeferredTests(TestCase): return a ** 2 self.assertThat( f(4), succeeded(Equals(16))) + msg = logger.messages[0] + assertContainsFields(self, msg, {"args": (4,)}) @capture_logging( lambda self, logger: @@ -308,6 +311,8 @@ class LogCallDeferredTests(TestCase): def f(base, exp): return base ** exp self.assertThat(f(exp=2,base=10), succeeded(Equals(100))) + msg = logger.messages[0] + assertContainsFields(self, msg, {"base": 10, "exp": 2}) @capture_logging( @@ -321,4 +326,6 @@ class LogCallDeferredTests(TestCase): @log_call_deferred(action_type=u"the-action") def f(base, exp, message): return base ** exp - self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) \ No newline at end of file + self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) + msg = logger.messages[0] + assertContainsFields(self, msg, {"args": (10, 2), "message": "an exponential function"}) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index d989c9e2a..ac2d3e4e0 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -324,8 +324,8 @@ def log_call_deferred(action_type): def logged_f(*a, **kw): # Use the action's context method to avoid ending the action when # the `with` block ends. - args = {k: bytes_to_unicode(True, kw[k]) for k in kw} - with start_action(action_type=action_type, **args).context(): + kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} + with start_action(action_type=action_type, args=a, **kwargs).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. d = maybeDeferred(f, *a, **kw) From 759d4c85a295cfd620aacbc55225ee1d8aa236b2 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 28 Sep 2021 09:56:14 +0100 Subject: [PATCH 008/220] avoid argument collision in call of start_action in eliotutil Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 5 +++-- src/allmydata/test/web/test_logs.py | 2 +- src/allmydata/util/eliotutil.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 7edd4e780..1fbb9ec8d 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -312,7 +312,7 @@ class LogCallDeferredTests(TestCase): return base ** exp self.assertThat(f(exp=2,base=10), succeeded(Equals(100))) msg = logger.messages[0] - assertContainsFields(self, msg, {"base": 10, "exp": 2}) + assertContainsFields(self, msg, {"kwargs": {"base": 10, "exp": 2}}) @capture_logging( @@ -328,4 +328,5 @@ class LogCallDeferredTests(TestCase): return base ** exp self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) msg = logger.messages[0] - assertContainsFields(self, msg, {"args": (10, 2), "message": "an exponential function"}) + assertContainsFields(self, msg, {"args": (10, 2)}) + assertContainsFields(self, msg, {"kwargs": {"message": "an exponential function"}}) diff --git a/src/allmydata/test/web/test_logs.py b/src/allmydata/test/web/test_logs.py index 043541690..fe0a0445d 100644 --- a/src/allmydata/test/web/test_logs.py +++ b/src/allmydata/test/web/test_logs.py @@ -121,7 +121,7 @@ class TestStreamingLogs(AsyncTestCase): self.assertThat(len(messages), Equals(3)) self.assertThat(messages[0]["action_type"], Equals("test:cli:some-exciting-action")) - self.assertThat(messages[0]["arguments"], + self.assertThat(messages[0]["kwargs"]["arguments"], Equals(["hello", "good-\\xff-day", 123, {"a": 35}, [None]])) self.assertThat(messages[1]["action_type"], Equals("test:cli:some-exciting-action")) self.assertThat("started", Equals(messages[0]["action_status"])) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index ac2d3e4e0..e0c2fd8ae 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -325,7 +325,7 @@ def log_call_deferred(action_type): # Use the action's context method to avoid ending the action when # the `with` block ends. kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} - with start_action(action_type=action_type, args=a, **kwargs).context(): + with start_action(action_type=action_type, args=a, kwargs=kwargs).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. d = maybeDeferred(f, *a, **kw) From 5803d9999d3763e1cc7ac746273cf2873cede646 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Mon, 11 Oct 2021 13:49:29 +0100 Subject: [PATCH 009/220] remove unseriable args in log_call_deferred passed to start_action Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 4 ++-- src/allmydata/util/eliotutil.py | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 1fbb9ec8d..3d7b2bd42 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -297,7 +297,7 @@ class LogCallDeferredTests(TestCase): self.assertThat( f(4), succeeded(Equals(16))) msg = logger.messages[0] - assertContainsFields(self, msg, {"args": (4,)}) + assertContainsFields(self, msg, {"args": {'arg_0': 4}}) @capture_logging( lambda self, logger: @@ -328,5 +328,5 @@ class LogCallDeferredTests(TestCase): return base ** exp self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) msg = logger.messages[0] - assertContainsFields(self, msg, {"args": (10, 2)}) + assertContainsFields(self, msg, {"args": {'arg_0': 10, 'arg_1': 2}}) assertContainsFields(self, msg, {"kwargs": {"message": "an exponential function"}}) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index e0c2fd8ae..fb18ed332 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -91,7 +91,7 @@ from .jsonbytes import ( AnyBytesJSONEncoder, bytes_to_unicode ) - +import json def validateInstanceOf(t): @@ -315,6 +315,14 @@ class _DestinationParser(object): _parse_destination_description = _DestinationParser().parse +def is_json_serializable(object): + try: + json.dumps(object) + return True + except (TypeError, OverflowError): + return False + + def log_call_deferred(action_type): """ Like ``eliot.log_call`` but for functions which return ``Deferred``. @@ -325,7 +333,14 @@ def log_call_deferred(action_type): # Use the action's context method to avoid ending the action when # the `with` block ends. kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} - with start_action(action_type=action_type, args=a, kwargs=kwargs).context(): + # Remove complex (unserializable) objects from positional args to + # prevent eliot from throwing errors when it attempts serialization + args = { + "arg_" + str(pos): bytes_to_unicode(True, a[pos]) + for pos in range(len(a)) + if is_json_serializable(a[pos]) + } + with start_action(action_type=action_type, args=args, kwargs=kwargs).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. d = maybeDeferred(f, *a, **kw) @@ -339,3 +354,5 @@ if PY2: capture_logging = eliot_capture_logging else: capture_logging = partial(eliot_capture_logging, encoder_=AnyBytesJSONEncoder) + + From 57a0f76e1f6ed5116c1defa221ff93241f60291d Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Wed, 13 Oct 2021 23:41:42 +0100 Subject: [PATCH 010/220] maintain list of positional arguments as tuple Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 4 ++-- src/allmydata/util/eliotutil.py | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 3d7b2bd42..1fbb9ec8d 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -297,7 +297,7 @@ class LogCallDeferredTests(TestCase): self.assertThat( f(4), succeeded(Equals(16))) msg = logger.messages[0] - assertContainsFields(self, msg, {"args": {'arg_0': 4}}) + assertContainsFields(self, msg, {"args": (4,)}) @capture_logging( lambda self, logger: @@ -328,5 +328,5 @@ class LogCallDeferredTests(TestCase): return base ** exp self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) msg = logger.messages[0] - assertContainsFields(self, msg, {"args": {'arg_0': 10, 'arg_1': 2}}) + assertContainsFields(self, msg, {"args": (10, 2)}) assertContainsFields(self, msg, {"kwargs": {"message": "an exponential function"}}) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index fb18ed332..997dadb8d 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -335,11 +335,7 @@ def log_call_deferred(action_type): kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} # Remove complex (unserializable) objects from positional args to # prevent eliot from throwing errors when it attempts serialization - args = { - "arg_" + str(pos): bytes_to_unicode(True, a[pos]) - for pos in range(len(a)) - if is_json_serializable(a[pos]) - } + args = tuple([a[pos] for pos in range(len(a)) if is_json_serializable(a[pos])]) with start_action(action_type=action_type, args=args, kwargs=kwargs).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. From 1a12a8acdffea5491e223e82cafc557b1d8dbda6 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Fri, 15 Oct 2021 00:50:11 +0100 Subject: [PATCH 011/220] don't throw away unserializable parameter Signed-off-by: fenn-cs --- src/allmydata/util/eliotutil.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index 997dadb8d..f2272a731 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -335,7 +335,12 @@ def log_call_deferred(action_type): kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} # Remove complex (unserializable) objects from positional args to # prevent eliot from throwing errors when it attempts serialization - args = tuple([a[pos] for pos in range(len(a)) if is_json_serializable(a[pos])]) + args = tuple( + a[pos] + if is_json_serializable(a[pos]) + else str(a[pos]) + for pos in range(len(a)) + ) with start_action(action_type=action_type, args=args, kwargs=kwargs).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. From 1c347c593130f606cfdc7d0e2f52a0ec1db5b20a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 15 Oct 2021 15:05:21 -0400 Subject: [PATCH 012/220] replace sensitive introducer fURL with path where it can be found --- src/allmydata/introducer/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/introducer/server.py b/src/allmydata/introducer/server.py index 1e28f511b..aa0ae8336 100644 --- a/src/allmydata/introducer/server.py +++ b/src/allmydata/introducer/server.py @@ -136,7 +136,7 @@ class _IntroducerNode(node.Node): os.rename(old_public_fn, private_fn) furl = self.tub.registerReference(introducerservice, furlFile=private_fn) - self.log(" introducer is at %s" % furl, umid="qF2L9A") + self.log(" introducer can be found in {!r}".format(private_fn), umid="qF2L9A") self.introducer_url = furl # for tests def init_web(self, webport): From f2ef72e935126f55be103ac856836ebf4b2c140e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 08:14:42 -0400 Subject: [PATCH 013/220] newsfragment in temporary location --- newsfragments/LFS-01-001.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/LFS-01-001.security diff --git a/newsfragments/LFS-01-001.security b/newsfragments/LFS-01-001.security new file mode 100644 index 000000000..975fd0035 --- /dev/null +++ b/newsfragments/LFS-01-001.security @@ -0,0 +1 @@ +The introducer server no longer writes the sensitive introducer fURL value to its log at startup time. Instead it writes the well-known path of the file from which this value can be read. From 7d04e6ab8613010e98f807fa95826451d79b2d1d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 10:45:10 -0400 Subject: [PATCH 014/220] news fragment --- newsfragments/LFS-01-007.security | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 newsfragments/LFS-01-007.security diff --git a/newsfragments/LFS-01-007.security b/newsfragments/LFS-01-007.security new file mode 100644 index 000000000..75d9904a2 --- /dev/null +++ b/newsfragments/LFS-01-007.security @@ -0,0 +1,2 @@ +The storage protocol operation ``add_lease`` now safely rejects an attempt to add a 4,294,967,296th lease to an immutable share. +Previously this failed with an error after recording the new lease in the share file, resulting in the share file losing track of a one previous lease. From f60bbbd3e201b1b49598b7b2b6a05ad8db8a3dfd Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 10:45:58 -0400 Subject: [PATCH 015/220] make it possible to test this behavior of `add_lease` --- src/allmydata/storage/immutable.py | 66 ++++++++++++++++++++++++++++-- src/allmydata/test/test_storage.py | 59 +++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index b8b18f140..acd09854f 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -53,13 +53,64 @@ from allmydata.storage.common import UnknownImmutableContainerVersionError # then the value stored in this field will be the actual share data length # modulo 2**32. +def _fix_lease_count_format(lease_count_format): + """ + Turn a single character struct format string into a format string suitable + for use in encoding and decoding the lease count value inside a share + file, if possible. + + :param str lease_count_format: A single character format string like + ``"B"`` or ``"L"``. + + :raise ValueError: If the given format string is not suitable for use + encoding and decoding a lease count. + + :return str: A complete format string which can safely be used to encode + and decode lease counts in a share file. + """ + if len(lease_count_format) != 1: + raise ValueError( + "Cannot construct ShareFile with lease_count_format={!r}; " + "format must accept a single value".format( + lease_count_format, + ), + ) + # Make it big-endian with standard size so all platforms agree on the + # result. + fixed = ">" + lease_count_format + if struct.calcsize(fixed) > 4: + # There is only room for at most 4 bytes in the share file format so + # we can't allow any larger formats. + raise ValueError( + "Cannot construct ShareFile with lease_count_format={!r}; " + "size must be smaller than size of '>L'".format( + lease_count_format, + ), + ) + return fixed + + class ShareFile(object): + """ + Support interaction with persistent storage of a share. + + :ivar str _lease_count_format: The format string which is used to encode + and decode the lease count inside the share file. As stated in the + comment in this module there is room for at most 4 bytes in this part + of the file. A format string that works on fewer bytes is allowed to + restrict the number of leases allowed in the share file to a smaller + number than could be supported by using the full 4 bytes. This is + mostly of interest for testing. + """ LEASE_SIZE = struct.calcsize(">L32s32sL") sharetype = "immutable" - def __init__(self, filename, max_size=None, create=False): + def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """ precondition((max_size is not None) or (not create), max_size, create) + + self._lease_count_format = _fix_lease_count_format(lease_count_format) + self._lease_count_size = struct.calcsize(self._lease_count_format) self.home = filename self._max_size = max_size if create: @@ -126,12 +177,21 @@ class ShareFile(object): def _read_num_leases(self, f): f.seek(0x08) - (num_leases,) = struct.unpack(">L", f.read(4)) + (num_leases,) = struct.unpack( + self._lease_count_format, + f.read(self._lease_count_size), + ) return num_leases def _write_num_leases(self, f, num_leases): + self._write_encoded_num_leases( + f, + struct.pack(self._lease_count_format, num_leases), + ) + + def _write_encoded_num_leases(self, f, encoded_num_leases): f.seek(0x08) - f.write(struct.pack(">L", num_leases)) + f.write(encoded_num_leases) def _truncate_leases(self, f, num_leases): f.truncate(self._lease_offset + num_leases * self.LEASE_SIZE) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index d18960a1e..0a37dffc2 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -19,6 +19,7 @@ import platform import stat import struct import shutil +from functools import partial from uuid import uuid4 from twisted.trial import unittest @@ -3009,8 +3010,8 @@ class Stats(unittest.TestCase): class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" - def get_sharefile(self): - sf = ShareFile(self.mktemp(), max_size=1000, create=True) + def get_sharefile(self, **kwargs): + sf = ShareFile(self.mktemp(), max_size=1000, create=True, **kwargs) sf.write_share_data(0, b"abc") sf.write_share_data(2, b"DEF") # Should be b'abDEF' now. @@ -3039,3 +3040,57 @@ class ShareFileTests(unittest.TestCase): sf = self.get_sharefile() with self.assertRaises(IndexError): sf.cancel_lease(b"garbage") + + def test_long_lease_count_format(self): + """ + ``ShareFile.__init__`` raises ``ValueError`` if the lease count format + given is longer than one character. + """ + with self.assertRaises(ValueError): + self.get_sharefile(lease_count_format="BB") + + def test_large_lease_count_format(self): + """ + ``ShareFile.__init__`` raises ``ValueError`` if the lease count format + encodes to a size larger than 8 bytes. + """ + with self.assertRaises(ValueError): + self.get_sharefile(lease_count_format="Q") + + def test_avoid_lease_overflow(self): + """ + If the share file already has the maximum number of leases supported then + ``ShareFile.add_lease`` raises ``struct.error`` and makes no changes + to the share file contents. + """ + make_lease = partial( + LeaseInfo, + renew_secret=b"r" * 32, + cancel_secret=b"c" * 32, + expiration_time=2 ** 31, + ) + # Make it a little easier to reach the condition by limiting the + # number of leases to only 255. + sf = self.get_sharefile(lease_count_format="B") + + # Add the leases. + for i in range(2 ** 8 - 1): + lease = make_lease(owner_num=i) + sf.add_lease(lease) + + # Capture the state of the share file at this point so we can + # determine whether the next operation modifies it or not. + with open(sf.home, "rb") as f: + before_data = f.read() + + # It is not possible to add a 256th lease. + lease = make_lease(owner_num=256) + with self.assertRaises(struct.error): + sf.add_lease(lease) + + # Compare the share file state to what we captured earlier. Any + # change is a bug. + with open(sf.home, "rb") as f: + after_data = f.read() + + self.assertEqual(before_data, after_data) From df64bbb1e443cbbad272067e7716b4d9a3f3408d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 10:50:28 -0400 Subject: [PATCH 016/220] fail to encode the lease count *before* changing anything This preserves the failure behavior - `struct.error` is raised - but leaves the actual share file contents untouched if the new lease count cannot be encoded. There are still two separate write operations so it is conceivable that some other problem could cause `write_lease_record` to happen but `write_encoded_num_leases` not to happen. As far as I can tell we have severely limited options for addressing that problem in general as long as share files are backed by a POSIX filesystem. However, by removing the failure mode that depends on user input, it may be that this is all that is needed to close the *security* hole. --- src/allmydata/storage/immutable.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index acd09854f..887ccc931 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -209,8 +209,11 @@ class ShareFile(object): def add_lease(self, lease_info): with open(self.home, 'rb+') as f: num_leases = self._read_num_leases(f) + # Before we write the new lease record, make sure we can encode + # the new lease count. + new_lease_count = struct.pack(self._lease_count_format, num_leases + 1) self._write_lease_record(f, num_leases, lease_info) - self._write_num_leases(f, num_leases+1) + self._write_encoded_num_leases(f, new_lease_count) def renew_lease(self, renew_secret, new_expire_time): for i,lease in enumerate(self.get_leases()): From 4a5e4be0069ed41eb25deeb09828de76e1db041d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 14:35:11 -0400 Subject: [PATCH 017/220] news fragment --- newsfragments/LFS-01-008.security | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 newsfragments/LFS-01-008.security diff --git a/newsfragments/LFS-01-008.security b/newsfragments/LFS-01-008.security new file mode 100644 index 000000000..5d6c07ab5 --- /dev/null +++ b/newsfragments/LFS-01-008.security @@ -0,0 +1,2 @@ +The storage protocol operation ``readv`` now safely rejects attempts to read negative lengths. +Previously these read requests were satisfied with the complete contents of the share file (including trailing metadata) starting from the specified offset. From 5e58b62979b7ad2c813f95e1f50c550da1f69f36 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 14:36:24 -0400 Subject: [PATCH 018/220] Add a test for negative offset or length to MutableShareFile.readv --- src/allmydata/test/strategies.py | 15 ++++ src/allmydata/test/test_storage.py | 117 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/src/allmydata/test/strategies.py b/src/allmydata/test/strategies.py index c0f558ef6..2bb23a373 100644 --- a/src/allmydata/test/strategies.py +++ b/src/allmydata/test/strategies.py @@ -16,6 +16,7 @@ from hypothesis.strategies import ( one_of, builds, binary, + integers, ) from ..uri import ( @@ -119,3 +120,17 @@ def dir2_mdmf_capabilities(): MDMFDirectoryURI, mdmf_capabilities(), ) + +def offsets(min_value=0, max_value=2 ** 16): + """ + Build ``int`` values that could be used as valid offsets into a sequence + (such as share data in a share file). + """ + return integers(min_value, max_value) + +def lengths(min_value=1, max_value=2 ** 16): + """ + Build ``int`` values that could be used as valid lengths of data (such as + share data in a share file). + """ + return integers(min_value, max_value) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 0a37dffc2..f19073f3e 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -13,6 +13,9 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from six import ensure_str +from io import ( + BytesIO, +) import time import os.path import platform @@ -59,6 +62,10 @@ from allmydata.storage_client import ( ) from .common import LoggingServiceParent, ShouldFailMixin from .common_util import FakeCanary +from .strategies import ( + offsets, + lengths, +) class UtilTests(unittest.TestCase): @@ -3094,3 +3101,113 @@ class ShareFileTests(unittest.TestCase): after_data = f.read() self.assertEqual(before_data, after_data) + + +class MutableShareFileTests(unittest.TestCase): + """ + Tests for allmydata.storage.mutable.MutableShareFile. + """ + def get_sharefile(self): + return MutableShareFile(self.mktemp()) + + @given( + nodeid=strategies.just(b"x" * 20), + write_enabler=strategies.just(b"y" * 32), + datav=strategies.lists( + # Limit the max size of these so we don't write *crazy* amounts of + # data to disk. + strategies.tuples(offsets(), strategies.binary(max_size=2 ** 8)), + max_size=2 ** 8, + ), + new_length=offsets(), + ) + def test_readv_reads_share_data(self, nodeid, write_enabler, datav, new_length): + """ + ``MutableShareFile.readv`` returns bytes from the share data portion + of the share file. + """ + sf = self.get_sharefile() + sf.create(my_nodeid=nodeid, write_enabler=write_enabler) + sf.writev(datav=datav, new_length=new_length) + + # Apply all of the writes to a simple in-memory buffer so we can + # resolve the final state of the share data. In particular, this + # helps deal with overlapping writes which otherwise make it tricky to + # figure out what data to expect to be able to read back. + buf = BytesIO() + for (offset, data) in datav: + buf.seek(offset) + buf.write(data) + buf.truncate(new_length) + + # Using that buffer, determine the expected result of a readv for all + # of the data just written. + def read_from_buf(offset, length): + buf.seek(offset) + return buf.read(length) + expected_data = list( + read_from_buf(offset, len(data)) + for (offset, data) + in datav + ) + + # Perform a read that gives back all of the data written to the share + # file. + read_vectors = list((offset, len(data)) for (offset, data) in datav) + read_data = sf.readv(read_vectors) + + # Make sure the read reproduces the value we computed using our local + # buffer. + self.assertEqual(expected_data, read_data) + + @given( + nodeid=strategies.just(b"x" * 20), + write_enabler=strategies.just(b"y" * 32), + readv=strategies.lists(strategies.tuples(offsets(), lengths()), min_size=1), + random=strategies.randoms(), + ) + def test_readv_rejects_negative_length(self, nodeid, write_enabler, readv, random): + """ + If a negative length is given to ``MutableShareFile.readv`` in a read + vector then ``AssertionError`` is raised. + """ + # Pick a read vector to break with a negative value + readv_index = random.randrange(len(readv)) + # Decide on whether we're breaking offset or length + offset_or_length = random.randrange(2) + + # A helper function that will take a valid offset and length and break + # one of them. + def corrupt(break_length, offset, length): + if break_length: + # length must not be 0 or flipping the sign does nothing + # length must not be negative or flipping the sign *fixes* it + assert length > 0 + return (offset, -length) + else: + if offset > 0: + # We can break offset just by flipping the sign. + return (-offset, length) + else: + # Otherwise it has to be zero. If it was negative, what's + # going on? + assert offset == 0 + # Since we can't just flip the sign on 0 to break things, + # replace a 0 offset with a simple negative value. All + # other negative values will be tested by the `offset > 0` + # case above. + return (-1, length) + + # Break the read vector very slightly! + broken_readv = readv[:] + broken_readv[readv_index] = corrupt( + offset_or_length, + *broken_readv[readv_index] + ) + + sf = self.get_sharefile() + sf.create(my_nodeid=nodeid, write_enabler=write_enabler) + + # A read with a broken read vector is an error. + with self.assertRaises(AssertionError): + sf.readv(broken_readv) From 3cd9a02c810f6aa1dba9dbd664980b49bec39048 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 18 Oct 2021 20:13:24 -0400 Subject: [PATCH 019/220] Reject negative lengths in MutableShareFile._read_share_data and readv --- src/allmydata/storage/mutable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 2ef0c3215..cdb4faeaf 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -120,6 +120,7 @@ class MutableShareFile(object): def _read_share_data(self, f, offset, length): precondition(offset >= 0) + precondition(length >= 0) data_length = self._read_data_length(f) if offset+length > data_length: # reads beyond the end of the data are truncated. Reads that @@ -454,4 +455,3 @@ def create_mutable_sharefile(filename, my_nodeid, write_enabler, parent): ms.create(my_nodeid, write_enabler) del ms return MutableShareFile(filename, parent) - From 20ad6cd9e79cc62b23c6de4c4dba8ff9300f7a2c Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 19 Oct 2021 23:57:52 +0100 Subject: [PATCH 020/220] iterate over args directly without indexing Signed-off-by: fenn-cs --- src/allmydata/util/eliotutil.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index f2272a731..fe431568f 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -335,12 +335,7 @@ def log_call_deferred(action_type): kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} # Remove complex (unserializable) objects from positional args to # prevent eliot from throwing errors when it attempts serialization - args = tuple( - a[pos] - if is_json_serializable(a[pos]) - else str(a[pos]) - for pos in range(len(a)) - ) + args = tuple(arg if is_json_serializable(arg) else str(arg) for arg in a) with start_action(action_type=action_type, args=args, kwargs=kwargs).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. From a8d3555ebb6eae1d65cf4cfc928357de8d9a2268 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 21 Oct 2021 15:24:53 -0400 Subject: [PATCH 021/220] reference the eventually-public ticket number --- newsfragments/{LFS-01-001.security => 3819.security} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename newsfragments/{LFS-01-001.security => 3819.security} (100%) diff --git a/newsfragments/LFS-01-001.security b/newsfragments/3819.security similarity index 100% rename from newsfragments/LFS-01-001.security rename to newsfragments/3819.security From 61a20e245029ecfa626aa5f522af2eb08b7e19d3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Oct 2021 10:10:53 -0400 Subject: [PATCH 022/220] Add concept of upload secret to immutable uploads. --- docs/proposed/http-storage-node-protocol.rst | 33 +++++++++++++++++--- newsfragments/3820.minor | 0 2 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 newsfragments/3820.minor diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 521bf476d..16db0fed9 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -451,6 +451,7 @@ Details of the buckets to create are encoded in the request body. For example:: {"renew-secret": "efgh", "cancel-secret": "ijkl", + "upload-secret": "xyzf", "share-numbers": [1, 7, ...], "allocated-size": 12345} The response body includes encoded information about the created buckets. @@ -458,6 +459,8 @@ For example:: {"already-have": [1, ...], "allocated": [7, ...]} +The session secret is an opaque _byte_ string. + Discussion `````````` @@ -482,6 +485,13 @@ The response includes ``already-have`` and ``allocated`` for two reasons: This might be because a server has become unavailable and a remaining server needs to store more shares for the upload. It could also just be that the client's preferred servers have changed. +Regarding upload secrets, +the goal is for uploading and aborting (see next sections) to be authenticated by more than just the storage index. +In the future, we will want to generate them in a way that allows resuming/canceling when the client has issues. +In the short term, they can just be a random byte string. +The key security constraint is that each upload to each server has its own, unique upload key, +tied to uploading that particular storage index to this particular server. + ``PATCH /v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -498,6 +508,12 @@ If any one of these requests fails then at most 128KiB of upload work needs to b The server must recognize when all of the data has been received and mark the share as complete (which it can do because it was informed of the size when the storage index was initialized). +The request body looks this, with data and upload secret being bytes:: + + { "upload-secret": "xyzf", "data": "thedata" } + +Responses: + * When a chunk that does not complete the share is successfully uploaded the response is ``OK``. The response body indicates the range of share data that has yet to be uploaded. That is:: @@ -522,6 +538,10 @@ The server must recognize when all of the data has been received and mark the sh This cancels an *in-progress* upload. +The request body looks this:: + + { "upload-secret": "xyzf" } + The response code: * When the upload is still in progress and therefore the abort has succeeded, @@ -695,6 +715,7 @@ Immutable Data POST /v1/immutable/AAAAAAAAAAAAAAAA {"renew-secret": "efgh", "cancel-secret": "ijkl", + "upload-secret": "xyzf", "share-numbers": [1, 7], "allocated-size": 48} 200 OK @@ -704,25 +725,29 @@ Immutable Data PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Content-Range: bytes 0-15/48 - + + {"upload-secret": b"xyzf", "data": "first 16 bytes!!" 200 OK PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Content-Range: bytes 16-31/48 - + + {"upload-secret": "xyzf", "data": "second 16 bytes!" 200 OK PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Content-Range: bytes 32-47/48 - + + {"upload-secret": "xyzf", "data": "final 16 bytes!!" 201 CREATED #. Download the content of the previously uploaded immutable share ``7``:: - GET /v1/immutable/AAAAAAAAAAAAAAAA?share=7&offset=0&size=48 + GET /v1/immutable/AAAAAAAAAAAAAAAA?share=7 + Range: bytes=0-47 200 OK diff --git a/newsfragments/3820.minor b/newsfragments/3820.minor new file mode 100644 index 000000000..e69de29bb From e0c8bab5d7a97d539a5364c962cd5861430432a0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Oct 2021 10:32:44 -0400 Subject: [PATCH 023/220] Add proposal on how to generate upload secret. --- docs/proposed/http-storage-node-protocol.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 16db0fed9..d5b6653be 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -459,7 +459,13 @@ For example:: {"already-have": [1, ...], "allocated": [7, ...]} -The session secret is an opaque _byte_ string. +The uplaod secret is an opaque _byte_ string. +It will be generated by hashing a combination of:b + +1. A tag. +2. The storage index, so it's unique across different source files. +3. The server ID, so it's unique across different servers. +4. The convergence secret, so that servers can't guess the upload secret for other servers. Discussion `````````` @@ -492,6 +498,13 @@ In the short term, they can just be a random byte string. The key security constraint is that each upload to each server has its own, unique upload key, tied to uploading that particular storage index to this particular server. +Rejected designs for upload secrets: + +* Upload secret per share number. + In order to make the secret unguessable by attackers, which includes other servers, + it must contain randomness. + Randomness means there is no need to have a secret per share, since adding share-specific content to randomness doesn't actually make the secret any better. + ``PATCH /v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! From 6c0ca0b88592bffd8954cf06142cd962c1a3c654 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 08:41:09 -0400 Subject: [PATCH 024/220] try making windows let us use longer paths --- src/allmydata/test/test_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index d61942839..8e7aa9d27 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -493,7 +493,7 @@ class DownloadTest(_Base, unittest.TestCase): d.addCallback(_done) return d - def test_simultaneous_onefails_onecancelled(self): + def test_simul_1fail_1cancel(self): # This exercises an mplayer behavior in ticket #1154. I believe that # mplayer made two simultaneous webapi GET requests: first one for an # index region at the end of the (mp3/video) file, then one for the From d8c466e9a7ba5f121cb6d9f891569db7e01e87b6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 12:35:11 -0400 Subject: [PATCH 025/220] try to explain `lease_count_format` more clearly --- src/allmydata/storage/immutable.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 887ccc931..e23abb080 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -106,7 +106,30 @@ class ShareFile(object): sharetype = "immutable" def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): - """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """ + """ + Initialize a ``ShareFile``. + + :param Optional[int] max_size: If given, the maximum number of bytes + that this ``ShareFile`` will accept to be stored. ``write`` will + accept in total. + + :param bool create: If ``True``, create the file (and fail if it + exists already). ``max_size`` must not be ``None`` in this case. + If ``False``, open an existing file for reading. + + :param str lease_count_format: A format character to use to encode and + decode the number of leases in the share file. There are only 4 + bytes available in the file so the format must be 4 bytes or + smaller. If different formats are used at different times with + the same share file, the result will likely be nonsense. + + This parameter is intended for the test suite to use to be able to + exercise values near the maximum encodeable value without having + to create billions of leases. + + :raise ValueError: If the encoding of ``lease_count_format`` is too + large or if it is not a single format character. + """ precondition((max_size is not None) or (not create), max_size, create) self._lease_count_format = _fix_lease_count_format(lease_count_format) From bcdfb8155c28c94e75b8e7acc7344dc1f01aa798 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 12:53:17 -0400 Subject: [PATCH 026/220] give the news fragment its proper name --- newsfragments/{LFS-01-007.security => 3821.security} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename newsfragments/{LFS-01-007.security => 3821.security} (100%) diff --git a/newsfragments/LFS-01-007.security b/newsfragments/3821.security similarity index 100% rename from newsfragments/LFS-01-007.security rename to newsfragments/3821.security From 7f3d9316d2dc8d2fe99b211a006bc45749f184c3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 12:59:26 -0400 Subject: [PATCH 027/220] Give the news fragment its real name --- newsfragments/{LFS-01-008.security => 3822.security} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename newsfragments/{LFS-01-008.security => 3822.security} (100%) diff --git a/newsfragments/LFS-01-008.security b/newsfragments/3822.security similarity index 100% rename from newsfragments/LFS-01-008.security rename to newsfragments/3822.security From ce30f9dd0663ba22a985571f8029ad35026bb91e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 15:04:45 -0400 Subject: [PATCH 028/220] clean up copyediting errors --- src/allmydata/storage/immutable.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index e23abb080..55bcdda64 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -110,8 +110,7 @@ class ShareFile(object): Initialize a ``ShareFile``. :param Optional[int] max_size: If given, the maximum number of bytes - that this ``ShareFile`` will accept to be stored. ``write`` will - accept in total. + that this ``ShareFile`` will accept to be stored. :param bool create: If ``True``, create the file (and fail if it exists already). ``max_size`` must not be ``None`` in this case. From bb5b26638de4729254b6febb2549a08cd82471e7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 20 Oct 2021 14:20:53 -0400 Subject: [PATCH 029/220] news fragment --- newsfragments/LFS-01-005.security | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 newsfragments/LFS-01-005.security diff --git a/newsfragments/LFS-01-005.security b/newsfragments/LFS-01-005.security new file mode 100644 index 000000000..135b2487c --- /dev/null +++ b/newsfragments/LFS-01-005.security @@ -0,0 +1,3 @@ +The storage server implementation now respects the ``reserved_space`` configuration value when writing lease information. +Previously, new leases could be created and written to disk even when the storage server had less remaining space than the configured reserve space value. +Now this operation will fail with an exception and the lease will not be created. From c77425693769ecd1ce73fcf064b62ef0eaf29ec6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 20 Oct 2021 14:25:01 -0400 Subject: [PATCH 030/220] Add a test for ``remote_add_lease`` with respect to reserved space --- src/allmydata/interfaces.py | 2 ++ src/allmydata/test/common.py | 37 ++++++++++++++++++++++++++++ src/allmydata/test/common_storage.py | 33 +++++++++++++++++++++++++ src/allmydata/test/test_storage.py | 35 +++++++++++++++++++++++++- 4 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 src/allmydata/test/common_storage.py diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 5522663ee..f055a01e2 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -52,6 +52,8 @@ WriteEnablerSecret = Hash # used to protect mutable share modifications LeaseRenewSecret = Hash # used to protect lease renewal requests LeaseCancelSecret = Hash # was used to protect lease cancellation requests +class NoSpace(Exception): + """Storage space was not available for a space-allocating operation.""" class DataTooLargeError(Exception): """The write went past the expected size of the bucket.""" diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 0f2dc7c62..38282297a 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -87,6 +87,7 @@ from allmydata.interfaces import ( SDMF_VERSION, MDMF_VERSION, IAddressFamily, + NoSpace, ) from allmydata.check_results import CheckResults, CheckAndRepairResults, \ DeepCheckResults, DeepCheckAndRepairResults @@ -139,6 +140,42 @@ EMPTY_CLIENT_CONFIG = config_from_string( "" ) +@attr.s +class FakeDisk(object): + """ + Just enough of a disk to be able to report free / used information. + """ + total = attr.ib() + used = attr.ib() + + def use(self, num_bytes): + """ + Mark some amount of available bytes as used (and no longer available). + + :param int num_bytes: The number of bytes to use. + + :raise NoSpace: If there are fewer bytes available than ``num_bytes``. + + :return: ``None`` + """ + if num_bytes > self.total - self.used: + raise NoSpace() + self.used += num_bytes + + @property + def available(self): + return self.total - self.used + + def get_disk_stats(self, whichdir, reserved_space): + avail = self.available + return { + 'total': self.total, + 'free_for_root': avail, + 'free_for_nonroot': avail, + 'used': self.used, + 'avail': avail - reserved_space, + } + @attr.s class MemoryIntroducerClient(object): diff --git a/src/allmydata/test/common_storage.py b/src/allmydata/test/common_storage.py new file mode 100644 index 000000000..f020a8146 --- /dev/null +++ b/src/allmydata/test/common_storage.py @@ -0,0 +1,33 @@ + +from .common_util import ( + FakeCanary, +) + +def upload_immutable(storage_server, storage_index, renew_secret, cancel_secret, shares): + """ + Synchronously upload some immutable shares to a ``StorageServer``. + + :param allmydata.storage.server.StorageServer storage_server: The storage + server object to use to perform the upload. + + :param bytes storage_index: The storage index for the immutable shares. + + :param bytes renew_secret: The renew secret for the implicitly created lease. + :param bytes cancel_secret: The cancel secret for the implicitly created lease. + + :param dict[int, bytes] shares: A mapping from share numbers to share data + to upload. The data for all shares must be of the same length. + + :return: ``None`` + """ + already, writers = storage_server.remote_allocate_buckets( + storage_index, + renew_secret, + cancel_secret, + shares.keys(), + len(next(iter(shares.values()))), + canary=FakeCanary(), + ) + for shnum, writer in writers.items(): + writer.remote_write(0, shares[shnum]) + writer.remote_close() diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index f19073f3e..67d690047 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -60,8 +60,15 @@ from allmydata.test.no_network import NoNetworkServer from allmydata.storage_client import ( _StorageServer, ) -from .common import LoggingServiceParent, ShouldFailMixin +from .common import ( + LoggingServiceParent, + ShouldFailMixin, + FakeDisk, +) from .common_util import FakeCanary +from .common_storage import ( + upload_immutable, +) from .strategies import ( offsets, lengths, @@ -651,6 +658,32 @@ class Server(unittest.TestCase): self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) + def test_reserved_space_immutable_lease(self): + """ + If there is not enough available space to store an additional lease then + ``remote_add_lease`` fails with ``NoSpace`` when an attempt is made to + use it to create a new lease. + """ + disk = FakeDisk(total=1024, used=0) + self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) + + ss = self.create("test_reserved_space_immutable_lease") + + storage_index = b"x" * 16 + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + shares = {0: b"y" * 500} + upload_immutable(ss, storage_index, renew_secret, cancel_secret, shares) + + # use up all the available space + disk.use(disk.available) + + # Different secrets to produce a different lease, not a renewal. + renew_secret = b"R" * 32 + cancel_secret = b"C" * 32 + with self.assertRaises(interfaces.NoSpace): + ss.remote_add_lease(storage_index, renew_secret, cancel_secret) + def test_reserved_space(self): reserved = 10000 allocated = 0 From b3aa1e224f226fb09fdc38312c189369a7aa8847 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 20 Oct 2021 14:27:27 -0400 Subject: [PATCH 031/220] Add a helper to LeaseInfo for computing size This lets some code LBYL and avoid writing if the lease won't fit in the immutable share in the space available. --- src/allmydata/storage/lease.py | 14 ++++++++++++-- src/allmydata/test/test_storage.py | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 187f32406..d3b3eef88 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -13,6 +13,9 @@ if PY2: import struct, time +# struct format for representation of a lease in an immutable share +IMMUTABLE_FORMAT = ">L32s32sL" + class LeaseInfo(object): def __init__(self, owner_num=None, renew_secret=None, cancel_secret=None, expiration_time=None, nodeid=None): @@ -39,12 +42,19 @@ class LeaseInfo(object): (self.owner_num, self.renew_secret, self.cancel_secret, - self.expiration_time) = struct.unpack(">L32s32sL", data) + self.expiration_time) = struct.unpack(IMMUTABLE_FORMAT, data) self.nodeid = None return self + def immutable_size(self): + """ + :return int: The size, in bytes, of the representation of this lease in an + immutable share file. + """ + return struct.calcsize(IMMUTABLE_FORMAT) + def to_immutable_data(self): - return struct.pack(">L32s32sL", + return struct.pack(IMMUTABLE_FORMAT, self.owner_num, self.renew_secret, self.cancel_secret, int(self.expiration_time)) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 67d690047..329953e99 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -117,6 +117,29 @@ class FakeStatsProvider(object): def register_producer(self, producer): pass + +class LeaseInfoTests(unittest.TestCase): + """ + Tests for ``LeaseInfo``. + """ + @given( + strategies.tuples( + strategies.integers(min_value=0, max_value=2 ** 31 - 1), + strategies.binary(min_size=32, max_size=32), + strategies.binary(min_size=32, max_size=32), + strategies.integers(min_value=0, max_value=2 ** 31 - 1), + strategies.binary(min_size=20, max_size=20), + ), + ) + def test_immutable_size(self, initializer_args): + """ + ``LeaseInfo.immutable_size`` returns the length of the result of + ``LeaseInfo.to_immutable_data``. + """ + info = LeaseInfo(*initializer_args) + self.assertEqual(len(info.to_immutable_data()), info.immutable_size()) + + class Bucket(unittest.TestCase): def make_workdir(self, name): basedir = os.path.join("storage", "Bucket", name) From 1264c3be1e225e0573aa1e5b30ffa52f5af2d3be Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 20 Oct 2021 14:35:13 -0400 Subject: [PATCH 032/220] Use `_add_or_renew_leases` helper consistently in StorageServer This will make it easier to add a new argument to the underlying `add_or_renew_lease` call. --- src/allmydata/storage/server.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 041783a4e..21c612a59 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -286,7 +286,7 @@ class StorageServer(service.MultiService, Referenceable): # to a particular owner. start = self._get_current_time() self.count("allocate") - alreadygot = set() + alreadygot = {} bucketwriters = {} # k: shnum, v: BucketWriter si_dir = storage_index_to_dir(storage_index) si_s = si_b2a(storage_index) @@ -318,9 +318,8 @@ class StorageServer(service.MultiService, Referenceable): # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): - alreadygot.add(shnum) - sf = ShareFile(fn) - sf.add_or_renew_lease(lease_info) + alreadygot[shnum] = ShareFile(fn) + self._add_or_renew_leases(alreadygot.values(), lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) @@ -352,7 +351,7 @@ class StorageServer(service.MultiService, Referenceable): fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) self.add_latency("allocate", self._get_current_time() - start) - return alreadygot, bucketwriters + return set(alreadygot), bucketwriters def remote_allocate_buckets(self, storage_index, renew_secret, cancel_secret, @@ -392,8 +391,10 @@ class StorageServer(service.MultiService, Referenceable): lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, new_expire_time, self.my_nodeid) - for sf in self._iter_share_files(storage_index): - sf.add_or_renew_lease(lease_info) + self._add_or_renew_leases( + self._iter_share_files(storage_index), + lease_info, + ) self.add_latency("add-lease", self._get_current_time() - start) return None @@ -611,12 +612,12 @@ class StorageServer(service.MultiService, Referenceable): """ Put the given lease onto the given shares. - :param dict[int, MutableShareFile] shares: The shares to put the lease - onto. + :param Iterable[Union[MutableShareFile, ShareFile]] shares: The shares + to put the lease onto. :param LeaseInfo lease_info: The lease to put on the shares. """ - for share in six.viewvalues(shares): + for share in shares: share.add_or_renew_lease(lease_info) def slot_testv_and_readv_and_writev( # type: ignore # warner/foolscap#78 @@ -675,7 +676,7 @@ class StorageServer(service.MultiService, Referenceable): ) if renew_leases: lease_info = self._make_lease_info(renew_secret, cancel_secret) - self._add_or_renew_leases(remaining_shares, lease_info) + self._add_or_renew_leases(remaining_shares.values(), lease_info) # all done self.add_latency("writev", self._get_current_time() - start) From 4defc641a2da2b20898f15eb1c9234dcc1cbeb38 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 20 Oct 2021 14:36:05 -0400 Subject: [PATCH 033/220] Have ShareFile only write a new lease if there is room for it StorageServer passes available space down so it can make the decision. ShareFile has to do it because `add_or_renew_lease` only *sometimes* adds a lease and only ShareFile knows when that is. --- src/allmydata/storage/immutable.py | 20 ++++++++++++++++++-- src/allmydata/storage/server.py | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 55bcdda64..ad2d19f5f 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -21,6 +21,7 @@ from zope.interface import implementer from allmydata.interfaces import ( RIBucketWriter, RIBucketReader, ConflictingWriteError, DataTooLargeError, + NoSpace, ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition @@ -249,14 +250,29 @@ class ShareFile(object): return raise IndexError("unable to renew non-existent lease") - def add_or_renew_lease(self, lease_info): + def add_or_renew_lease(self, available_space, lease_info): + """ + Renew an existing lease if possible, otherwise allocate a new one. + + :param int available_space: The maximum number of bytes of storage to + commit in this operation. If more than this number of bytes is + required, raise ``NoSpace`` instead. + + :param LeaseInfo lease_info: The details of the lease to renew or add. + + :raise NoSpace: If more than ``available_space`` bytes is required to + complete the operation. In this case, no lease is added. + + :return: ``None`` + """ try: self.renew_lease(lease_info.renew_secret, lease_info.expiration_time) except IndexError: + if lease_info.immutable_size() > available_space: + raise NoSpace() self.add_lease(lease_info) - def cancel_lease(self, cancel_secret): """Remove a lease with the given cancel_secret. If the last lease is cancelled, the file will be removed. Return the number of bytes that diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 21c612a59..66d9df998 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -618,7 +618,7 @@ class StorageServer(service.MultiService, Referenceable): :param LeaseInfo lease_info: The lease to put on the shares. """ for share in shares: - share.add_or_renew_lease(lease_info) + share.add_or_renew_lease(self.get_available_space(), lease_info) def slot_testv_and_readv_and_writev( # type: ignore # warner/foolscap#78 self, From e0ed04c1033f995aa5cf90f829b63d127cd290af Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 21 Oct 2021 14:27:20 -0400 Subject: [PATCH 034/220] use SyncTestCase to get `expectThat` --- src/allmydata/test/test_storage.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 329953e99..5b5cfa89d 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -25,6 +25,10 @@ import shutil from functools import partial from uuid import uuid4 +from testtools.matchers import ( + HasLength, +) + from twisted.trial import unittest from twisted.internet import defer @@ -64,6 +68,7 @@ from .common import ( LoggingServiceParent, ShouldFailMixin, FakeDisk, + SyncTestCase, ) from .common_util import FakeCanary from .common_storage import ( @@ -118,7 +123,7 @@ class FakeStatsProvider(object): pass -class LeaseInfoTests(unittest.TestCase): +class LeaseInfoTests(SyncTestCase): """ Tests for ``LeaseInfo``. """ @@ -137,7 +142,10 @@ class LeaseInfoTests(unittest.TestCase): ``LeaseInfo.to_immutable_data``. """ info = LeaseInfo(*initializer_args) - self.assertEqual(len(info.to_immutable_data()), info.immutable_size()) + self.expectThat( + info.to_immutable_data(), + HasLength(info.immutable_size()), + ) class Bucket(unittest.TestCase): From dd1ab2afe8299f8b96651112e4117ffb267ad054 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 21 Oct 2021 14:27:45 -0400 Subject: [PATCH 035/220] Add a helper to compute the size of a lease in a mutable share --- src/allmydata/storage/lease.py | 14 ++++++++++++-- src/allmydata/test/test_storage.py | 7 +++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index d3b3eef88..3453c1ecc 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -16,6 +16,9 @@ import struct, time # struct format for representation of a lease in an immutable share IMMUTABLE_FORMAT = ">L32s32sL" +# struct format for representation of a lease in a mutable share +MUTABLE_FORMAT = ">LL32s32s20s" + class LeaseInfo(object): def __init__(self, owner_num=None, renew_secret=None, cancel_secret=None, expiration_time=None, nodeid=None): @@ -53,6 +56,13 @@ class LeaseInfo(object): """ return struct.calcsize(IMMUTABLE_FORMAT) + def mutable_size(self): + """ + :return int: The size, in bytes, of the representation of this lease in a + mutable share file. + """ + return struct.calcsize(MUTABLE_FORMAT) + def to_immutable_data(self): return struct.pack(IMMUTABLE_FORMAT, self.owner_num, @@ -60,7 +70,7 @@ class LeaseInfo(object): int(self.expiration_time)) def to_mutable_data(self): - return struct.pack(">LL32s32s20s", + return struct.pack(MUTABLE_FORMAT, self.owner_num, int(self.expiration_time), self.renew_secret, self.cancel_secret, @@ -70,5 +80,5 @@ class LeaseInfo(object): (self.owner_num, self.expiration_time, self.renew_secret, self.cancel_secret, - self.nodeid) = struct.unpack(">LL32s32s20s", data) + self.nodeid) = struct.unpack(MUTABLE_FORMAT, data) return self diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 5b5cfa89d..9ce6482ea 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -140,12 +140,19 @@ class LeaseInfoTests(SyncTestCase): """ ``LeaseInfo.immutable_size`` returns the length of the result of ``LeaseInfo.to_immutable_data``. + + ``LeaseInfo.mutable_size`` returns the length of the result of + ``LeaseInfo.to_mutable_data``. """ info = LeaseInfo(*initializer_args) self.expectThat( info.to_immutable_data(), HasLength(info.immutable_size()), ) + self.expectThat( + info.to_mutable_data(), + HasLength(info.mutable_size()), + ) class Bucket(unittest.TestCase): From f789339a79c995d617e09010563e6f418e815067 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 21 Oct 2021 15:16:56 -0400 Subject: [PATCH 036/220] Have MutableShare file only write a new lease if there is room for it This is analagous to the earlier ShareFile change. --- src/allmydata/storage/mutable.py | 25 ++++++++++++--- src/allmydata/test/common_storage.py | 32 +++++++++++++++++++ src/allmydata/test/test_storage.py | 47 ++++++++++++++++++++++++++-- 3 files changed, 97 insertions(+), 7 deletions(-) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index cdb4faeaf..74c0d1051 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -13,7 +13,10 @@ if PY2: import os, stat, struct -from allmydata.interfaces import BadWriteEnablerError +from allmydata.interfaces import ( + BadWriteEnablerError, + NoSpace, +) from allmydata.util import idlib, log from allmydata.util.assertutil import precondition from allmydata.util.hashutil import timing_safe_compare @@ -289,7 +292,19 @@ class MutableShareFile(object): except IndexError: return - def add_lease(self, lease_info): + def add_lease(self, available_space, lease_info): + """ + Add a new lease to this share. + + :param int available_space: The maximum number of bytes of storage to + commit in this operation. If more than this number of bytes is + required, raise ``NoSpace`` instead. + + :raise NoSpace: If more than ``available_space`` bytes is required to + complete the operation. In this case, no lease is added. + + :return: ``None`` + """ precondition(lease_info.owner_num != 0) # 0 means "no lease here" with open(self.home, 'rb+') as f: num_lease_slots = self._get_num_lease_slots(f) @@ -297,6 +312,8 @@ class MutableShareFile(object): if empty_slot is not None: self._write_lease_record(f, empty_slot, lease_info) else: + if lease_info.mutable_size() > available_space: + raise NoSpace() self._write_lease_record(f, num_lease_slots, lease_info) def renew_lease(self, renew_secret, new_expire_time): @@ -321,13 +338,13 @@ class MutableShareFile(object): msg += " ." raise IndexError(msg) - def add_or_renew_lease(self, lease_info): + def add_or_renew_lease(self, available_space, lease_info): precondition(lease_info.owner_num != 0) # 0 means "no lease here" try: self.renew_lease(lease_info.renew_secret, lease_info.expiration_time) except IndexError: - self.add_lease(lease_info) + self.add_lease(available_space, lease_info) def cancel_lease(self, cancel_secret): """Remove any leases with the given cancel_secret. If the last lease diff --git a/src/allmydata/test/common_storage.py b/src/allmydata/test/common_storage.py index f020a8146..529ebe586 100644 --- a/src/allmydata/test/common_storage.py +++ b/src/allmydata/test/common_storage.py @@ -31,3 +31,35 @@ def upload_immutable(storage_server, storage_index, renew_secret, cancel_secret, for shnum, writer in writers.items(): writer.remote_write(0, shares[shnum]) writer.remote_close() + + +def upload_mutable(storage_server, storage_index, secrets, shares): + """ + Synchronously upload some mutable shares to a ``StorageServer``. + + :param allmydata.storage.server.StorageServer storage_server: The storage + server object to use to perform the upload. + + :param bytes storage_index: The storage index for the immutable shares. + + :param secrets: A three-tuple of a write enabler, renew secret, and cancel + secret. + + :param dict[int, bytes] shares: A mapping from share numbers to share data + to upload. + + :return: ``None`` + """ + test_and_write_vectors = { + sharenum: ([], [(0, data)], None) + for sharenum, data + in shares.items() + } + read_vector = [] + + storage_server.remote_slot_testv_and_readv_and_writev( + storage_index, + secrets, + test_and_write_vectors, + read_vector, + ) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 9ce6482ea..e03c07203 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -73,6 +73,7 @@ from .common import ( from .common_util import FakeCanary from .common_storage import ( upload_immutable, + upload_mutable, ) from .strategies import ( offsets, @@ -698,9 +699,9 @@ class Server(unittest.TestCase): def test_reserved_space_immutable_lease(self): """ - If there is not enough available space to store an additional lease then - ``remote_add_lease`` fails with ``NoSpace`` when an attempt is made to - use it to create a new lease. + If there is not enough available space to store an additional lease on an + immutable share then ``remote_add_lease`` fails with ``NoSpace`` when + an attempt is made to use it to create a new lease. """ disk = FakeDisk(total=1024, used=0) self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) @@ -722,6 +723,46 @@ class Server(unittest.TestCase): with self.assertRaises(interfaces.NoSpace): ss.remote_add_lease(storage_index, renew_secret, cancel_secret) + def test_reserved_space_mutable_lease(self): + """ + If there is not enough available space to store an additional lease on a + mutable share then ``remote_add_lease`` fails with ``NoSpace`` when an + attempt is made to use it to create a new lease. + """ + disk = FakeDisk(total=1024, used=0) + self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) + + ss = self.create("test_reserved_space_mutable_lease") + + renew_secrets = iter( + "{}{}".format("r" * 31, i).encode("ascii") + for i + in range(5) + ) + + storage_index = b"x" * 16 + write_enabler = b"w" * 32 + cancel_secret = b"c" * 32 + secrets = (write_enabler, next(renew_secrets), cancel_secret) + shares = {0: b"y" * 500} + upload_mutable(ss, storage_index, secrets, shares) + + # use up all the available space + disk.use(disk.available) + + # The upload created one lease. There is room for three more leases + # in the share header. Even if we're out of disk space, on a boring + # enough filesystem we can write these. + for i in range(3): + ss.remote_add_lease(storage_index, next(renew_secrets), cancel_secret) + + # Having used all of the space for leases in the header, we would have + # to allocate storage for the next lease. Since there is no space + # available, this must fail instead. + with self.assertRaises(interfaces.NoSpace): + ss.remote_add_lease(storage_index, next(renew_secrets), cancel_secret) + + def test_reserved_space(self): reserved = 10000 allocated = 0 From 6449ad03de20db407dc96ba2a6651b9d80ff797a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 13:38:37 -0400 Subject: [PATCH 037/220] Do not record corruption advisories if there is no available space --- src/allmydata/storage/server.py | 86 ++++++++++++++++++++++++------ src/allmydata/test/test_storage.py | 21 ++++++++ 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 66d9df998..3ee494786 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -737,24 +737,80 @@ class StorageServer(service.MultiService, Referenceable): # protocol backwards compatibility reasons. assert isinstance(share_type, bytes) assert isinstance(reason, bytes), "%r is not bytes" % (reason,) - fileutil.make_dirs(self.corruption_advisory_dir) - now = time_format.iso_utc(sep="T") + si_s = si_b2a(storage_index) - # windows can't handle colons in the filename - fn = os.path.join( - self.corruption_advisory_dir, - ("%s--%s-%d" % (now, str(si_s, "utf-8"), shnum)).replace(":","") - ) - with open(fn, "w") as f: - f.write("report: Share Corruption\n") - f.write("type: %s\n" % bytes_to_native_str(share_type)) - f.write("storage_index: %s\n" % bytes_to_native_str(si_s)) - f.write("share_number: %d\n" % shnum) - f.write("\n") - f.write(bytes_to_native_str(reason)) - f.write("\n") + log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") + + fileutil.make_dirs(self.corruption_advisory_dir) + now = time_format.iso_utc(sep="T") + + report = render_corruption_report(share_type, si_s, shnum, reason) + if len(report) > self.get_available_space(): + return None + + report_path = get_corruption_report_path( + self.corruption_advisory_dir, + now, + si_s, + shnum, + ) + with open(report_path, "w") as f: + f.write(report) + return None + +CORRUPTION_REPORT_FORMAT = """\ +report: Share Corruption +type: {type} +storage_index: {storage_index} +share_number: {share_number} + +{reason} + +""" + +def render_corruption_report(share_type, si_s, shnum, reason): + """ + Create a string that explains a corruption report using freeform text. + + :param bytes share_type: The type of the share which the report is about. + + :param bytes si_s: The encoded representation of the storage index which + the report is about. + + :param int shnum: The share number which the report is about. + + :param bytes reason: The reason given by the client for the corruption + report. + """ + return CORRUPTION_REPORT_FORMAT.format( + type=bytes_to_native_str(share_type), + storage_index=bytes_to_native_str(si_s), + share_number=shnum, + reason=bytes_to_native_str(reason), + ) + +def get_corruption_report_path(base_dir, now, si_s, shnum): + """ + Determine the path to which a certain corruption report should be written. + + :param str base_dir: The directory beneath which to construct the path. + + :param str now: The time of the report. + + :param str si_s: The encoded representation of the storage index which the + report is about. + + :param int shnum: The share number which the report is about. + + :return str: A path to which the report can be written. + """ + # windows can't handle colons in the filename + return os.path.join( + base_dir, + ("%s--%s-%d" % (now, str(si_s, "utf-8"), shnum)).replace(":","") + ) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index e03c07203..314069ce2 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1006,6 +1006,27 @@ class Server(unittest.TestCase): self.failUnlessEqual(set(b.keys()), set([0,1,2])) self.failUnlessEqual(b[0].remote_read(0, 25), b"\x00" * 25) + def test_reserved_space_advise_corruption(self): + """ + If there is no available space then ``remote_advise_corrupt_share`` does + not write a corruption report. + """ + disk = FakeDisk(total=1024, used=1024) + self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) + + workdir = self.workdir("test_reserved_space_advise_corruption") + ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) + ss.setServiceParent(self.sparent) + + si0_s = base32.b2a(b"si0") + ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, + b"This share smells funny.\n") + + self.assertEqual( + [], + os.listdir(ss.corruption_advisory_dir), + ) + def test_advise_corruption(self): workdir = self.workdir("test_advise_corruption") ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) From 5837841c090d110e1ec772f0aed137642a7d6aaa Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 14:15:47 -0400 Subject: [PATCH 038/220] mention corruption advisories in the news fragment too --- newsfragments/LFS-01-005.security | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/newsfragments/LFS-01-005.security b/newsfragments/LFS-01-005.security index 135b2487c..ba2bbd741 100644 --- a/newsfragments/LFS-01-005.security +++ b/newsfragments/LFS-01-005.security @@ -1,3 +1,4 @@ -The storage server implementation now respects the ``reserved_space`` configuration value when writing lease information. +The storage server implementation now respects the ``reserved_space`` configuration value when writing lease information and recording corruption advisories. Previously, new leases could be created and written to disk even when the storage server had less remaining space than the configured reserve space value. Now this operation will fail with an exception and the lease will not be created. +Similarly, if there is no space available, corruption advisories will be logged but not written to disk. From 8d15d61ff2600b3a4f560e3b55f14a13bc3138e5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 15:58:48 -0400 Subject: [PATCH 039/220] put the news fragment in the right place --- newsfragments/{LFS-01-005.security => 3823.security} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename newsfragments/{LFS-01-005.security => 3823.security} (100%) diff --git a/newsfragments/LFS-01-005.security b/newsfragments/3823.security similarity index 100% rename from newsfragments/LFS-01-005.security rename to newsfragments/3823.security From 194499aafe42399185cbc4185fa078f09adfb608 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 16:09:54 -0400 Subject: [PATCH 040/220] remove unused import --- src/allmydata/storage/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 3ee494786..30fa5adc2 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -15,7 +15,6 @@ else: from typing import Dict import os, re, struct, time -import six from foolscap.api import Referenceable from foolscap.ipb import IRemoteReference From cb675df48d08f4f0a42061d9261a3f5d47ac1673 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 16:10:24 -0400 Subject: [PATCH 041/220] remove unused encoding of storage index --- src/allmydata/test/test_storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 314069ce2..70cad7db2 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1018,7 +1018,6 @@ class Server(unittest.TestCase): ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) ss.setServiceParent(self.sparent) - si0_s = base32.b2a(b"si0") ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, b"This share smells funny.\n") From ea202ba61b90545ab78127f3340a8bb4bac18612 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 14:51:37 -0400 Subject: [PATCH 042/220] news fragment --- newsfragments/3824.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3824.security diff --git a/newsfragments/3824.security b/newsfragments/3824.security new file mode 100644 index 000000000..b29b2acc8 --- /dev/null +++ b/newsfragments/3824.security @@ -0,0 +1 @@ +The storage server implementation no longer records corruption advisories about storage indexes for which it holds no shares. From 470657b337ca199418aee6777866281769d8f38c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 22 Oct 2021 14:56:09 -0400 Subject: [PATCH 043/220] Drop corruption advisories if we don't have a matching share --- src/allmydata/storage/server.py | 32 ++++++++++++++++++++++++++---- src/allmydata/test/test_storage.py | 23 +++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 30fa5adc2..c81d88bfc 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -77,9 +77,9 @@ class StorageServer(service.MultiService, Referenceable): sharedir = os.path.join(storedir, "shares") fileutil.make_dirs(sharedir) self.sharedir = sharedir - # we don't actually create the corruption-advisory dir until necessary self.corruption_advisory_dir = os.path.join(storedir, "corruption-advisories") + fileutil.make_dirs(self.corruption_advisory_dir) self.reserved_space = int(reserved_space) self.no_storage = discard_storage self.readonly_storage = readonly_storage @@ -730,6 +730,21 @@ class StorageServer(service.MultiService, Referenceable): self.add_latency("readv", self._get_current_time() - start) return datavs + def _share_exists(self, storage_index, shnum): + """ + Check local share storage to see if a matching share exists. + + :param bytes storage_index: The storage index to inspect. + :param int shnum: The share number to check for. + + :return bool: ``True`` if a share with the given number exists at the + given storage index, ``False`` otherwise. + """ + for existing_sharenum, ignored in self._get_bucket_shares(storage_index): + if existing_sharenum == shnum: + return True + return False + def remote_advise_corrupt_share(self, share_type, storage_index, shnum, reason): # This is a remote API, I believe, so this has to be bytes for legacy @@ -739,18 +754,27 @@ class StorageServer(service.MultiService, Referenceable): si_s = si_b2a(storage_index) + if not self._share_exists(storage_index, shnum): + log.msg( + format=( + "discarding client corruption claim for %(si)s/%(shnum)d " + "which I do not have" + ), + si=si_s, + shnum=shnum, + ) + return + log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") - fileutil.make_dirs(self.corruption_advisory_dir) - now = time_format.iso_utc(sep="T") - report = render_corruption_report(share_type, si_s, shnum, reason) if len(report) > self.get_available_space(): return None + now = time_format.iso_utc(sep="T") report_path = get_corruption_report_path( self.corruption_advisory_dir, now, diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 70cad7db2..9889a001a 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1018,6 +1018,7 @@ class Server(unittest.TestCase): ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) ss.setServiceParent(self.sparent) + upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, b"This share smells funny.\n") @@ -1032,6 +1033,7 @@ class Server(unittest.TestCase): ss.setServiceParent(self.sparent) si0_s = base32.b2a(b"si0") + upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, b"This share smells funny.\n") reportdir = os.path.join(workdir, "corruption-advisories") @@ -1070,6 +1072,27 @@ class Server(unittest.TestCase): self.failUnlessIn(b"share_number: 1", report) self.failUnlessIn(b"This share tastes like dust.", report) + def test_advise_corruption_missing(self): + """ + If a corruption advisory is received for a share that is not present on + this server then it is not persisted. + """ + workdir = self.workdir("test_advise_corruption_missing") + ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) + ss.setServiceParent(self.sparent) + + # Upload one share for this storage index + upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) + + # And try to submit a corruption advisory about a different share + si0_s = base32.b2a(b"si0") + ss.remote_advise_corrupt_share(b"immutable", b"si0", 1, + b"This share smells funny.\n") + + self.assertEqual( + [], + os.listdir(ss.corruption_advisory_dir), + ) class MutableServer(unittest.TestCase): From 0ada9d93f794efed68d185b8f68bcb31f6394ee0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sat, 23 Oct 2021 07:43:22 -0400 Subject: [PATCH 044/220] remove unused local --- src/allmydata/test/test_storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 9889a001a..06b8d7957 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1085,7 +1085,6 @@ class Server(unittest.TestCase): upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) # And try to submit a corruption advisory about a different share - si0_s = base32.b2a(b"si0") ss.remote_advise_corrupt_share(b"immutable", b"si0", 1, b"This share smells funny.\n") From b51f0ac8ff60a39b34c51d5f8b4c4b7aad232c37 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sat, 23 Oct 2021 08:04:19 -0400 Subject: [PATCH 045/220] storage_index is a byte string and Python 3 cares --- src/allmydata/test/test_storage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 06b8d7957..738e218eb 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1018,7 +1018,7 @@ class Server(unittest.TestCase): ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) ss.setServiceParent(self.sparent) - upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) + upload_immutable(ss, b"si0", b"r" * 32, b"c" * 32, {0: b""}) ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, b"This share smells funny.\n") @@ -1033,7 +1033,7 @@ class Server(unittest.TestCase): ss.setServiceParent(self.sparent) si0_s = base32.b2a(b"si0") - upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) + upload_immutable(ss, b"si0", b"r" * 32, b"c" * 32, {0: b""}) ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, b"This share smells funny.\n") reportdir = os.path.join(workdir, "corruption-advisories") @@ -1082,7 +1082,7 @@ class Server(unittest.TestCase): ss.setServiceParent(self.sparent) # Upload one share for this storage index - upload_immutable(ss, "si0", b"r" * 32, b"c" * 32, {0: b""}) + upload_immutable(ss, b"si0", b"r" * 32, b"c" * 32, {0: b""}) # And try to submit a corruption advisory about a different share ss.remote_advise_corrupt_share(b"immutable", b"si0", 1, From 0b4e6754a34ee9ba8d7d71f6f16e7e29f4fd8ec8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 25 Oct 2021 20:47:35 -0400 Subject: [PATCH 046/220] news fragment --- newsfragments/3827.security | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 newsfragments/3827.security diff --git a/newsfragments/3827.security b/newsfragments/3827.security new file mode 100644 index 000000000..4fee19c76 --- /dev/null +++ b/newsfragments/3827.security @@ -0,0 +1,4 @@ +The SFTP server no longer accepts password-based credentials for authentication. +Public/private key-based credentials are now the only supported authentication type. +This removes plaintext password storage from the SFTP credentials file. +It also removes a possible timing side-channel vulnerability which might have allowed attackers to discover an account's plaintext password. From 5878a64890ba0a395f61432a9b5dd534daa9a64a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 25 Oct 2021 20:50:19 -0400 Subject: [PATCH 047/220] Remove password-based authentication from the SFTP frontend --- docs/frontends/FTP-and-SFTP.rst | 14 +- src/allmydata/frontends/auth.py | 122 ++++++++++------ src/allmydata/test/test_auth.py | 244 +++++++++++++++++++++++--------- 3 files changed, 255 insertions(+), 125 deletions(-) diff --git a/docs/frontends/FTP-and-SFTP.rst b/docs/frontends/FTP-and-SFTP.rst index 9d4f1dcec..ede719e26 100644 --- a/docs/frontends/FTP-and-SFTP.rst +++ b/docs/frontends/FTP-and-SFTP.rst @@ -47,8 +47,8 @@ servers must be configured with a way to first authenticate a user (confirm that a prospective client has a legitimate claim to whatever authorities we might grant a particular user), and second to decide what directory cap should be used as the root directory for a log-in by the authenticated user. -A username and password can be used; as of Tahoe-LAFS v1.11, RSA or DSA -public key authentication is also supported. +As of Tahoe-LAFS v1.17, +RSA/DSA public key authentication is the only supported mechanism. Tahoe-LAFS provides two mechanisms to perform this user-to-cap mapping. The first (recommended) is a simple flat file with one account per line. @@ -59,20 +59,14 @@ Creating an Account File To use the first form, create a file (for example ``BASEDIR/private/accounts``) in which each non-comment/non-blank line is a space-separated line of -(USERNAME, PASSWORD, ROOTCAP), like so:: +(USERNAME, KEY-TYPE, PUBLIC-KEY, ROOTCAP), like so:: % cat BASEDIR/private/accounts - # This is a password line: username password cap - alice password URI:DIR2:ioej8xmzrwilg772gzj4fhdg7a:wtiizszzz2rgmczv4wl6bqvbv33ag4kvbr6prz3u6w3geixa6m6a - bob sekrit URI:DIR2:6bdmeitystckbl9yqlw7g56f4e:serp5ioqxnh34mlbmzwvkp3odehsyrr7eytt5f64we3k9hhcrcja - # This is a public key line: username keytype pubkey cap # (Tahoe-LAFS v1.11 or later) carol ssh-rsa AAAA... URI:DIR2:ovjy4yhylqlfoqg2vcze36dhde:4d4f47qko2xm5g7osgo2yyidi5m4muyo2vjjy53q4vjju2u55mfa -For public key authentication, the keytype may be either "ssh-rsa" or "ssh-dsa". -To avoid ambiguity between passwords and public key types, a password cannot -start with "ssh-". +The key type may be either "ssh-rsa" or "ssh-dsa". Now add an ``accounts.file`` directive to your ``tahoe.cfg`` file, as described in the next sections. diff --git a/src/allmydata/frontends/auth.py b/src/allmydata/frontends/auth.py index b61062334..312a9da1a 100644 --- a/src/allmydata/frontends/auth.py +++ b/src/allmydata/frontends/auth.py @@ -32,65 +32,93 @@ class FTPAvatarID(object): @implementer(checkers.ICredentialsChecker) class AccountFileChecker(object): - credentialInterfaces = (credentials.IUsernamePassword, - credentials.IUsernameHashedPassword, - credentials.ISSHPrivateKey) + credentialInterfaces = (credentials.ISSHPrivateKey,) + def __init__(self, client, accountfile): self.client = client - self.passwords = BytesKeyDict() - pubkeys = BytesKeyDict() - self.rootcaps = BytesKeyDict() - with open(abspath_expanduser_unicode(accountfile), "rb") as f: - for line in f: - line = line.strip() - if line.startswith(b"#") or not line: - continue - name, passwd, rest = line.split(None, 2) - if passwd.startswith(b"ssh-"): - bits = rest.split() - keystring = b" ".join([passwd] + bits[:-1]) - key = keys.Key.fromString(keystring) - rootcap = bits[-1] - pubkeys[name] = [key] - else: - self.passwords[name] = passwd - rootcap = rest - self.rootcaps[name] = rootcap + path = abspath_expanduser_unicode(accountfile) + with open_account_file(path) as f: + self.rootcaps, pubkeys = load_account_file(f) self._pubkeychecker = SSHPublicKeyChecker(InMemorySSHKeyDB(pubkeys)) def _avatarId(self, username): return FTPAvatarID(username, self.rootcaps[username]) - def _cbPasswordMatch(self, matched, username): - if matched: - return self._avatarId(username) - raise error.UnauthorizedLogin - def requestAvatarId(self, creds): if credentials.ISSHPrivateKey.providedBy(creds): d = defer.maybeDeferred(self._pubkeychecker.requestAvatarId, creds) d.addCallback(self._avatarId) return d - elif credentials.IUsernameHashedPassword.providedBy(creds): - return self._checkPassword(creds) - elif credentials.IUsernamePassword.providedBy(creds): - return self._checkPassword(creds) - else: - raise NotImplementedError() + raise NotImplementedError() - def _checkPassword(self, creds): - """ - Determine whether the password in the given credentials matches the - password in the account file. +def open_account_file(path): + """ + Open and return the accounts file at the given path. + """ + return open(path, "rt", encoding="utf-8") - Returns a Deferred that fires with the username if the password matches - or with an UnauthorizedLogin failure otherwise. - """ - try: - correct = self.passwords[creds.username] - except KeyError: - return defer.fail(error.UnauthorizedLogin()) +def load_account_file(lines): + """ + Load credentials from an account file. - d = defer.maybeDeferred(creds.checkPassword, correct) - d.addCallback(self._cbPasswordMatch, creds.username) - return d + :param lines: An iterable of account lines to load. + + :return: See ``create_account_maps``. + """ + return create_account_maps( + parse_accounts( + content_lines( + lines, + ), + ), + ) + +def content_lines(lines): + """ + Drop empty and commented-out lines (``#``-prefixed) from an iterator of + lines. + + :param lines: An iterator of lines to process. + + :return: An iterator of lines including only those from ``lines`` that + include content intended to be loaded. + """ + for line in lines: + line = line.strip() + if line and not line.startswith("#"): + yield line + +def parse_accounts(lines): + """ + Parse account lines into their components (name, key, rootcap). + """ + for line in lines: + name, passwd, rest = line.split(None, 2) + if not passwd.startswith("ssh-"): + raise ValueError( + "Password-based authentication is not supported; " + "configure key-based authentication instead." + ) + + bits = rest.split() + keystring = " ".join([passwd] + bits[:-1]) + key = keys.Key.fromString(keystring) + rootcap = bits[-1] + yield (name, key, rootcap) + +def create_account_maps(accounts): + """ + Build mappings from account names to keys and rootcaps. + + :param accounts: An iterator if (name, key, rootcap) tuples. + + :return: A tuple of two dicts. The first maps account names to rootcaps. + The second maps account names to public keys. + """ + rootcaps = BytesKeyDict() + pubkeys = BytesKeyDict() + for (name, key, rootcap) in accounts: + name_bytes = name.encode("utf-8") + rootcaps[name_bytes] = rootcap.encode("utf-8") + pubkeys[name_bytes] = [key] + return rootcaps, pubkeys diff --git a/src/allmydata/test/test_auth.py b/src/allmydata/test/test_auth.py index d5198d326..19c2f7c01 100644 --- a/src/allmydata/test/test_auth.py +++ b/src/allmydata/test/test_auth.py @@ -8,7 +8,17 @@ from __future__ import unicode_literals from future.utils import PY2 if PY2: - from future.builtins import str # noqa: F401 + from future.builtins import str, open # noqa: F401 + +from hypothesis import ( + given, +) +from hypothesis.strategies import ( + text, + characters, + tuples, + lists, +) from twisted.trial import unittest from twisted.python import filepath @@ -38,25 +48,184 @@ dBSD8940XU3YW+oeq8e+p3yQ2GinHfeJ3BYQyNQLuMAJ -----END RSA PRIVATE KEY----- """) -DUMMY_ACCOUNTS = u"""\ -alice herpassword URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111 -bob sekrit URI:DIR2:bbbbbbbbbbbbbbbbbbbbbbbbbb:2222222222222222222222222222222222222222222222222222 +DUMMY_KEY_DSA = keys.Key.fromString("""\ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABsQAAAAdzc2gtZH +NzAAAAgQDKMh/ELaiP21LYRBuPbUy7dUhv/XZwV7aS1LzxSP+KaJvtDOei8X76XEAfkqX+ +aGh9eup+BLkezrV6LlpO9uPzhY8ChlKpkvw5PZKv/2agSrVxZyG7yEzHNtSBQXE6qNMwIk +N/ycXLGCqyAhQSzRhLz9ETNaslRDLo7YyVWkiuAQAAABUA5nTatFKux5EqZS4EarMWFRBU +i1UAAACAFpkkK+JsPixSTPyn0DNMoGKA0Klqy8h61Ds6pws+4+aJQptUBshpwNw1ypo7MO ++goDZy3wwdWtURTPGMgesNdEfxp8L2/kqE4vpMK0myoczCqOiWMeNB/x1AStbSkBI8WmHW +2htgsC01xbaix/FrA3edK8WEyv+oIxlbV1FkrPkAAACANb0EpCc8uoR4/32rO2JLsbcLBw +H5wc2khe7AKkIa9kUknRIRvoCZUtXF5XuXXdRmnpVEm2KcsLdtZjip43asQcqgt0Kz3nuF +kAf7bI98G1waFUimcCSPsal4kCmW2HC11sg/BWOt5qczX/0/3xVxpo6juUeBq9ncnFTvPX +5fOlEAAAHoJkFqHiZBah4AAAAHc3NoLWRzcwAAAIEAyjIfxC2oj9tS2EQbj21Mu3VIb/12 +cFe2ktS88Uj/imib7QznovF++lxAH5Kl/mhofXrqfgS5Hs61ei5aTvbj84WPAoZSqZL8OT +2Sr/9moEq1cWchu8hMxzbUgUFxOqjTMCJDf8nFyxgqsgIUEs0YS8/REzWrJUQy6O2MlVpI +rgEAAAAVAOZ02rRSrseRKmUuBGqzFhUQVItVAAAAgBaZJCvibD4sUkz8p9AzTKBigNCpas +vIetQ7OqcLPuPmiUKbVAbIacDcNcqaOzDvoKA2ct8MHVrVEUzxjIHrDXRH8afC9v5KhOL6 +TCtJsqHMwqjoljHjQf8dQErW0pASPFph1tobYLAtNcW2osfxawN3nSvFhMr/qCMZW1dRZK +z5AAAAgDW9BKQnPLqEeP99qztiS7G3CwcB+cHNpIXuwCpCGvZFJJ0SEb6AmVLVxeV7l13U +Zp6VRJtinLC3bWY4qeN2rEHKoLdCs957hZAH+2yPfBtcGhVIpnAkj7GpeJAplthwtdbIPw +VjreanM1/9P98VcaaOo7lHgavZ3JxU7z1+XzpRAAAAFQC7360pZLbv7PFt4BPFJ8zAHxAe +QwAAAA5leGFya3VuQGJhcnlvbgECAwQ= +-----END OPENSSH PRIVATE KEY----- +""") -# dennis password URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111 +ACCOUNTS = u"""\ +# dennis {key} URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111 carol {key} URI:DIR2:cccccccccccccccccccccccccc:3333333333333333333333333333333333333333333333333333 """.format(key=str(DUMMY_KEY.public().toString("openssh"), "ascii")).encode("ascii") +# Python str.splitlines considers NEXT LINE, LINE SEPARATOR, and PARAGRAPH +# separator to be line separators, too. However, file.readlines() does not... +LINE_SEPARATORS = ( + '\x0a', # line feed + '\x0b', # vertical tab + '\x0c', # form feed + '\x0d', # carriage return +) + +class AccountFileParserTests(unittest.TestCase): + """ + Tests for ``load_account_file`` and its helper functions. + """ + @given(lists( + text(alphabet=characters( + blacklist_categories=( + # Surrogates are an encoding trick to help out UTF-16. + # They're not necessary to represent any non-surrogate code + # point in unicode. They're also not legal individually but + # only in pairs. + 'Cs', + ), + # Exclude all our line separators too. + blacklist_characters=("\n", "\r"), + )), + )) + def test_ignore_comments(self, lines): + """ + ``auth.content_lines`` filters out lines beginning with `#` and empty + lines. + """ + expected = set() + + # It's not clear that real files and StringIO behave sufficiently + # similarly to use the latter instead of the former here. In + # particular, they seem to have distinct and incompatible + # line-splitting rules. + bufpath = self.mktemp() + with open(bufpath, "wt", encoding="utf-8") as buf: + for line in lines: + stripped = line.strip() + is_content = stripped and not stripped.startswith("#") + if is_content: + expected.add(stripped) + buf.write(line + "\n") + + with auth.open_account_file(bufpath) as buf: + actual = set(auth.content_lines(buf)) + + self.assertEqual(expected, actual) + + def test_parse_accounts(self): + """ + ``auth.parse_accounts`` accepts an iterator of account lines and returns + an iterator of structured account data. + """ + alice_key = DUMMY_KEY.public().toString("openssh").decode("utf-8") + alice_cap = "URI:DIR2:aaaa:1111" + + bob_key = DUMMY_KEY_DSA.public().toString("openssh").decode("utf-8") + bob_cap = "URI:DIR2:aaaa:2222" + self.assertEqual( + list(auth.parse_accounts([ + "alice {} {}".format(alice_key, alice_cap), + "bob {} {}".format(bob_key, bob_cap), + ])), + [ + ("alice", DUMMY_KEY.public(), alice_cap), + ("bob", DUMMY_KEY_DSA.public(), bob_cap), + ], + ) + + def test_parse_accounts_rejects_passwords(self): + """ + The iterator returned by ``auth.parse_accounts`` raises ``ValueError`` + when processing reaches a line that has what looks like a password + instead of an ssh key. + """ + with self.assertRaises(ValueError): + list(auth.parse_accounts(["alice apassword URI:DIR2:aaaa:1111"])) + + def test_create_account_maps(self): + """ + ``auth.create_account_maps`` accepts an iterator of structured account + data and returns two mappings: one from account name to rootcap, the + other from account name to public keys. + """ + alice_cap = "URI:DIR2:aaaa:1111" + alice_key = DUMMY_KEY.public() + bob_cap = "URI:DIR2:aaaa:2222" + bob_key = DUMMY_KEY_DSA.public() + accounts = [ + ("alice", alice_key, alice_cap), + ("bob", bob_key, bob_cap), + ] + self.assertEqual( + auth.create_account_maps(accounts), + ({ + b"alice": alice_cap.encode("utf-8"), + b"bob": bob_cap.encode("utf-8"), + }, + { + b"alice": [alice_key], + b"bob": [bob_key], + }), + ) + + def test_load_account_file(self): + """ + ``auth.load_account_file`` accepts an iterator of serialized account lines + and returns two mappings: one from account name to rootcap, the other + from account name to public keys. + """ + alice_key = DUMMY_KEY.public().toString("openssh").decode("utf-8") + alice_cap = "URI:DIR2:aaaa:1111" + + bob_key = DUMMY_KEY_DSA.public().toString("openssh").decode("utf-8") + bob_cap = "URI:DIR2:aaaa:2222" + + accounts = [ + "alice {} {}".format(alice_key, alice_cap), + "bob {} {}".format(bob_key, bob_cap), + "# carol {} {}".format(alice_key, alice_cap), + ] + + self.assertEqual( + auth.load_account_file(accounts), + ({ + b"alice": alice_cap.encode("utf-8"), + b"bob": bob_cap.encode("utf-8"), + }, + { + b"alice": [DUMMY_KEY.public()], + b"bob": [DUMMY_KEY_DSA.public()], + }), + ) + + class AccountFileCheckerKeyTests(unittest.TestCase): """ Tests for key handling done by allmydata.frontends.auth.AccountFileChecker. """ def setUp(self): self.account_file = filepath.FilePath(self.mktemp()) - self.account_file.setContent(DUMMY_ACCOUNTS) + self.account_file.setContent(ACCOUNTS) abspath = abspath_expanduser_unicode(str(self.account_file.path)) self.checker = auth.AccountFileChecker(None, abspath) - def test_unknown_user_ssh(self): + def test_unknown_user(self): """ AccountFileChecker.requestAvatarId returns a Deferred that fires with UnauthorizedLogin if called with an SSHPrivateKey object with a @@ -67,67 +236,6 @@ class AccountFileCheckerKeyTests(unittest.TestCase): avatarId = self.checker.requestAvatarId(key_credentials) return self.assertFailure(avatarId, error.UnauthorizedLogin) - def test_unknown_user_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - UnauthorizedLogin if called with an SSHPrivateKey object with a - username not present in the account file. - - We use a commented out user, so we're also checking that comments are - skipped. - """ - key_credentials = credentials.UsernamePassword(b"dennis", b"password") - d = self.checker.requestAvatarId(key_credentials) - return self.assertFailure(d, error.UnauthorizedLogin) - - def test_password_auth_user_with_ssh_key(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - UnauthorizedLogin if called with an SSHPrivateKey object for a username - only associated with a password in the account file. - """ - key_credentials = credentials.SSHPrivateKey( - b"alice", b"md5", None, None, None) - avatarId = self.checker.requestAvatarId(key_credentials) - return self.assertFailure(avatarId, error.UnauthorizedLogin) - - def test_password_auth_user_with_correct_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - the user if the correct password is given. - """ - key_credentials = credentials.UsernamePassword(b"alice", b"herpassword") - d = self.checker.requestAvatarId(key_credentials) - def authenticated(avatarId): - self.assertEqual( - (b"alice", - b"URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111"), - (avatarId.username, avatarId.rootcap)) - return d - - def test_password_auth_user_with_correct_hashed_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - the user if the correct password is given in hashed form. - """ - key_credentials = credentials.UsernameHashedPassword(b"alice", b"herpassword") - d = self.checker.requestAvatarId(key_credentials) - def authenticated(avatarId): - self.assertEqual( - (b"alice", - b"URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111"), - (avatarId.username, avatarId.rootcap)) - return d - - def test_password_auth_user_with_wrong_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - UnauthorizedLogin if the wrong password is given. - """ - key_credentials = credentials.UsernamePassword(b"alice", b"WRONG") - avatarId = self.checker.requestAvatarId(key_credentials) - return self.assertFailure(avatarId, error.UnauthorizedLogin) - def test_unrecognized_key(self): """ AccountFileChecker.requestAvatarId returns a Deferred that fires with From 3de481ab6bbabde6943648c80722cdacabb1d3e1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 25 Oct 2021 20:52:35 -0400 Subject: [PATCH 048/220] remove unused imports --- src/allmydata/frontends/auth.py | 2 +- src/allmydata/test/test_auth.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/frontends/auth.py b/src/allmydata/frontends/auth.py index 312a9da1a..b6f9c2b7e 100644 --- a/src/allmydata/frontends/auth.py +++ b/src/allmydata/frontends/auth.py @@ -12,7 +12,7 @@ if PY2: from zope.interface import implementer from twisted.internet import defer -from twisted.cred import error, checkers, credentials +from twisted.cred import checkers, credentials from twisted.conch.ssh import keys from twisted.conch.checkers import SSHPublicKeyChecker, InMemorySSHKeyDB diff --git a/src/allmydata/test/test_auth.py b/src/allmydata/test/test_auth.py index 19c2f7c01..bfe717f79 100644 --- a/src/allmydata/test/test_auth.py +++ b/src/allmydata/test/test_auth.py @@ -16,7 +16,6 @@ from hypothesis import ( from hypothesis.strategies import ( text, characters, - tuples, lists, ) From 9764ac740ada46b8ee23b3060e951e0cd5dab9a9 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 26 Oct 2021 11:22:32 +0100 Subject: [PATCH 049/220] test kwargs overlap with params in start_action Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 1fbb9ec8d..bab37243c 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -330,3 +330,27 @@ class LogCallDeferredTests(TestCase): msg = logger.messages[0] assertContainsFields(self, msg, {"args": (10, 2)}) assertContainsFields(self, msg, {"kwargs": {"message": "an exponential function"}}) + + + @capture_logging( + lambda self, logger: + assertHasAction(self, logger, u"the-action", succeeded=True), + ) + def test_keyword_args_dont_overlap_with_start_action(self, logger): + """ + Check that both keyword and positional arguments are logged when using ``log_call_deferred`` + """ + @log_call_deferred(action_type=u"the-action") + def f(base, exp, kwargs, args): + return base ** exp + self.assertThat( + f(10, 2, kwargs={"kwarg_1": "value_1", "kwarg_2": 2}, args=(1, 2, 3)), + succeeded(Equals(100)), + ) + msg = logger.messages[0] + assertContainsFields(self, msg, {"args": (10, 2)}) + assertContainsFields( + self, + msg, + {"kwargs": {"args": [1, 2, 3], "kwargs": {"kwarg_1": "value_1", "kwarg_2": 2}}}, + ) From 5b9997f388ccca089081d8f5939f0c84edea3542 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 26 Oct 2021 07:16:24 -0400 Subject: [PATCH 050/220] update the integration tests to reflect removal of sftp password auth --- integration/conftest.py | 29 ++++++++++++++++---------- integration/test_sftp.py | 45 +++++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/integration/conftest.py b/integration/conftest.py index 39ff3b42b..ef5c518a8 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -353,10 +353,23 @@ def storage_nodes(reactor, temp_dir, introducer, introducer_furl, flog_gatherer, nodes.append(process) return nodes +@pytest.fixture(scope="session") +def alice_sftp_client_key_path(temp_dir): + # The client SSH key path is typically going to be somewhere else (~/.ssh, + # typically), but for convenience sake for testing we'll put it inside node. + return join(temp_dir, "alice", "private", "ssh_client_rsa_key") @pytest.fixture(scope='session') @log_call(action_type=u"integration:alice", include_args=[], include_result=False) -def alice(reactor, temp_dir, introducer_furl, flog_gatherer, storage_nodes, request): +def alice( + reactor, + temp_dir, + introducer_furl, + flog_gatherer, + storage_nodes, + alice_sftp_client_key_path, + request, +): process = pytest_twisted.blockon( _create_node( reactor, request, temp_dir, introducer_furl, flog_gatherer, "alice", @@ -387,19 +400,13 @@ accounts.file = {accounts_path} """.format(ssh_key_path=host_ssh_key_path, accounts_path=accounts_path)) generate_ssh_key(host_ssh_key_path) - # 3. Add a SFTP access file with username/password and SSH key auth. - - # The client SSH key path is typically going to be somewhere else (~/.ssh, - # typically), but for convenience sake for testing we'll put it inside node. - client_ssh_key_path = join(process.node_dir, "private", "ssh_client_rsa_key") - generate_ssh_key(client_ssh_key_path) + # 3. Add a SFTP access file with an SSH key for auth. + generate_ssh_key(alice_sftp_client_key_path) # Pub key format is "ssh-rsa ". We want the key. - ssh_public_key = open(client_ssh_key_path + ".pub").read().strip().split()[1] + ssh_public_key = open(alice_sftp_client_key_path + ".pub").read().strip().split()[1] with open(accounts_path, "w") as f: f.write("""\ -alice password {rwcap} - -alice2 ssh-rsa {ssh_public_key} {rwcap} +alice-key ssh-rsa {ssh_public_key} {rwcap} """.format(rwcap=rwcap, ssh_public_key=ssh_public_key)) # 4. Restart the node with new SFTP config. diff --git a/integration/test_sftp.py b/integration/test_sftp.py index 6171c7413..3fdbb56d7 100644 --- a/integration/test_sftp.py +++ b/integration/test_sftp.py @@ -19,6 +19,7 @@ from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import os.path from posixpath import join from stat import S_ISDIR @@ -33,7 +34,7 @@ import pytest from .util import generate_ssh_key, run_in_thread -def connect_sftp(connect_args={"username": "alice", "password": "password"}): +def connect_sftp(connect_args): """Create an SFTP client.""" client = SSHClient() client.set_missing_host_key_policy(AutoAddPolicy) @@ -60,24 +61,24 @@ def connect_sftp(connect_args={"username": "alice", "password": "password"}): @run_in_thread def test_bad_account_password_ssh_key(alice, tmpdir): """ - Can't login with unknown username, wrong password, or wrong SSH pub key. + Can't login with unknown username, any password, or wrong SSH pub key. """ - # Wrong password, wrong username: - for u, p in [("alice", "wrong"), ("someuser", "password")]: + # Any password, wrong username: + for u, p in [("alice-key", "wrong"), ("someuser", "password")]: with pytest.raises(AuthenticationException): connect_sftp(connect_args={ "username": u, "password": p, }) - another_key = join(str(tmpdir), "ssh_key") + another_key = os.path.join(str(tmpdir), "ssh_key") generate_ssh_key(another_key) - good_key = RSAKey(filename=join(alice.node_dir, "private", "ssh_client_rsa_key")) + good_key = RSAKey(filename=os.path.join(alice.node_dir, "private", "ssh_client_rsa_key")) bad_key = RSAKey(filename=another_key) # Wrong key: with pytest.raises(AuthenticationException): connect_sftp(connect_args={ - "username": "alice2", "pkey": bad_key, + "username": "alice-key", "pkey": bad_key, }) # Wrong username: @@ -86,13 +87,24 @@ def test_bad_account_password_ssh_key(alice, tmpdir): "username": "someoneelse", "pkey": good_key, }) +def sftp_client_key(node): + return RSAKey( + filename=os.path.join(node.node_dir, "private", "ssh_client_rsa_key"), + ) + +def test_sftp_client_key_exists(alice, alice_sftp_client_key_path): + """ + Weakly validate the sftp client key fixture by asserting that *something* + exists at the supposed key path. + """ + assert os.path.exists(alice_sftp_client_key_path) @run_in_thread def test_ssh_key_auth(alice): """It's possible to login authenticating with SSH public key.""" - key = RSAKey(filename=join(alice.node_dir, "private", "ssh_client_rsa_key")) + key = sftp_client_key(alice) sftp = connect_sftp(connect_args={ - "username": "alice2", "pkey": key + "username": "alice-key", "pkey": key }) assert sftp.listdir() == [] @@ -100,7 +112,10 @@ def test_ssh_key_auth(alice): @run_in_thread def test_read_write_files(alice): """It's possible to upload and download files.""" - sftp = connect_sftp() + sftp = connect_sftp(connect_args={ + "username": "alice-key", + "pkey": sftp_client_key(alice), + }) with sftp.file("myfile", "wb") as f: f.write(b"abc") f.write(b"def") @@ -117,7 +132,10 @@ def test_directories(alice): It's possible to create, list directories, and create and remove files in them. """ - sftp = connect_sftp() + sftp = connect_sftp(connect_args={ + "username": "alice-key", + "pkey": sftp_client_key(alice), + }) assert sftp.listdir() == [] sftp.mkdir("childdir") @@ -148,7 +166,10 @@ def test_directories(alice): @run_in_thread def test_rename(alice): """Directories and files can be renamed.""" - sftp = connect_sftp() + sftp = connect_sftp(connect_args={ + "username": "alice-key", + "pkey": sftp_client_key(alice), + }) sftp.mkdir("dir") filepath = join("dir", "file") From 69d335c1e1503544850e4e3014ca1a6d1d89180b Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 26 Oct 2021 13:14:26 +0100 Subject: [PATCH 051/220] update test overlap function docstring Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index bab37243c..61e0a6958 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -338,7 +338,7 @@ class LogCallDeferredTests(TestCase): ) def test_keyword_args_dont_overlap_with_start_action(self, logger): """ - Check that both keyword and positional arguments are logged when using ``log_call_deferred`` + Check that kwargs passed to decorated functions don't overlap with params in ``start_action`` """ @log_call_deferred(action_type=u"the-action") def f(base, exp, kwargs, args): From 28cc3cad66e0367da10ee97326d100c686f78d10 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 26 Oct 2021 14:10:29 -0400 Subject: [PATCH 052/220] news fragment --- newsfragments/3829.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3829.minor diff --git a/newsfragments/3829.minor b/newsfragments/3829.minor new file mode 100644 index 000000000..e69de29bb From 7ec7cd45dd41b0f828a581865ab3b7bb15a655be Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 26 Oct 2021 14:10:41 -0400 Subject: [PATCH 053/220] Use "concurrency groups" to auto-cancel redundant builds --- .github/workflows/ci.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45b2986a3..8209108bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,23 @@ on: - "master" pull_request: +# Control to what degree jobs in this workflow will run concurrently with +# other instances of themselves. +# +# https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions#concurrency +concurrency: + # We want every revision on master to run the workflow completely. + # "head_ref" is not set for the "push" event but it is set for the + # "pull_request" event. If it is set then it is the name of the branch and + # we can use it to make sure each branch has only one active workflow at a + # time. If it is not set then we can compute a unique string that gives + # every master/push workflow its own group. + group: "${{ github.head_ref || format('{0}-{1}', github.run_number, github.run_attempt) }}" + + # Then, we say that if a new workflow wants to start in the same group as a + # running workflow, the running workflow should be cancelled. + cancel-in-progress: true + env: # Tell Hypothesis which configuration we want it to use. TAHOE_LAFS_HYPOTHESIS_PROFILE: "ci" From 972790cdebe6056ae93c59452975c7c1b67c7c9c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 09:47:47 -0400 Subject: [PATCH 054/220] news fragment --- newsfragments/3830.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3830.minor diff --git a/newsfragments/3830.minor b/newsfragments/3830.minor new file mode 100644 index 000000000..e69de29bb From 70fb5d563abfcd809ce627b5ed35c0b09d55d684 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 09:48:26 -0400 Subject: [PATCH 055/220] Get rid of the public expiration_time attribute LeaseInfo now has a getter and a setter for this attribute. LeaseInfo is now also immutable by way of `attrs`. LeaseInfo is now also comparable by way of `attrs`. --- src/allmydata/scripts/debug.py | 8 +-- src/allmydata/storage/immutable.py | 6 +-- src/allmydata/storage/lease.py | 72 ++++++++++++++++++++------ src/allmydata/storage/mutable.py | 7 ++- src/allmydata/test/test_storage.py | 23 +++----- src/allmydata/test/test_storage_web.py | 2 +- 6 files changed, 73 insertions(+), 45 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 2d6ba4602..4d3f4cb21 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -170,7 +170,7 @@ def dump_immutable_lease_info(f, out): leases = list(f.get_leases()) if leases: for i,lease in enumerate(leases): - when = format_expiration_time(lease.expiration_time) + when = format_expiration_time(lease.get_expiration_time()) print(" Lease #%d: owner=%d, expire in %s" \ % (i, lease.owner_num, when), file=out) else: @@ -223,7 +223,7 @@ def dump_mutable_share(options): print(file=out) print(" Lease #%d:" % leasenum, file=out) print(" ownerid: %d" % lease.owner_num, file=out) - when = format_expiration_time(lease.expiration_time) + when = format_expiration_time(lease.get_expiration_time()) print(" expires in %s" % when, file=out) print(" renew_secret: %s" % str(base32.b2a(lease.renew_secret), "utf-8"), file=out) print(" cancel_secret: %s" % str(base32.b2a(lease.cancel_secret), "utf-8"), file=out) @@ -730,7 +730,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): m = MutableShareFile(abs_sharefile) WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) - expiration_time = min( [lease.expiration_time + expiration_time = min( [lease.get_expiration_time() for (i,lease) in m._enumerate_leases(f)] ) expiration = max(0, expiration_time - now) @@ -811,7 +811,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) - expiration_time = min( [lease.expiration_time + expiration_time = min( [lease.get_expiration_time() for lease in sf.get_leases()] ) expiration = max(0, expiration_time - now) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index b8b18f140..81470eed8 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -156,9 +156,9 @@ class ShareFile(object): for i,lease in enumerate(self.get_leases()): if timing_safe_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. - if new_expire_time > lease.expiration_time: + if new_expire_time > lease.get_expiration_time(): # yes - lease.expiration_time = new_expire_time + lease = lease.renew(new_expire_time) with open(self.home, 'rb+') as f: self._write_lease_record(f, i, lease) return @@ -167,7 +167,7 @@ class ShareFile(object): def add_or_renew_lease(self, lease_info): try: self.renew_lease(lease_info.renew_secret, - lease_info.expiration_time) + lease_info.get_expiration_time()) except IndexError: self.add_lease(lease_info) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 187f32406..594d61cf5 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -13,24 +13,64 @@ if PY2: import struct, time +import attr + +@attr.s(frozen=True) class LeaseInfo(object): - def __init__(self, owner_num=None, renew_secret=None, cancel_secret=None, - expiration_time=None, nodeid=None): - self.owner_num = owner_num - self.renew_secret = renew_secret - self.cancel_secret = cancel_secret - self.expiration_time = expiration_time - if nodeid is not None: - assert isinstance(nodeid, bytes) - assert len(nodeid) == 20 - self.nodeid = nodeid + """ + Represent the details of one lease, a marker which is intended to inform + the storage server how long to store a particular share. + """ + owner_num = attr.ib(default=None) + + # Don't put secrets into the default string representation. This makes it + # slightly less likely the secrets will accidentally be leaked to + # someplace they're not meant to be. + renew_secret = attr.ib(default=None, repr=False) + cancel_secret = attr.ib(default=None, repr=False) + + _expiration_time = attr.ib(default=None) + + nodeid = attr.ib(default=None) + + @nodeid.validator + def _validate_nodeid(self, attribute, value): + if value is not None: + if not isinstance(value, bytes): + raise ValueError( + "nodeid value must be bytes, not {!r}".format(value), + ) + if len(value) != 20: + raise ValueError( + "nodeid value must be 20 bytes long, not {!r}".format(value), + ) + return None def get_expiration_time(self): - return self.expiration_time + # type: () -> float + """ + Retrieve a POSIX timestamp representing the time at which this lease is + set to expire. + """ + return self._expiration_time + + def renew(self, new_expire_time): + # type: (float) -> LeaseInfo + """ + Create a new lease the same as this one but with a new expiration time. + + :param new_expire_time: The new expiration time. + + :return: The new lease info. + """ + return attr.assoc( + self, + _expiration_time=new_expire_time, + ) def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period - return self.expiration_time - 31*24*60*60 + return self._expiration_time - 31*24*60*60 def get_age(self): return time.time() - self.get_grant_renew_time_time() @@ -39,7 +79,7 @@ class LeaseInfo(object): (self.owner_num, self.renew_secret, self.cancel_secret, - self.expiration_time) = struct.unpack(">L32s32sL", data) + self._expiration_time) = struct.unpack(">L32s32sL", data) self.nodeid = None return self @@ -47,18 +87,18 @@ class LeaseInfo(object): return struct.pack(">L32s32sL", self.owner_num, self.renew_secret, self.cancel_secret, - int(self.expiration_time)) + int(self._expiration_time)) def to_mutable_data(self): return struct.pack(">LL32s32s20s", self.owner_num, - int(self.expiration_time), + int(self._expiration_time), self.renew_secret, self.cancel_secret, self.nodeid) def from_mutable_data(self, data): (self.owner_num, - self.expiration_time, + self._expiration_time, self.renew_secret, self.cancel_secret, self.nodeid) = struct.unpack(">LL32s32s20s", data) return self diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 2ef0c3215..53a38fae9 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -304,9 +304,9 @@ class MutableShareFile(object): for (leasenum,lease) in self._enumerate_leases(f): if timing_safe_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. - if new_expire_time > lease.expiration_time: + if new_expire_time > lease.get_expiration_time(): # yes - lease.expiration_time = new_expire_time + lease = lease.renew(new_expire_time) self._write_lease_record(f, leasenum, lease) return accepting_nodeids.add(lease.nodeid) @@ -324,7 +324,7 @@ class MutableShareFile(object): precondition(lease_info.owner_num != 0) # 0 means "no lease here" try: self.renew_lease(lease_info.renew_secret, - lease_info.expiration_time) + lease_info.get_expiration_time()) except IndexError: self.add_lease(lease_info) @@ -454,4 +454,3 @@ def create_mutable_sharefile(filename, my_nodeid, write_enabler, parent): ms.create(my_nodeid, write_enabler) del ms return MutableShareFile(filename, parent) - diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index d18960a1e..8123be2c5 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -835,7 +835,7 @@ class Server(unittest.TestCase): # Start out with single lease created with bucket: renewal_secret, cancel_secret = self.create_bucket_5_shares(ss, b"si0") [lease] = ss.get_leases(b"si0") - self.assertEqual(lease.expiration_time, 123 + DEFAULT_RENEWAL_TIME) + self.assertEqual(lease.get_expiration_time(), 123 + DEFAULT_RENEWAL_TIME) # Time passes: clock.advance(123456) @@ -843,7 +843,7 @@ class Server(unittest.TestCase): # Adding a lease with matching renewal secret just renews it: ss.remote_add_lease(b"si0", renewal_secret, cancel_secret) [lease] = ss.get_leases(b"si0") - self.assertEqual(lease.expiration_time, 123 + 123456 + DEFAULT_RENEWAL_TIME) + self.assertEqual(lease.get_expiration_time(), 123 + 123456 + DEFAULT_RENEWAL_TIME) def test_have_shares(self): """By default the StorageServer has no shares.""" @@ -1230,17 +1230,6 @@ class MutableServer(unittest.TestCase): self.failUnlessEqual(a.cancel_secret, b.cancel_secret) self.failUnlessEqual(a.nodeid, b.nodeid) - def compare_leases(self, leases_a, leases_b): - self.failUnlessEqual(len(leases_a), len(leases_b)) - for i in range(len(leases_a)): - a = leases_a[i] - b = leases_b[i] - self.failUnlessEqual(a.owner_num, b.owner_num) - self.failUnlessEqual(a.renew_secret, b.renew_secret) - self.failUnlessEqual(a.cancel_secret, b.cancel_secret) - self.failUnlessEqual(a.nodeid, b.nodeid) - self.failUnlessEqual(a.expiration_time, b.expiration_time) - def test_leases(self): ss = self.create("test_leases") def secrets(n): @@ -1321,11 +1310,11 @@ class MutableServer(unittest.TestCase): self.failUnlessIn("I have leases accepted by nodeids:", e_s) self.failUnlessIn("nodeids: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' .", e_s) - self.compare_leases(all_leases, list(s0.get_leases())) + self.assertEqual(all_leases, list(s0.get_leases())) # reading shares should not modify the timestamp read(b"si1", [], [(0,200)]) - self.compare_leases(all_leases, list(s0.get_leases())) + self.assertEqual(all_leases, list(s0.get_leases())) write(b"si1", secrets(0), {0: ([], [(200, b"make me bigger")], None)}, []) @@ -1359,7 +1348,7 @@ class MutableServer(unittest.TestCase): "shares", storage_index_to_dir(b"si1")) s0 = MutableShareFile(os.path.join(bucket_dir, "0")) [lease] = s0.get_leases() - self.assertEqual(lease.expiration_time, 235 + DEFAULT_RENEWAL_TIME) + self.assertEqual(lease.get_expiration_time(), 235 + DEFAULT_RENEWAL_TIME) # Time passes... clock.advance(835) @@ -1367,7 +1356,7 @@ class MutableServer(unittest.TestCase): # Adding a lease renews it: ss.remote_add_lease(b"si1", renew_secret, cancel_secret) [lease] = s0.get_leases() - self.assertEqual(lease.expiration_time, + self.assertEqual(lease.get_expiration_time(), 235 + 835 + DEFAULT_RENEWAL_TIME) def test_remove(self): diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index b3f5fac98..e905b240d 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -490,7 +490,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): # current lease has), so we have to reach inside it. for i,lease in enumerate(sf.get_leases()): if lease.renew_secret == renew_secret: - lease.expiration_time = new_expire_time + lease = lease.renew(new_expire_time) f = open(sf.home, 'rb+') sf._write_lease_record(f, i, lease) f.close() From 76caf4634710344f839b8b8e58bfc424a124fdcc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 10:23:58 -0400 Subject: [PATCH 056/220] make the alternate LeaseInfo constructors into class methods --- src/allmydata/storage/immutable.py | 2 +- src/allmydata/storage/lease.py | 46 +++++++++++++++++++++--------- src/allmydata/storage/mutable.py | 2 +- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 81470eed8..0042673f5 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -144,7 +144,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield LeaseInfo().from_immutable_data(data) + yield LeaseInfo.from_immutable_data(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 594d61cf5..191edbe1a 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -75,13 +75,22 @@ class LeaseInfo(object): def get_age(self): return time.time() - self.get_grant_renew_time_time() - def from_immutable_data(self, data): - (self.owner_num, - self.renew_secret, - self.cancel_secret, - self._expiration_time) = struct.unpack(">L32s32sL", data) - self.nodeid = None - return self + @classmethod + def from_immutable_data(cls, data): + # type: (bytes) -> cls + """ + Create a new instance from the encoded data given. + + :param data: A lease serialized using the immutable-share-file format. + """ + names = [ + "owner_num", + "renew_secret", + "cancel_secret", + "expiration_time", + ] + values = struct.unpack(">L32s32sL", data) + return cls(nodeid=None, **dict(zip(names, values))) def to_immutable_data(self): return struct.pack(">L32s32sL", @@ -96,9 +105,20 @@ class LeaseInfo(object): self.renew_secret, self.cancel_secret, self.nodeid) - def from_mutable_data(self, data): - (self.owner_num, - self._expiration_time, - self.renew_secret, self.cancel_secret, - self.nodeid) = struct.unpack(">LL32s32s20s", data) - return self + @classmethod + def from_mutable_data(cls, data): + # (bytes) -> cls + """ + Create a new instance from the encoded data given. + + :param data: A lease serialized using the mutable-share-file format. + """ + names = [ + "owner_num", + "expiration_time", + "renew_secret", + "cancel_secret", + "nodeid", + ] + values = struct.unpack(">LL32s32s20s", data) + return cls(**dict(zip(names, values))) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 53a38fae9..e6f24679b 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -253,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = LeaseInfo().from_mutable_data(data) + lease_info = LeaseInfo.from_mutable_data(data) if lease_info.owner_num == 0: return None return lease_info From 3514995068b7b132d3f7c590b4ecb8347f36655e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 10:26:30 -0400 Subject: [PATCH 057/220] some versions of mypy don't like this so nevermind --- src/allmydata/storage/lease.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 191edbe1a..17683a888 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -77,7 +77,6 @@ class LeaseInfo(object): @classmethod def from_immutable_data(cls, data): - # type: (bytes) -> cls """ Create a new instance from the encoded data given. @@ -107,7 +106,6 @@ class LeaseInfo(object): @classmethod def from_mutable_data(cls, data): - # (bytes) -> cls """ Create a new instance from the encoded data given. From 125c937d466db13e27ab06cc40e5d68aa5d93d28 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 28 Oct 2021 10:49:08 -0400 Subject: [PATCH 058/220] Switch to HTTP header scheme. --- docs/proposed/http-storage-node-protocol.rst | 38 ++++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index d5b6653be..fd1db5c4c 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -450,16 +450,22 @@ A lease is also created for the shares. Details of the buckets to create are encoded in the request body. For example:: - {"renew-secret": "efgh", "cancel-secret": "ijkl", - "upload-secret": "xyzf", - "share-numbers": [1, 7, ...], "allocated-size": 12345} + {"share-numbers": [1, 7, ...], "allocated-size": 12345} + +The request must include ``WWW-Authenticate`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. +Typically this is a header sent by the server, but in Tahoe-LAFS keys are set by the client, so may as well reuse it. +For example:: + + WWW-Authenticate: x-tahoe-renew-secret + WWW-Authenticate: x-tahoe-cancel-secret + WWW-Authenticate: x-tahoe-upload-secret The response body includes encoded information about the created buckets. For example:: {"already-have": [1, ...], "allocated": [7, ...]} -The uplaod secret is an opaque _byte_ string. +The upload secret is an opaque _byte_ string. It will be generated by hashing a combination of:b 1. A tag. @@ -521,9 +527,9 @@ If any one of these requests fails then at most 128KiB of upload work needs to b The server must recognize when all of the data has been received and mark the share as complete (which it can do because it was informed of the size when the storage index was initialized). -The request body looks this, with data and upload secret being bytes:: +The request must include a ``Authorization`` header that includes the upload secret:: - { "upload-secret": "xyzf", "data": "thedata" } + Authorization: x-tahoe-upload-secret Responses: @@ -727,9 +733,11 @@ Immutable Data 1. Create a bucket for storage index ``AAAAAAAAAAAAAAAA`` to hold two immutable shares, discovering that share ``1`` was already uploaded:: POST /v1/immutable/AAAAAAAAAAAAAAAA - {"renew-secret": "efgh", "cancel-secret": "ijkl", - "upload-secret": "xyzf", - "share-numbers": [1, 7], "allocated-size": 48} + WWW-Authenticate: x-tahoe-renew-secret efgh + WWW-Authenticate: x-tahoe-cancel-secret jjkl + WWW-Authenticate: x-tahoe-upload-secret xyzf + + {"share-numbers": [1, 7], "allocated-size": 48} 200 OK {"already-have": [1], "allocated": [7]} @@ -738,22 +746,22 @@ Immutable Data PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Content-Range: bytes 0-15/48 - - {"upload-secret": b"xyzf", "data": "first 16 bytes!!" + Authorization: x-tahoe-upload-secret xyzf + 200 OK PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Content-Range: bytes 16-31/48 - - {"upload-secret": "xyzf", "data": "second 16 bytes!" + Authorization: x-tahoe-upload-secret xyzf + 200 OK PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Content-Range: bytes 32-47/48 - - {"upload-secret": "xyzf", "data": "final 16 bytes!!" + Authorization: x-tahoe-upload-secret xyzf + 201 CREATED From f635aec5bebda81ad1c073efa598e3546861ebf9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 10:53:29 -0400 Subject: [PATCH 059/220] news fragment --- newsfragments/3832.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3832.minor diff --git a/newsfragments/3832.minor b/newsfragments/3832.minor new file mode 100644 index 000000000..e69de29bb From 65d3ab614256a21430dc77b2982137de1cccfd8b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 10:53:52 -0400 Subject: [PATCH 060/220] move backdating logic into mutable/immutable share files --- src/allmydata/storage/immutable.py | 15 +++++++++++++-- src/allmydata/storage/mutable.py | 15 +++++++++++++-- src/allmydata/test/test_storage_web.py | 12 +----------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 0042673f5..7712e568a 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -152,11 +152,22 @@ class ShareFile(object): self._write_lease_record(f, num_leases, lease_info) self._write_num_leases(f, num_leases+1) - def renew_lease(self, renew_secret, new_expire_time): + def renew_lease(self, renew_secret, new_expire_time, allow_backdate=False): + # type: (bytes, int, bool) -> None + """ + Update the expiration time on an existing lease. + + :param allow_backdate: If ``True`` then allow the new expiration time + to be before the current expiration time. Otherwise, make no + change when this is the case. + + :raise IndexError: If there is no lease matching the given renew + secret. + """ for i,lease in enumerate(self.get_leases()): if timing_safe_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. - if new_expire_time > lease.get_expiration_time(): + if allow_backdate or new_expire_time > lease.get_expiration_time(): # yes lease = lease.renew(new_expire_time) with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index e6f24679b..de840b89a 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -298,13 +298,24 @@ class MutableShareFile(object): else: self._write_lease_record(f, num_lease_slots, lease_info) - def renew_lease(self, renew_secret, new_expire_time): + def renew_lease(self, renew_secret, new_expire_time, allow_backdate=False): + # type: (bytes, int, bool) -> None + """ + Update the expiration time on an existing lease. + + :param allow_backdate: If ``True`` then allow the new expiration time + to be before the current expiration time. Otherwise, make no + change when this is the case. + + :raise IndexError: If there is no lease matching the given renew + secret. + """ accepting_nodeids = set() with open(self.home, 'rb+') as f: for (leasenum,lease) in self._enumerate_leases(f): if timing_safe_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. - if new_expire_time > lease.get_expiration_time(): + if allow_backdate or new_expire_time > lease.get_expiration_time(): # yes lease = lease.renew(new_expire_time) self._write_lease_record(f, leasenum, lease) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index e905b240d..38e380223 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -485,17 +485,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): return d def backdate_lease(self, sf, renew_secret, new_expire_time): - # ShareFile.renew_lease ignores attempts to back-date a lease (i.e. - # "renew" a lease with a new_expire_time that is older than what the - # current lease has), so we have to reach inside it. - for i,lease in enumerate(sf.get_leases()): - if lease.renew_secret == renew_secret: - lease = lease.renew(new_expire_time) - f = open(sf.home, 'rb+') - sf._write_lease_record(f, i, lease) - f.close() - return - raise IndexError("unable to renew non-existent lease") + sf.renew_lease(renew_secret, new_expire_time, allow_backdate=True) def test_expire_age(self): basedir = "storage/LeaseCrawler/expire_age" From 54bf271fbebdb7d63642cc4d86dcf9507ae839df Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 11:12:08 -0400 Subject: [PATCH 061/220] news fragment --- newsfragments/3833.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3833.minor diff --git a/newsfragments/3833.minor b/newsfragments/3833.minor new file mode 100644 index 000000000..e69de29bb From 34d2f74ede88107d6e30c927fdce704be06e2c3d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 28 Oct 2021 11:12:17 -0400 Subject: [PATCH 062/220] Tell RTD how to install Sphinx. --- .readthedocs.yaml | 5 +++++ docs/requirements.txt | 4 ++++ newsfragments/3831.minor | 0 tox.ini | 7 +------ 4 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 .readthedocs.yaml create mode 100644 docs/requirements.txt create mode 100644 newsfragments/3831.minor diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..65b390f26 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,5 @@ +version: 2 + +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..39c4c20f0 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,4 @@ +sphinx +docutils<0.18 # https://github.com/sphinx-doc/sphinx/issues/9788 +recommonmark +sphinx_rtd_theme diff --git a/newsfragments/3831.minor b/newsfragments/3831.minor new file mode 100644 index 000000000..e69de29bb diff --git a/tox.ini b/tox.ini index 61a811b71..38cee1f9f 100644 --- a/tox.ini +++ b/tox.ini @@ -217,13 +217,8 @@ commands = # your web browser. [testenv:docs] -# we pin docutils because of https://sourceforge.net/p/docutils/bugs/301/ -# which asserts when it reads links to .svg files (e.g. about.rst) deps = - sphinx - docutils==0.12 - recommonmark - sphinx_rtd_theme + -r docs/requirements.txt # normal install is not needed for docs, and slows things down skip_install = True commands = From 66845c9a1786778e145aab65e30fb0068e2f8245 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 11:12:20 -0400 Subject: [PATCH 063/220] Add ShareFile.is_valid_header and use it instead of manual header inspection --- src/allmydata/scripts/debug.py | 2 +- src/allmydata/storage/immutable.py | 15 +++++++++++++++ src/allmydata/storage/server.py | 2 +- src/allmydata/test/test_system.py | 8 ++++---- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 4d3f4cb21..71e1ccb41 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -795,7 +795,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): else: print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out) - elif struct.unpack(">L", prefix[:4]) == (1,): + elif ShareFile.is_valid_header(prefix): # immutable class ImmediateReadBucketProxy(ReadBucketProxy): diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 7712e568a..407116038 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -57,6 +57,21 @@ class ShareFile(object): LEASE_SIZE = struct.calcsize(">L32s32sL") sharetype = "immutable" + @classmethod + def is_valid_header(cls, header): + # (bytes) -> bool + """ + Determine if the given bytes constitute a valid header for this type of + container. + + :param header: Some bytes from the beginning of a container. + + :return: ``True`` if the bytes could belong to this container, + ``False`` otherwise. + """ + (version,) = struct.unpack(">L", header[:4]) + return version == 1 + def __init__(self, filename, max_size=None, create=False): """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """ precondition((max_size is not None) or (not create), max_size, create) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 041783a4e..f339c579b 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -378,7 +378,7 @@ class StorageServer(service.MultiService, Referenceable): # note: if the share has been migrated, the renew_lease() # call will throw an exception, with information to help the # client update the lease. - elif header[:4] == struct.pack(">L", 1): + elif ShareFile.is_valid_header(header): sf = ShareFile(filename) else: continue # non-sharefile diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 087a1c634..72ce4b6ec 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -22,7 +22,7 @@ from twisted.trial import unittest from twisted.internet import defer from allmydata import uri -from allmydata.storage.mutable import MutableShareFile +from allmydata.storage.mutable import ShareFile, MutableShareFile from allmydata.storage.server import si_a2b from allmydata.immutable import offloaded, upload from allmydata.immutable.literal import LiteralFileNode @@ -1290,9 +1290,9 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): # are sharefiles here filename = os.path.join(dirpath, filenames[0]) # peek at the magic to see if it is a chk share - magic = open(filename, "rb").read(4) - if magic == b'\x00\x00\x00\x01': - break + with open(filename, "rb") as f: + if ShareFile.is_valid_header(f.read(32)): + break else: self.fail("unable to find any uri_extension files in %r" % self.basedir) From 1b46ac7a241e719cc0d7ddc4b66fa9fcdca5992d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 11:38:18 -0400 Subject: [PATCH 064/220] add MutableShareFile.is_valid_header and use it --- src/allmydata/scripts/debug.py | 287 ++++++++++++++--------------- src/allmydata/storage/immutable.py | 2 +- src/allmydata/storage/mutable.py | 18 +- src/allmydata/storage/server.py | 2 +- src/allmydata/storage/shares.py | 3 +- src/allmydata/test/common.py | 2 +- src/allmydata/test/test_system.py | 3 +- 7 files changed, 163 insertions(+), 154 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 71e1ccb41..ab48b0fd0 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -15,15 +15,22 @@ try: except ImportError: pass - -# do not import any allmydata modules at this level. Do that from inside -# individual functions instead. import struct, time, os, sys + from twisted.python import usage, failure from twisted.internet import defer from foolscap.logging import cli as foolscap_cli -from allmydata.scripts.common import BaseOptions +from allmydata.scripts.common import BaseOptions +from allmydata import uri +from allmydata.storage.mutable import MutableShareFile +from allmydata.storage.immutable import ShareFile +from allmydata.mutable.layout import unpack_share +from allmydata.mutable.layout import MDMFSlotReadProxy +from allmydata.mutable.common import NeedMoreDataError +from allmydata.immutable.layout import ReadBucketProxy +from allmydata.util import base32 +from allmydata.util.encodingutil import quote_output class DumpOptions(BaseOptions): def getSynopsis(self): @@ -56,13 +63,11 @@ def dump_share(options): # check the version, to see if we have a mutable or immutable share print("share filename: %s" % quote_output(options['filename']), file=out) - f = open(options['filename'], "rb") - prefix = f.read(32) - f.close() - if prefix == MutableShareFile.MAGIC: - return dump_mutable_share(options) - # otherwise assume it's immutable - return dump_immutable_share(options) + with open(options['filename'], "rb") as f: + if MutableShareFile.is_valid_header(f.read(32)): + return dump_mutable_share(options) + # otherwise assume it's immutable + return dump_immutable_share(options) def dump_immutable_share(options): from allmydata.storage.immutable import ShareFile @@ -712,125 +717,115 @@ def call(c, *args, **kwargs): return results[0] def describe_share(abs_sharefile, si_s, shnum_s, now, out): - from allmydata import uri - from allmydata.storage.mutable import MutableShareFile - from allmydata.storage.immutable import ShareFile - from allmydata.mutable.layout import unpack_share - from allmydata.mutable.common import NeedMoreDataError - from allmydata.immutable.layout import ReadBucketProxy - from allmydata.util import base32 - from allmydata.util.encodingutil import quote_output - import struct - - f = open(abs_sharefile, "rb") - prefix = f.read(32) - - if prefix == MutableShareFile.MAGIC: - # mutable share - m = MutableShareFile(abs_sharefile) - WE, nodeid = m._read_write_enabler_and_nodeid(f) - data_length = m._read_data_length(f) - expiration_time = min( [lease.get_expiration_time() - for (i,lease) in m._enumerate_leases(f)] ) - expiration = max(0, expiration_time - now) - - share_type = "unknown" - f.seek(m.DATA_OFFSET) - version = f.read(1) - if version == b"\x00": - # this slot contains an SMDF share - share_type = "SDMF" - elif version == b"\x01": - share_type = "MDMF" - - if share_type == "SDMF": - f.seek(m.DATA_OFFSET) - data = f.read(min(data_length, 2000)) - - try: - pieces = unpack_share(data) - except NeedMoreDataError as e: - # retry once with the larger size - size = e.needed_bytes - f.seek(m.DATA_OFFSET) - data = f.read(min(data_length, size)) - pieces = unpack_share(data) - (seqnum, root_hash, IV, k, N, segsize, datalen, - pubkey, signature, share_hash_chain, block_hash_tree, - share_data, enc_privkey) = pieces - - print("SDMF %s %d/%d %d #%d:%s %d %s" % \ - (si_s, k, N, datalen, - seqnum, str(base32.b2a(root_hash), "utf-8"), - expiration, quote_output(abs_sharefile)), file=out) - elif share_type == "MDMF": - from allmydata.mutable.layout import MDMFSlotReadProxy - fake_shnum = 0 - # TODO: factor this out with dump_MDMF_share() - class ShareDumper(MDMFSlotReadProxy): - def _read(self, readvs, force_remote=False, queue=False): - data = [] - for (where,length) in readvs: - f.seek(m.DATA_OFFSET+where) - data.append(f.read(length)) - return defer.succeed({fake_shnum: data}) - - p = ShareDumper(None, "fake-si", fake_shnum) - def extract(func): - stash = [] - # these methods return Deferreds, but we happen to know that - # they run synchronously when not actually talking to a - # remote server - d = func() - d.addCallback(stash.append) - return stash[0] - - verinfo = extract(p.get_verinfo) - (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, - offsets) = verinfo - print("MDMF %s %d/%d %d #%d:%s %d %s" % \ - (si_s, k, N, datalen, - seqnum, str(base32.b2a(root_hash), "utf-8"), - expiration, quote_output(abs_sharefile)), file=out) + with open(abs_sharefile, "rb") as f: + prefix = f.read(32) + if MutableShareFile.is_valid_header(prefix): + _describe_mutable_share(abs_sharefile, f, now, si_s, out) + elif ShareFile.is_valid_header(prefix): + _describe_immutable_share(abs_sharefile, now, si_s, out) else: - print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out) + print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out) - elif ShareFile.is_valid_header(prefix): - # immutable +def _describe_mutable_share(abs_sharefile, f, now, si_s, out): + # mutable share + m = MutableShareFile(abs_sharefile) + WE, nodeid = m._read_write_enabler_and_nodeid(f) + data_length = m._read_data_length(f) + expiration_time = min( [lease.get_expiration_time() + for (i,lease) in m._enumerate_leases(f)] ) + expiration = max(0, expiration_time - now) - class ImmediateReadBucketProxy(ReadBucketProxy): - def __init__(self, sf): - self.sf = sf - ReadBucketProxy.__init__(self, None, None, "") - def __repr__(self): - return "" - def _read(self, offset, size): - return defer.succeed(sf.read_share_data(offset, size)) + share_type = "unknown" + f.seek(m.DATA_OFFSET) + version = f.read(1) + if version == b"\x00": + # this slot contains an SMDF share + share_type = "SDMF" + elif version == b"\x01": + share_type = "MDMF" - # use a ReadBucketProxy to parse the bucket and find the uri extension - sf = ShareFile(abs_sharefile) - bp = ImmediateReadBucketProxy(sf) + if share_type == "SDMF": + f.seek(m.DATA_OFFSET) + data = f.read(min(data_length, 2000)) - expiration_time = min( [lease.get_expiration_time() - for lease in sf.get_leases()] ) - expiration = max(0, expiration_time - now) + try: + pieces = unpack_share(data) + except NeedMoreDataError as e: + # retry once with the larger size + size = e.needed_bytes + f.seek(m.DATA_OFFSET) + data = f.read(min(data_length, size)) + pieces = unpack_share(data) + (seqnum, root_hash, IV, k, N, segsize, datalen, + pubkey, signature, share_hash_chain, block_hash_tree, + share_data, enc_privkey) = pieces - UEB_data = call(bp.get_uri_extension) - unpacked = uri.unpack_extension_readable(UEB_data) + print("SDMF %s %d/%d %d #%d:%s %d %s" % \ + (si_s, k, N, datalen, + seqnum, str(base32.b2a(root_hash), "utf-8"), + expiration, quote_output(abs_sharefile)), file=out) + elif share_type == "MDMF": + fake_shnum = 0 + # TODO: factor this out with dump_MDMF_share() + class ShareDumper(MDMFSlotReadProxy): + def _read(self, readvs, force_remote=False, queue=False): + data = [] + for (where,length) in readvs: + f.seek(m.DATA_OFFSET+where) + data.append(f.read(length)) + return defer.succeed({fake_shnum: data}) - k = unpacked["needed_shares"] - N = unpacked["total_shares"] - filesize = unpacked["size"] - ueb_hash = unpacked["UEB_hash"] - - print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, - str(ueb_hash, "utf-8"), expiration, - quote_output(abs_sharefile)), file=out) + p = ShareDumper(None, "fake-si", fake_shnum) + def extract(func): + stash = [] + # these methods return Deferreds, but we happen to know that + # they run synchronously when not actually talking to a + # remote server + d = func() + d.addCallback(stash.append) + return stash[0] + verinfo = extract(p.get_verinfo) + (seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix, + offsets) = verinfo + print("MDMF %s %d/%d %d #%d:%s %d %s" % \ + (si_s, k, N, datalen, + seqnum, str(base32.b2a(root_hash), "utf-8"), + expiration, quote_output(abs_sharefile)), file=out) else: - print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out) + print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out) + + +def _describe_immutable_share(abs_sharefile, now, si_s, out): + class ImmediateReadBucketProxy(ReadBucketProxy): + def __init__(self, sf): + self.sf = sf + ReadBucketProxy.__init__(self, None, None, "") + def __repr__(self): + return "" + def _read(self, offset, size): + return defer.succeed(sf.read_share_data(offset, size)) + + # use a ReadBucketProxy to parse the bucket and find the uri extension + sf = ShareFile(abs_sharefile) + bp = ImmediateReadBucketProxy(sf) + + expiration_time = min( [lease.get_expiration_time() + for lease in sf.get_leases()] ) + expiration = max(0, expiration_time - now) + + UEB_data = call(bp.get_uri_extension) + unpacked = uri.unpack_extension_readable(UEB_data) + + k = unpacked["needed_shares"] + N = unpacked["total_shares"] + filesize = unpacked["size"] + ueb_hash = unpacked["UEB_hash"] + + print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize, + str(ueb_hash, "utf-8"), expiration, + quote_output(abs_sharefile)), file=out) - f.close() def catalog_shares(options): from allmydata.util.encodingutil import listdir_unicode, quote_output @@ -933,34 +928,34 @@ def corrupt_share(options): f.write(d) f.close() - f = open(fn, "rb") - prefix = f.read(32) - f.close() - if prefix == MutableShareFile.MAGIC: - # mutable - m = MutableShareFile(fn) - f = open(fn, "rb") - f.seek(m.DATA_OFFSET) - data = f.read(2000) - # make sure this slot contains an SMDF share - assert data[0:1] == b"\x00", "non-SDMF mutable shares not supported" - f.close() + with open(fn, "rb") as f: + prefix = f.read(32) - (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, - ig_datalen, offsets) = unpack_header(data) + if MutableShareFile.is_valid_header(prefix): + # mutable + m = MutableShareFile(fn) + f = open(fn, "rb") + f.seek(m.DATA_OFFSET) + data = f.read(2000) + # make sure this slot contains an SMDF share + assert data[0:1] == b"\x00", "non-SDMF mutable shares not supported" + f.close() - assert version == 0, "we only handle v0 SDMF files" - start = m.DATA_OFFSET + offsets["share_data"] - end = m.DATA_OFFSET + offsets["enc_privkey"] - flip_bit(start, end) - else: - # otherwise assume it's immutable - f = ShareFile(fn) - bp = ReadBucketProxy(None, None, '') - offsets = bp._parse_offsets(f.read_share_data(0, 0x24)) - start = f._data_offset + offsets["data"] - end = f._data_offset + offsets["plaintext_hash_tree"] - flip_bit(start, end) + (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, + ig_datalen, offsets) = unpack_header(data) + + assert version == 0, "we only handle v0 SDMF files" + start = m.DATA_OFFSET + offsets["share_data"] + end = m.DATA_OFFSET + offsets["enc_privkey"] + flip_bit(start, end) + else: + # otherwise assume it's immutable + f = ShareFile(fn) + bp = ReadBucketProxy(None, None, '') + offsets = bp._parse_offsets(f.read_share_data(0, 0x24)) + start = f._data_offset + offsets["data"] + end = f._data_offset + offsets["plaintext_hash_tree"] + flip_bit(start, end) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 407116038..24465c1ed 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -59,7 +59,7 @@ class ShareFile(object): @classmethod def is_valid_header(cls, header): - # (bytes) -> bool + # type: (bytes) -> bool """ Determine if the given bytes constitute a valid header for this type of container. diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index de840b89a..1b29b4a65 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -67,6 +67,20 @@ class MutableShareFile(object): MAX_SIZE = MAX_MUTABLE_SHARE_SIZE # TODO: decide upon a policy for max share size + @classmethod + def is_valid_header(cls, header): + # type: (bytes) -> bool + """ + Determine if the given bytes constitute a valid header for this type of + container. + + :param header: Some bytes from the beginning of a container. + + :return: ``True`` if the bytes could belong to this container, + ``False`` otherwise. + """ + return header.startswith(cls.MAGIC) + def __init__(self, filename, parent=None): self.home = filename if os.path.exists(self.home): @@ -77,7 +91,7 @@ class MutableShareFile(object): write_enabler_nodeid, write_enabler, data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) - if magic != self.MAGIC: + if not self.is_valid_header(data): msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ (filename, magic, self.MAGIC) raise UnknownMutableContainerVersionError(msg) @@ -388,7 +402,7 @@ class MutableShareFile(object): write_enabler_nodeid, write_enabler, data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) - assert magic == self.MAGIC + assert self.is_valid_header(data) return (write_enabler, write_enabler_nodeid) def readv(self, readv): diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index f339c579b..0f30dad6a 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -373,7 +373,7 @@ class StorageServer(service.MultiService, Referenceable): for shnum, filename in self._get_bucket_shares(storage_index): with open(filename, 'rb') as f: header = f.read(32) - if header[:32] == MutableShareFile.MAGIC: + if MutableShareFile.is_valid_header(header): sf = MutableShareFile(filename, self) # note: if the share has been migrated, the renew_lease() # call will throw an exception, with information to help the diff --git a/src/allmydata/storage/shares.py b/src/allmydata/storage/shares.py index ec6c0a501..59e7b1539 100644 --- a/src/allmydata/storage/shares.py +++ b/src/allmydata/storage/shares.py @@ -17,8 +17,7 @@ from allmydata.storage.immutable import ShareFile def get_share_file(filename): with open(filename, "rb") as f: prefix = f.read(32) - if prefix == MutableShareFile.MAGIC: + if MutableShareFile.is_valid_header(prefix): return MutableShareFile(filename) # otherwise assume it's immutable return ShareFile(filename) - diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 0f2dc7c62..97368ee92 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -1068,7 +1068,7 @@ def _corrupt_offset_of_uri_extension_to_force_short_read(data, debug=False): def _corrupt_mutable_share_data(data, debug=False): prefix = data[:32] - assert prefix == MutableShareFile.MAGIC, "This function is designed to corrupt mutable shares of v1, and the magic number doesn't look right: %r vs %r" % (prefix, MutableShareFile.MAGIC) + assert MutableShareFile.is_valid_header(prefix), "This function is designed to corrupt mutable shares of v1, and the magic number doesn't look right: %r vs %r" % (prefix, MutableShareFile.MAGIC) data_offset = MutableShareFile.DATA_OFFSET sharetype = data[data_offset:data_offset+1] assert sharetype == b"\x00", "non-SDMF mutable shares not supported" diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 72ce4b6ec..d859a0e00 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -22,7 +22,8 @@ from twisted.trial import unittest from twisted.internet import defer from allmydata import uri -from allmydata.storage.mutable import ShareFile, MutableShareFile +from allmydata.storage.mutable import MutableShareFile +from allmydata.storage.immutable import ShareFile from allmydata.storage.server import si_a2b from allmydata.immutable import offloaded, upload from allmydata.immutable.literal import LiteralFileNode From 8d202a4018bc5121800ea5551fd925d8432b2996 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 12:37:37 -0400 Subject: [PATCH 065/220] news fragment --- newsfragments/3835.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3835.minor diff --git a/newsfragments/3835.minor b/newsfragments/3835.minor new file mode 100644 index 000000000..e69de29bb From d0ee17d99efff05738f0eb3140c3a5c947c20b5a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 12:39:01 -0400 Subject: [PATCH 066/220] some docstrings --- src/allmydata/test/no_network.py | 26 +++++++++++++++ src/allmydata/test/test_download.py | 52 ++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index 7a84580bf..aa41ab6bc 100644 --- a/src/allmydata/test/no_network.py +++ b/src/allmydata/test/no_network.py @@ -479,6 +479,18 @@ class GridTestMixin(object): def set_up_grid(self, num_clients=1, num_servers=10, client_config_hooks={}, oneshare=False): + """ + Create a Tahoe-LAFS storage grid. + + :param num_clients: See ``NoNetworkGrid`` + :param num_servers: See `NoNetworkGrid`` + :param client_config_hooks: See ``NoNetworkGrid`` + + :param bool oneshare: If ``True`` then the first client node is + configured with ``n == k == happy == 1``. + + :return: ``None`` + """ # self.basedir must be set port_assigner = SameProcessStreamEndpointAssigner() port_assigner.setUp() @@ -557,6 +569,15 @@ class GridTestMixin(object): return sorted(shares) def copy_shares(self, uri): + # type: (bytes) -> Dict[bytes, bytes] + """ + Read all of the share files for the given capability from the storage area + of the storage servers created by ``set_up_grid``. + + :param bytes uri: A Tahoe-LAFS data capability. + + :return: A ``dict`` mapping share file names to share file contents. + """ shares = {} for (shnum, serverid, sharefile) in self.find_uri_shares(uri): with open(sharefile, "rb") as f: @@ -601,6 +622,11 @@ class GridTestMixin(object): f.write(corruptdata) def corrupt_all_shares(self, uri, corruptor, debug=False): + # type: (bytes, Callable[[bytes, bool], bytes] -> bytes), bool) -> None + """ + Apply ``corruptor`` to the contents of all share files associated with a + given capability and replace the share file contents with its result. + """ for (i_shnum, i_serverid, i_sharefile) in self.find_uri_shares(uri): with open(i_sharefile, "rb") as f: sharedata = f.read() diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index d61942839..6b8dc6a31 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -951,12 +951,52 @@ class Corruption(_Base, unittest.TestCase): self.corrupt_shares_numbered(imm_uri, [2], _corruptor) def _corrupt_set(self, ign, imm_uri, which, newvalue): + # type: (Any, bytes, int, int) -> None + """ + Replace a single byte share file number 2 for the given capability with a + new byte. + + :param imm_uri: Corrupt share number 2 belonging to this capability. + :param which: The byte position to replace. + :param newvalue: The new byte value to set in the share. + """ log.msg("corrupt %d" % which) def _corruptor(s, debug=False): return s[:which] + bchr(newvalue) + s[which+1:] self.corrupt_shares_numbered(imm_uri, [2], _corruptor) def test_each_byte(self): + """ + Test share selection behavior of the downloader in the face of certain + kinds of data corruption. + + 1. upload a small share to the no-network grid + 2. read all of the resulting share files out of the no-network storage servers + 3. for each of + + a. each byte of the share file version field + b. each byte of the immutable share version field + c. each byte of the immutable share data offset field + d. the most significant byte of the block_shares offset field + e. one of the bytes of one of the merkle trees + f. one of the bytes of the share hashes list + + i. flip the least significant bit in all of the the share files + ii. perform the download/check/restore process + + 4. add 2 ** 24 to the share file version number + 5. perform the download/check/restore process + + 6. add 2 ** 24 to the share version number + 7. perform the download/check/restore process + + The download/check/restore process is: + + 1. attempt to download the data + 2. assert that the recovered plaintext is correct + 3. assert that only the "correct" share numbers were used to reconstruct the plaintext + 4. restore all of the share files to their pristine condition + """ # Setting catalog_detection=True performs an exhaustive test of the # Downloader's response to corruption in the lsb of each byte of the # 2070-byte share, with two goals: make sure we tolerate all forms of @@ -1145,8 +1185,18 @@ class Corruption(_Base, unittest.TestCase): return d def _corrupt_flip_all(self, ign, imm_uri, which): + # type: (Any, bytes, int) -> None + """ + Flip the least significant bit at a given byte position in all share files + for the given capability. + """ def _corruptor(s, debug=False): - return s[:which] + bchr(ord(s[which:which+1])^0x01) + s[which+1:] + # type: (bytes, bool) -> bytes + before_corruption = s[:which] + after_corruption = s[which+1:] + original_byte = s[which:which+1] + corrupt_byte = bchr(ord(original_byte) ^ 0x01) + return b"".join([before_corruption, corrupt_byte, after_corruption]) self.corrupt_all_shares(imm_uri, _corruptor) class DownloadV2(_Base, unittest.TestCase): From 8cb1f4f57cc6591e46573bd214ef0a7c43ad2c04 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 14:25:24 -0400 Subject: [PATCH 067/220] news fragment --- newsfragments/3527.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3527.minor diff --git a/newsfragments/3527.minor b/newsfragments/3527.minor new file mode 100644 index 000000000..e69de29bb From 54d80222c9cdf6662510f67d15de0cf7494a723e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 14:34:47 -0400 Subject: [PATCH 068/220] switch to monkey-patching from other sources This is not much of an improvement to the tests themselves, unfortunately. However, it does get us one step closer to dropping `mock` as a dependency. --- src/allmydata/test/cli/test_create.py | 144 ++++++++++++++++---------- src/allmydata/test/common.py | 25 +++++ 2 files changed, 115 insertions(+), 54 deletions(-) diff --git a/src/allmydata/test/cli/test_create.py b/src/allmydata/test/cli/test_create.py index 282f26163..609888fb3 100644 --- a/src/allmydata/test/cli/test_create.py +++ b/src/allmydata/test/cli/test_create.py @@ -11,16 +11,24 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import os -import mock + +try: + from typing import Any, List, Tuple +except ImportError: + pass + from twisted.trial import unittest from twisted.internet import defer, reactor from twisted.python import usage from allmydata.util import configutil +from allmydata.util import tor_provider, i2p_provider from ..common_util import run_cli, parse_cli +from ..common import ( + disable_modules, +) from ...scripts import create_node from ... import client - def read_config(basedir): tahoe_cfg = os.path.join(basedir, "tahoe.cfg") config = configutil.get_config(tahoe_cfg) @@ -105,11 +113,12 @@ class Config(unittest.TestCase): @defer.inlineCallbacks def test_client_hide_ip_no_i2p_txtorcon(self): - # hmm, I must be doing something weird, these don't work as - # @mock.patch decorators for some reason - txi2p = mock.patch('allmydata.util.i2p_provider._import_txi2p', return_value=None) - txtorcon = mock.patch('allmydata.util.tor_provider._import_txtorcon', return_value=None) - with txi2p, txtorcon: + """ + The ``create-client`` sub-command tells the user to install the necessary + dependencies if they have neither tor nor i2p support installed and + they request network location privacy with the ``--hide-ip`` flag. + """ + with disable_modules("txi2p", "txtorcon"): basedir = self.mktemp() rc, out, err = yield run_cli("create-client", "--hide-ip", basedir) self.assertTrue(rc != 0, out) @@ -118,8 +127,7 @@ class Config(unittest.TestCase): @defer.inlineCallbacks def test_client_i2p_option_no_txi2p(self): - txi2p = mock.patch('allmydata.util.i2p_provider._import_txi2p', return_value=None) - with txi2p: + with disable_modules("txi2p"): basedir = self.mktemp() rc, out, err = yield run_cli("create-node", "--listen=i2p", "--i2p-launch", basedir) self.assertTrue(rc != 0) @@ -127,8 +135,7 @@ class Config(unittest.TestCase): @defer.inlineCallbacks def test_client_tor_option_no_txtorcon(self): - txtorcon = mock.patch('allmydata.util.tor_provider._import_txtorcon', return_value=None) - with txtorcon: + with disable_modules("txtorcon"): basedir = self.mktemp() rc, out, err = yield run_cli("create-node", "--listen=tor", "--tor-launch", basedir) self.assertTrue(rc != 0) @@ -145,9 +152,7 @@ class Config(unittest.TestCase): @defer.inlineCallbacks def test_client_hide_ip_no_txtorcon(self): - txtorcon = mock.patch('allmydata.util.tor_provider._import_txtorcon', - return_value=None) - with txtorcon: + with disable_modules("txtorcon"): basedir = self.mktemp() rc, out, err = yield run_cli("create-client", "--hide-ip", basedir) self.assertEqual(0, rc) @@ -295,11 +300,10 @@ class Config(unittest.TestCase): def test_node_slow_tor(self): basedir = self.mktemp() d = defer.Deferred() - with mock.patch("allmydata.util.tor_provider.create_config", - return_value=d): - d2 = run_cli("create-node", "--listen=tor", basedir) - d.callback(({}, "port", "location")) - rc, out, err = yield d2 + self.patch(tor_provider, "create_config", lambda *a, **kw: d) + d2 = run_cli("create-node", "--listen=tor", basedir) + d.callback(({}, "port", "location")) + rc, out, err = yield d2 self.assertEqual(rc, 0) self.assertIn("Node created", out) self.assertEqual(err, "") @@ -308,11 +312,10 @@ class Config(unittest.TestCase): def test_node_slow_i2p(self): basedir = self.mktemp() d = defer.Deferred() - with mock.patch("allmydata.util.i2p_provider.create_config", - return_value=d): - d2 = run_cli("create-node", "--listen=i2p", basedir) - d.callback(({}, "port", "location")) - rc, out, err = yield d2 + self.patch(i2p_provider, "create_config", lambda *a, **kw: d) + d2 = run_cli("create-node", "--listen=i2p", basedir) + d.callback(({}, "port", "location")) + rc, out, err = yield d2 self.assertEqual(rc, 0) self.assertIn("Node created", out) self.assertEqual(err, "") @@ -353,6 +356,27 @@ class Config(unittest.TestCase): self.assertIn("is not empty", err) self.assertIn("To avoid clobbering anything, I am going to quit now", err) +def fake_config(testcase, module, result): + # type: (unittest.TestCase, Any, Any) -> List[Tuple] + """ + Monkey-patch a fake configuration function into the given module. + + :param testcase: The test case to use to do the monkey-patching. + + :param module: The module into which to patch the fake function. + + :param result: The return value for the fake function. + + :return: A list of tuples of the arguments the fake function was called + with. + """ + calls = [] + def fake_config(reactor, cli_config): + calls.append((reactor, cli_config)) + return result + testcase.patch(module, "create_config", fake_config) + return calls + class Tor(unittest.TestCase): def test_default(self): basedir = self.mktemp() @@ -360,12 +384,14 @@ class Tor(unittest.TestCase): tor_port = "ghi" tor_location = "jkl" config_d = defer.succeed( (tor_config, tor_port, tor_location) ) - with mock.patch("allmydata.util.tor_provider.create_config", - return_value=config_d) as co: - rc, out, err = self.successResultOf( - run_cli("create-node", "--listen=tor", basedir)) - self.assertEqual(len(co.mock_calls), 1) - args = co.mock_calls[0][1] + + calls = fake_config(self, tor_provider, config_d) + rc, out, err = self.successResultOf( + run_cli("create-node", "--listen=tor", basedir), + ) + + self.assertEqual(len(calls), 1) + args = calls[0] self.assertIdentical(args[0], reactor) self.assertIsInstance(args[1], create_node.CreateNodeOptions) self.assertEqual(args[1]["listen"], "tor") @@ -380,12 +406,15 @@ class Tor(unittest.TestCase): tor_port = "ghi" tor_location = "jkl" config_d = defer.succeed( (tor_config, tor_port, tor_location) ) - with mock.patch("allmydata.util.tor_provider.create_config", - return_value=config_d) as co: - rc, out, err = self.successResultOf( - run_cli("create-node", "--listen=tor", "--tor-launch", - basedir)) - args = co.mock_calls[0][1] + + calls = fake_config(self, tor_provider, config_d) + rc, out, err = self.successResultOf( + run_cli( + "create-node", "--listen=tor", "--tor-launch", + basedir, + ), + ) + args = calls[0] self.assertEqual(args[1]["listen"], "tor") self.assertEqual(args[1]["tor-launch"], True) self.assertEqual(args[1]["tor-control-port"], None) @@ -396,12 +425,15 @@ class Tor(unittest.TestCase): tor_port = "ghi" tor_location = "jkl" config_d = defer.succeed( (tor_config, tor_port, tor_location) ) - with mock.patch("allmydata.util.tor_provider.create_config", - return_value=config_d) as co: - rc, out, err = self.successResultOf( - run_cli("create-node", "--listen=tor", "--tor-control-port=mno", - basedir)) - args = co.mock_calls[0][1] + + calls = fake_config(self, tor_provider, config_d) + rc, out, err = self.successResultOf( + run_cli( + "create-node", "--listen=tor", "--tor-control-port=mno", + basedir, + ), + ) + args = calls[0] self.assertEqual(args[1]["listen"], "tor") self.assertEqual(args[1]["tor-launch"], False) self.assertEqual(args[1]["tor-control-port"], "mno") @@ -434,12 +466,13 @@ class I2P(unittest.TestCase): i2p_port = "ghi" i2p_location = "jkl" dest_d = defer.succeed( (i2p_config, i2p_port, i2p_location) ) - with mock.patch("allmydata.util.i2p_provider.create_config", - return_value=dest_d) as co: - rc, out, err = self.successResultOf( - run_cli("create-node", "--listen=i2p", basedir)) - self.assertEqual(len(co.mock_calls), 1) - args = co.mock_calls[0][1] + + calls = fake_config(self, i2p_provider, dest_d) + rc, out, err = self.successResultOf( + run_cli("create-node", "--listen=i2p", basedir), + ) + self.assertEqual(len(calls), 1) + args = calls[0] self.assertIdentical(args[0], reactor) self.assertIsInstance(args[1], create_node.CreateNodeOptions) self.assertEqual(args[1]["listen"], "i2p") @@ -461,12 +494,15 @@ class I2P(unittest.TestCase): i2p_port = "ghi" i2p_location = "jkl" dest_d = defer.succeed( (i2p_config, i2p_port, i2p_location) ) - with mock.patch("allmydata.util.i2p_provider.create_config", - return_value=dest_d) as co: - rc, out, err = self.successResultOf( - run_cli("create-node", "--listen=i2p", "--i2p-sam-port=mno", - basedir)) - args = co.mock_calls[0][1] + + calls = fake_config(self, i2p_provider, dest_d) + rc, out, err = self.successResultOf( + run_cli( + "create-node", "--listen=i2p", "--i2p-sam-port=mno", + basedir, + ), + ) + args = calls[0] self.assertEqual(args[1]["listen"], "i2p") self.assertEqual(args[1]["i2p-launch"], False) self.assertEqual(args[1]["i2p-sam-port"], "mno") diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 0f2dc7c62..2e6da9801 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -26,8 +26,14 @@ __all__ = [ "PIPE", ] +try: + from typing import Tuple, ContextManager +except ImportError: + pass + import sys import os, random, struct +from contextlib import contextmanager import six import tempfile from tempfile import mktemp @@ -1213,6 +1219,25 @@ class ConstantAddresses(object): raise Exception("{!r} has no client endpoint.") return self._handler +@contextmanager +def disable_modules(*names): + # type: (Tuple[str]) -> ContextManager + """ + A context manager which makes modules appear to be missing while it is + active. + + :param *names: The names of the modules to disappear. + """ + missing = object() + modules = list(sys.modules.get(n, missing) for n in names) + for n in names: + sys.modules[n] = None + yield + for n, original in zip(names, modules): + if original is missing: + del sys.modules[n] + else: + sys.modules[n] = original class _TestCaseMixin(object): """ From 8d5727977b9a1a7865954db30f9d4771518b97c0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 14:47:42 -0400 Subject: [PATCH 069/220] it doesn't typecheck, nevermind --- src/allmydata/test/common.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 2e6da9801..8e97fa598 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -26,11 +26,6 @@ __all__ = [ "PIPE", ] -try: - from typing import Tuple, ContextManager -except ImportError: - pass - import sys import os, random, struct from contextlib import contextmanager @@ -1221,7 +1216,6 @@ class ConstantAddresses(object): @contextmanager def disable_modules(*names): - # type: (Tuple[str]) -> ContextManager """ A context manager which makes modules appear to be missing while it is active. From f8655f149bb0754013adc985a6041738f18327f2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 15:04:19 -0400 Subject: [PATCH 070/220] fix the type annotations and such --- src/allmydata/test/no_network.py | 9 +++++++-- src/allmydata/test/test_download.py | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index aa41ab6bc..b9fa99005 100644 --- a/src/allmydata/test/no_network.py +++ b/src/allmydata/test/no_network.py @@ -25,6 +25,11 @@ if PY2: from past.builtins import unicode from six import ensure_text +try: + from typing import Dict, Callable +except ImportError: + pass + import os from base64 import b32encode from functools import ( @@ -622,7 +627,7 @@ class GridTestMixin(object): f.write(corruptdata) def corrupt_all_shares(self, uri, corruptor, debug=False): - # type: (bytes, Callable[[bytes, bool], bytes] -> bytes), bool) -> None + # type: (bytes, Callable[[bytes, bool], bytes], bool) -> None """ Apply ``corruptor`` to the contents of all share files associated with a given capability and replace the share file contents with its result. @@ -630,7 +635,7 @@ class GridTestMixin(object): for (i_shnum, i_serverid, i_sharefile) in self.find_uri_shares(uri): with open(i_sharefile, "rb") as f: sharedata = f.read() - corruptdata = corruptor(sharedata, debug=debug) + corruptdata = corruptor(sharedata, debug) with open(i_sharefile, "wb") as f: f.write(corruptdata) diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index 6b8dc6a31..aeea9642e 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -14,6 +14,11 @@ if PY2: # a previous run. This asserts that the current code is capable of decoding # shares from a previous version. +try: + from typing import Any +except ImportError: + pass + import six import os from twisted.trial import unittest From 78dbe7699403dbe38f94d574367f5d5e95916f4a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 15:20:44 -0400 Subject: [PATCH 071/220] remove unused import --- src/allmydata/storage/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 0f30dad6a..3e2d3b5c6 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -14,7 +14,7 @@ if PY2: else: from typing import Dict -import os, re, struct, time +import os, re, time import six from foolscap.api import Referenceable From 8b976b441e793f45b50e5d5ebcb4314beba889ee Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 12:05:34 -0400 Subject: [PATCH 072/220] add LeaseInfo.is_renew_secret and use it --- src/allmydata/storage/immutable.py | 2 +- src/allmydata/storage/lease.py | 12 +++++++++ src/allmydata/storage/mutable.py | 2 +- src/allmydata/test/test_storage.py | 39 ++++++++++++++++++++++-------- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 24465c1ed..c9b8995b5 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -180,7 +180,7 @@ class ShareFile(object): secret. """ for i,lease in enumerate(self.get_leases()): - if timing_safe_compare(lease.renew_secret, renew_secret): + if lease.is_renew_secret(renew_secret): # yup. See if we need to update the owner time. if allow_backdate or new_expire_time > lease.get_expiration_time(): # yes diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 17683a888..2132048ce 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -15,6 +15,8 @@ import struct, time import attr +from allmydata.util.hashutil import timing_safe_compare + @attr.s(frozen=True) class LeaseInfo(object): """ @@ -68,6 +70,16 @@ class LeaseInfo(object): _expiration_time=new_expire_time, ) + def is_renew_secret(self, candidate_secret): + # type: (bytes) -> bool + """ + Check a string to see if it is the correct renew secret. + + :return: ``True`` if it is the correct renew secret, ``False`` + otherwise. + """ + return timing_safe_compare(self.renew_secret, candidate_secret) + def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self._expiration_time - 31*24*60*60 diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 1b29b4a65..017f2dbb7 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -327,7 +327,7 @@ class MutableShareFile(object): accepting_nodeids = set() with open(self.home, 'rb+') as f: for (leasenum,lease) in self._enumerate_leases(f): - if timing_safe_compare(lease.renew_secret, renew_secret): + if lease.is_renew_secret(renew_secret): # yup. See if we need to update the owner time. if allow_backdate or new_expire_time > lease.get_expiration_time(): # yes diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 8123be2c5..005309f87 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -755,28 +755,28 @@ class Server(unittest.TestCase): # Create a bucket: rs0, cs0 = self.create_bucket_5_shares(ss, b"si0") - leases = list(ss.get_leases(b"si0")) - self.failUnlessEqual(len(leases), 1) - self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs0])) + (lease,) = ss.get_leases(b"si0") + self.assertTrue(lease.is_renew_secret(rs0)) rs1, cs1 = self.create_bucket_5_shares(ss, b"si1") # take out a second lease on si1 rs2, cs2 = self.create_bucket_5_shares(ss, b"si1", 5, 0) - leases = list(ss.get_leases(b"si1")) - self.failUnlessEqual(len(leases), 2) - self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2])) + (lease1, lease2) = ss.get_leases(b"si1") + self.assertTrue(lease1.is_renew_secret(rs1)) + self.assertTrue(lease2.is_renew_secret(rs2)) # and a third lease, using add-lease rs2a,cs2a = (hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)), hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret))) ss.remote_add_lease(b"si1", rs2a, cs2a) - leases = list(ss.get_leases(b"si1")) - self.failUnlessEqual(len(leases), 3) - self.failUnlessEqual(set([l.renew_secret for l in leases]), set([rs1, rs2, rs2a])) + (lease1, lease2, lease3) = ss.get_leases(b"si1") + self.assertTrue(lease1.is_renew_secret(rs1)) + self.assertTrue(lease2.is_renew_secret(rs2)) + self.assertTrue(lease3.is_renew_secret(rs2a)) # add-lease on a missing storage index is silently ignored - self.failUnlessEqual(ss.remote_add_lease(b"si18", b"", b""), None) + self.assertIsNone(ss.remote_add_lease(b"si18", b"", b"")) # check that si0 is readable readers = ss.remote_get_buckets(b"si0") @@ -3028,3 +3028,22 @@ class ShareFileTests(unittest.TestCase): sf = self.get_sharefile() with self.assertRaises(IndexError): sf.cancel_lease(b"garbage") + + def test_renew_secret(self): + """ + A lease loaded from a share file can have its renew secret verified. + """ + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + expiration_time = 2 ** 31 + + sf = self.get_sharefile() + lease = LeaseInfo( + owner_num=0, + renew_secret=renew_secret, + cancel_secret=cancel_secret, + expiration_time=expiration_time, + ) + sf.add_lease(lease) + (loaded_lease,) = sf.get_leases() + self.assertTrue(loaded_lease.is_renew_secret(renew_secret)) From b5f882ffa60574f193a18e70e3c310077a2f097e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Oct 2021 12:21:22 -0400 Subject: [PATCH 073/220] introduce and use LeaseInfo.is_cancel_secret --- src/allmydata/storage/immutable.py | 2 +- src/allmydata/storage/lease.py | 10 ++++++++++ src/allmydata/storage/mutable.py | 2 +- src/allmydata/test/test_storage.py | 19 +++++++++++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index c9b8995b5..4f6a1c9c7 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -209,7 +209,7 @@ class ShareFile(object): leases = list(self.get_leases()) num_leases_removed = 0 for i,lease in enumerate(leases): - if timing_safe_compare(lease.cancel_secret, cancel_secret): + if lease.is_cancel_secret(cancel_secret): leases[i] = None num_leases_removed += 1 if not num_leases_removed: diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 2132048ce..ff96ebaf4 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -80,6 +80,16 @@ class LeaseInfo(object): """ return timing_safe_compare(self.renew_secret, candidate_secret) + def is_cancel_secret(self, candidate_secret): + # type: (bytes) -> bool + """ + Check a string to see if it is the correct cancel secret. + + :return: ``True`` if it is the correct cancel secret, ``False`` + otherwise. + """ + return timing_safe_compare(self.cancel_secret, candidate_secret) + def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self._expiration_time - 31*24*60*60 diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 017f2dbb7..9480a3c03 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -371,7 +371,7 @@ class MutableShareFile(object): with open(self.home, 'rb+') as f: for (leasenum,lease) in self._enumerate_leases(f): accepting_nodeids.add(lease.nodeid) - if timing_safe_compare(lease.cancel_secret, cancel_secret): + if lease.is_cancel_secret(cancel_secret): self._write_lease_record(f, leasenum, blank_lease) modified += 1 else: diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 005309f87..aac40362c 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -3047,3 +3047,22 @@ class ShareFileTests(unittest.TestCase): sf.add_lease(lease) (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_renew_secret(renew_secret)) + + def test_cancel_secret(self): + """ + A lease loaded from a share file can have its cancel secret verified. + """ + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + expiration_time = 2 ** 31 + + sf = self.get_sharefile() + lease = LeaseInfo( + owner_num=0, + renew_secret=renew_secret, + cancel_secret=cancel_secret, + expiration_time=expiration_time, + ) + sf.add_lease(lease) + (loaded_lease,) = sf.get_leases() + self.assertTrue(loaded_lease.is_cancel_secret(cancel_secret)) From 696a260ddfc02be35a63ed1446eda0b5434cc86f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 29 Oct 2021 09:00:38 -0400 Subject: [PATCH 074/220] news fragment --- newsfragments/3836.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3836.minor diff --git a/newsfragments/3836.minor b/newsfragments/3836.minor new file mode 100644 index 000000000..e69de29bb From 892b4683654cd1281be8feeaa65a2ef946ed4f5a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 29 Oct 2021 09:03:37 -0400 Subject: [PATCH 075/220] use the port assigner to assign a port for the main tub --- src/allmydata/test/common_system.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 9d14c8642..874c7f6ba 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -672,11 +672,14 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): """ iv_dir = self.getdir("introducer") if not os.path.isdir(iv_dir): - _, port_endpoint = self.port_assigner.assign(reactor) + _, web_port_endpoint = self.port_assigner.assign(reactor) + main_location_hint, main_port_endpoint = self.port_assigner.assign(reactor) introducer_config = ( u"[node]\n" u"nickname = introducer \N{BLACK SMILING FACE}\n" + - u"web.port = {}\n".format(port_endpoint) + u"web.port = {}\n".format(web_port_endpoint) + + u"tub.port = {}\n".format(main_port_endpoint) + + u"tub.location = {}\n".format(main_location_hint) ).encode("utf-8") fileutil.make_dirs(iv_dir) From 5ba636c7b10fd146c39eff9a60c34f9eb5943a9a Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 2 Nov 2021 10:36:32 +0100 Subject: [PATCH 076/220] removed deferred logger from basic function in test_logs Signed-off-by: fenn-cs --- src/allmydata/test/web/test_logs.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/web/test_logs.py b/src/allmydata/test/web/test_logs.py index fe0a0445d..81ec357c0 100644 --- a/src/allmydata/test/web/test_logs.py +++ b/src/allmydata/test/web/test_logs.py @@ -54,9 +54,7 @@ from ...web.logs import ( TokenAuthenticatedWebSocketServerProtocol, ) -from ...util.eliotutil import ( - log_call_deferred -) +from eliot import log_call class StreamingEliotLogsTests(SyncTestCase): """ @@ -110,7 +108,7 @@ class TestStreamingLogs(AsyncTestCase): messages.append(json.loads(msg)) proto.on("message", got_message) - @log_call_deferred(action_type=u"test:cli:some-exciting-action") + @log_call(action_type=u"test:cli:some-exciting-action") def do_a_thing(arguments): pass @@ -121,7 +119,7 @@ class TestStreamingLogs(AsyncTestCase): self.assertThat(len(messages), Equals(3)) self.assertThat(messages[0]["action_type"], Equals("test:cli:some-exciting-action")) - self.assertThat(messages[0]["kwargs"]["arguments"], + self.assertThat(messages[0]["arguments"], Equals(["hello", "good-\\xff-day", 123, {"a": 35}, [None]])) self.assertThat(messages[1]["action_type"], Equals("test:cli:some-exciting-action")) self.assertThat("started", Equals(messages[0]["action_status"])) From fcfc89e3ae4d2a73ba110b2b23eaf24001e78dd9 Mon Sep 17 00:00:00 2001 From: fenn-cs Date: Tue, 2 Nov 2021 14:32:20 +0100 Subject: [PATCH 077/220] moved new tests/update for eliotutils Signed-off-by: fenn-cs --- src/allmydata/test/test_eliotutil.py | 73 ---------------------------- src/allmydata/util/eliotutil.py | 22 +-------- 2 files changed, 2 insertions(+), 93 deletions(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 61e0a6958..3f915ecd2 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -56,7 +56,6 @@ from eliot.testing import ( capture_logging, assertHasAction, swap_logger, - assertContainsFields, ) from twisted.internet.defer import ( @@ -282,75 +281,3 @@ class LogCallDeferredTests(TestCase): ), ), ) - - @capture_logging( - lambda self, logger: - assertHasAction(self, logger, u"the-action", succeeded=True), - ) - def test_gets_positional_arguments(self, logger): - """ - Check that positional arguments are logged when using ``log_call_deferred`` - """ - @log_call_deferred(action_type=u"the-action") - def f(a): - return a ** 2 - self.assertThat( - f(4), succeeded(Equals(16))) - msg = logger.messages[0] - assertContainsFields(self, msg, {"args": (4,)}) - - @capture_logging( - lambda self, logger: - assertHasAction(self, logger, u"the-action", succeeded=True), - ) - def test_gets_keyword_arguments(self, logger): - """ - Check that keyword arguments are logged when using ``log_call_deferred`` - """ - @log_call_deferred(action_type=u"the-action") - def f(base, exp): - return base ** exp - self.assertThat(f(exp=2,base=10), succeeded(Equals(100))) - msg = logger.messages[0] - assertContainsFields(self, msg, {"kwargs": {"base": 10, "exp": 2}}) - - - @capture_logging( - lambda self, logger: - assertHasAction(self, logger, u"the-action", succeeded=True), - ) - def test_gets_keyword_and_positional_arguments(self, logger): - """ - Check that both keyword and positional arguments are logged when using ``log_call_deferred`` - """ - @log_call_deferred(action_type=u"the-action") - def f(base, exp, message): - return base ** exp - self.assertThat(f(10, 2, message="an exponential function"), succeeded(Equals(100))) - msg = logger.messages[0] - assertContainsFields(self, msg, {"args": (10, 2)}) - assertContainsFields(self, msg, {"kwargs": {"message": "an exponential function"}}) - - - @capture_logging( - lambda self, logger: - assertHasAction(self, logger, u"the-action", succeeded=True), - ) - def test_keyword_args_dont_overlap_with_start_action(self, logger): - """ - Check that kwargs passed to decorated functions don't overlap with params in ``start_action`` - """ - @log_call_deferred(action_type=u"the-action") - def f(base, exp, kwargs, args): - return base ** exp - self.assertThat( - f(10, 2, kwargs={"kwarg_1": "value_1", "kwarg_2": 2}, args=(1, 2, 3)), - succeeded(Equals(100)), - ) - msg = logger.messages[0] - assertContainsFields(self, msg, {"args": (10, 2)}) - assertContainsFields( - self, - msg, - {"kwargs": {"args": [1, 2, 3], "kwargs": {"kwarg_1": "value_1", "kwarg_2": 2}}}, - ) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index fe431568f..4e48fbb9f 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -87,11 +87,7 @@ from twisted.internet.defer import ( ) from twisted.application.service import Service -from .jsonbytes import ( - AnyBytesJSONEncoder, - bytes_to_unicode -) -import json +from .jsonbytes import AnyBytesJSONEncoder def validateInstanceOf(t): @@ -315,14 +311,6 @@ class _DestinationParser(object): _parse_destination_description = _DestinationParser().parse -def is_json_serializable(object): - try: - json.dumps(object) - return True - except (TypeError, OverflowError): - return False - - def log_call_deferred(action_type): """ Like ``eliot.log_call`` but for functions which return ``Deferred``. @@ -332,11 +320,7 @@ def log_call_deferred(action_type): def logged_f(*a, **kw): # Use the action's context method to avoid ending the action when # the `with` block ends. - kwargs = {k: bytes_to_unicode(True, kw[k]) for k in kw} - # Remove complex (unserializable) objects from positional args to - # prevent eliot from throwing errors when it attempts serialization - args = tuple(arg if is_json_serializable(arg) else str(arg) for arg in a) - with start_action(action_type=action_type, args=args, kwargs=kwargs).context(): + with start_action(action_type=action_type).context(): # Use addActionFinish so that the action finishes when the # Deferred fires. d = maybeDeferred(f, *a, **kw) @@ -350,5 +334,3 @@ if PY2: capture_logging = eliot_capture_logging else: capture_logging = partial(eliot_capture_logging, encoder_=AnyBytesJSONEncoder) - - From 39c4a2c4eb1963b2035644d97c1760b649c21278 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 2 Nov 2021 15:10:54 -0400 Subject: [PATCH 078/220] tidy up some corners --- src/allmydata/scripts/debug.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index ab48b0fd0..260cca55b 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -746,6 +746,13 @@ def _describe_mutable_share(abs_sharefile, f, now, si_s, out): if share_type == "SDMF": f.seek(m.DATA_OFFSET) + + # Read at least the mutable header length, if possible. If there's + # less data than that in the share, don't try to read more (we won't + # be able to unpack the header in this case but we surely don't want + # to try to unpack bytes *following* the data section as if they were + # header data). Rather than 2000 we could use HEADER_LENGTH from + # allmydata/mutable/layout.py, probably. data = f.read(min(data_length, 2000)) try: @@ -810,8 +817,8 @@ def _describe_immutable_share(abs_sharefile, now, si_s, out): sf = ShareFile(abs_sharefile) bp = ImmediateReadBucketProxy(sf) - expiration_time = min( [lease.get_expiration_time() - for lease in sf.get_leases()] ) + expiration_time = min(lease.get_expiration_time() + for lease in sf.get_leases()) expiration = max(0, expiration_time - now) UEB_data = call(bp.get_uri_extension) @@ -934,9 +941,10 @@ def corrupt_share(options): if MutableShareFile.is_valid_header(prefix): # mutable m = MutableShareFile(fn) - f = open(fn, "rb") - f.seek(m.DATA_OFFSET) - data = f.read(2000) + with open(fn, "rb") as f: + f.seek(m.DATA_OFFSET) + # Read enough data to get a mutable header to unpack. + data = f.read(2000) # make sure this slot contains an SMDF share assert data[0:1] == b"\x00", "non-SDMF mutable shares not supported" f.close() From b3d1acd14a1f602df5bba424214070a4643a8bab Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 09:55:16 -0400 Subject: [PATCH 079/220] try skipping Tor integration tests on Python 2 --- integration/test_tor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration/test_tor.py b/integration/test_tor.py index 15d888e36..b0419f0d2 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -35,6 +35,9 @@ from allmydata.test.common import ( if sys.platform.startswith('win'): pytest.skip('Skipping Tor tests on Windows', allow_module_level=True) +if PY2: + pytest.skip('Skipping Tor tests on Python 2 because dependencies are hard to come by', allow_module_level=True) + @pytest_twisted.inlineCallbacks def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl): yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) From 4606c3c9dde91de11d769bf9d8c6fd6f2fd1f877 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 09:59:19 -0400 Subject: [PATCH 080/220] news fragment --- newsfragments/3837.other | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3837.other diff --git a/newsfragments/3837.other b/newsfragments/3837.other new file mode 100644 index 000000000..a9e4e6986 --- /dev/null +++ b/newsfragments/3837.other @@ -0,0 +1 @@ +Tahoe-LAFS no longer runs its Tor integration test suite on Python 2 due to the increased complexity of obtaining compatible versions of necessary dependencies. From 8e150cce6a27b6616db54cfd4c2ac08fbdd13794 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 13:14:55 -0400 Subject: [PATCH 081/220] add explicit direct tests for the new methods --- src/allmydata/test/test_storage.py | 61 ++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index aac40362c..460653bd0 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -3066,3 +3066,64 @@ class ShareFileTests(unittest.TestCase): sf.add_lease(lease) (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_cancel_secret(cancel_secret)) + + +class LeaseInfoTests(unittest.TestCase): + """ + Tests for ``allmydata.storage.lease.LeaseInfo``. + """ + def test_is_renew_secret(self): + """ + ``LeaseInfo.is_renew_secret`` returns ``True`` if the value given is the + renew secret. + """ + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + lease = LeaseInfo( + owner_num=1, + renew_secret=renew_secret, + cancel_secret=cancel_secret, + ) + self.assertTrue(lease.is_renew_secret(renew_secret)) + + def test_is_not_renew_secret(self): + """ + ``LeaseInfo.is_renew_secret`` returns ``False`` if the value given is not + the renew secret. + """ + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + lease = LeaseInfo( + owner_num=1, + renew_secret=renew_secret, + cancel_secret=cancel_secret, + ) + self.assertFalse(lease.is_renew_secret(cancel_secret)) + + def test_is_cancel_secret(self): + """ + ``LeaseInfo.is_cancel_secret`` returns ``True`` if the value given is the + cancel secret. + """ + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + lease = LeaseInfo( + owner_num=1, + renew_secret=renew_secret, + cancel_secret=cancel_secret, + ) + self.assertTrue(lease.is_cancel_secret(cancel_secret)) + + def test_is_not_cancel_secret(self): + """ + ``LeaseInfo.is_cancel_secret`` returns ``False`` if the value given is not + the cancel secret. + """ + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + lease = LeaseInfo( + owner_num=1, + renew_secret=renew_secret, + cancel_secret=cancel_secret, + ) + self.assertFalse(lease.is_cancel_secret(renew_secret)) From 7335b2a5977752c0805a7fd9c7759cafa8ac31b1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 13:16:15 -0400 Subject: [PATCH 082/220] remove unused import --- src/allmydata/storage/immutable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 4f6a1c9c7..8a7a5a966 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -24,7 +24,6 @@ from allmydata.interfaces import ( ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition -from allmydata.util.hashutil import timing_safe_compare from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError From 86ca463c3198746e31b61569f79e860f4a6e7d6d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 13:24:04 -0400 Subject: [PATCH 083/220] news fragment --- newsfragments/3834.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3834.minor diff --git a/newsfragments/3834.minor b/newsfragments/3834.minor new file mode 100644 index 000000000..e69de29bb From 797e0994596dd916a978b5fc8a757d15322b3100 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 16:05:28 -0400 Subject: [PATCH 084/220] make create_introducer_webish assign a main tub port --- src/allmydata/test/web/test_introducer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/web/test_introducer.py b/src/allmydata/test/web/test_introducer.py index 4b5850cbc..69309d35b 100644 --- a/src/allmydata/test/web/test_introducer.py +++ b/src/allmydata/test/web/test_introducer.py @@ -83,12 +83,18 @@ def create_introducer_webish(reactor, port_assigner, basedir): with the node and its webish service. """ node.create_node_dir(basedir, "testing") - _, port_endpoint = port_assigner.assign(reactor) + main_tub_location, main_tub_endpoint = port_assigner.assign(reactor) + _, web_port_endpoint = port_assigner.assign(reactor) with open(join(basedir, "tahoe.cfg"), "w") as f: f.write( "[node]\n" - "tub.location = 127.0.0.1:1\n" + - "web.port = {}\n".format(port_endpoint) + "tub.port = {main_tub_endpoint}\n" + "tub.location = {main_tub_location}\n" + "web.port = {web_port_endpoint}\n".format( + main_tub_endpoint=main_tub_endpoint, + main_tub_location=main_tub_location, + web_port_endpoint=web_port_endpoint, + ) ) intro_node = yield create_introducer(basedir) From 31649890ef47c2169a0aedee2d7488b8f6da6959 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 16:08:08 -0400 Subject: [PATCH 085/220] Teach UseNode to use a port assigner for tub.port Then use it to assign ports for tub.port unless the caller supplied their own value. --- src/allmydata/test/common.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 97368ee92..e0472edce 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -267,8 +267,12 @@ class UseNode(object): node_config = attr.ib(default=attr.Factory(dict)) config = attr.ib(default=None) + reactor = attr.ib(default=None) def setUp(self): + self.assigner = SameProcessStreamEndpointAssigner() + self.assigner.setUp() + def format_config_items(config): return "\n".join( " = ".join((key, value)) @@ -292,6 +296,23 @@ class UseNode(object): "default", self.introducer_furl, ) + + node_config = self.node_config.copy() + if "tub.port" not in node_config: + if "tub.location" in node_config: + raise ValueError( + "UseNode fixture does not support specifying tub.location " + "without tub.port" + ) + + # Don't use the normal port auto-assignment logic. It produces + # collisions and makes tests fail spuriously. + tub_location, tub_endpoint = self.assigner.assign(self.reactor) + node_config.update({ + "tub.port": tub_endpoint, + "tub.location": tub_location, + }) + self.config = config_from_string( self.basedir.asTextMode().path, "tub.port", @@ -304,7 +325,7 @@ storage.plugins = {storage_plugin} {plugin_config_section} """.format( storage_plugin=self.storage_plugin, - node_config=format_config_items(self.node_config), + node_config=format_config_items(node_config), plugin_config_section=plugin_config_section, ) ) @@ -316,7 +337,7 @@ storage.plugins = {storage_plugin} ) def cleanUp(self): - pass + self.assigner.tearDown() def getDetails(self): From 5a71774bf875a71c8ddbfb8b4fcfcb2dda7a4f9d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 16:10:32 -0400 Subject: [PATCH 086/220] use port assigner and UseNode more in test_node.py --- src/allmydata/test/test_node.py | 36 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/test_node.py b/src/allmydata/test/test_node.py index cf5fa27f3..c6cff1bab 100644 --- a/src/allmydata/test/test_node.py +++ b/src/allmydata/test/test_node.py @@ -69,6 +69,8 @@ import allmydata.test.common_util as testutil from .common import ( ConstantAddresses, + SameProcessStreamEndpointAssigner, + UseNode, ) def port_numbers(): @@ -80,11 +82,10 @@ class LoggingMultiService(service.MultiService): # see https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2946 -def testing_tub(config_data=''): +def testing_tub(reactor, config_data=''): """ Creates a 'main' Tub for testing purposes, from config data """ - from twisted.internet import reactor basedir = 'dummy_basedir' config = config_from_string(basedir, 'DEFAULT_PORTNUMFILE_BLANK', config_data) fileutil.make_dirs(os.path.join(basedir, 'private')) @@ -112,6 +113,9 @@ class TestCase(testutil.SignalMixin, unittest.TestCase): # try to bind the port. We'll use a low-numbered one that's likely to # conflict with another service to prove it. self._available_port = 22 + self.port_assigner = SameProcessStreamEndpointAssigner() + self.port_assigner.setUp() + self.addCleanup(self.port_assigner.tearDown) def _test_location( self, @@ -137,11 +141,23 @@ class TestCase(testutil.SignalMixin, unittest.TestCase): :param local_addresses: If not ``None`` then a list of addresses to supply to the system under test as local addresses. """ + from twisted.internet import reactor + basedir = self.mktemp() create_node_dir(basedir, "testing") + if tub_port is None: + # Always configure a usable tub.port address instead of relying on + # the automatic port assignment. The automatic port assignment is + # prone to collisions and spurious test failures. + _, tub_port = self.port_assigner.assign(reactor) + config_data = "[node]\n" - if tub_port: - config_data += "tub.port = {}\n".format(tub_port) + config_data += "tub.port = {}\n".format(tub_port) + + # If they wanted a certain location, go for it. This probably won't + # agree with the tub.port value we set but that only matters if + # anything tries to use this to establish a connection ... which + # nothing in this test suite will. if tub_location is not None: config_data += "tub.location = {}\n".format(tub_location) @@ -149,7 +165,7 @@ class TestCase(testutil.SignalMixin, unittest.TestCase): self.patch(iputil, 'get_local_addresses_sync', lambda: local_addresses) - tub = testing_tub(config_data) + tub = testing_tub(reactor, config_data) class Foo(object): pass @@ -431,7 +447,12 @@ class TestCase(testutil.SignalMixin, unittest.TestCase): @defer.inlineCallbacks def test_logdir_is_str(self): - basedir = "test_node/test_logdir_is_str" + from twisted.internet import reactor + + basedir = FilePath(self.mktemp()) + fixture = UseNode(None, None, basedir, "pb://introducer/furl", {}, reactor=reactor) + fixture.setUp() + self.addCleanup(fixture.cleanUp) ns = Namespace() ns.called = False @@ -440,8 +461,7 @@ class TestCase(testutil.SignalMixin, unittest.TestCase): self.failUnless(isinstance(logdir, str), logdir) self.patch(foolscap.logging.log, 'setLogDir', call_setLogDir) - create_node_dir(basedir, "nothing to see here") - yield client.create_client(basedir) + yield fixture.create_node() self.failUnless(ns.called) def test_set_config_unescaped_furl_hash(self): From 5caa80fe383630aab8afa8a9a1667fb3d4cd8f60 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 16:11:08 -0400 Subject: [PATCH 087/220] use UseNode more in test_client.py Also make write_introducer more lenient about filesystem state --- src/allmydata/scripts/common.py | 4 +++- src/allmydata/test/test_client.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py index 0a9ab8714..c9fc8e031 100644 --- a/src/allmydata/scripts/common.py +++ b/src/allmydata/scripts/common.py @@ -141,7 +141,9 @@ def write_introducer(basedir, petname, furl): """ if isinstance(furl, bytes): furl = furl.decode("utf-8") - basedir.child(b"private").child(b"introducers.yaml").setContent( + private = basedir.child(b"private") + private.makedirs(ignoreExistingDirectory=True) + private.child(b"introducers.yaml").setContent( safe_dump({ "introducers": { petname: { diff --git a/src/allmydata/test/test_client.py b/src/allmydata/test/test_client.py index fd2837f1d..a2572e735 100644 --- a/src/allmydata/test/test_client.py +++ b/src/allmydata/test/test_client.py @@ -89,6 +89,7 @@ from .common import ( UseTestPlugins, MemoryIntroducerClient, get_published_announcements, + UseNode, ) from .matchers import ( MatchesSameElements, @@ -953,13 +954,14 @@ class Run(unittest.TestCase, testutil.StallMixin): @defer.inlineCallbacks def test_reloadable(self): - basedir = FilePath("test_client.Run.test_reloadable") - private = basedir.child("private") - private.makedirs() + from twisted.internet import reactor + dummy = "pb://wl74cyahejagspqgy4x5ukrvfnevlknt@127.0.0.1:58889/bogus" - write_introducer(basedir, "someintroducer", dummy) - basedir.child("tahoe.cfg").setContent(BASECONFIG. encode("ascii")) - c1 = yield client.create_client(basedir.path) + fixture = UseNode(None, None, FilePath(self.mktemp()), dummy, reactor=reactor) + fixture.setUp() + self.addCleanup(fixture.cleanUp) + + c1 = yield fixture.create_node() c1.setServiceParent(self.sparent) # delay to let the service start up completely. I'm not entirely sure @@ -981,7 +983,7 @@ class Run(unittest.TestCase, testutil.StallMixin): # also change _check_exit_trigger to use it instead of a raw # reactor.stop, also instrument the shutdown event in an # attribute that we can check.) - c2 = yield client.create_client(basedir.path) + c2 = yield fixture.create_node() c2.setServiceParent(self.sparent) yield c2.disownServiceParent() From 780be2691b9ca4a7b1b2d08c6d0bb44b11d8b9a1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 16:11:28 -0400 Subject: [PATCH 088/220] assign a tub.port to all system test nodes --- src/allmydata/test/common_system.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 874c7f6ba..0c424136a 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -767,13 +767,15 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def _generate_config(self, which, basedir): config = {} - except1 = set(range(self.numclients)) - {1} + allclients = set(range(self.numclients)) + except1 = allclients - {1} feature_matrix = { ("client", "nickname"): except1, - # client 1 has to auto-assign an address. - ("node", "tub.port"): except1, - ("node", "tub.location"): except1, + # Auto-assigning addresses is extremely failure prone and not + # amenable to automated testing in _this_ manner. + ("node", "tub.port"): allclients, + ("node", "tub.location"): allclients, # client 0 runs a webserver and a helper # client 3 runs a webserver but no helper @@ -855,7 +857,13 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): # connection-lost code basedir = FilePath(self.getdir("client%d" % client_num)) basedir.makedirs() - config = "[client]\n" + config = ( + "[node]\n" + "tub.location = {}\n" + "tub.port = {}\n" + "[client]\n" + ).format(*self.port_assigner.assign(reactor)) + if helper_furl: config += "helper.furl = %s\n" % helper_furl basedir.child("tahoe.cfg").setContent(config.encode("utf-8")) From b4bc95cb5a36b7507ce3745cfc3a273b5eedecb6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 3 Nov 2021 16:15:38 -0400 Subject: [PATCH 089/220] news fragment --- newsfragments/3838.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3838.minor diff --git a/newsfragments/3838.minor b/newsfragments/3838.minor new file mode 100644 index 000000000..e69de29bb From 0459b712b02b8ba686687d696325bcdb650f770c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 08:54:55 -0400 Subject: [PATCH 090/220] news fragment --- newsfragments/3839.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3839.security diff --git a/newsfragments/3839.security b/newsfragments/3839.security new file mode 100644 index 000000000..1ae054542 --- /dev/null +++ b/newsfragments/3839.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets. From 274dc6e837dd7181fb1f6ba9116570dc4b255d66 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 08:55:37 -0400 Subject: [PATCH 091/220] Introduce `UnknownContainerVersionError` base w/ structured args --- src/allmydata/storage/common.py | 11 ++++++++--- src/allmydata/storage/immutable.py | 4 +--- src/allmydata/storage/mutable.py | 4 +--- src/allmydata/test/test_storage.py | 7 ++++--- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index e5563647f..48fc77840 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -16,11 +16,16 @@ from allmydata.util import base32 # Backwards compatibility. from allmydata.interfaces import DataTooLargeError # noqa: F401 -class UnknownMutableContainerVersionError(Exception): - pass -class UnknownImmutableContainerVersionError(Exception): +class UnknownContainerVersionError(Exception): + def __init__(self, filename, version): + self.filename = filename + self.version = version + +class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass +class UnknownImmutableContainerVersionError(UnknownContainerVersionError): + pass def si_b2a(storageindex): return base32.b2a(storageindex) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index a43860138..fcc60509c 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -174,9 +174,7 @@ class ShareFile(object): filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) if version != 1: - msg = "sharefile %s had version %d but we wanted 1" % \ - (filename, version) - raise UnknownImmutableContainerVersionError(msg) + raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) self._data_offset = 0xc diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 4abf22064..ce9cc5ff4 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -95,9 +95,7 @@ class MutableShareFile(object): data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) if not self.is_valid_header(data): - msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ - (filename, magic, self.MAGIC) - raise UnknownMutableContainerVersionError(msg) + raise UnknownMutableContainerVersionError(filename, magic) self.parent = parent # for logging def log(self, *args, **kwargs): diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 2c8d84b9e..bf9eff37a 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -646,7 +646,8 @@ class Server(unittest.TestCase): e = self.failUnlessRaises(UnknownImmutableContainerVersionError, ss.remote_get_buckets, b"si1") - self.failUnlessIn(" had version 0 but we wanted 1", str(e)) + self.assertEqual(e.filename, fn) + self.assertEqual(e.version, 0) def test_disconnect(self): # simulate a disconnection @@ -1127,8 +1128,8 @@ class MutableServer(unittest.TestCase): read = ss.remote_slot_readv e = self.failUnlessRaises(UnknownMutableContainerVersionError, read, b"si1", [0], [(0,10)]) - self.failUnlessIn(" had magic ", str(e)) - self.failUnlessIn(" but we wanted ", str(e)) + self.assertEqual(e.filename, fn) + self.assertTrue(e.version.startswith(b"BAD MAGIC")) def test_container_size(self): ss = self.create("test_container_size") From 10724a91f9ca2fe929f4e29adb03b876b21f9fe5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:17:36 -0400 Subject: [PATCH 092/220] introduce an explicit representation of the v1 immutable container schema This is only a partial representation, sufficient to express the changes that are coming in v2. --- src/allmydata/storage/immutable.py | 37 ++++++----- src/allmydata/storage/immutable_schema.py | 81 +++++++++++++++++++++++ 2 files changed, 102 insertions(+), 16 deletions(-) create mode 100644 src/allmydata/storage/immutable_schema.py diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index fcc60509c..ae5a710af 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -25,9 +25,14 @@ from allmydata.interfaces import ( ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition -from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError +from .immutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_version, +) + + # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and # RIBucketReader.read . The lease information is not accessible through these @@ -118,9 +123,16 @@ class ShareFile(object): ``False`` otherwise. """ (version,) = struct.unpack(">L", header[:4]) - return version == 1 + return schema_from_version(version) is not None - def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): + def __init__( + self, + filename, + max_size=None, + create=False, + lease_count_format="L", + schema=NEWEST_SCHEMA_VERSION, + ): """ Initialize a ``ShareFile``. @@ -156,24 +168,17 @@ class ShareFile(object): # it. Also construct the metadata. assert not os.path.exists(self.home) fileutil.make_dirs(os.path.dirname(self.home)) - # The second field -- the four-byte share data length -- is no - # longer used as of Tahoe v1.3.0, but we continue to write it in - # there in case someone downgrades a storage server from >= - # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one - # server to another, etc. We do saturation -- a share data length - # larger than 2**32-1 (what can fit into the field) is marked as - # the largest length that can fit into the field. That way, even - # if this does happen, the old < v1.3.0 server will still allow - # clients to read the first part of the share. + self._schema = schema with open(self.home, 'wb') as f: - f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) + f.write(self._schema.header(max_size)) self._lease_offset = max_size + 0x0c self._num_leases = 0 else: with open(self.home, 'rb') as f: filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - if version != 1: + self._schema = schema_from_version(version) + if self._schema is None: raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) @@ -209,7 +214,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_immutable_data()) + f.write(self._schema.serialize_lease(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -240,7 +245,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield LeaseInfo.from_immutable_data(data) + yield self._schema.unserialize_lease(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py new file mode 100644 index 000000000..759752bed --- /dev/null +++ b/src/allmydata/storage/immutable_schema.py @@ -0,0 +1,81 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +from .lease import ( + LeaseInfo, +) + +def _header(version, max_size): + # (int, int) -> bytes + """ + Construct the header for an immutable container. + + :param version: The container version to include the in header. + :param max_size: The maximum data size the container will hold. + + :return: Some bytes to write at the beginning of the container. + """ + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V1(object): + """ + Implement encoding and decoding for v1 of the immutable container. + """ + version = 1 + + @classmethod + def header(cls, max_size): + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + if isinstance(lease, LeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return LeaseInfo.from_immutable_data(data) + + +ALL_SCHEMAS = {_V1} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) + +def schema_from_version(version): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.version == version: + return schema + return None From 3b4141952387452894ed5c0ed58113e272ad3e4f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:32:59 -0400 Subject: [PATCH 093/220] apply the ShareFile tests to all schema versions using hypothesis --- src/allmydata/test/test_storage.py | 60 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index bf9eff37a..655395042 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -43,6 +43,9 @@ from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile +from allmydata.storage.immutable_schema import ( + ALL_SCHEMAS, +) from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \ si_b2a, si_a2b @@ -844,6 +847,9 @@ class Server(unittest.TestCase): # Create a bucket: rs0, cs0 = self.create_bucket_5_shares(ss, b"si0") + + # Upload of an immutable implies creation of a single lease with the + # supplied secrets. (lease,) = ss.get_leases(b"si0") self.assertTrue(lease.is_renew_secret(rs0)) @@ -3125,6 +3131,7 @@ class Stats(unittest.TestCase): self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) +immutable_schemas = strategies.sampled_from(list(ALL_SCHEMAS)) class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" @@ -3136,47 +3143,54 @@ class ShareFileTests(unittest.TestCase): # Should be b'abDEF' now. return sf - def test_read_write(self): + @given(immutable_schemas) + def test_read_write(self, schema): """Basic writes can be read.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 3), b"abD") self.assertEqual(sf.read_share_data(1, 4), b"bDEF") - def test_reads_beyond_file_end(self): + @given(immutable_schemas) + def test_reads_beyond_file_end(self, schema): """Reads beyond the file size are truncated.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 10), b"abDEF") self.assertEqual(sf.read_share_data(5, 10), b"") - def test_too_large_write(self): + @given(immutable_schemas) + def test_too_large_write(self, schema): """Can't do write larger than file size.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(DataTooLargeError): sf.write_share_data(0, b"x" * 3000) - def test_no_leases_cancelled(self): + @given(immutable_schemas) + def test_no_leases_cancelled(self, schema): """If no leases were cancelled, IndexError is raised.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(IndexError): sf.cancel_lease(b"garbage") - def test_long_lease_count_format(self): + @given(immutable_schemas) + def test_long_lease_count_format(self, schema): """ ``ShareFile.__init__`` raises ``ValueError`` if the lease count format given is longer than one character. """ with self.assertRaises(ValueError): - self.get_sharefile(lease_count_format="BB") + self.get_sharefile(schema=schema, lease_count_format="BB") - def test_large_lease_count_format(self): + @given(immutable_schemas) + def test_large_lease_count_format(self, schema): """ ``ShareFile.__init__`` raises ``ValueError`` if the lease count format encodes to a size larger than 8 bytes. """ with self.assertRaises(ValueError): - self.get_sharefile(lease_count_format="Q") + self.get_sharefile(schema=schema, lease_count_format="Q") - def test_avoid_lease_overflow(self): + @given(immutable_schemas) + def test_avoid_lease_overflow(self, schema): """ If the share file already has the maximum number of leases supported then ``ShareFile.add_lease`` raises ``struct.error`` and makes no changes @@ -3190,7 +3204,7 @@ class ShareFileTests(unittest.TestCase): ) # Make it a little easier to reach the condition by limiting the # number of leases to only 255. - sf = self.get_sharefile(lease_count_format="B") + sf = self.get_sharefile(schema=schema, lease_count_format="B") # Add the leases. for i in range(2 ** 8 - 1): @@ -3214,16 +3228,17 @@ class ShareFileTests(unittest.TestCase): self.assertEqual(before_data, after_data) - def test_renew_secret(self): + @given(immutable_schemas) + def test_renew_secret(self, schema): """ - A lease loaded from an immutable share file can have its renew secret - verified. + A lease loaded from an immutable share file at any schema version can have + its renew secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, @@ -3234,16 +3249,17 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_renew_secret(renew_secret)) - def test_cancel_secret(self): + @given(immutable_schemas) + def test_cancel_secret(self, schema): """ - A lease loaded from an immutable share file can have its cancel secret - verified. + A lease loaded from an immutable share file at any schema version can have + its cancel secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, From 234b8dcde2febc2b3eee96e1ede4d123f634dcb1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 11:56:49 -0400 Subject: [PATCH 094/220] Formalize LeaseInfo interface in preparation for another implementation --- src/allmydata/storage/lease.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 6d21bb2b2..23071707a 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -15,6 +15,11 @@ import struct, time import attr +from zope.interface import ( + Interface, + implementer, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -23,6 +28,84 @@ IMMUTABLE_FORMAT = ">L32s32sL" # struct format for representation of a lease in a mutable share MUTABLE_FORMAT = ">LL32s32s20s" + +class ILeaseInfo(Interface): + """ + Represent a marker attached to a share that indicates that share should be + retained for some amount of time. + + Typically clients will create and renew leases on their shares as a way to + inform storage servers that there is still interest in those shares. A + share may have more than one lease. If all leases on a share have + expiration times in the past then the storage server may take this as a + strong hint that no one is interested in the share anymore and therefore + the share may be deleted to reclaim the space. + """ + def renew(new_expire_time): + """ + Create a new ``ILeaseInfo`` with the given expiration time. + + :param Union[int, float] new_expire_time: The expiration time the new + ``ILeaseInfo`` will have. + + :return: The new ``ILeaseInfo`` provider with the new expiration time. + """ + + def get_expiration_time(): + """ + :return Union[int, float]: this lease's expiration time + """ + + def get_grant_renew_time_time(): + """ + :return Union[int, float]: a guess about the last time this lease was + renewed + """ + + def get_age(): + """ + :return Union[int, float]: a guess about how long it has been since this + lease was renewed + """ + + def to_immutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in an immutable container + """ + + def to_mutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in a mutable container + """ + + def immutable_size(): + """ + :return int: the size of the serialized representation of this lease in an + immutable container + """ + + def mutable_size(): + """ + :return int: the size of the serialized representation of this lease in a + mutable container + """ + + def is_renew_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's renew + secret, ``False`` otherwise + """ + + def is_cancel_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's cancel + secret, ``False`` otherwise + """ + + +@implementer(ILeaseInfo) @attr.s(frozen=True) class LeaseInfo(object): """ From b69e8d013bfc32f8b7ca5948ad36a5c60b3db73a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:07:49 -0400 Subject: [PATCH 095/220] introduce immutable container schema version 2 This version used on-disk hashed secrets to reduce the chance of secrets leaking to unintended parties. --- src/allmydata/storage/immutable.py | 23 ++++- src/allmydata/storage/immutable_schema.py | 105 +++++++++++++++++++++- src/allmydata/storage/lease.py | 82 +++++++++++++++++ src/allmydata/test/test_download.py | 14 ++- 4 files changed, 214 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index ae5a710af..216262a81 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -39,14 +39,14 @@ from .immutable_schema import ( # interfaces. # The share file has the following layout: -# 0x00: share file version number, four bytes, current version is 1 +# 0x00: share file version number, four bytes, current version is 2 # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. # 0x08: number of leases, four bytes big-endian # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) # A+0x0c = B: first lease. Lease format is: # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner -# B+0x04: renew secret, 32 bytes (SHA256) -# B+0x24: cancel secret, 32 bytes (SHA256) +# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below. +# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b) # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch # B+0x48: next lease, or end of record @@ -58,6 +58,23 @@ from .immutable_schema import ( # then the value stored in this field will be the actual share data length # modulo 2**32. +# Footnote 2: The change between share file version number 1 and 2 is that +# storage of lease secrets is changed from plaintext to hashed. This change +# protects the secrets from compromises of local storage on the server: if a +# plaintext cancel secret is somehow exfiltrated from the storage server, an +# attacker could use it to cancel that lease and potentially cause user data +# to be discarded before intended by the real owner. As of this comment, +# lease cancellation is disabled because there have been at least two bugs +# which leak the persisted value of the cancellation secret. If lease secrets +# were stored hashed instead of plaintext then neither of these bugs would +# have allowed an attacker to learn a usable cancel secret. +# +# Clients are free to construct these secrets however they like. The +# Tahoe-LAFS client uses a SHA256-based construction. The server then uses +# blake2b to hash these values for storage so that it retains no persistent +# copy of the original secret. +# + def _fix_lease_count_format(lease_count_format): """ Turn a single character struct format string into a format string suitable diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 759752bed..fc823507a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,8 +13,14 @@ if PY2: import struct +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + from .lease import ( LeaseInfo, + HashedLeaseInfo, ) def _header(version, max_size): @@ -22,10 +28,10 @@ def _header(version, max_size): """ Construct the header for an immutable container. - :param version: The container version to include the in header. - :param max_size: The maximum data size the container will hold. + :param version: the container version to include the in header + :param max_size: the maximum data size the container will hold - :return: Some bytes to write at the beginning of the container. + :return: some bytes to write at the beginning of the container """ # The second field -- the four-byte share data length -- is no longer # used as of Tahoe v1.3.0, but we continue to write it in there in @@ -38,6 +44,97 @@ def _header(version, max_size): # the share. return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V2(object): + """ + Implement encoding and decoding for v2 of the immutable container. + """ + version = 2 + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + @classmethod + def header(cls, max_size): + # type: (int) -> bytes + """ + Construct a container header. + + :param max_size: the maximum size the container can hold + + :return: the header bytes + """ + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + """ + Serialize a lease to be written to a v2 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = cls._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> HashedLeaseInfo + """ + Unserialize some bytes from a v2 container. + + :param data: the bytes from the container + + :return: the ``HashedLeaseInfo`` the bytes represent + """ + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) + + class _V1(object): """ Implement encoding and decoding for v1 of the immutable container. @@ -66,7 +163,7 @@ class _V1(object): return LeaseInfo.from_immutable_data(data) -ALL_SCHEMAS = {_V1} +ALL_SCHEMAS = {_V2, _V1} ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 23071707a..895a0970c 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -20,6 +20,10 @@ from zope.interface import ( implementer, ) +from twisted.python.components import ( + proxyForInterface, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -245,3 +249,81 @@ class LeaseInfo(object): ] values = struct.unpack(">LL32s32s20s", data) return cls(**dict(zip(names, values))) + + +@attr.s(frozen=True) +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): + """ + A ``HashedLeaseInfo`` wraps lease information in which the secrets have + been hashed. + """ + _lease_info = attr.ib() + _hash = attr.ib() + + def is_renew_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + + def is_cancel_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + if isinstance(candidate_secret, _HashedCancelSecret): + # Someone read it off of this object in this project - probably + # the lease crawler - and is just trying to use it to identify + # which lease it wants to operate on. Avoid re-hashing the value. + # + # It is important that this codepath is only availably internally + # for this process to talk to itself. If it were to be exposed to + # clients over the network, they could just provide the hashed + # value to avoid having to ever learn the original value. + hashed_candidate = candidate_secret.hashed_value + else: + # It is not yet hashed so hash it. + hashed_candidate = self._hash(candidate_secret) + + return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + + @property + def owner_num(self): + return self._lease_info.owner_num + + @property + def cancel_secret(self): + """ + Give back an opaque wrapper around the hashed cancel secret which can + later be presented for a succesful equality comparison. + """ + # We don't *have* the cancel secret. We hashed it and threw away the + # original. That's good. It does mean that some code that runs + # in-process with the storage service (LeaseCheckingCrawler) runs into + # some difficulty. That code wants to cancel leases and does so using + # the same interface that faces storage clients (or would face them, + # if lease cancellation were exposed). + # + # Since it can't use the hashed secret to cancel a lease (that's the + # point of the hashing) and we don't have the unhashed secret to give + # it, instead we give it a marker that `cancel_lease` will recognize. + # On recognizing it, if the hashed value given matches the hashed + # value stored it is considered a match and the lease can be + # cancelled. + # + # This isn't great. Maybe the internal and external consumers of + # cancellation should use different interfaces. + return _HashedCancelSecret(self._lease_info.cancel_secret) + + +@attr.s(frozen=True) +class _HashedCancelSecret(object): + """ + ``_HashedCancelSecret`` is a marker type for an already-hashed lease + cancel secret that lets internal lease cancellers bypass the hash-based + protection that's imposed on external lease cancellers. + + :ivar bytes hashed_value: The already-hashed secret. + """ + hashed_value = attr.ib() diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index ca5b5650b..85d89cde6 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase): d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) - corrupt_values = [(3, 2, "no-sh2"), - (15, 2, "need-4th"), # share looks v2 - ] + corrupt_values = [ + # Make the container version for share number 2 look + # unsupported. If you add support for immutable share file + # version number much past 16 million then you will have to + # update this test. Also maybe you have other problems. + (1, 255, "no-sh2"), + # Make the immutable share number 2 (not the container, the + # thing inside the container) look unsupported. Ditto the + # above about version numbers in the ballpark of 16 million. + (13, 255, "need-4th"), + ] for i,newvalue,expected in corrupt_values: d.addCallback(self._corrupt_set, imm_uri, i, newvalue) d.addCallback(_download, imm_uri, i, expected) From 7a59aa83bb9e429d0b44f47fff6365dbfa24f42f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:12:54 -0400 Subject: [PATCH 096/220] add missing import --- src/allmydata/storage/immutable_schema.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index fc823507a..6ac49f6f1 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,6 +13,11 @@ if PY2: import struct +try: + from typing import Union +except ImportError: + pass + import attr from nacl.hash import blake2b From 6889ab2a76a1665dc7adb11dfa2205760641f303 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:16:55 -0400 Subject: [PATCH 097/220] fix syntax of type hint --- src/allmydata/storage/immutable_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 6ac49f6f1..7ffec418a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -29,7 +29,7 @@ from .lease import ( ) def _header(version, max_size): - # (int, int) -> bytes + # type: (int, int) -> bytes """ Construct the header for an immutable container. From 2186bfcc372d01ab79f6899e8d0a54157ee83444 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:40:43 -0400 Subject: [PATCH 098/220] silence some mypy errors :/ I don't know the "right" way to make mypy happy with these things --- src/allmydata/storage/immutable_schema.py | 4 ++-- src/allmydata/storage/lease.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 7ffec418a..440755b01 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -169,8 +169,8 @@ class _V1(object): ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore def schema_from_version(version): # (int) -> Optional[type] diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 895a0970c..63dba15e8 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -252,7 +252,7 @@ class LeaseInfo(object): @attr.s(frozen=True) -class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ignore # unsupported dynamic base class """ A ``HashedLeaseInfo`` wraps lease information in which the secrets have been hashed. From 931ddf85a532178ab83584820eda8605a495d5ab Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 15:26:58 -0400 Subject: [PATCH 099/220] introduce an explicit representation of the v1 mutable container schema This is only a partial representation, sufficient to express the changes that are coming in v2. --- src/allmydata/storage/mutable.py | 46 +++------ src/allmydata/storage/mutable_schema.py | 119 ++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 31 deletions(-) create mode 100644 src/allmydata/storage/mutable_schema.py diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index ce9cc5ff4..346edd53a 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -24,7 +24,10 @@ from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownMutableContainerVersionError, \ DataTooLargeError from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE - +from .mutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_header, +) # the MutableShareFile is like the ShareFile, but used for mutable data. It # has a different layout. See docs/mutable.txt for more details. @@ -64,9 +67,6 @@ class MutableShareFile(object): # our sharefiles share with a recognizable string, plus some random # binary data to reduce the chance that a regular text file will look # like a sharefile. - MAGIC = b"Tahoe mutable container v1\n" + b"\x75\x09\x44\x03\x8e" - assert len(MAGIC) == 32 - assert isinstance(MAGIC, bytes) MAX_SIZE = MAX_MUTABLE_SHARE_SIZE # TODO: decide upon a policy for max share size @@ -82,20 +82,19 @@ class MutableShareFile(object): :return: ``True`` if the bytes could belong to this container, ``False`` otherwise. """ - return header.startswith(cls.MAGIC) + return schema_from_header(header) is not None - def __init__(self, filename, parent=None): + def __init__(self, filename, parent=None, schema=NEWEST_SCHEMA_VERSION): self.home = filename if os.path.exists(self.home): # we don't cache anything, just check the magic with open(self.home, 'rb') as f: - data = f.read(self.HEADER_SIZE) - (magic, - write_enabler_nodeid, write_enabler, - data_length, extra_least_offset) = \ - struct.unpack(">32s20s32sQQ", data) - if not self.is_valid_header(data): - raise UnknownMutableContainerVersionError(filename, magic) + header = f.read(self.HEADER_SIZE) + self._schema = schema_from_header(header) + if self._schema is None: + raise UnknownMutableContainerVersionError(filename, header) + else: + self._schema = schema self.parent = parent # for logging def log(self, *args, **kwargs): @@ -103,23 +102,8 @@ class MutableShareFile(object): def create(self, my_nodeid, write_enabler): assert not os.path.exists(self.home) - data_length = 0 - extra_lease_offset = (self.HEADER_SIZE - + 4 * self.LEASE_SIZE - + data_length) - assert extra_lease_offset == self.DATA_OFFSET # true at creation - num_extra_leases = 0 with open(self.home, 'wb') as f: - header = struct.pack( - ">32s20s32sQQ", - self.MAGIC, my_nodeid, write_enabler, - data_length, extra_lease_offset, - ) - leases = (b"\x00" * self.LEASE_SIZE) * 4 - f.write(header + leases) - # data goes here, empty after creation - f.write(struct.pack(">L", num_extra_leases)) - # extra leases go here, none at creation + f.write(self._schema.header(my_nodeid, write_enabler)) def unlink(self): os.unlink(self.home) @@ -252,7 +236,7 @@ class MutableShareFile(object): + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_mutable_data()) + f.write(self._schema.serialize_lease(lease_info)) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None @@ -269,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = LeaseInfo.from_mutable_data(data) + lease_info = self._schema.unserialize_lease(data) if lease_info.owner_num == 0: return None return lease_info diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py new file mode 100644 index 000000000..25f24ea1f --- /dev/null +++ b/src/allmydata/storage/mutable_schema.py @@ -0,0 +1,119 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +from .lease import ( + LeaseInfo, +) + +class _V1(object): + """ + Implement encoding and decoding for v1 of the mutable container. + """ + version = 1 + + _MAGIC = ( + # Make it easy for people to recognize + b"Tahoe mutable container v1\n" + # But also keep the chance of accidental collision low + b"\x75\x09\x44\x03\x8e" + ) + assert len(_MAGIC) == 32 + + _HEADER_FORMAT = ">32s20s32sQQ" + + # This size excludes leases + _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + + _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + @classmethod + def magic_matches(cls, candidate_magic): + # type: (bytes) -> bool + """ + Return ``True`` if a candidate string matches the expected magic string + from a mutable container header, ``False`` otherwise. + """ + return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + + @classmethod + def header(cls, nodeid, write_enabler): + # type: (bytes, bytes) -> bytes + """ + Construct a container header. + + :param nodeid: A unique identifier for the node holding this + container. + + :param write_enabler: A secret shared with the client used to + authorize changes to the contents of this container. + """ + fixed_header = struct.pack( + ">32s20s32sQQ", + cls._MAGIC, + nodeid, + write_enabler, + # data length, initially the container is empty + 0, + cls._EXTRA_LEASE_OFFSET, + ) + blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 + extra_lease_count = struct.pack(">L", 0) + + return b"".join([ + fixed_header, + # share data will go in between the next two items eventually but + # for now there is none. + blank_leases, + extra_lease_count, + ]) + + @classmethod + def serialize_lease(cls, lease_info): + # type: (LeaseInfo) -> bytes + """ + Serialize a lease to be written to a v1 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + return lease_info.to_mutable_data() + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> LeaseInfo + """ + Unserialize some bytes from a v1 container. + + :param data: the bytes from the container + + :return: the ``LeaseInfo`` the bytes represent + """ + return LeaseInfo.from_mutable_data(data) + + +ALL_SCHEMAS = {_V1} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore + +def schema_from_header(header): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.magic_matches(header): + return schema + return None From 728638fe230dfdf0149c5835b0a8077230dbf021 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 15:37:29 -0400 Subject: [PATCH 100/220] apply the MutableShareFile tests to all known schemas --- src/allmydata/test/test_storage.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 655395042..fbd005050 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -42,9 +42,12 @@ from allmydata.util import fileutil, hashutil, base32 from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile +from allmydata.storage.mutable_schema import ( + ALL_SCHEMAS as ALL_MUTABLE_SCHEMAS, +) from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile from allmydata.storage.immutable_schema import ( - ALL_SCHEMAS, + ALL_SCHEMAS as ALL_IMMUTABLE_SCHEMAS, ) from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \ @@ -3131,7 +3134,7 @@ class Stats(unittest.TestCase): self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) -immutable_schemas = strategies.sampled_from(list(ALL_SCHEMAS)) +immutable_schemas = strategies.sampled_from(list(ALL_IMMUTABLE_SCHEMAS)) class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" @@ -3270,15 +3273,17 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_cancel_secret(cancel_secret)) +mutable_schemas = strategies.sampled_from(list(ALL_MUTABLE_SCHEMAS)) class MutableShareFileTests(unittest.TestCase): """ Tests for allmydata.storage.mutable.MutableShareFile. """ - def get_sharefile(self): - return MutableShareFile(self.mktemp()) + def get_sharefile(self, **kwargs): + return MutableShareFile(self.mktemp(), **kwargs) @given( + schema=mutable_schemas, nodeid=strategies.just(b"x" * 20), write_enabler=strategies.just(b"y" * 32), datav=strategies.lists( @@ -3289,12 +3294,12 @@ class MutableShareFileTests(unittest.TestCase): ), new_length=offsets(), ) - def test_readv_reads_share_data(self, nodeid, write_enabler, datav, new_length): + def test_readv_reads_share_data(self, schema, nodeid, write_enabler, datav, new_length): """ ``MutableShareFile.readv`` returns bytes from the share data portion of the share file. """ - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) sf.create(my_nodeid=nodeid, write_enabler=write_enabler) sf.writev(datav=datav, new_length=new_length) @@ -3329,12 +3334,13 @@ class MutableShareFileTests(unittest.TestCase): self.assertEqual(expected_data, read_data) @given( + schema=mutable_schemas, nodeid=strategies.just(b"x" * 20), write_enabler=strategies.just(b"y" * 32), readv=strategies.lists(strategies.tuples(offsets(), lengths()), min_size=1), random=strategies.randoms(), ) - def test_readv_rejects_negative_length(self, nodeid, write_enabler, readv, random): + def test_readv_rejects_negative_length(self, schema, nodeid, write_enabler, readv, random): """ If a negative length is given to ``MutableShareFile.readv`` in a read vector then ``AssertionError`` is raised. @@ -3373,7 +3379,7 @@ class MutableShareFileTests(unittest.TestCase): *broken_readv[readv_index] ) - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) sf.create(my_nodeid=nodeid, write_enabler=write_enabler) # A read with a broken read vector is an error. From 8adff050a7f179c6c4796f4d3b04fab60924cbad Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 13:51:46 -0400 Subject: [PATCH 101/220] compare without breaking out all of the fields HashedLeaseInfo doesn't have all of these attributes --- src/allmydata/test/test_storage.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index fbd005050..92176ce52 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1361,14 +1361,21 @@ class MutableServer(unittest.TestCase): 2: [b"2"*10]}) def compare_leases_without_timestamps(self, leases_a, leases_b): - self.failUnlessEqual(len(leases_a), len(leases_b)) - for i in range(len(leases_a)): - a = leases_a[i] - b = leases_b[i] - self.failUnlessEqual(a.owner_num, b.owner_num) - self.failUnlessEqual(a.renew_secret, b.renew_secret) - self.failUnlessEqual(a.cancel_secret, b.cancel_secret) - self.failUnlessEqual(a.nodeid, b.nodeid) + for a, b in zip(leases_a, leases_b): + # The leases aren't always of the same type (though of course + # corresponding elements in the two lists should be of the same + # type as each other) so it's inconvenient to just reach in and + # normalize the expiration timestamp. We don't want to call + # `renew` on both objects to normalize the expiration timestamp in + # case `renew` is broken and gives us back equal outputs from + # non-equal inputs (expiration timestamp aside). It seems + # reasonably safe to use `renew` to make _one_ of the timestamps + # equal to the other though. + self.assertEqual( + a.renew(b.get_expiration_time()), + b, + ) + self.assertEqual(len(leases_a), len(leases_b)) def test_leases(self): ss = self.create("test_leases") From 0cd96ed713ba6429b76e2520752acb7e8e166e40 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:09:46 -0400 Subject: [PATCH 102/220] fix the debug tool for the hashed lease secret case --- src/allmydata/scripts/debug.py | 4 ++-- src/allmydata/storage/lease.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 260cca55b..6201ce28f 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -230,8 +230,8 @@ def dump_mutable_share(options): print(" ownerid: %d" % lease.owner_num, file=out) when = format_expiration_time(lease.get_expiration_time()) print(" expires in %s" % when, file=out) - print(" renew_secret: %s" % str(base32.b2a(lease.renew_secret), "utf-8"), file=out) - print(" cancel_secret: %s" % str(base32.b2a(lease.cancel_secret), "utf-8"), file=out) + print(" renew_secret: %s" % lease.present_renew_secret(), file=out) + print(" cancel_secret: %s" % lease.present_cancel_secret(), file=out) print(" secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid), file=out) else: print("No leases.", file=out) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 63dba15e8..3ec760dbe 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -25,6 +25,7 @@ from twisted.python.components import ( ) from allmydata.util.hashutil import timing_safe_compare +from allmydata.util import base32 # struct format for representation of a lease in an immutable share IMMUTABLE_FORMAT = ">L32s32sL" @@ -102,12 +103,24 @@ class ILeaseInfo(Interface): secret, ``False`` otherwise """ + def present_renew_secret(): + """ + :return str: Text which could reasonably be shown to a person representing + this lease's renew secret. + """ + def is_cancel_secret(candidate_secret): """ :return bool: ``True`` if the given byte string is this lease's cancel secret, ``False`` otherwise """ + def present_cancel_secret(): + """ + :return str: Text which could reasonably be shown to a person representing + this lease's cancel secret. + """ + @implementer(ILeaseInfo) @attr.s(frozen=True) @@ -173,6 +186,13 @@ class LeaseInfo(object): """ return timing_safe_compare(self.renew_secret, candidate_secret) + def present_renew_secret(self): + # type: () -> bytes + """ + Return the renew secret, base32-encoded. + """ + return str(base32.b2a(self.renew_secret), "utf-8") + def is_cancel_secret(self, candidate_secret): # type: (bytes) -> bool """ @@ -183,6 +203,13 @@ class LeaseInfo(object): """ return timing_safe_compare(self.cancel_secret, candidate_secret) + def present_cancel_secret(self): + # type: () -> bytes + """ + Return the cancel secret, base32-encoded. + """ + return str(base32.b2a(self.cancel_secret), "utf-8") + def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self._expiration_time - 31*24*60*60 @@ -267,6 +294,12 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign """ return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + def present_renew_secret(self): + """ + Present the hash of the secret with a marker indicating it is a hash. + """ + return u"hash:" + super(HashedLeaseInfo, self).present_renew_secret() + def is_cancel_secret(self, candidate_secret): """ Hash the candidate secret and compare the result to the stored hashed @@ -288,10 +321,20 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + def present_cancel_secret(self): + """ + Present the hash of the secret with a marker indicating it is a hash. + """ + return u"hash:" + super(HashedLeaseInfo, self).present_cancel_secret() + @property def owner_num(self): return self._lease_info.owner_num + @property + def nodeid(self): + return self._lease_info.nodeid + @property def cancel_secret(self): """ From 5d703d989339587cfd5706fea1728ecb59e17808 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:10:27 -0400 Subject: [PATCH 103/220] some type annotations --- src/allmydata/storage/lease.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 3ec760dbe..9ddbc9c68 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -187,7 +187,7 @@ class LeaseInfo(object): return timing_safe_compare(self.renew_secret, candidate_secret) def present_renew_secret(self): - # type: () -> bytes + # type: () -> str """ Return the renew secret, base32-encoded. """ @@ -204,7 +204,7 @@ class LeaseInfo(object): return timing_safe_compare(self.cancel_secret, candidate_secret) def present_cancel_secret(self): - # type: () -> bytes + # type: () -> str """ Return the cancel secret, base32-encoded. """ @@ -288,6 +288,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _hash = attr.ib() def is_renew_secret(self, candidate_secret): + # type: (bytes) -> bool """ Hash the candidate secret and compare the result to the stored hashed secret. @@ -295,12 +296,14 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) def present_renew_secret(self): + # type: () -> str """ Present the hash of the secret with a marker indicating it is a hash. """ return u"hash:" + super(HashedLeaseInfo, self).present_renew_secret() def is_cancel_secret(self, candidate_secret): + # type: (bytes) -> bool """ Hash the candidate secret and compare the result to the stored hashed secret. @@ -322,6 +325,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) def present_cancel_secret(self): + # type: () -> str """ Present the hash of the secret with a marker indicating it is a hash. """ From 3de9c73b0b066e5e15978a15c2903d10e398ed0a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:11:05 -0400 Subject: [PATCH 104/220] preserve the type when renewing HashedLeaseInfo does this mean immutable lease renewal is untested? maybe --- src/allmydata/storage/lease.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 9ddbc9c68..1a5416d6a 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -287,6 +287,13 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _lease_info = attr.ib() _hash = attr.ib() + def renew(self, new_expire_time): + # Preserve the HashedLeaseInfo wrapper around the renewed LeaseInfo. + return attr.assoc( + self, + _lease_info=super(HashedLeaseInfo, self).renew(new_expire_time), + ) + def is_renew_secret(self, candidate_secret): # type: (bytes) -> bool """ From 456df65a07a0c48f3a056519282cc96b5e4e2f25 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:16:43 -0400 Subject: [PATCH 105/220] Add v2 of the mutable container schema It uses hashed lease secrets, like v2 of the immutable container schema. --- src/allmydata/storage/mutable_schema.py | 225 ++++++++++++++++++++---- 1 file changed, 187 insertions(+), 38 deletions(-) diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py index 25f24ea1f..9496fe571 100644 --- a/src/allmydata/storage/mutable_schema.py +++ b/src/allmydata/storage/mutable_schema.py @@ -13,23 +13,193 @@ if PY2: import struct +try: + from typing import Union +except ImportError: + pass + +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + +from ..util.hashutil import ( + tagged_hash, +) from .lease import ( LeaseInfo, + HashedLeaseInfo, ) +def _magic(version): + # type: (int) -> bytes + """ + Compute a "magic" header string for a container of the given version. + + :param version: The version number of the container. + """ + # Make it easy for people to recognize + human_readable = u"Tahoe mutable container v{:d}\n".format(version).encode("ascii") + # But also keep the chance of accidental collision low + if version == 1: + # It's unclear where this byte sequence came from. It may have just + # been random. In any case, preserve it since it is the magic marker + # in all v1 share files. + random_bytes = b"\x75\x09\x44\x03\x8e" + else: + # For future versions, use a reproducable scheme. + random_bytes = tagged_hash( + b"allmydata_mutable_container_header", + human_readable, + truncate_to=5, + ) + magic = human_readable + random_bytes + assert len(magic) == 32 + if version > 1: + # The chance of collision is pretty low but let's just be sure about + # it. + assert magic != _magic(version - 1) + + return magic + +def _header(magic, extra_lease_offset, nodeid, write_enabler): + # type: (bytes, int, bytes, bytes) -> bytes + """ + Construct a container header. + + :param nodeid: A unique identifier for the node holding this + container. + + :param write_enabler: A secret shared with the client used to + authorize changes to the contents of this container. + """ + fixed_header = struct.pack( + ">32s20s32sQQ", + magic, + nodeid, + write_enabler, + # data length, initially the container is empty + 0, + extra_lease_offset, + ) + blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 + extra_lease_count = struct.pack(">L", 0) + + return b"".join([ + fixed_header, + # share data will go in between the next two items eventually but + # for now there is none. + blank_leases, + extra_lease_count, + ]) + + +class _V2(object): + """ + Implement encoding and decoding for v2 of the mutable container. + """ + version = 2 + _MAGIC = _magic(version) + + _HEADER_FORMAT = ">32s20s32sQQ" + + # This size excludes leases + _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + + _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + @classmethod + def magic_matches(cls, candidate_magic): + # type: (bytes) -> bool + """ + Return ``True`` if a candidate string matches the expected magic string + from a mutable container header, ``False`` otherwise. + """ + return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + + @classmethod + def header(cls, nodeid, write_enabler): + return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) + + @classmethod + def serialize_lease(cls, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + """ + Serialize a lease to be written to a v2 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + if isinstance(lease, LeaseInfo): + # v2 of the mutable schema stores lease secrets hashed. If we're + # given a LeaseInfo then it holds plaintext secrets. Hash them + # before trying to serialize. + lease = cls._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return lease.to_mutable_data() + raise ValueError( + "MutableShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> HashedLeaseInfo + """ + Unserialize some bytes from a v2 container. + + :param data: the bytes from the container + + :return: the ``HashedLeaseInfo`` the bytes represent + """ + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + lease = LeaseInfo.from_mutable_data(data) + return HashedLeaseInfo(lease, cls._hash_secret) + + class _V1(object): """ Implement encoding and decoding for v1 of the mutable container. """ version = 1 - - _MAGIC = ( - # Make it easy for people to recognize - b"Tahoe mutable container v1\n" - # But also keep the chance of accidental collision low - b"\x75\x09\x44\x03\x8e" - ) - assert len(_MAGIC) == 32 + _MAGIC = _magic(version) _HEADER_FORMAT = ">32s20s32sQQ" @@ -49,35 +219,8 @@ class _V1(object): @classmethod def header(cls, nodeid, write_enabler): - # type: (bytes, bytes) -> bytes - """ - Construct a container header. + return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) - :param nodeid: A unique identifier for the node holding this - container. - - :param write_enabler: A secret shared with the client used to - authorize changes to the contents of this container. - """ - fixed_header = struct.pack( - ">32s20s32sQQ", - cls._MAGIC, - nodeid, - write_enabler, - # data length, initially the container is empty - 0, - cls._EXTRA_LEASE_OFFSET, - ) - blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 - extra_lease_count = struct.pack(">L", 0) - - return b"".join([ - fixed_header, - # share data will go in between the next two items eventually but - # for now there is none. - blank_leases, - extra_lease_count, - ]) @classmethod def serialize_lease(cls, lease_info): @@ -89,7 +232,13 @@ class _V1(object): :return: the serialized bytes """ - return lease_info.to_mutable_data() + if isinstance(lease, LeaseInfo): + return lease_info.to_mutable_data() + raise ValueError( + "MutableShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) @classmethod def unserialize_lease(cls, data): @@ -104,7 +253,7 @@ class _V1(object): return LeaseInfo.from_mutable_data(data) -ALL_SCHEMAS = {_V1} +ALL_SCHEMAS = {_V2, _V1} ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore From 617a1eac9d848661b1fce2fe18976796ce02ac2a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 15:30:49 -0400 Subject: [PATCH 106/220] refactor lease hashing logic to avoid mutable/immutable duplication --- src/allmydata/storage/immutable.py | 4 +- src/allmydata/storage/immutable_schema.py | 169 ++++--------------- src/allmydata/storage/lease.py | 4 +- src/allmydata/storage/lease_schema.py | 129 ++++++++++++++ src/allmydata/storage/mutable.py | 4 +- src/allmydata/storage/mutable_schema.py | 194 ++++------------------ 6 files changed, 199 insertions(+), 305 deletions(-) create mode 100644 src/allmydata/storage/lease_schema.py diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 216262a81..e9992d96e 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -231,7 +231,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(self._schema.serialize_lease(lease_info)) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -262,7 +262,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield self._schema.unserialize_lease(data) + yield self._schema.lease_serializer.unserialize(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 440755b01..40663b935 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,84 +13,28 @@ if PY2: import struct -try: - from typing import Union -except ImportError: - pass - import attr -from nacl.hash import blake2b -from nacl.encoding import RawEncoder - -from .lease import ( - LeaseInfo, - HashedLeaseInfo, +from .lease_schema import ( + v1_immutable, + v2_immutable, ) -def _header(version, max_size): - # type: (int, int) -> bytes +@attr.s(frozen=True) +class _Schema(object): """ - Construct the header for an immutable container. + Implement encoding and decoding for multiple versions of the immutable + container schema. - :param version: the container version to include the in header - :param max_size: the maximum data size the container will hold + :ivar int version: the version number of the schema this object supports - :return: some bytes to write at the beginning of the container + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization """ - # The second field -- the four-byte share data length -- is no longer - # used as of Tahoe v1.3.0, but we continue to write it in there in - # case someone downgrades a storage server from >= Tahoe-1.3.0 to < - # Tahoe-1.3.0, or moves a share file from one server to another, - # etc. We do saturation -- a share data length larger than 2**32-1 - # (what can fit into the field) is marked as the largest length that - # can fit into the field. That way, even if this does happen, the old - # < v1.3.0 server will still allow clients to read the first part of - # the share. - return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + version = attr.ib() + lease_serializer = attr.ib() - -class _V2(object): - """ - Implement encoding and decoding for v2 of the immutable container. - """ - version = 2 - - @classmethod - def _hash_secret(cls, secret): - # type: (bytes) -> bytes - """ - Hash a lease secret for storage. - """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) - - @classmethod - def _hash_lease_info(cls, lease_info): - # type: (LeaseInfo) -> HashedLeaseInfo - """ - Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. - """ - if not isinstance(lease_info, LeaseInfo): - # Provide a little safety against misuse, especially an attempt to - # re-hash an already-hashed lease info which is represented as a - # different type. - raise TypeError( - "Can only hash LeaseInfo, not {!r}".format(lease_info), - ) - - # Hash the cleartext secrets in the lease info and wrap the result in - # a new type. - return HashedLeaseInfo( - attr.assoc( - lease_info, - renew_secret=cls._hash_secret(lease_info.renew_secret), - cancel_secret=cls._hash_secret(lease_info.cancel_secret), - ), - cls._hash_secret, - ) - - @classmethod - def header(cls, max_size): + def header(self, max_size): # type: (int) -> bytes """ Construct a container header. @@ -99,78 +43,23 @@ class _V2(object): :return: the header bytes """ - return _header(cls.version, max_size) + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0) - @classmethod - def serialize_lease(cls, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes - """ - Serialize a lease to be written to a v2 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - # v2 of the immutable schema stores lease secrets hashed. If - # we're given a LeaseInfo then it holds plaintext secrets. Hash - # them before trying to serialize. - lease = cls._hash_lease_info(lease) - if isinstance(lease, HashedLeaseInfo): - return lease.to_immutable_data() - raise ValueError( - "ShareFile v2 schema cannot represent lease {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> HashedLeaseInfo - """ - Unserialize some bytes from a v2 container. - - :param data: the bytes from the container - - :return: the ``HashedLeaseInfo`` the bytes represent - """ - # In v2 of the immutable schema lease secrets are stored hashed. Wrap - # a LeaseInfo in a HashedLeaseInfo so it can supply the correct - # interpretation for those values. - return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) - - -class _V1(object): - """ - Implement encoding and decoding for v1 of the immutable container. - """ - version = 1 - - @classmethod - def header(cls, max_size): - return _header(cls.version, max_size) - - @classmethod - def serialize_lease(cls, lease): - if isinstance(lease, LeaseInfo): - return lease.to_immutable_data() - raise ValueError( - "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # In v1 of the immutable schema lease secrets are stored plaintext. - # So load the data into a plain LeaseInfo which works on plaintext - # secrets. - return LeaseInfo.from_immutable_data(data) - - -ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore +ALL_SCHEMAS = { + _Schema(version=2, lease_serializer=v2_immutable), + _Schema(version=1, lease_serializer=v1_immutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_version(version): # (int) -> Optional[type] diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 1a5416d6a..8be44bafd 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -230,7 +230,7 @@ class LeaseInfo(object): "cancel_secret", "expiration_time", ] - values = struct.unpack(">L32s32sL", data) + values = struct.unpack(IMMUTABLE_FORMAT, data) return cls(nodeid=None, **dict(zip(names, values))) def immutable_size(self): @@ -274,7 +274,7 @@ class LeaseInfo(object): "cancel_secret", "nodeid", ] - values = struct.unpack(">LL32s32s20s", data) + values = struct.unpack(MUTABLE_FORMAT, data) return cls(**dict(zip(names, values))) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py new file mode 100644 index 000000000..697ac9e34 --- /dev/null +++ b/src/allmydata/storage/lease_schema.py @@ -0,0 +1,129 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +try: + from typing import Union +except ImportError: + pass + +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + +from .lease import ( + LeaseInfo, + HashedLeaseInfo, +) + +@attr.s(frozen=True) +class CleartextLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + def serialize(self, lease): + # type: (LeaseInfo) -> bytes + if isinstance(lease, LeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> LeaseInfo + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return self._from_data(data) + +@attr.s(frozen=True) +class HashedLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + def serialize(self, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = self._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> HashedLeaseInfo + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(self._from_data(data), self._hash_secret) + +v1_immutable = CleartextLeaseSerializer( + LeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v2_immutable = HashedLeaseSerializer( + HashedLeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v1_mutable = CleartextLeaseSerializer( + LeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) + +v2_mutable = HashedLeaseSerializer( + HashedLeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 346edd53a..bd59d96b8 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -236,7 +236,7 @@ class MutableShareFile(object): + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset - f.write(self._schema.serialize_lease(lease_info)) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None @@ -253,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = self._schema.unserialize_lease(data) + lease_info = self._schema.lease_serializer.unserialize(data) if lease_info.owner_num == 0: return None return lease_info diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py index 9496fe571..4be0d2137 100644 --- a/src/allmydata/storage/mutable_schema.py +++ b/src/allmydata/storage/mutable_schema.py @@ -13,22 +13,17 @@ if PY2: import struct -try: - from typing import Union -except ImportError: - pass - import attr -from nacl.hash import blake2b -from nacl.encoding import RawEncoder - from ..util.hashutil import ( tagged_hash, ) from .lease import ( LeaseInfo, - HashedLeaseInfo, +) +from .lease_schema import ( + v1_mutable, + v2_mutable, ) def _magic(version): @@ -94,168 +89,49 @@ def _header(magic, extra_lease_offset, nodeid, write_enabler): ]) -class _V2(object): +_HEADER_FORMAT = ">32s20s32sQQ" + +# This size excludes leases +_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + +_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + +@attr.s(frozen=True) +class _Schema(object): """ - Implement encoding and decoding for v2 of the mutable container. + Implement encoding and decoding for the mutable container. + + :ivar int version: the version number of the schema this object supports + + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization """ - version = 2 - _MAGIC = _magic(version) - - _HEADER_FORMAT = ">32s20s32sQQ" - - # This size excludes leases - _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) - - _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + version = attr.ib() + lease_serializer = attr.ib() + _magic = attr.ib() @classmethod - def _hash_secret(cls, secret): - # type: (bytes) -> bytes - """ - Hash a lease secret for storage. - """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) + def for_version(cls, version, lease_serializer): + return cls(version, lease_serializer, magic=_magic(version)) - @classmethod - def _hash_lease_info(cls, lease_info): - # type: (LeaseInfo) -> HashedLeaseInfo - """ - Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. - """ - if not isinstance(lease_info, LeaseInfo): - # Provide a little safety against misuse, especially an attempt to - # re-hash an already-hashed lease info which is represented as a - # different type. - raise TypeError( - "Can only hash LeaseInfo, not {!r}".format(lease_info), - ) - - # Hash the cleartext secrets in the lease info and wrap the result in - # a new type. - return HashedLeaseInfo( - attr.assoc( - lease_info, - renew_secret=cls._hash_secret(lease_info.renew_secret), - cancel_secret=cls._hash_secret(lease_info.cancel_secret), - ), - cls._hash_secret, - ) - - @classmethod - def magic_matches(cls, candidate_magic): + def magic_matches(self, candidate_magic): # type: (bytes) -> bool """ Return ``True`` if a candidate string matches the expected magic string from a mutable container header, ``False`` otherwise. """ - return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + return candidate_magic[:len(self._magic)] == self._magic - @classmethod - def header(cls, nodeid, write_enabler): - return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) + def header(self, nodeid, write_enabler): + return _header(self._magic, _EXTRA_LEASE_OFFSET, nodeid, write_enabler) - @classmethod - def serialize_lease(cls, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes - """ - Serialize a lease to be written to a v2 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - # v2 of the mutable schema stores lease secrets hashed. If we're - # given a LeaseInfo then it holds plaintext secrets. Hash them - # before trying to serialize. - lease = cls._hash_lease_info(lease) - if isinstance(lease, HashedLeaseInfo): - return lease.to_mutable_data() - raise ValueError( - "MutableShareFile v2 schema cannot represent lease {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> HashedLeaseInfo - """ - Unserialize some bytes from a v2 container. - - :param data: the bytes from the container - - :return: the ``HashedLeaseInfo`` the bytes represent - """ - # In v2 of the immutable schema lease secrets are stored hashed. Wrap - # a LeaseInfo in a HashedLeaseInfo so it can supply the correct - # interpretation for those values. - lease = LeaseInfo.from_mutable_data(data) - return HashedLeaseInfo(lease, cls._hash_secret) - - -class _V1(object): - """ - Implement encoding and decoding for v1 of the mutable container. - """ - version = 1 - _MAGIC = _magic(version) - - _HEADER_FORMAT = ">32s20s32sQQ" - - # This size excludes leases - _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) - - _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() - - @classmethod - def magic_matches(cls, candidate_magic): - # type: (bytes) -> bool - """ - Return ``True`` if a candidate string matches the expected magic string - from a mutable container header, ``False`` otherwise. - """ - return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC - - @classmethod - def header(cls, nodeid, write_enabler): - return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) - - - @classmethod - def serialize_lease(cls, lease_info): - # type: (LeaseInfo) -> bytes - """ - Serialize a lease to be written to a v1 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - return lease_info.to_mutable_data() - raise ValueError( - "MutableShareFile v1 schema only supports LeaseInfo, not {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> LeaseInfo - """ - Unserialize some bytes from a v1 container. - - :param data: the bytes from the container - - :return: the ``LeaseInfo`` the bytes represent - """ - return LeaseInfo.from_mutable_data(data) - - -ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore +ALL_SCHEMAS = { + _Schema.for_version(version=2, lease_serializer=v2_mutable), + _Schema.for_version(version=1, lease_serializer=v1_mutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_header(header): # (int) -> Optional[type] From 66644791cbce41f31a08a7a9ba56449ccf02e33e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 15:36:26 -0400 Subject: [PATCH 107/220] news fragment --- newsfragments/3841.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3841.security diff --git a/newsfragments/3841.security b/newsfragments/3841.security new file mode 100644 index 000000000..867322e0a --- /dev/null +++ b/newsfragments/3841.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for mutable share files instead of keeping the original secrets. \ No newline at end of file From 8dd4aaebb6e579b4874951bc4b6c6218ed667b79 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 10 Nov 2021 14:42:22 -0500 Subject: [PATCH 108/220] More consistent header system. --- docs/proposed/http-storage-node-protocol.rst | 106 +++++++++++-------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index fd1db5c4c..2a392fb20 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -363,11 +363,11 @@ one branch contains all of the share data; another branch contains all of the lease data; etc. -Authorization is required for all endpoints. +An ``Authorization`` header in requests is required for all endpoints. The standard HTTP authorization protocol is used. The authentication *type* used is ``Tahoe-LAFS``. The swissnum from the NURL used to locate the storage service is used as the *credentials*. -If credentials are not presented or the swissnum is not associated with a storage service then no storage processing is performed and the request receives an ``UNAUTHORIZED`` response. +If credentials are not presented or the swissnum is not associated with a storage service then no storage processing is performed and the request receives an ``401 UNAUTHORIZED`` response. General ~~~~~~~ @@ -396,17 +396,26 @@ For example:: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Either renew or create a new lease on the bucket addressed by ``storage_index``. -The details of the lease are encoded in the request body. + +For a renewal, the renew secret and cancellation secret should be included as ``X-Tahoe-Authorization`` headers. For example:: - {"renew-secret": "abcd", "cancel-secret": "efgh"} + X-Tahoe-Authorization: lease-renew-secret + X-Tahoe-Authorization: lease-cancel-secret -If the ``renew-secret`` value matches an existing lease -then the expiration time of that lease will be changed to 31 days after the time of this operation. -If it does not match an existing lease -then a new lease will be created with this ``renew-secret`` which expires 31 days after the time of this operation. +For a new lease, ``X-Tahoe-Set-Authorization`` headers should be used instead. +For example:: -``renew-secret`` and ``cancel-secret`` values must be 32 bytes long. + X-Tahoe-Set-Authorization: lease-renew-secret + X-Tahoe-Set-Authorization: lease-cancel-secret + +For renewal, the expiration time of that lease will be changed to 31 days after the time of this operation. +If the renewal secret does not match, a new lease will be created, but clients should still not rely on this behavior if possible, and instead use the appropriate new lease headers. + +For the creation path, +then a new lease will be created with this ``lease-renew-secret`` which expires 31 days after the time of this operation. + +``lease-renew-secret`` and ``lease-cancel-secret`` values must be 32 bytes long. The server treats them as opaque values. :ref:`Share Leases` gives details about how the Tahoe-LAFS storage client constructs these values. @@ -423,7 +432,7 @@ In these cases the server takes no action and returns ``NOT FOUND``. Discussion `````````` -We considered an alternative where ``renew-secret`` and ``cancel-secret`` are placed in query arguments on the request path. +We considered an alternative where ``lease-renew-secret`` and ``lease-cancel-secret`` are placed in query arguments on the request path. We chose to put these values into the request body to make the URL simpler. Several behaviors here are blindly copied from the Foolscap-based storage server protocol. @@ -452,13 +461,13 @@ For example:: {"share-numbers": [1, 7, ...], "allocated-size": 12345} -The request must include ``WWW-Authenticate`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. +The request must include ``X-Tahoe-Set-Authorization`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. Typically this is a header sent by the server, but in Tahoe-LAFS keys are set by the client, so may as well reuse it. For example:: - WWW-Authenticate: x-tahoe-renew-secret - WWW-Authenticate: x-tahoe-cancel-secret - WWW-Authenticate: x-tahoe-upload-secret + X-Tahoe-Set-Authorization: lease-renew-secret + X-Tahoe-Set-Authorization: lease-cancel-secret + X-Tahoe-Set-Authorization: upload-secret The response body includes encoded information about the created buckets. For example:: @@ -527,9 +536,9 @@ If any one of these requests fails then at most 128KiB of upload work needs to b The server must recognize when all of the data has been received and mark the share as complete (which it can do because it was informed of the size when the storage index was initialized). -The request must include a ``Authorization`` header that includes the upload secret:: +The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: - Authorization: x-tahoe-upload-secret + X-Tahoe-Authorization: upload-secret Responses: @@ -557,9 +566,9 @@ Responses: This cancels an *in-progress* upload. -The request body looks this:: +The request must include a ``Authorization`` header that includes the upload secret:: - { "upload-secret": "xyzf" } + X-Tahoe-Authorization: upload-secret The response code: @@ -658,16 +667,16 @@ The first write operation on a mutable storage index creates it (that is, there is no separate "create this storage index" operation as there is for the immutable storage index type). -The request body includes the secrets necessary to rewrite to the shares -along with test, read, and write vectors for the operation. +The request must include ``X-Tahoe-Authorization`` headers with write enabler and lease secrets:: + + X-Tahoe-Authorization: write-enabler + X-Tahoe-Authorization: lease-lease-cancel-secret + X-Tahoe-Authorization: lease-renew-secret + +The request body includes test, read, and write vectors for the operation. For example:: { - "secrets": { - "write-enabler": "abcd", - "lease-renew": "efgh", - "lease-cancel": "ijkl" - }, "test-write-vectors": { 0: { "test": [{ @@ -733,9 +742,10 @@ Immutable Data 1. Create a bucket for storage index ``AAAAAAAAAAAAAAAA`` to hold two immutable shares, discovering that share ``1`` was already uploaded:: POST /v1/immutable/AAAAAAAAAAAAAAAA - WWW-Authenticate: x-tahoe-renew-secret efgh - WWW-Authenticate: x-tahoe-cancel-secret jjkl - WWW-Authenticate: x-tahoe-upload-secret xyzf + Authorization: Tahoe-LAFS nurl-swissnum + X-Tahoe-Set-Authorization: lease-renew-secret efgh + X-Tahoe-Set-Authorization: lease-cancel-secret jjkl + X-Tahoe-Set-Authorization: upload-secret xyzf {"share-numbers": [1, 7], "allocated-size": 48} @@ -745,22 +755,25 @@ Immutable Data #. Upload the content for immutable share ``7``:: PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 0-15/48 - Authorization: x-tahoe-upload-secret xyzf + X-Tahoe-Authorization: upload-secret xyzf 200 OK PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 16-31/48 - Authorization: x-tahoe-upload-secret xyzf + X-Tahoe-Authorization: upload-secret xyzf 200 OK PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 32-47/48 - Authorization: x-tahoe-upload-secret xyzf + X-Tahoe-Authorization: upload-secret xyzf 201 CREATED @@ -768,6 +781,7 @@ Immutable Data #. Download the content of the previously uploaded immutable share ``7``:: GET /v1/immutable/AAAAAAAAAAAAAAAA?share=7 + Authorization: Tahoe-LAFS nurl-swissnum Range: bytes=0-47 200 OK @@ -776,7 +790,9 @@ Immutable Data #. Renew the lease on all immutable shares in bucket ``AAAAAAAAAAAAAAAA``:: PUT /v1/lease/AAAAAAAAAAAAAAAA - {"renew-secret": "efgh", "cancel-secret": "ijkl"} + Authorization: Tahoe-LAFS nurl-swissnum + X-Tahoe-Authorization: lease-cancel-secret jjkl + X-Tahoe-Authorization: upload-secret xyzf 204 NO CONTENT @@ -789,12 +805,12 @@ if there is no existing share, otherwise it will read a byte which won't match `b""`:: POST /v1/mutable/BBBBBBBBBBBBBBBB/read-test-write + Authorization: Tahoe-LAFS nurl-swissnum + X-Tahoe-Authorization: write-enabler abcd + X-Tahoe-Authorization: lease-cancel-secret efgh + X-Tahoe-Authorization: lease-renew-secret ijkl + { - "secrets": { - "write-enabler": "abcd", - "lease-renew": "efgh", - "lease-cancel": "ijkl" - }, "test-write-vectors": { 3: { "test": [{ @@ -821,12 +837,12 @@ otherwise it will read a byte which won't match `b""`:: #. Safely rewrite the contents of a known version of mutable share number ``3`` (or fail):: POST /v1/mutable/BBBBBBBBBBBBBBBB/read-test-write + Authorization: Tahoe-LAFS nurl-swissnum + X-Tahoe-Authorization: write-enabler abcd + X-Tahoe-Authorization: lease-cancel-secret efgh + X-Tahoe-Authorization: lease-renew-secret ijkl + { - "secrets": { - "write-enabler": "abcd", - "lease-renew": "efgh", - "lease-cancel": "ijkl" - }, "test-write-vectors": { 3: { "test": [{ @@ -853,12 +869,16 @@ otherwise it will read a byte which won't match `b""`:: #. Download the contents of share number ``3``:: GET /v1/mutable/BBBBBBBBBBBBBBBB?share=3&offset=0&size=10 + Authorization: Tahoe-LAFS nurl-swissnum + #. Renew the lease on previously uploaded mutable share in slot ``BBBBBBBBBBBBBBBB``:: PUT /v1/lease/BBBBBBBBBBBBBBBB - {"renew-secret": "efgh", "cancel-secret": "ijkl"} + Authorization: Tahoe-LAFS nurl-swissnum + X-Tahoe-Authorization: lease-cancel-secret efgh + X-Tahoe-Authorization: lease-renew-secret ijkl 204 NO CONTENT From 7faec6e5a0bb53f5d58a4253795047144a58d62d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 10 Nov 2021 15:48:58 -0500 Subject: [PATCH 109/220] news fragment --- newsfragments/3842.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3842.minor diff --git a/newsfragments/3842.minor b/newsfragments/3842.minor new file mode 100644 index 000000000..e69de29bb From 9af81d21c5af232c8e02e874b09ac33202cb5158 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 10 Nov 2021 16:08:40 -0500 Subject: [PATCH 110/220] add a way to turn off implicit bucket lease renewal too --- src/allmydata/storage/server.py | 50 ++++++++++++++++++++----- src/allmydata/test/test_storage.py | 59 +++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 11 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 3e2d3b5c6..d142646a8 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -57,9 +57,23 @@ DEFAULT_RENEWAL_TIME = 31 * 24 * 60 * 60 @implementer(RIStorageServer, IStatsProducer) class StorageServer(service.MultiService, Referenceable): + """ + A filesystem-based implementation of ``RIStorageServer``. + + :ivar bool _implicit_bucket_lease_renewal: If and only if this is ``True`` + then ``allocate_buckets`` will renew leases on existing shares + associated with the storage index it operates on. + + :ivar bool _implicit_slot_lease_renewal: If and only if this is ``True`` + then ``slot_testv_and_readv_and_writev`` will renew leases on shares + associated with the slot it operates on. + """ name = 'storage' LeaseCheckerClass = LeaseCheckingCrawler + _implicit_bucket_lease_renewal = True + _implicit_slot_lease_renewal = True + def __init__(self, storedir, nodeid, reserved_space=0, discard_storage=False, readonly_storage=False, stats_provider=None, @@ -135,6 +149,29 @@ class StorageServer(service.MultiService, Referenceable): def __repr__(self): return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) + def set_implicit_bucket_lease_renewal(self, enabled): + # type: (bool) -> None + """ + Control the behavior of implicit lease renewal by *allocate_buckets*. + + :param enabled: If and only if ``True`` then future *allocate_buckets* + calls will renew leases on shares that already exist in the bucket. + """ + self._implicit_bucket_lease_renewal = enabled + + def set_implicit_slot_lease_renewal(self, enabled): + # type: (bool) -> None + """ + Control the behavior of implicit lease renewal by + *slot_testv_and_readv_and_writev*. + + :param enabled: If and only if ``True`` then future + *slot_testv_and_readv_and_writev* calls will renew leases on + shares that still exist in the slot after the writev is applied + and which were touched by the writev. + """ + self._implicit_slot_lease_renewal = enabled + def have_shares(self): # quick test to decide if we need to commit to an implicit # permutation-seed or if we should use a new one @@ -319,8 +356,9 @@ class StorageServer(service.MultiService, Referenceable): # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): alreadygot.add(shnum) - sf = ShareFile(fn) - sf.add_or_renew_lease(lease_info) + if self._implicit_bucket_lease_renewal: + sf = ShareFile(fn) + sf.add_or_renew_lease(lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) @@ -625,15 +663,10 @@ class StorageServer(service.MultiService, Referenceable): secrets, test_and_write_vectors, read_vector, - renew_leases, ): """ Read data from shares and conditionally write some data to them. - :param bool renew_leases: If and only if this is ``True`` and the test - vectors pass then shares in this slot will also have an updated - lease applied to them. - See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. """ @@ -673,7 +706,7 @@ class StorageServer(service.MultiService, Referenceable): test_and_write_vectors, shares, ) - if renew_leases: + if self._implicit_slot_lease_renewal: lease_info = self._make_lease_info(renew_secret, cancel_secret) self._add_or_renew_leases(remaining_shares, lease_info) @@ -690,7 +723,6 @@ class StorageServer(service.MultiService, Referenceable): secrets, test_and_write_vectors, read_vector, - renew_leases=True, ) def _allocate_slot_share(self, bucketdir, secrets, sharenum, diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 460653bd0..efa889f8d 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -608,6 +608,61 @@ class Server(unittest.TestCase): for i,wb in writers.items(): wb.remote_abort() + def test_allocate_without_lease_renewal(self): + """ + ``remote_allocate_buckets`` does not renew leases on existing shares if + ``set_implicit_bucket_lease_renewal(False)`` is called first. + """ + first_lease = 456 + second_lease = 543 + storage_index = b"allocate" + + clock = Clock() + clock.advance(first_lease) + ss = self.create( + "test_allocate_without_lease_renewal", + get_current_time=clock.seconds, + ) + ss.set_implicit_bucket_lease_renewal(False) + + # Put a share on there + already, writers = self.allocate(ss, storage_index, [0], 1) + (writer,) = writers.values() + writer.remote_write(0, b"x") + writer.remote_close() + + # It should have a lease granted at the current time. + shares = dict(ss._get_bucket_shares(storage_index)) + self.assertEqual( + [first_lease], + list( + lease.get_grant_renew_time_time() + for lease + in ShareFile(shares[0]).get_leases() + ), + ) + + # Let some time pass so we can tell if the lease on share 0 is + # renewed. + clock.advance(second_lease) + + # Put another share on there. + already, writers = self.allocate(ss, storage_index, [1], 1) + (writer,) = writers.values() + writer.remote_write(0, b"x") + writer.remote_close() + + # The first share's lease expiration time is unchanged. + shares = dict(ss._get_bucket_shares(storage_index)) + self.assertEqual( + [first_lease], + list( + lease.get_grant_renew_time_time() + for lease + in ShareFile(shares[0]).get_leases() + ), + ) + def test_bad_container_version(self): ss = self.create("test_bad_container_version") a,w = self.allocate(ss, b"si1", [0], 10) @@ -1408,9 +1463,10 @@ class MutableServer(unittest.TestCase): def test_writev_without_renew_lease(self): """ The helper method ``slot_testv_and_readv_and_writev`` does not renew - leases if ``False`` is passed for the ``renew_leases`` parameter. + leases if ``set_implicit_bucket_lease_renewal(False)`` is called first. """ ss = self.create("test_writev_without_renew_lease") + ss.set_implicit_slot_lease_renewal(False) storage_index = b"si2" secrets = ( @@ -1429,7 +1485,6 @@ class MutableServer(unittest.TestCase): sharenum: ([], datav, None), }, read_vector=[], - renew_leases=False, ) leases = list(ss.get_slot_leases(storage_index)) self.assertEqual([], leases) From 2742de6f7c1fa6cf77e35ecc5854bcf7db3e5963 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 10 Nov 2021 16:08:53 -0500 Subject: [PATCH 111/220] drop some ancient cruft allocated_size not used anywhere, so why have it --- src/allmydata/storage/server.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index d142646a8..36cf06d0e 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -617,10 +617,8 @@ class StorageServer(service.MultiService, Referenceable): else: if sharenum not in shares: # allocate a new share - allocated_size = 2000 # arbitrary, really share = self._allocate_slot_share(bucketdir, secrets, sharenum, - allocated_size, owner_num=0) shares[sharenum] = share shares[sharenum].writev(datav, new_length) @@ -726,7 +724,7 @@ class StorageServer(service.MultiService, Referenceable): ) def _allocate_slot_share(self, bucketdir, secrets, sharenum, - allocated_size, owner_num=0): + owner_num=0): (write_enabler, renew_secret, cancel_secret) = secrets my_nodeid = self.my_nodeid fileutil.make_dirs(bucketdir) From c270a346c6c7c247db08bf107bef93c4cccc7ced Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Nov 2021 11:02:51 -0500 Subject: [PATCH 112/220] Remove typo. --- docs/proposed/http-storage-node-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 2a392fb20..19a64f5ca 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -670,7 +670,7 @@ there is no separate "create this storage index" operation as there is for the i The request must include ``X-Tahoe-Authorization`` headers with write enabler and lease secrets:: X-Tahoe-Authorization: write-enabler - X-Tahoe-Authorization: lease-lease-cancel-secret + X-Tahoe-Authorization: lease-cancel-secret X-Tahoe-Authorization: lease-renew-secret The request body includes test, read, and write vectors for the operation. From 24646c56d0aae56bd18d2d2ffa2acf1616cc2a62 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Nov 2021 11:29:05 -0500 Subject: [PATCH 113/220] Updates based on review. --- docs/proposed/http-storage-node-protocol.rst | 43 ++++++++------------ 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 19a64f5ca..44bda1205 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -397,22 +397,15 @@ For example:: Either renew or create a new lease on the bucket addressed by ``storage_index``. -For a renewal, the renew secret and cancellation secret should be included as ``X-Tahoe-Authorization`` headers. +The renew secret and cancellation secret should be included as ``X-Tahoe-Authorization`` headers. For example:: X-Tahoe-Authorization: lease-renew-secret X-Tahoe-Authorization: lease-cancel-secret -For a new lease, ``X-Tahoe-Set-Authorization`` headers should be used instead. -For example:: - - X-Tahoe-Set-Authorization: lease-renew-secret - X-Tahoe-Set-Authorization: lease-cancel-secret - -For renewal, the expiration time of that lease will be changed to 31 days after the time of this operation. -If the renewal secret does not match, a new lease will be created, but clients should still not rely on this behavior if possible, and instead use the appropriate new lease headers. - -For the creation path, +If the ``lease-renew-secret`` value matches an existing lease +then the expiration time of that lease will be changed to 31 days after the time of this operation. +If it does not match an existing lease then a new lease will be created with this ``lease-renew-secret`` which expires 31 days after the time of this operation. ``lease-renew-secret`` and ``lease-cancel-secret`` values must be 32 bytes long. @@ -433,7 +426,9 @@ Discussion `````````` We considered an alternative where ``lease-renew-secret`` and ``lease-cancel-secret`` are placed in query arguments on the request path. -We chose to put these values into the request body to make the URL simpler. +This increases chances of leaking secrets in logs. +Putting the secrets in the body reduces the chances of leaking secrets, +but eventually we chose headers as the least likely information to be logged. Several behaviors here are blindly copied from the Foolscap-based storage server protocol. @@ -461,13 +456,13 @@ For example:: {"share-numbers": [1, 7, ...], "allocated-size": 12345} -The request must include ``X-Tahoe-Set-Authorization`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. +The request must include ``X-Tahoe-Authorization`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. Typically this is a header sent by the server, but in Tahoe-LAFS keys are set by the client, so may as well reuse it. For example:: - X-Tahoe-Set-Authorization: lease-renew-secret - X-Tahoe-Set-Authorization: lease-cancel-secret - X-Tahoe-Set-Authorization: upload-secret + X-Tahoe-Authorization: lease-renew-secret + X-Tahoe-Authorization: lease-cancel-secret + X-Tahoe-Authorization: upload-secret The response body includes encoded information about the created buckets. For example:: @@ -475,12 +470,6 @@ For example:: {"already-have": [1, ...], "allocated": [7, ...]} The upload secret is an opaque _byte_ string. -It will be generated by hashing a combination of:b - -1. A tag. -2. The storage index, so it's unique across different source files. -3. The server ID, so it's unique across different servers. -4. The convergence secret, so that servers can't guess the upload secret for other servers. Discussion `````````` @@ -508,7 +497,7 @@ The response includes ``already-have`` and ``allocated`` for two reasons: Regarding upload secrets, the goal is for uploading and aborting (see next sections) to be authenticated by more than just the storage index. -In the future, we will want to generate them in a way that allows resuming/canceling when the client has issues. +In the future, we may want to generate them in a way that allows resuming/canceling when the client has issues. In the short term, they can just be a random byte string. The key security constraint is that each upload to each server has its own, unique upload key, tied to uploading that particular storage index to this particular server. @@ -566,7 +555,7 @@ Responses: This cancels an *in-progress* upload. -The request must include a ``Authorization`` header that includes the upload secret:: +The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: X-Tahoe-Authorization: upload-secret @@ -743,9 +732,9 @@ Immutable Data POST /v1/immutable/AAAAAAAAAAAAAAAA Authorization: Tahoe-LAFS nurl-swissnum - X-Tahoe-Set-Authorization: lease-renew-secret efgh - X-Tahoe-Set-Authorization: lease-cancel-secret jjkl - X-Tahoe-Set-Authorization: upload-secret xyzf + X-Tahoe-Authorization: lease-renew-secret efgh + X-Tahoe-Authorization: lease-cancel-secret jjkl + X-Tahoe-Authorization: upload-secret xyzf {"share-numbers": [1, 7], "allocated-size": 48} From bea4cf18a0d7d91dece9fb4a45bb39c5b41b8e9d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 11:19:29 -0500 Subject: [PATCH 114/220] News file. --- newsfragments/3843.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3843.minor diff --git a/newsfragments/3843.minor b/newsfragments/3843.minor new file mode 100644 index 000000000..e69de29bb From e7a5d14c0e8c0077880e2a9ffbd1e3db3738dd93 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 11:25:10 -0500 Subject: [PATCH 115/220] New requirements. --- nix/tahoe-lafs.nix | 2 +- setup.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index e42afc57f..f691677f6 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -4,7 +4,7 @@ , setuptools, setuptoolsTrial, pyasn1, zope_interface , service-identity, pyyaml, magic-wormhole, treq, appdirs , beautifulsoup4, eliot, autobahn, cryptography, netifaces -, html5lib, pyutil, distro, configparser +, html5lib, pyutil, distro, configparser, klein, treq }: python.pkgs.buildPythonPackage rec { # Most of the time this is not exactly the release version (eg 1.16.0). diff --git a/setup.py b/setup.py index 8c6396937..3d9f5a509 100644 --- a/setup.py +++ b/setup.py @@ -140,6 +140,10 @@ install_requires = [ # For the RangeMap datastructure. "collections-extended", + + # HTTP server and client + "klein", + "treq", ] setup_requires = [ @@ -397,7 +401,6 @@ setup(name="tahoe-lafs", # also set in __init__.py # Python 2.7. "decorator < 5", "hypothesis >= 3.6.1", - "treq", "towncrier", "testtools", "fixtures", From 777d630f481e3010c399d0cc2e872bacd572e700 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 12:00:07 -0500 Subject: [PATCH 116/220] Another dependency. --- nix/tahoe-lafs.nix | 2 +- setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index f691677f6..a6a8a69ec 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -4,7 +4,7 @@ , setuptools, setuptoolsTrial, pyasn1, zope_interface , service-identity, pyyaml, magic-wormhole, treq, appdirs , beautifulsoup4, eliot, autobahn, cryptography, netifaces -, html5lib, pyutil, distro, configparser, klein, treq +, html5lib, pyutil, distro, configparser, klein, treq, cbor2 }: python.pkgs.buildPythonPackage rec { # Most of the time this is not exactly the release version (eg 1.16.0). diff --git a/setup.py b/setup.py index 3d9f5a509..7e7a955c6 100644 --- a/setup.py +++ b/setup.py @@ -144,6 +144,7 @@ install_requires = [ # HTTP server and client "klein", "treq", + "cbor2" ] setup_requires = [ From a32c6be978f0c857ee0465cf123b56058178a21e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 12:02:58 -0500 Subject: [PATCH 117/220] A sketch of what the HTTP server will look like. --- src/allmydata/storage/http_server.py | 66 ++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 src/allmydata/storage/http_server.py diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py new file mode 100644 index 000000000..87edda999 --- /dev/null +++ b/src/allmydata/storage/http_server.py @@ -0,0 +1,66 @@ +""" +HTTP server for storage. +""" + +from functools import wraps + +from klein import Klein +from twisted.web import http + +# Make sure to use pure Python versions: +from cbor2.encoder import dumps +from cbor2.decoder import loads + +from .server import StorageServer + + +def _authorization_decorator(f): + """ + Check the ``Authorization`` header, and (TODO: in later revision of code) + extract ``X-Tahoe-Authorization`` headers and pass them in. + """ + + @wraps(f) + def route(self, request, *args, **kwargs): + if request.headers["Authorization"] != self._swissnum: + request.setResponseCode(http.NOT_ALLOWED) + return b"" + # authorization = request.headers.getRawHeaders("X-Tahoe-Authorization", []) + # For now, just a placeholder: + authorization = None + return f(self, request, authorization, *args, **kwargs) + + +def _route(app, *route_args, **route_kwargs): + """ + Like Klein's @route, but with additional support for checking the + ``Authorization`` header as well as ``X-Tahoe-Authorization`` headers. The + latter will (TODO: in later revision of code) get passed in as second + argument to wrapped functions. + """ + + def decorator(f): + @app.route(*route_args, **route_kwargs) + @_authorization_decorator + def handle_route(*args, **kwargs): + return f(*args, **kwargs) + + return handle_route + + return decorator + + +class HTTPServer(object): + """ + A HTTP interface to the storage server. + """ + + _app = Klein() + + def __init__(self, storage_server: StorageServer, swissnum): + self._storage_server = storage_server + self._swissnum = swissnum + + @_route(_app, "/v1/version", methods=["GET"]) + def version(self, request, authorization): + return dumps(self._storage_server.remote_get_version()) From ddd2780bd243436d3630fdcee8b0340480736e27 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 12:51:52 -0500 Subject: [PATCH 118/220] First sketch of HTTP client. --- src/allmydata/storage/http_client.py | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/allmydata/storage/http_client.py diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py new file mode 100644 index 000000000..ca80b704e --- /dev/null +++ b/src/allmydata/storage/http_client.py @@ -0,0 +1,36 @@ +""" +HTTP client that talks to the HTTP storage server. +""" + +# Make sure to import Python version: +from cbor2.encoder import loads +from cbor2.decoder import loads + +from twisted.internet.defer import inlineCallbacks, returnValue +from hyperlink import DecodedURL +import treq + + +def _decode_cbor(response): + """Given HTTP response, return decoded CBOR body.""" + return treq.content(response).addCallback(loads) + + +class StorageClient(object): + """ + HTTP client that talks to the HTTP storage server. + """ + + def __init__(self, url: DecodedURL, swissnum, treq=treq): + self._base_url = url + self._swissnum = swissnum + self._treq = treq + + @inlineCallbacks + def get_version(self): + """ + Return the version metadata for the server. + """ + url = self._base_url.child("v1", "version") + response = _decode_cbor((yield self._treq.get(url))) + returnValue(response) From 12cbf8a90109548aaba570d977863bacc2e8fdad Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 13:03:53 -0500 Subject: [PATCH 119/220] First sketch of HTTP testing infrastructure. --- src/allmydata/test/test_storage_http.py | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/allmydata/test/test_storage_http.py diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py new file mode 100644 index 000000000..589cfdddf --- /dev/null +++ b/src/allmydata/test/test_storage_http.py @@ -0,0 +1,38 @@ +""" +Tests for HTTP storage client + server. +""" + +from twisted.trial.unittest import TestCase +from twisted.internet.defer import inlineCallbacks + +from treq.testing import StubTreq +from hyperlink import DecodedURL + +from ..storage.server import StorageServer +from ..storage.http_server import HTTPServer +from ..storage.http_client import StorageClient + + +class HTTPTests(TestCase): + """ + Tests of HTTP client talking to the HTTP server. + """ + + def setUp(self): + self.storage_server = StorageServer(self.mktemp(), b"\x00" * 20) + # TODO what should the swissnum _actually_ be? + self._http_server = HTTPServer(self._storage_server, b"abcd") + self.client = StorageClient( + DecodedURL.from_text("http://example.com"), + b"abcd", + treq=StubTreq(self._http_server.get_resource()), + ) + + @inlineCallbacks + def test_version(self): + """ + The client can return the version. + """ + version = yield self.client.get_version() + expected_version = self.storage_server.remote_get_version() + self.assertEqual(version, expected_version) From c101dd4dc9e33190da63daedd1963a1fb0e9f7cf Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Nov 2021 13:13:19 -0500 Subject: [PATCH 120/220] Closer to first passing test. --- src/allmydata/storage/http_client.py | 20 +++++++++++++------- src/allmydata/storage/http_server.py | 20 +++++++++++++++----- src/allmydata/test/test_storage_http.py | 2 +- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index ca80b704e..e593fd379 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -2,18 +2,23 @@ HTTP client that talks to the HTTP storage server. """ -# Make sure to import Python version: -from cbor2.encoder import loads -from cbor2.decoder import loads +# TODO Make sure to import Python version? +from cbor2 import loads, dumps -from twisted.internet.defer import inlineCallbacks, returnValue +from twisted.internet.defer import inlineCallbacks, returnValue, fail from hyperlink import DecodedURL import treq +class ClientException(Exception): + """An unexpected error.""" + + def _decode_cbor(response): """Given HTTP response, return decoded CBOR body.""" - return treq.content(response).addCallback(loads) + if response.code > 199 and response.code < 300: + return treq.content(response).addCallback(loads) + return fail(ClientException(response.code, response.phrase)) class StorageClient(object): @@ -32,5 +37,6 @@ class StorageClient(object): Return the version metadata for the server. """ url = self._base_url.child("v1", "version") - response = _decode_cbor((yield self._treq.get(url))) - returnValue(response) + response = yield self._treq.get(url) + decoded_response = yield _decode_cbor(response) + returnValue(decoded_response) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 87edda999..b862fe7b1 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -7,9 +7,8 @@ from functools import wraps from klein import Klein from twisted.web import http -# Make sure to use pure Python versions: -from cbor2.encoder import dumps -from cbor2.decoder import loads +# TODO Make sure to use pure Python versions? +from cbor2 import loads, dumps from .server import StorageServer @@ -22,14 +21,19 @@ def _authorization_decorator(f): @wraps(f) def route(self, request, *args, **kwargs): - if request.headers["Authorization"] != self._swissnum: + if ( + request.requestHeaders.getRawHeaders("Authorization", [None])[0] + != self._swissnum + ): request.setResponseCode(http.NOT_ALLOWED) return b"" - # authorization = request.headers.getRawHeaders("X-Tahoe-Authorization", []) + # authorization = request.requestHeaders.getRawHeaders("X-Tahoe-Authorization", []) # For now, just a placeholder: authorization = None return f(self, request, authorization, *args, **kwargs) + return route + def _route(app, *route_args, **route_kwargs): """ @@ -53,6 +57,8 @@ def _route(app, *route_args, **route_kwargs): class HTTPServer(object): """ A HTTP interface to the storage server. + + TODO returning CBOR should set CBOR content-type """ _app = Klein() @@ -61,6 +67,10 @@ class HTTPServer(object): self._storage_server = storage_server self._swissnum = swissnum + def get_resource(self): + """Return twisted.web Resource for this object.""" + return self._app.resource() + @_route(_app, "/v1/version", methods=["GET"]) def version(self, request, authorization): return dumps(self._storage_server.remote_get_version()) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 589cfdddf..663675f40 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -21,7 +21,7 @@ class HTTPTests(TestCase): def setUp(self): self.storage_server = StorageServer(self.mktemp(), b"\x00" * 20) # TODO what should the swissnum _actually_ be? - self._http_server = HTTPServer(self._storage_server, b"abcd") + self._http_server = HTTPServer(self.storage_server, b"abcd") self.client = StorageClient( DecodedURL.from_text("http://example.com"), b"abcd", From c3cb0ebaeaa196c24272ac1fd834ed3c30baa377 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Nov 2021 16:20:27 -0500 Subject: [PATCH 121/220] Switch to per-call parameter for controlling lease renewal behavior This is closer to an implementation where you could have two frontends, say a Foolscap frontend and an HTTP frontend or even just two different HTTP frontends, which had different opinions about what the behaviour should be. --- src/allmydata/storage/server.py | 51 ++++++------------------ src/allmydata/test/test_storage.py | 63 +++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 53 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 36cf06d0e..70d71f841 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -59,21 +59,10 @@ DEFAULT_RENEWAL_TIME = 31 * 24 * 60 * 60 class StorageServer(service.MultiService, Referenceable): """ A filesystem-based implementation of ``RIStorageServer``. - - :ivar bool _implicit_bucket_lease_renewal: If and only if this is ``True`` - then ``allocate_buckets`` will renew leases on existing shares - associated with the storage index it operates on. - - :ivar bool _implicit_slot_lease_renewal: If and only if this is ``True`` - then ``slot_testv_and_readv_and_writev`` will renew leases on shares - associated with the slot it operates on. """ name = 'storage' LeaseCheckerClass = LeaseCheckingCrawler - _implicit_bucket_lease_renewal = True - _implicit_slot_lease_renewal = True - def __init__(self, storedir, nodeid, reserved_space=0, discard_storage=False, readonly_storage=False, stats_provider=None, @@ -149,29 +138,6 @@ class StorageServer(service.MultiService, Referenceable): def __repr__(self): return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) - def set_implicit_bucket_lease_renewal(self, enabled): - # type: (bool) -> None - """ - Control the behavior of implicit lease renewal by *allocate_buckets*. - - :param enabled: If and only if ``True`` then future *allocate_buckets* - calls will renew leases on shares that already exist in the bucket. - """ - self._implicit_bucket_lease_renewal = enabled - - def set_implicit_slot_lease_renewal(self, enabled): - # type: (bool) -> None - """ - Control the behavior of implicit lease renewal by - *slot_testv_and_readv_and_writev*. - - :param enabled: If and only if ``True`` then future - *slot_testv_and_readv_and_writev* calls will renew leases on - shares that still exist in the slot after the writev is applied - and which were touched by the writev. - """ - self._implicit_slot_lease_renewal = enabled - def have_shares(self): # quick test to decide if we need to commit to an implicit # permutation-seed or if we should use a new one @@ -314,9 +280,12 @@ class StorageServer(service.MultiService, Referenceable): def _allocate_buckets(self, storage_index, renew_secret, cancel_secret, sharenums, allocated_size, - owner_num=0): + owner_num=0, renew_leases=True): """ Generic bucket allocation API. + + :param bool renew_leases: If and only if this is ``True`` then + renew leases on existing shares in this bucket. """ # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated @@ -356,7 +325,7 @@ class StorageServer(service.MultiService, Referenceable): # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): alreadygot.add(shnum) - if self._implicit_bucket_lease_renewal: + if renew_leases: sf = ShareFile(fn) sf.add_or_renew_lease(lease_info) @@ -399,7 +368,7 @@ class StorageServer(service.MultiService, Referenceable): """Foolscap-specific ``allocate_buckets()`` API.""" alreadygot, bucketwriters = self._allocate_buckets( storage_index, renew_secret, cancel_secret, sharenums, allocated_size, - owner_num=owner_num, + owner_num=owner_num, renew_leases=True, ) # Abort BucketWriters if disconnection happens. for bw in bucketwriters.values(): @@ -661,12 +630,17 @@ class StorageServer(service.MultiService, Referenceable): secrets, test_and_write_vectors, read_vector, + renew_leases, ): """ Read data from shares and conditionally write some data to them. See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. + + :param bool renew_leases: If and only if this is ``True`` then renew + leases on all shares mentioned in ``test_and_write_vectors` that + still exist after the changes are made. """ start = self._get_current_time() self.count("writev") @@ -704,7 +678,7 @@ class StorageServer(service.MultiService, Referenceable): test_and_write_vectors, shares, ) - if self._implicit_slot_lease_renewal: + if renew_leases: lease_info = self._make_lease_info(renew_secret, cancel_secret) self._add_or_renew_leases(remaining_shares, lease_info) @@ -721,6 +695,7 @@ class StorageServer(service.MultiService, Referenceable): secrets, test_and_write_vectors, read_vector, + renew_leases=True, ) def _allocate_slot_share(self, bucketdir, secrets, sharenum, diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index efa889f8d..a6c1ac2c2 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -468,14 +468,19 @@ class Server(unittest.TestCase): sv1 = ver[b'http://allmydata.org/tahoe/protocols/storage/v1'] self.failUnlessIn(b'available-space', sv1) - def allocate(self, ss, storage_index, sharenums, size, canary=None): + def allocate(self, ss, storage_index, sharenums, size, renew_leases=True): + """ + Call directly into the storage server's allocate_buckets implementation, + skipping the Foolscap layer. + """ renew_secret = hashutil.my_renewal_secret_hash(b"%d" % next(self._lease_secret)) cancel_secret = hashutil.my_cancel_secret_hash(b"%d" % next(self._lease_secret)) - if not canary: - canary = FakeCanary() - return ss.remote_allocate_buckets(storage_index, - renew_secret, cancel_secret, - sharenums, size, canary) + return ss._allocate_buckets( + storage_index, + renew_secret, cancel_secret, + sharenums, size, + renew_leases=renew_leases, + ) def test_large_share(self): syslow = platform.system().lower() @@ -611,7 +616,7 @@ class Server(unittest.TestCase): def test_allocate_without_lease_renewal(self): """ ``remote_allocate_buckets`` does not renew leases on existing shares if - ``set_implicit_bucket_lease_renewal(False)`` is called first. + ``renew_leases`` is ``False``. """ first_lease = 456 second_lease = 543 @@ -623,10 +628,11 @@ class Server(unittest.TestCase): "test_allocate_without_lease_renewal", get_current_time=clock.seconds, ) - ss.set_implicit_bucket_lease_renewal(False) # Put a share on there - already, writers = self.allocate(ss, storage_index, [0], 1) + already, writers = self.allocate( + ss, storage_index, [0], 1, renew_leases=False, + ) (writer,) = writers.values() writer.remote_write(0, b"x") writer.remote_close() @@ -647,7 +653,9 @@ class Server(unittest.TestCase): clock.advance(second_lease) # Put another share on there. - already, writers = self.allocate(ss, storage_index, [1], 1) + already, writers = self.allocate( + ss, storage_index, [1], 1, renew_leases=False, + ) (writer,) = writers.values() writer.remote_write(0, b"x") writer.remote_close() @@ -684,8 +692,17 @@ class Server(unittest.TestCase): def test_disconnect(self): # simulate a disconnection ss = self.create("test_disconnect") + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 canary = FakeCanary() - already,writers = self.allocate(ss, b"disconnect", [0,1,2], 75, canary) + already,writers = ss.remote_allocate_buckets( + b"disconnect", + renew_secret, + cancel_secret, + sharenums=[0,1,2], + allocated_size=75, + canary=canary, + ) self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) for (f,args,kwargs) in list(canary.disconnectors.values()): @@ -717,8 +734,17 @@ class Server(unittest.TestCase): # the size we request. OVERHEAD = 3*4 LEASE_SIZE = 4+32+32+4 + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 canary = FakeCanary() - already, writers = self.allocate(ss, b"vid1", [0,1,2], 1000, canary) + already, writers = ss.remote_allocate_buckets( + b"vid1", + renew_secret, + cancel_secret, + sharenums=[0,1,2], + allocated_size=1000, + canary=canary, + ) self.failUnlessEqual(len(writers), 3) # now the StorageServer should have 3000 bytes provisionally # allocated, allowing only 2000 more to be claimed @@ -751,7 +777,14 @@ class Server(unittest.TestCase): # now there should be ALLOCATED=1001+12+72=1085 bytes allocated, and # 5000-1085=3915 free, therefore we can fit 39 100byte shares canary3 = FakeCanary() - already3, writers3 = self.allocate(ss, b"vid3", list(range(100)), 100, canary3) + already3, writers3 = ss.remote_allocate_buckets( + b"vid3", + renew_secret, + cancel_secret, + sharenums=list(range(100)), + allocated_size=100, + canary=canary3, + ) self.failUnlessEqual(len(writers3), 39) self.failUnlessEqual(len(ss._bucket_writers), 39) @@ -1463,10 +1496,9 @@ class MutableServer(unittest.TestCase): def test_writev_without_renew_lease(self): """ The helper method ``slot_testv_and_readv_and_writev`` does not renew - leases if ``set_implicit_bucket_lease_renewal(False)`` is called first. + leases if ``renew_leases```` is ``False``. """ ss = self.create("test_writev_without_renew_lease") - ss.set_implicit_slot_lease_renewal(False) storage_index = b"si2" secrets = ( @@ -1485,6 +1517,7 @@ class MutableServer(unittest.TestCase): sharenum: ([], datav, None), }, read_vector=[], + renew_leases=False, ) leases = list(ss.get_slot_leases(storage_index)) self.assertEqual([], leases) From 85977e48a7dde8ea29e196a6d466ae8685c2f6fc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Nov 2021 16:23:15 -0500 Subject: [PATCH 122/220] put this comment back and merge info from the two versions --- src/allmydata/storage/server.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 70d71f841..9b73963ae 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -635,12 +635,13 @@ class StorageServer(service.MultiService, Referenceable): """ Read data from shares and conditionally write some data to them. + :param bool renew_leases: If and only if this is ``True`` and the test + vectors pass then shares mentioned in ``test_and_write_vectors`` + that still exist after the changes are made will also have an + updated lease applied to them. + See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. - - :param bool renew_leases: If and only if this is ``True`` then renew - leases on all shares mentioned in ``test_and_write_vectors` that - still exist after the changes are made. """ start = self._get_current_time() self.count("writev") From dece67ee3ac8d2bd06b42e07a01492e3c4497ae6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Nov 2021 16:24:29 -0500 Subject: [PATCH 123/220] it is not the remote interface that varies anymore --- src/allmydata/test/test_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index a6c1ac2c2..076e9f3d1 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -615,8 +615,8 @@ class Server(unittest.TestCase): def test_allocate_without_lease_renewal(self): """ - ``remote_allocate_buckets`` does not renew leases on existing shares if - ``renew_leases`` is ``False``. + ``StorageServer._allocate_buckets`` does not renew leases on existing + shares if ``renew_leases`` is ``False``. """ first_lease = 456 second_lease = 543 From 6c2e85e99145652625ff7a4d6791a410ce13c742 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Nov 2021 16:25:36 -0500 Subject: [PATCH 124/220] put the comment back --- src/allmydata/test/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 076e9f3d1..4e40a76a5 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1496,7 +1496,7 @@ class MutableServer(unittest.TestCase): def test_writev_without_renew_lease(self): """ The helper method ``slot_testv_and_readv_and_writev`` does not renew - leases if ``renew_leases```` is ``False``. + leases if ``False`` is passed for the ``renew_leases`` parameter. """ ss = self.create("test_writev_without_renew_lease") From ad6017e63df94dbac7916f4673332b33deb8d5be Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 15 Nov 2021 08:08:14 -0500 Subject: [PATCH 125/220] clarify renew_leases docs on allocate_buckets --- src/allmydata/storage/server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 9b73963ae..bfbc10b59 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -284,8 +284,10 @@ class StorageServer(service.MultiService, Referenceable): """ Generic bucket allocation API. - :param bool renew_leases: If and only if this is ``True`` then - renew leases on existing shares in this bucket. + :param bool renew_leases: If and only if this is ``True`` then renew a + secret-matching lease on (or, if none match, add a new lease to) + existing shares in this bucket. Any *new* shares are given a new + lease regardless. """ # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated From 84c19f5468b04279e1826ed77dc1e7d4b4ae00e8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 15 Nov 2021 08:12:07 -0500 Subject: [PATCH 126/220] clarify renew_leases docs on slot_testv_and_readv_and_writev --- src/allmydata/storage/server.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index bfbc10b59..ee2ea1c61 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -639,8 +639,9 @@ class StorageServer(service.MultiService, Referenceable): :param bool renew_leases: If and only if this is ``True`` and the test vectors pass then shares mentioned in ``test_and_write_vectors`` - that still exist after the changes are made will also have an - updated lease applied to them. + that still exist after the changes are made will also have a + secret-matching lease renewed (or, if none match, a new lease + added). See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. From fcd634fc43c42c838ac415767bd6eeb05172c82b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 15 Nov 2021 13:34:46 -0500 Subject: [PATCH 127/220] some direct tests for the new utility function --- src/allmydata/test/common.py | 7 ++- src/allmydata/test/test_common_util.py | 78 +++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 8e97fa598..76127fb57 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -1220,8 +1220,13 @@ def disable_modules(*names): A context manager which makes modules appear to be missing while it is active. - :param *names: The names of the modules to disappear. + :param *names: The names of the modules to disappear. Only top-level + modules are supported (that is, "." is not allowed in any names). + This is an implementation shortcoming which could be lifted if + desired. """ + if any("." in name for name in names): + raise ValueError("Names containing '.' are not supported.") missing = object() modules = list(sys.modules.get(n, missing) for n in names) for n in names: diff --git a/src/allmydata/test/test_common_util.py b/src/allmydata/test/test_common_util.py index 55986d123..c141adc8d 100644 --- a/src/allmydata/test/test_common_util.py +++ b/src/allmydata/test/test_common_util.py @@ -10,16 +10,30 @@ from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import sys import random -import unittest +from hypothesis import given +from hypothesis.strategies import lists, sampled_from +from testtools.matchers import Equals +from twisted.python.reflect import ( + ModuleNotFound, + namedAny, +) + +from .common import ( + SyncTestCase, + disable_modules, +) from allmydata.test.common_util import flip_one_bit -class TestFlipOneBit(unittest.TestCase): +class TestFlipOneBit(SyncTestCase): def setUp(self): - random.seed(42) # I tried using version=1 on PY3 to avoid the if below, to no avail. + super(TestFlipOneBit, self).setUp() + # I tried using version=1 on PY3 to avoid the if below, to no avail. + random.seed(42) def test_accepts_byte_string(self): actual = flip_one_bit(b'foo') @@ -27,3 +41,61 @@ class TestFlipOneBit(unittest.TestCase): def test_rejects_unicode_string(self): self.assertRaises(AssertionError, flip_one_bit, u'foo') + + + +def some_existing_modules(): + """ + Build the names of modules (as native strings) that exist and can be + imported. + """ + candidates = sorted( + name + for name + in sys.modules + if "." not in name + and sys.modules[name] is not None + ) + return sampled_from(candidates) + +class DisableModulesTests(SyncTestCase): + """ + Tests for ``disable_modules``. + """ + def setup_example(self): + return sys.modules.copy() + + def teardown_example(self, safe_modules): + sys.modules.update(safe_modules) + + @given(lists(some_existing_modules(), unique=True)) + def test_importerror(self, module_names): + """ + While the ``disable_modules`` context manager is active any import of the + modules identified by the names passed to it result in ``ImportError`` + being raised. + """ + def get_modules(): + return list( + namedAny(name) + for name + in module_names + ) + before_modules = get_modules() + + with disable_modules(*module_names): + for name in module_names: + with self.assertRaises(ModuleNotFound): + namedAny(name) + + after_modules = get_modules() + self.assertThat(before_modules, Equals(after_modules)) + + def test_dotted_names_rejected(self): + """ + If names with "." in them are passed to ``disable_modules`` then + ``ValueError`` is raised. + """ + with self.assertRaises(ValueError): + with disable_modules("foo.bar"): + pass From 304b0269e3afe6499eaa1a92abd4856c970da60b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 10:14:04 -0500 Subject: [PATCH 128/220] Apply suggestions from code review Co-authored-by: Jean-Paul Calderone --- docs/proposed/http-storage-node-protocol.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 44bda1205..bc109ac7e 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -400,8 +400,8 @@ Either renew or create a new lease on the bucket addressed by ``storage_index``. The renew secret and cancellation secret should be included as ``X-Tahoe-Authorization`` headers. For example:: - X-Tahoe-Authorization: lease-renew-secret - X-Tahoe-Authorization: lease-cancel-secret + X-Tahoe-Authorization: lease-renew-secret + X-Tahoe-Authorization: lease-cancel-secret If the ``lease-renew-secret`` value matches an existing lease then the expiration time of that lease will be changed to 31 days after the time of this operation. @@ -457,7 +457,6 @@ For example:: {"share-numbers": [1, 7, ...], "allocated-size": 12345} The request must include ``X-Tahoe-Authorization`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. -Typically this is a header sent by the server, but in Tahoe-LAFS keys are set by the client, so may as well reuse it. For example:: X-Tahoe-Authorization: lease-renew-secret @@ -499,7 +498,7 @@ Regarding upload secrets, the goal is for uploading and aborting (see next sections) to be authenticated by more than just the storage index. In the future, we may want to generate them in a way that allows resuming/canceling when the client has issues. In the short term, they can just be a random byte string. -The key security constraint is that each upload to each server has its own, unique upload key, +The primary security constraint is that each upload to each server has its own unique upload key, tied to uploading that particular storage index to this particular server. Rejected designs for upload secrets: @@ -527,7 +526,7 @@ The server must recognize when all of the data has been received and mark the sh The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: - X-Tahoe-Authorization: upload-secret + X-Tahoe-Authorization: upload-secret Responses: @@ -557,7 +556,7 @@ This cancels an *in-progress* upload. The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: - X-Tahoe-Authorization: upload-secret + X-Tahoe-Authorization: upload-secret The response code: @@ -658,7 +657,7 @@ there is no separate "create this storage index" operation as there is for the i The request must include ``X-Tahoe-Authorization`` headers with write enabler and lease secrets:: - X-Tahoe-Authorization: write-enabler + X-Tahoe-Authorization: write-enabler X-Tahoe-Authorization: lease-cancel-secret X-Tahoe-Authorization: lease-renew-secret From 7caffce8d509e1293248cb83d89e81e030b88e16 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 10:14:19 -0500 Subject: [PATCH 129/220] Another review suggestion Co-authored-by: Jean-Paul Calderone --- docs/proposed/http-storage-node-protocol.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index bc109ac7e..490d3f3ca 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -780,7 +780,7 @@ Immutable Data PUT /v1/lease/AAAAAAAAAAAAAAAA Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-cancel-secret jjkl - X-Tahoe-Authorization: upload-secret xyzf + X-Tahoe-Authorization: lease-renew-secret efgh 204 NO CONTENT From 41ec63f7586124eaaf9ca65bb4d6c4884e16b48f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 10:56:21 -0500 Subject: [PATCH 130/220] Passing first tests. --- src/allmydata/storage/http_client.py | 22 ++++++++++++++++++++-- src/allmydata/storage/http_server.py | 8 ++++---- src/allmydata/test/test_storage_http.py | 19 +++++++++++++++++-- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index e593fd379..412bf9cec 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -2,9 +2,13 @@ HTTP client that talks to the HTTP storage server. """ +import base64 + # TODO Make sure to import Python version? from cbor2 import loads, dumps + +from twisted.web.http_headers import Headers from twisted.internet.defer import inlineCallbacks, returnValue, fail from hyperlink import DecodedURL import treq @@ -21,6 +25,11 @@ def _decode_cbor(response): return fail(ClientException(response.code, response.phrase)) +def swissnum_auth_header(swissnum): + """Return value for ``Authentication`` header.""" + return b"Tahoe-LAFS " + base64.encodestring(swissnum).strip() + + class StorageClient(object): """ HTTP client that talks to the HTTP storage server. @@ -31,12 +40,21 @@ class StorageClient(object): self._swissnum = swissnum self._treq = treq + def _get_headers(self): + """Return the basic headers to be used by default.""" + headers = Headers() + headers.addRawHeader( + "Authorization", + swissnum_auth_header(self._swissnum), + ) + return headers + @inlineCallbacks def get_version(self): """ Return the version metadata for the server. """ - url = self._base_url.child("v1", "version") - response = yield self._treq.get(url) + url = self._base_url.click("/v1/version") + response = yield self._treq.get(url, headers=self._get_headers()) decoded_response = yield _decode_cbor(response) returnValue(decoded_response) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index b862fe7b1..2d6308baf 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -11,6 +11,7 @@ from twisted.web import http from cbor2 import loads, dumps from .server import StorageServer +from .http_client import swissnum_auth_header def _authorization_decorator(f): @@ -21,11 +22,10 @@ def _authorization_decorator(f): @wraps(f) def route(self, request, *args, **kwargs): - if ( - request.requestHeaders.getRawHeaders("Authorization", [None])[0] - != self._swissnum + if request.requestHeaders.getRawHeaders("Authorization", [None])[0] != str( + swissnum_auth_header(self._swissnum), "ascii" ): - request.setResponseCode(http.NOT_ALLOWED) + request.setResponseCode(http.UNAUTHORIZED) return b"" # authorization = request.requestHeaders.getRawHeaders("X-Tahoe-Authorization", []) # For now, just a placeholder: diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 663675f40..b659a6ace 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -10,7 +10,7 @@ from hyperlink import DecodedURL from ..storage.server import StorageServer from ..storage.http_server import HTTPServer -from ..storage.http_client import StorageClient +from ..storage.http_client import StorageClient, ClientException class HTTPTests(TestCase): @@ -23,11 +23,26 @@ class HTTPTests(TestCase): # TODO what should the swissnum _actually_ be? self._http_server = HTTPServer(self.storage_server, b"abcd") self.client = StorageClient( - DecodedURL.from_text("http://example.com"), + DecodedURL.from_text("http://127.0.0.1"), b"abcd", treq=StubTreq(self._http_server.get_resource()), ) + @inlineCallbacks + def test_bad_authentication(self): + """ + If the wrong swissnum is used, an ``Unauthorized`` response code is + returned. + """ + client = StorageClient( + DecodedURL.from_text("http://127.0.0.1"), + b"something wrong", + treq=StubTreq(self._http_server.get_resource()), + ) + with self.assertRaises(ClientException) as e: + yield client.get_version() + self.assertEqual(e.exception.args[0], 401) + @inlineCallbacks def test_version(self): """ From 671b670154f62cb6c7876c707f254a6c7b3a2f4f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:09:08 -0500 Subject: [PATCH 131/220] Some type annotations. --- src/allmydata/storage/http_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 412bf9cec..8e14d1137 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -25,7 +25,7 @@ def _decode_cbor(response): return fail(ClientException(response.code, response.phrase)) -def swissnum_auth_header(swissnum): +def swissnum_auth_header(swissnum): # type: (bytes) -> bytes """Return value for ``Authentication`` header.""" return b"Tahoe-LAFS " + base64.encodestring(swissnum).strip() @@ -40,7 +40,7 @@ class StorageClient(object): self._swissnum = swissnum self._treq = treq - def _get_headers(self): + def _get_headers(self): # type: () -> Headers """Return the basic headers to be used by default.""" headers = Headers() headers.addRawHeader( From 171d1053ec803f2d2de57f0970fbad049d49f2da Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:09:17 -0500 Subject: [PATCH 132/220] CBOR content-type on responses. --- src/allmydata/storage/http_server.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 2d6308baf..91387c58f 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -57,8 +57,6 @@ def _route(app, *route_args, **route_kwargs): class HTTPServer(object): """ A HTTP interface to the storage server. - - TODO returning CBOR should set CBOR content-type """ _app = Klein() @@ -71,6 +69,12 @@ class HTTPServer(object): """Return twisted.web Resource for this object.""" return self._app.resource() + def _cbor(self, request, data): + """Return CBOR-encoded data.""" + request.setHeader("Content-Type", "application/cbor") + # TODO if data is big, maybe want to use a temporary file eventually... + return dumps(data) + @_route(_app, "/v1/version", methods=["GET"]) def version(self, request, authorization): - return dumps(self._storage_server.remote_get_version()) + return self._cbor(request, self._storage_server.remote_get_version()) From c195f895db7bd3ec7a8618956a71e67152e32df7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:16:26 -0500 Subject: [PATCH 133/220] Python 2 support. --- src/allmydata/storage/http_client.py | 19 ++++++++++++++++++- src/allmydata/storage/http_server.py | 18 ++++++++++++++++-- src/allmydata/test/test_storage_http.py | 12 ++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 8e14d1137..4a143a60b 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -2,6 +2,21 @@ HTTP client that talks to the HTTP storage server. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 + +if PY2: + # fmt: off + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + # fmt: on +else: + from typing import Union + from treq.testing import StubTreq + import base64 # TODO Make sure to import Python version? @@ -35,7 +50,9 @@ class StorageClient(object): HTTP client that talks to the HTTP storage server. """ - def __init__(self, url: DecodedURL, swissnum, treq=treq): + def __init__( + self, url, swissnum, treq=treq + ): # type: (DecodedURL, bytes, Union[treq,StubTreq]) -> None self._base_url = url self._swissnum = swissnum self._treq = treq diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 91387c58f..373d31e2e 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -2,6 +2,18 @@ HTTP server for storage. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 + +if PY2: + # fmt: off + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + # fmt: on + from functools import wraps from klein import Klein @@ -61,12 +73,14 @@ class HTTPServer(object): _app = Klein() - def __init__(self, storage_server: StorageServer, swissnum): + def __init__( + self, storage_server, swissnum + ): # type: (StorageServer, bytes) -> None self._storage_server = storage_server self._swissnum = swissnum def get_resource(self): - """Return twisted.web Resource for this object.""" + """Return twisted.web ``Resource`` for this object.""" return self._app.resource() def _cbor(self, request, data): diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index b659a6ace..9ba8adf21 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -2,6 +2,18 @@ Tests for HTTP storage client + server. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 + +if PY2: + # fmt: off + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + # fmt: on + from twisted.trial.unittest import TestCase from twisted.internet.defer import inlineCallbacks From a64778ddb0fb774ea43fa8a3c59be67b84e957ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:28:13 -0500 Subject: [PATCH 134/220] Flakes. --- src/allmydata/storage/http_client.py | 2 +- src/allmydata/storage/http_server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 4a143a60b..d5ca6caec 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -20,7 +20,7 @@ else: import base64 # TODO Make sure to import Python version? -from cbor2 import loads, dumps +from cbor2 import loads from twisted.web.http_headers import Headers diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 373d31e2e..3baa336fa 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -20,7 +20,7 @@ from klein import Klein from twisted.web import http # TODO Make sure to use pure Python versions? -from cbor2 import loads, dumps +from cbor2 import dumps from .server import StorageServer from .http_client import swissnum_auth_header From e5b5b50602268314e035c89e56b740c745b85c84 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:28:19 -0500 Subject: [PATCH 135/220] Duplicate package. --- nix/tahoe-lafs.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index a6a8a69ec..8092dfaa7 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -95,7 +95,7 @@ EOF propagatedBuildInputs = with python.pkgs; [ twisted foolscap zfec appdirs setuptoolsTrial pyasn1 zope_interface - service-identity pyyaml magic-wormhole treq + service-identity pyyaml magic-wormhole eliot autobahn cryptography netifaces setuptools future pyutil distro configparser collections-extended ]; From a1424e90e18ae1dfbed245277120fdf3f0aaedc8 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:34:44 -0500 Subject: [PATCH 136/220] Another duplicate. --- nix/tahoe-lafs.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index 8092dfaa7..df12f21d4 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -4,7 +4,7 @@ , setuptools, setuptoolsTrial, pyasn1, zope_interface , service-identity, pyyaml, magic-wormhole, treq, appdirs , beautifulsoup4, eliot, autobahn, cryptography, netifaces -, html5lib, pyutil, distro, configparser, klein, treq, cbor2 +, html5lib, pyutil, distro, configparser, klein, cbor2 }: python.pkgs.buildPythonPackage rec { # Most of the time this is not exactly the release version (eg 1.16.0). From f549488bb508a8377d968d16addb07a98559d8fd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Nov 2021 11:47:09 -0500 Subject: [PATCH 137/220] Don't use a deprecated API. --- src/allmydata/storage/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index d5ca6caec..e1743343d 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -42,7 +42,7 @@ def _decode_cbor(response): def swissnum_auth_header(swissnum): # type: (bytes) -> bytes """Return value for ``Authentication`` header.""" - return b"Tahoe-LAFS " + base64.encodestring(swissnum).strip() + return b"Tahoe-LAFS " + base64.b64encode(swissnum).strip() class StorageClient(object): From 3b69df36b0604a0981c92a4d4c0da0611bc04535 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 23 Oct 2021 15:38:51 -0600 Subject: [PATCH 138/220] crawler: pickle -> json --- src/allmydata/storage/crawler.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index bd4f4f432..d7dee78dc 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -11,15 +11,12 @@ from __future__ import print_function from future.utils import PY2, PY3 if PY2: - # We don't import bytes, object, dict, and list just in case they're used, - # so as not to create brittle pickles with random magic objects. - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min -import os, time, struct -try: - import cPickle as pickle -except ImportError: - import pickle # type: ignore +import os +import time +import json +import struct from twisted.internet import reactor from twisted.application import service from allmydata.storage.common import si_b2a @@ -214,7 +211,7 @@ class ShareCrawler(service.MultiService): # None if we are sleeping between cycles try: with open(self.statefile, "rb") as f: - state = pickle.load(f) + state = json.load(f) except Exception: state = {"version": 1, "last-cycle-finished": None, @@ -252,9 +249,7 @@ class ShareCrawler(service.MultiService): self.state["last-complete-prefix"] = last_complete_prefix tmpfile = self.statefile + ".tmp" with open(tmpfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(self.state, f, protocol=2) + json.dump(self.state, f) fileutil.move_into_place(tmpfile, self.statefile) def startService(self): From 758dcea2d4a73d472050d5f21bc84217a71802b8 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 23 Oct 2021 16:19:27 -0600 Subject: [PATCH 139/220] news --- newsfragments/3825.security | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 newsfragments/3825.security diff --git a/newsfragments/3825.security b/newsfragments/3825.security new file mode 100644 index 000000000..b16418d2b --- /dev/null +++ b/newsfragments/3825.security @@ -0,0 +1,5 @@ +The lease-checker now uses JSON instead of pickle to serialize its state. + +Once you have run this version the lease state files will be stored in JSON +and an older version of the software won't load them (it simply won't notice +them so it will appear to have never run). From f7b385f9544f48ebfc7da69b314d3d1dda30ee2c Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 24 Oct 2021 22:27:59 -0600 Subject: [PATCH 140/220] play nice with subclasses --- src/allmydata/storage/crawler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index d7dee78dc..b931f1ab5 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -248,8 +248,12 @@ class ShareCrawler(service.MultiService): last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix tmpfile = self.statefile + ".tmp" + + # Note: we use self.get_state() here because e.g + # LeaseCheckingCrawler stores non-JSON-able state in + # self.state() but converts it in self.get_state() with open(tmpfile, "wb") as f: - json.dump(self.state, f) + json.dump(self.get_state(), f) fileutil.move_into_place(tmpfile, self.statefile) def startService(self): From bb70e00065ab42f0e9f8faabff50d587532f49f7 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 24 Oct 2021 23:47:24 -0600 Subject: [PATCH 141/220] Make internal state JSON-able for lease-crawler --- src/allmydata/storage/expirer.py | 53 +++++++++++++------------- src/allmydata/test/test_storage_web.py | 32 ++++++++-------- src/allmydata/web/storage.py | 11 +++--- 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 7c6cd8218..4513dadb2 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -5,10 +5,11 @@ from __future__ import unicode_literals from future.utils import PY2 if PY2: - # We omit anything that might end up in pickle, just in case. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 - -import time, os, pickle, struct + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import json +import time +import os +import struct from allmydata.storage.crawler import ShareCrawler from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ @@ -95,9 +96,7 @@ class LeaseCheckingCrawler(ShareCrawler): if not os.path.exists(self.historyfile): history = {} # cyclenum -> dict with open(self.historyfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(history, f, protocol=2) + json.dump(history, f) def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() @@ -142,7 +141,7 @@ class LeaseCheckingCrawler(ShareCrawler): struct.error): twlog.msg("lease-checker error processing %s" % sharefile) twlog.err() - which = (storage_index_b32, shnum) + which = [storage_index_b32, shnum] self.state["cycle-to-date"]["corrupt-shares"].append(which) wks = (1, 1, 1, "unknown") would_keep_shares.append(wks) @@ -212,7 +211,7 @@ class LeaseCheckingCrawler(ShareCrawler): num_valid_leases_configured += 1 so_far = self.state["cycle-to-date"] - self.increment(so_far["leases-per-share-histogram"], num_leases, 1) + self.increment(so_far["leases-per-share-histogram"], str(num_leases), 1) self.increment_space("examined", s, sharetype) would_keep_share = [1, 1, 1, sharetype] @@ -291,12 +290,14 @@ class LeaseCheckingCrawler(ShareCrawler): start = self.state["current-cycle-start-time"] now = time.time() - h["cycle-start-finish-times"] = (start, now) + h["cycle-start-finish-times"] = [start, now] h["expiration-enabled"] = self.expiration_enabled - h["configured-expiration-mode"] = (self.mode, - self.override_lease_duration, - self.cutoff_date, - self.sharetypes_to_expire) + h["configured-expiration-mode"] = [ + self.mode, + self.override_lease_duration, + self.cutoff_date, + self.sharetypes_to_expire, + ] s = self.state["cycle-to-date"] @@ -315,15 +316,13 @@ class LeaseCheckingCrawler(ShareCrawler): h["space-recovered"] = s["space-recovered"].copy() with open(self.historyfile, "rb") as f: - history = pickle.load(f) - history[cycle] = h + history = json.load(f) + history[str(cycle)] = h while len(history) > 10: - oldcycles = sorted(history.keys()) - del history[oldcycles[0]] + oldcycles = sorted(int(k) for k in history.keys()) + del history[str(oldcycles[0])] with open(self.historyfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(history, f, protocol=2) + json.dump(history, f) def get_state(self): """In addition to the crawler state described in @@ -393,7 +392,7 @@ class LeaseCheckingCrawler(ShareCrawler): state = ShareCrawler.get_state(self) # does a shallow copy with open(self.historyfile, "rb") as f: - history = pickle.load(f) + history = json.load(f) state["history"] = history if not progress["cycle-in-progress"]: @@ -406,10 +405,12 @@ class LeaseCheckingCrawler(ShareCrawler): lah = so_far["lease-age-histogram"] so_far["lease-age-histogram"] = self.convert_lease_age_histogram(lah) so_far["expiration-enabled"] = self.expiration_enabled - so_far["configured-expiration-mode"] = (self.mode, - self.override_lease_duration, - self.cutoff_date, - self.sharetypes_to_expire) + so_far["configured-expiration-mode"] = [ + self.mode, + self.override_lease_duration, + self.cutoff_date, + self.sharetypes_to_expire, + ] so_far_sr = so_far["space-recovered"] remaining_sr = {} diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 38e380223..b9fa548d3 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -376,7 +376,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) self.failUnlessEqual(lah, [ (0.0, DAY, 1) ] ) - self.failUnlessEqual(so_far["leases-per-share-histogram"], {1: 1}) + self.failUnlessEqual(so_far["leases-per-share-histogram"], {"1": 1}) self.failUnlessEqual(so_far["corrupt-shares"], []) sr1 = so_far["space-recovered"] self.failUnlessEqual(sr1["examined-buckets"], 1) @@ -427,9 +427,9 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failIf("cycle-to-date" in s) self.failIf("estimated-remaining-cycle" in s) self.failIf("estimated-current-cycle" in s) - last = s["history"][0] + last = s["history"]["0"] self.failUnlessIn("cycle-start-finish-times", last) - self.failUnlessEqual(type(last["cycle-start-finish-times"]), tuple) + self.failUnlessEqual(type(last["cycle-start-finish-times"]), list) self.failUnlessEqual(last["expiration-enabled"], False) self.failUnlessIn("configured-expiration-mode", last) @@ -437,9 +437,9 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): lah = last["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) - self.failUnlessEqual(lah, [ (0.0, DAY, 6) ] ) + self.failUnlessEqual(lah, [ [0.0, DAY, 6] ] ) - self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) + self.failUnlessEqual(last["leases-per-share-histogram"], {"1": 2, "2": 2}) self.failUnlessEqual(last["corrupt-shares"], []) rec = last["space-recovered"] @@ -587,12 +587,12 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], - ("age", 2000, None, ("mutable", "immutable"))) - self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) + ["age", 2000, None, ["mutable", "immutable"]]) + self.failUnlessEqual(last["leases-per-share-histogram"], {"1": 2, "2": 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) @@ -731,14 +731,14 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], - ("cutoff-date", None, then, - ("mutable", "immutable"))) + ["cutoff-date", None, then, + ["mutable", "immutable"]]) self.failUnlessEqual(last["leases-per-share-histogram"], - {1: 2, 2: 2}) + {"1": 2, "2": 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) @@ -924,8 +924,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): s = lc.get_state() h = s["history"] self.failUnlessEqual(len(h), 10) - self.failUnlessEqual(max(h.keys()), 15) - self.failUnlessEqual(min(h.keys()), 6) + self.failUnlessEqual(max(int(k) for k in h.keys()), 15) + self.failUnlessEqual(min(int(k) for k in h.keys()), 6) d.addCallback(_check) return d @@ -1014,7 +1014,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): def _check(ignored): s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] rec = last["space-recovered"] self.failUnlessEqual(rec["configured-buckets"], 4) self.failUnlessEqual(rec["configured-shares"], 4) @@ -1110,7 +1110,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): def _after_first_cycle(ignored): s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 5) self.failUnlessEqual(rec["examined-shares"], 3) diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py index f2f021a15..e568d5ed5 100644 --- a/src/allmydata/web/storage.py +++ b/src/allmydata/web/storage.py @@ -256,8 +256,8 @@ class StorageStatusElement(Element): if so_far["corrupt-shares"]: add("Corrupt shares:", - T.ul( (T.li( ["SI %s shnum %d" % corrupt_share - for corrupt_share in so_far["corrupt-shares"] ] + T.ul( (T.li( ["SI %s shnum %d" % (si, shnum) + for si, shnum in so_far["corrupt-shares"] ] )))) return tag("Current cycle:", p) @@ -267,7 +267,8 @@ class StorageStatusElement(Element): h = lc.get_state()["history"] if not h: return "" - last = h[max(h.keys())] + biggest = str(max(int(k) for k in h.keys())) + last = h[biggest] start, end = last["cycle-start-finish-times"] tag("Last complete cycle (which took %s and finished %s ago)" @@ -290,8 +291,8 @@ class StorageStatusElement(Element): if last["corrupt-shares"]: add("Corrupt shares:", - T.ul( (T.li( ["SI %s shnum %d" % corrupt_share - for corrupt_share in last["corrupt-shares"] ] + T.ul( (T.li( ["SI %s shnum %d" % (si, shnum) + for si, shnum in last["corrupt-shares"] ] )))) return tag(p) From fa6950f08dc054ec770af1c402e8eb5d3c581b3e Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 12:18:28 -0600 Subject: [PATCH 142/220] an old pickle-format lease-checker state file --- src/allmydata/test/data/lease_checker.state | 545 ++++++++++++++++++++ 1 file changed, 545 insertions(+) create mode 100644 src/allmydata/test/data/lease_checker.state diff --git a/src/allmydata/test/data/lease_checker.state b/src/allmydata/test/data/lease_checker.state new file mode 100644 index 000000000..b32554434 --- /dev/null +++ b/src/allmydata/test/data/lease_checker.state @@ -0,0 +1,545 @@ +(dp1 +S'last-complete-prefix' +p2 +NsS'version' +p3 +I1 +sS'current-cycle-start-time' +p4 +F1635003106.611748 +sS'last-cycle-finished' +p5 +I312 +sS'cycle-to-date' +p6 +(dp7 +Vleases-per-share-histogram +p8 +(dp9 +I1 +I36793 +sI2 +I1 +ssVspace-recovered +p10 +(dp11 +Vexamined-buckets-immutable +p12 +I17183 +sVconfigured-buckets-mutable +p13 +I0 +sVexamined-shares-mutable +p14 +I1796 +sVoriginal-shares-mutable +p15 +I1563 +sVconfigured-buckets-immutable +p16 +I0 +sVoriginal-shares-immutable +p17 +I27926 +sVoriginal-diskbytes-immutable +p18 +I431149056 +sVexamined-shares-immutable +p19 +I34998 +sVoriginal-buckets +p20 +I14661 +sVactual-shares-immutable +p21 +I0 +sVconfigured-shares +p22 +I0 +sVoriginal-buckets-immutable +p23 +I13761 +sVactual-diskbytes +p24 +I4096 +sVactual-shares-mutable +p25 +I0 +sVconfigured-buckets +p26 +I1 +sVexamined-buckets-unknown +p27 +I14 +sVactual-sharebytes +p28 +I0 +sVoriginal-shares +p29 +I29489 +sVoriginal-sharebytes +p30 +I312664812 +sVexamined-sharebytes-immutable +p31 +I383801602 +sVactual-shares +p32 +I0 +sVactual-sharebytes-immutable +p33 +I0 +sVoriginal-diskbytes +p34 +I441643008 +sVconfigured-diskbytes-mutable +p35 +I0 +sVconfigured-sharebytes-immutable +p36 +I0 +sVconfigured-shares-mutable +p37 +I0 +sVactual-diskbytes-immutable +p38 +I0 +sVconfigured-diskbytes-immutable +p39 +I0 +sVoriginal-diskbytes-mutable +p40 +I10489856 +sVactual-sharebytes-mutable +p41 +I0 +sVconfigured-sharebytes +p42 +I0 +sVexamined-shares +p43 +I36794 +sVactual-diskbytes-mutable +p44 +I0 +sVactual-buckets +p45 +I1 +sVoriginal-buckets-mutable +p46 +I899 +sVconfigured-sharebytes-mutable +p47 +I0 +sVexamined-sharebytes +p48 +I390369660 +sVoriginal-sharebytes-immutable +p49 +I308125753 +sVoriginal-sharebytes-mutable +p50 +I4539059 +sVactual-buckets-mutable +p51 +I0 +sVexamined-diskbytes-mutable +p52 +I9154560 +sVexamined-buckets-mutable +p53 +I1043 +sVconfigured-shares-immutable +p54 +I0 +sVexamined-diskbytes +p55 +I476598272 +sVactual-buckets-immutable +p56 +I0 +sVexamined-sharebytes-mutable +p57 +I6568058 +sVexamined-buckets +p58 +I18241 +sVconfigured-diskbytes +p59 +I4096 +sVexamined-diskbytes-immutable +p60 +I467443712 +ssVcorrupt-shares +p61 +(lp62 +(V2dn6xnlnsqwtnapwxfdivpm3s4 +p63 +I4 +tp64 +a(g63 +I1 +tp65 +a(V2rrzthwsrrxolevmwdvbdy3rqi +p66 +I4 +tp67 +a(g66 +I1 +tp68 +a(V2skfngcto6h7eqmn4uo7ntk3ne +p69 +I4 +tp70 +a(g69 +I1 +tp71 +a(V32d5swqpqx2mwix7xmqzvhdwje +p72 +I4 +tp73 +a(g72 +I1 +tp74 +a(V5mmayp66yflmpon3o6unsnbaca +p75 +I4 +tp76 +a(g75 +I1 +tp77 +a(V6ixhpvbtre7fnrl6pehlrlflc4 +p78 +I4 +tp79 +a(g78 +I1 +tp80 +a(Vewzhvswjsz4vp2bqkb6mi3bz2u +p81 +I4 +tp82 +a(g81 +I1 +tp83 +a(Vfu7pazf6ogavkqj6z4q5qqex3u +p84 +I4 +tp85 +a(g84 +I1 +tp86 +a(Vhbyjtqvpcimwxiyqbcbbdn2i4a +p87 +I4 +tp88 +a(g87 +I1 +tp89 +a(Vpmcjbdkbjdl26k3e6yja77femq +p90 +I4 +tp91 +a(g90 +I1 +tp92 +a(Vr6swof4v2uttbiiqwj5pi32cm4 +p93 +I4 +tp94 +a(g93 +I1 +tp95 +a(Vt45v5akoktf53evc2fi6gwnv6y +p96 +I4 +tp97 +a(g96 +I1 +tp98 +a(Vy6zb4faar3rdvn3e6pfg4wlotm +p99 +I4 +tp100 +a(g99 +I1 +tp101 +a(Vz3yghutvqoqbchjao4lndnrh3a +p102 +I4 +tp103 +a(g102 +I1 +tp104 +asVlease-age-histogram +p105 +(dp106 +(I45619200 +I45705600 +tp107 +I4 +s(I12441600 +I12528000 +tp108 +I78 +s(I11923200 +I12009600 +tp109 +I89 +s(I33436800 +I33523200 +tp110 +I7 +s(I37411200 +I37497600 +tp111 +I4 +s(I38361600 +I38448000 +tp112 +I5 +s(I4665600 +I4752000 +tp113 +I256 +s(I11491200 +I11577600 +tp114 +I20 +s(I10713600 +I10800000 +tp115 +I183 +s(I42076800 +I42163200 +tp116 +I4 +s(I47865600 +I47952000 +tp117 +I7 +s(I3110400 +I3196800 +tp118 +I328 +s(I5788800 +I5875200 +tp119 +I954 +s(I9331200 +I9417600 +tp120 +I12 +s(I7430400 +I7516800 +tp121 +I7228 +s(I1555200 +I1641600 +tp122 +I492 +s(I37929600 +I38016000 +tp123 +I3 +s(I38880000 +I38966400 +tp124 +I3 +s(I12528000 +I12614400 +tp125 +I193 +s(I10454400 +I10540800 +tp126 +I1239 +s(I11750400 +I11836800 +tp127 +I7 +s(I950400 +I1036800 +tp128 +I4435 +s(I44409600 +I44496000 +tp129 +I13 +s(I12787200 +I12873600 +tp130 +I218 +s(I10368000 +I10454400 +tp131 +I117 +s(I3283200 +I3369600 +tp132 +I86 +s(I7516800 +I7603200 +tp133 +I993 +s(I42336000 +I42422400 +tp134 +I33 +s(I46310400 +I46396800 +tp135 +I1 +s(I39052800 +I39139200 +tp136 +I51 +s(I7603200 +I7689600 +tp137 +I2004 +s(I10540800 +I10627200 +tp138 +I16 +s(I36374400 +I36460800 +tp139 +I3 +s(I3369600 +I3456000 +tp140 +I79 +s(I12700800 +I12787200 +tp141 +I25 +s(I4838400 +I4924800 +tp142 +I386 +s(I10972800 +I11059200 +tp143 +I122 +s(I8812800 +I8899200 +tp144 +I57 +s(I38966400 +I39052800 +tp145 +I61 +s(I3196800 +I3283200 +tp146 +I628 +s(I9244800 +I9331200 +tp147 +I73 +s(I30499200 +I30585600 +tp148 +I5 +s(I12009600 +I12096000 +tp149 +I329 +s(I12960000 +I13046400 +tp150 +I8 +s(I12614400 +I12700800 +tp151 +I210 +s(I3801600 +I3888000 +tp152 +I32 +s(I10627200 +I10713600 +tp153 +I43 +s(I44928000 +I45014400 +tp154 +I2 +s(I8208000 +I8294400 +tp155 +I38 +s(I8640000 +I8726400 +tp156 +I32 +s(I7344000 +I7430400 +tp157 +I12689 +s(I49075200 +I49161600 +tp158 +I19 +s(I2764800 +I2851200 +tp159 +I76 +s(I2592000 +I2678400 +tp160 +I40 +s(I2073600 +I2160000 +tp161 +I388 +s(I37497600 +I37584000 +tp162 +I11 +s(I1641600 +I1728000 +tp163 +I78 +s(I12873600 +I12960000 +tp164 +I5 +s(I1814400 +I1900800 +tp165 +I1860 +s(I40176000 +I40262400 +tp166 +I1 +s(I3715200 +I3801600 +tp167 +I104 +s(I2332800 +I2419200 +tp168 +I12 +s(I2678400 +I2764800 +tp169 +I278 +s(I12268800 +I12355200 +tp170 +I2 +s(I28771200 +I28857600 +tp171 +I6 +s(I41990400 +I42076800 +tp172 +I10 +sssS'last-complete-bucket' +p173 +NsS'current-cycle' +p174 +Ns. \ No newline at end of file From f81e4e2d25e4d12362f05824075915d52b3878cc Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 13:15:38 -0600 Subject: [PATCH 143/220] refactor to use serializers / pickle->json upgraders --- src/allmydata/storage/crawler.py | 143 +++++++++++++++++++++++-- src/allmydata/storage/expirer.py | 82 +++++++++++--- src/allmydata/storage/server.py | 1 + src/allmydata/test/test_storage_web.py | 10 +- 4 files changed, 212 insertions(+), 24 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index b931f1ab5..48b03ec8b 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -19,12 +19,145 @@ import json import struct from twisted.internet import reactor from twisted.application import service +from twisted.python.filepath import FilePath from allmydata.storage.common import si_b2a from allmydata.util import fileutil class TimeSliceExceeded(Exception): pass + +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list + # ["leases-per-share-histogram"] gets str keys instead of int + # ["cycle-start-finish-times"] from 2-tuple to list + # ["configured-expiration-mode"] from 4-tuple to list + # ["history"] keys are strings + if state["version"] != 1: + raise ValueError( + "Unknown version {version} in pickle state".format(**state) + ) + + def convert_lpsh(value): + return { + str(k): v + for k, v in value.items() + } + + def convert_cem(value): + # original is a 4-tuple, with the last element being a 2-tuple + # .. convert both to lists + return [ + value[0], + value[1], + value[2], + list(value[3]), + ] + + def convert_history(value): + print("convert history") + print(value) + return { + str(k): v + for k, v in value + } + + converters = { + "cycle-to-date": list, + "leases-per-share-histogram": convert_lpsh, + "cycle-starte-finish-times": list, + "configured-expiration-mode": convert_cem, + "history": convert_history, + } + + def convert_value(key, value): + converter = converters.get(key, None) + if converter is None: + return value + return converter(value) + + new_state = { + k: convert_value(k, v) + for k, v in state.items() + } + return new_state + + +def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): + """ + :param FilePath state_path: the filepath to ensure is json + + :param Callable[dict] convert_pickle: function to change + pickle-style state into JSON-style state + + :returns unicode: the local path where the state is stored + + If this state path is JSON, simply return it. + + If this state is pickle, convert to the JSON format and return the + JSON path. + """ + if state_path.path.endswith(".json"): + return state_path.path + + json_state_path = state_path.siblingExtension(".json") + + # if there's no file there at all, we're done because there's + # nothing to upgrade + if not state_path.exists(): + return json_state_path.path + + # upgrade the pickle data to JSON + import pickle + with state_path.open("r") as f: + state = pickle.load(f) + state = convert_pickle(state) + json_state_path = state_path.siblingExtension(".json") + with json_state_path.open("w") as f: + json.dump(state, f) + # we've written the JSON, delete the pickle + state_path.remove() + return json_state_path.path + + +class _LeaseStateSerializer(object): + """ + Read and write state for LeaseCheckingCrawler. This understands + how to read the legacy pickle format files and upgrade them to the + new JSON format (which will occur automatically). + """ + + def __init__(self, state_path): + self._path = FilePath( + _maybe_upgrade_pickle_to_json( + FilePath(state_path), + _convert_pickle_state_to_json, + ) + ) + # XXX want this to .. load and save the state + # - if the state is pickle-only: + # - load it and convert to json format + # - save json + # - delete pickle + # - if the state is json, load it + + def load(self): + with self._path.open("r") as f: + return json.load(f) + + def save(self, data): + tmpfile = self._path.siblingExtension(".tmp") + with tmpfile.open("wb") as f: + json.dump(data, f) + fileutil.move_into_place(tmpfile.path, self._path.path) + return None + + class ShareCrawler(service.MultiService): """A ShareCrawler subclass is attached to a StorageServer, and periodically walks all of its shares, processing each one in some @@ -87,7 +220,7 @@ class ShareCrawler(service.MultiService): self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir - self.statefile = statefile + self._state_serializer = _LeaseStateSerializer(statefile) self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] if PY3: @@ -210,8 +343,7 @@ class ShareCrawler(service.MultiService): # of the last bucket to be processed, or # None if we are sleeping between cycles try: - with open(self.statefile, "rb") as f: - state = json.load(f) + state = self._state_serializer.load() except Exception: state = {"version": 1, "last-cycle-finished": None, @@ -247,14 +379,11 @@ class ShareCrawler(service.MultiService): else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix - tmpfile = self.statefile + ".tmp" # Note: we use self.get_state() here because e.g # LeaseCheckingCrawler stores non-JSON-able state in # self.state() but converts it in self.get_state() - with open(tmpfile, "wb") as f: - json.dump(self.get_state(), f) - fileutil.move_into_place(tmpfile, self.statefile) + self._state_serializer.save(self.get_state()) def startService(self): # arrange things to look like we were just sleeping, so diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 4513dadb2..d2f48004a 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -10,11 +10,72 @@ import json import time import os import struct -from allmydata.storage.crawler import ShareCrawler +from allmydata.storage.crawler import ( + ShareCrawler, + _maybe_upgrade_pickle_to_json, +) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ UnknownImmutableContainerVersionError from twisted.python import log as twlog +from twisted.python.filepath import FilePath + + +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + print("CONVERT", state) + for k, v in state.items(): + print(k, v) + if state["version"] != 1: + raise ValueError( + "Unknown version {version} in pickle state".format(**state) + ) + + return state + + +class _HistorySerializer(object): + """ + Serialize the 'history' file of the lease-crawler state. This is + "storage/history.state" for the pickle or + "storage/history.state.json" for the new JSON format. + """ + + def __init__(self, history_path): + self._path = FilePath( + _maybe_upgrade_pickle_to_json( + FilePath(history_path), + _convert_pickle_state_to_json, + ) + ) + if not self._path.exists(): + with self._path.open("wb") as f: + json.dump({}, f) + + def read(self): + """ + Deserialize the existing data. + + :return dict: the existing history state + """ + assert self._path is not None, "Not initialized" + with self._path.open("rb") as f: + history = json.load(f) + return history + + def write(self, new_history): + """ + Serialize the existing data as JSON. + """ + assert self._path is not None, "Not initialized" + with self._path.open("wb") as f: + json.dump(new_history, f) + return None + class LeaseCheckingCrawler(ShareCrawler): """I examine the leases on all shares, determining which are still valid @@ -64,7 +125,8 @@ class LeaseCheckingCrawler(ShareCrawler): override_lease_duration, # used if expiration_mode=="age" cutoff_date, # used if expiration_mode=="cutoff-date" sharetypes): - self.historyfile = historyfile + self._history_serializer = _HistorySerializer(historyfile) + ##self.historyfile = historyfile self.expiration_enabled = expiration_enabled self.mode = mode self.override_lease_duration = None @@ -92,12 +154,6 @@ class LeaseCheckingCrawler(ShareCrawler): for k in so_far: self.state["cycle-to-date"].setdefault(k, so_far[k]) - # initialize history - if not os.path.exists(self.historyfile): - history = {} # cyclenum -> dict - with open(self.historyfile, "wb") as f: - json.dump(history, f) - def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() so_far = {"corrupt-shares": [], @@ -315,14 +371,12 @@ class LeaseCheckingCrawler(ShareCrawler): # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() - with open(self.historyfile, "rb") as f: - history = json.load(f) + history = self._history_serializer.read() history[str(cycle)] = h while len(history) > 10: oldcycles = sorted(int(k) for k in history.keys()) del history[str(oldcycles[0])] - with open(self.historyfile, "wb") as f: - json.dump(history, f) + self._history_serializer.write(history) def get_state(self): """In addition to the crawler state described in @@ -391,9 +445,7 @@ class LeaseCheckingCrawler(ShareCrawler): progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy - with open(self.historyfile, "rb") as f: - history = json.load(f) - state["history"] = history + state["history"] = self._history_serializer.read() if not progress["cycle-in-progress"]: del state["cycle-to-date"] diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 49cb7fa82..9211535b7 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -57,6 +57,7 @@ DEFAULT_RENEWAL_TIME = 31 * 24 * 60 * 60 @implementer(RIStorageServer, IStatsProducer) class StorageServer(service.MultiService, Referenceable): name = 'storage' + # only the tests change this to anything else LeaseCheckerClass = LeaseCheckingCrawler def __init__(self, storedir, nodeid, reserved_space=0, diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index b9fa548d3..d91242449 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -25,14 +25,20 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.application import service from twisted.web.template import flattenString +from twisted.python.filepath import FilePath from foolscap.api import fireEventually from allmydata.util import fileutil, hashutil, base32, pollmixin from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError from allmydata.storage.server import StorageServer -from allmydata.storage.crawler import BucketCountingCrawler -from allmydata.storage.expirer import LeaseCheckingCrawler +from allmydata.storage.crawler import ( + BucketCountingCrawler, + _LeaseStateSerializer, +) +from allmydata.storage.expirer import ( + LeaseCheckingCrawler, +) from allmydata.web.storage import ( StorageStatus, StorageStatusElement, From bf5e682d71e086f351126129c2e586a80442c2bb Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 13:17:46 -0600 Subject: [PATCH 144/220] test upgrade of main state works --- src/allmydata/test/test_storage_web.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index d91242449..0b287d667 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1145,6 +1145,22 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d + def test_deserialize_pickle(self): + """ + The crawler can read existing state from the old pickle format + """ + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state") + test_pickle = FilePath("lease_checker.state") + with test_pickle.open("w") as local, original_pickle.open("r") as remote: + local.write(remote.read()) + + serial = _LeaseStateSerializer(test_pickle.path) + + # the (existing) state file should have been upgraded to JSON + self.assertNot(test_pickle.exists()) + self.assertTrue(test_pickle.siblingExtension(".json").exists()) + + class WebStatus(unittest.TestCase, pollmixin.PollMixin): From 89c2aacadca5cc7ba13f4afda2c4d3817ea9b5c0 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 15:52:01 -0600 Subject: [PATCH 145/220] working test of 'in the wild' data, working converters --- src/allmydata/storage/crawler.py | 36 +++--- src/allmydata/test/test_storage_web.py | 165 +++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 17 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 48b03ec8b..548864e06 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -34,7 +34,7 @@ def _convert_pickle_state_to_json(state): :return dict: the state in the JSON form """ # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list - # ["leases-per-share-histogram"] gets str keys instead of int + # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int # ["cycle-start-finish-times"] from 2-tuple to list # ["configured-expiration-mode"] from 4-tuple to list # ["history"] keys are strings @@ -43,12 +43,6 @@ def _convert_pickle_state_to_json(state): "Unknown version {version} in pickle state".format(**state) ) - def convert_lpsh(value): - return { - str(k): v - for k, v in value.items() - } - def convert_cem(value): # original is a 4-tuple, with the last element being a 2-tuple # .. convert both to lists @@ -59,20 +53,28 @@ def _convert_pickle_state_to_json(state): list(value[3]), ] - def convert_history(value): - print("convert history") - print(value) + def convert_ctd(value): + ctd_converter = { + "lease-age-histogram": lambda value: { + "{},{}".format(k[0], k[1]): v + for k, v in value.items() + }, + "corrupt-shares": lambda value: [ + list(x) + for x in value + ], + } return { - str(k): v - for k, v in value + k: ctd_converter.get(k, lambda z: z)(v) + for k, v in value.items() } + # we don't convert "history" here because that's in a separate + # file; see expirer.py converters = { - "cycle-to-date": list, - "leases-per-share-histogram": convert_lpsh, + "cycle-to-date": convert_ctd, "cycle-starte-finish-times": list, "configured-expiration-mode": convert_cem, - "history": convert_history, } def convert_value(key, value): @@ -116,10 +118,10 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): import pickle with state_path.open("r") as f: state = pickle.load(f) - state = convert_pickle(state) + new_state = convert_pickle(state) json_state_path = state_path.siblingExtension(".json") with json_state_path.open("w") as f: - json.dump(state, f) + json.dump(new_state, f) # we've written the JSON, delete the pickle state_path.remove() return json_state_path.path diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 0b287d667..5cdf02a25 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1160,6 +1160,171 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.assertNot(test_pickle.exists()) self.assertTrue(test_pickle.siblingExtension(".json").exists()) + self.assertEqual( + serial.load(), + { + u'last-complete-prefix': None, + u'version': 1, + u'current-cycle-start-time': 1635003106.611748, + u'last-cycle-finished': 312, + u'cycle-to-date': { + u'leases-per-share-histogram': { + u'1': 36793, + u'2': 1, + }, + u'space-recovered': { + u'examined-buckets-immutable': 17183, + u'configured-buckets-mutable': 0, + u'examined-shares-mutable': 1796, + u'original-shares-mutable': 1563, + u'configured-buckets-immutable': 0, + u'original-shares-immutable': 27926, + u'original-diskbytes-immutable': 431149056, + u'examined-shares-immutable': 34998, + u'original-buckets': 14661, + u'actual-shares-immutable': 0, + u'configured-shares': 0, + u'original-buckets-mutable': 899, + u'actual-diskbytes': 4096, + u'actual-shares-mutable': 0, + u'configured-buckets': 1, + u'examined-buckets-unknown': 14, + u'actual-sharebytes': 0, + u'original-shares': 29489, + u'actual-buckets-immutable': 0, + u'original-sharebytes': 312664812, + u'examined-sharebytes-immutable': 383801602, + u'actual-shares': 0, + u'actual-sharebytes-immutable': 0, + u'original-diskbytes': 441643008, + u'configured-diskbytes-mutable': 0, + u'configured-sharebytes-immutable': 0, + u'configured-shares-mutable': 0, + u'actual-diskbytes-immutable': 0, + u'configured-diskbytes-immutable': 0, + u'original-diskbytes-mutable': 10489856, + u'actual-sharebytes-mutable': 0, + u'configured-sharebytes': 0, + u'examined-shares': 36794, + u'actual-diskbytes-mutable': 0, + u'actual-buckets': 1, + u'original-buckets-immutable': 13761, + u'configured-sharebytes-mutable': 0, + u'examined-sharebytes': 390369660, + u'original-sharebytes-immutable': 308125753, + u'original-sharebytes-mutable': 4539059, + u'actual-buckets-mutable': 0, + u'examined-buckets-mutable': 1043, + u'configured-shares-immutable': 0, + u'examined-diskbytes': 476598272, + u'examined-diskbytes-mutable': 9154560, + u'examined-sharebytes-mutable': 6568058, + u'examined-buckets': 18241, + u'configured-diskbytes': 4096, + u'examined-diskbytes-immutable': 467443712}, + u'corrupt-shares': [ + [u'2dn6xnlnsqwtnapwxfdivpm3s4', 4], + [u'2dn6xnlnsqwtnapwxfdivpm3s4', 1], + [u'2rrzthwsrrxolevmwdvbdy3rqi', 4], + [u'2rrzthwsrrxolevmwdvbdy3rqi', 1], + [u'2skfngcto6h7eqmn4uo7ntk3ne', 4], + [u'2skfngcto6h7eqmn4uo7ntk3ne', 1], + [u'32d5swqpqx2mwix7xmqzvhdwje', 4], + [u'32d5swqpqx2mwix7xmqzvhdwje', 1], + [u'5mmayp66yflmpon3o6unsnbaca', 4], + [u'5mmayp66yflmpon3o6unsnbaca', 1], + [u'6ixhpvbtre7fnrl6pehlrlflc4', 4], + [u'6ixhpvbtre7fnrl6pehlrlflc4', 1], + [u'ewzhvswjsz4vp2bqkb6mi3bz2u', 4], + [u'ewzhvswjsz4vp2bqkb6mi3bz2u', 1], + [u'fu7pazf6ogavkqj6z4q5qqex3u', 4], + [u'fu7pazf6ogavkqj6z4q5qqex3u', 1], + [u'hbyjtqvpcimwxiyqbcbbdn2i4a', 4], + [u'hbyjtqvpcimwxiyqbcbbdn2i4a', 1], + [u'pmcjbdkbjdl26k3e6yja77femq', 4], + [u'pmcjbdkbjdl26k3e6yja77femq', 1], + [u'r6swof4v2uttbiiqwj5pi32cm4', 4], + [u'r6swof4v2uttbiiqwj5pi32cm4', 1], + [u't45v5akoktf53evc2fi6gwnv6y', 4], + [u't45v5akoktf53evc2fi6gwnv6y', 1], + [u'y6zb4faar3rdvn3e6pfg4wlotm', 4], + [u'y6zb4faar3rdvn3e6pfg4wlotm', 1], + [u'z3yghutvqoqbchjao4lndnrh3a', 4], + [u'z3yghutvqoqbchjao4lndnrh3a', 1], + ], + u'lease-age-histogram': { + "1641600,1728000": 78, + "12441600,12528000": 78, + "8640000,8726400": 32, + "1814400,1900800": 1860, + "2764800,2851200": 76, + "11491200,11577600": 20, + "10713600,10800000": 183, + "47865600,47952000": 7, + "3110400,3196800": 328, + "10627200,10713600": 43, + "45619200,45705600": 4, + "12873600,12960000": 5, + "7430400,7516800": 7228, + "1555200,1641600": 492, + "38880000,38966400": 3, + "12528000,12614400": 193, + "7344000,7430400": 12689, + "2678400,2764800": 278, + "2332800,2419200": 12, + "9244800,9331200": 73, + "12787200,12873600": 218, + "49075200,49161600": 19, + "10368000,10454400": 117, + "4665600,4752000": 256, + "7516800,7603200": 993, + "42336000,42422400": 33, + "10972800,11059200": 122, + "39052800,39139200": 51, + "12614400,12700800": 210, + "7603200,7689600": 2004, + "10540800,10627200": 16, + "950400,1036800": 4435, + "42076800,42163200": 4, + "8812800,8899200": 57, + "5788800,5875200": 954, + "36374400,36460800": 3, + "9331200,9417600": 12, + "30499200,30585600": 5, + "12700800,12787200": 25, + "2073600,2160000": 388, + "12960000,13046400": 8, + "11923200,12009600": 89, + "3369600,3456000": 79, + "3196800,3283200": 628, + "37497600,37584000": 11, + "33436800,33523200": 7, + "44928000,45014400": 2, + "37929600,38016000": 3, + "38966400,39052800": 61, + "3283200,3369600": 86, + "11750400,11836800": 7, + "3801600,3888000": 32, + "46310400,46396800": 1, + "4838400,4924800": 386, + "8208000,8294400": 38, + "37411200,37497600": 4, + "12009600,12096000": 329, + "10454400,10540800": 1239, + "40176000,40262400": 1, + "3715200,3801600": 104, + "44409600,44496000": 13, + "38361600,38448000": 5, + "12268800,12355200": 2, + "28771200,28857600": 6, + "41990400,42076800": 10, + "2592000,2678400": 40, + }, + }, + 'current-cycle': None, + 'last-complete-bucket': None, + } + ) class WebStatus(unittest.TestCase, pollmixin.PollMixin): From d4fc14f9ada372e94d68667da64643b52de9923c Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 19:42:08 -0600 Subject: [PATCH 146/220] docstring --- src/allmydata/storage/expirer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index d2f48004a..ce126b6a4 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -23,6 +23,9 @@ from twisted.python.filepath import FilePath def _convert_pickle_state_to_json(state): """ + Convert a pickle-serialized crawler-history state to the new JSON + format. + :param dict state: the pickled state :return dict: the state in the JSON form From 75410e51f04f90007efce9fa2cba6504c54fe9ac Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:10:43 -0600 Subject: [PATCH 147/220] refactor --- src/allmydata/storage/crawler.py | 86 ++++++++++++++++++-------------- src/allmydata/storage/expirer.py | 14 ++---- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 548864e06..2e9bafd13 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -27,23 +27,14 @@ class TimeSliceExceeded(Exception): pass -def _convert_pickle_state_to_json(state): +def _convert_cycle_data(state): """ - :param dict state: the pickled state + :param dict state: cycle-to-date or history-item state :return dict: the state in the JSON form """ - # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list - # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int - # ["cycle-start-finish-times"] from 2-tuple to list - # ["configured-expiration-mode"] from 4-tuple to list - # ["history"] keys are strings - if state["version"] != 1: - raise ValueError( - "Unknown version {version} in pickle state".format(**state) - ) - def convert_cem(value): + def _convert_expiration_mode(value): # original is a 4-tuple, with the last element being a 2-tuple # .. convert both to lists return [ @@ -53,41 +44,60 @@ def _convert_pickle_state_to_json(state): list(value[3]), ] - def convert_ctd(value): - ctd_converter = { - "lease-age-histogram": lambda value: { + def _convert_lease_age(value): + # if we're in cycle-to-date, this is a dict + if isinstance(value, dict): + return { "{},{}".format(k[0], k[1]): v for k, v in value.items() - }, - "corrupt-shares": lambda value: [ - list(x) - for x in value - ], - } - return { - k: ctd_converter.get(k, lambda z: z)(v) - for k, v in value.items() - } + } + # otherwise, it's a history-item and they're 3-tuples + return [ + list(v) + for v in value + ] - # we don't convert "history" here because that's in a separate - # file; see expirer.py converters = { - "cycle-to-date": convert_ctd, - "cycle-starte-finish-times": list, - "configured-expiration-mode": convert_cem, + "configured-expiration-mode": _convert_expiration_mode, + "cycle-start-finish-times": list, + "lease-age-histogram": _convert_lease_age, + "corrupt-shares": lambda value: [ + list(x) + for x in value + ], + "leases-per-share-histogram": lambda value: { + str(k): v + for k, v in value.items() + }, + } + return { + k: converters.get(k, lambda z: z)(v) + for k, v in state.items() } - def convert_value(key, value): - converter = converters.get(key, None) - if converter is None: - return value - return converter(value) - new_state = { - k: convert_value(k, v) +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list + # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int + # ["cycle-start-finish-times"] from 2-tuple to list + # ["history"] keys are strings + if state["version"] != 1: + raise ValueError( + "Unknown version {version} in pickle state".format(**state) + ) + + converters = { + "cycle-to-date": _convert_cycle_data, + } + return { + k: converters.get(k, lambda x: x)(v) for k, v in state.items() } - return new_state def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index ce126b6a4..946498eaf 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -13,6 +13,7 @@ import struct from allmydata.storage.crawler import ( ShareCrawler, _maybe_upgrade_pickle_to_json, + _convert_cycle_data, ) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ @@ -30,15 +31,10 @@ def _convert_pickle_state_to_json(state): :return dict: the state in the JSON form """ - print("CONVERT", state) - for k, v in state.items(): - print(k, v) - if state["version"] != 1: - raise ValueError( - "Unknown version {version} in pickle state".format(**state) - ) - - return state + return { + str(k): _convert_cycle_data(v) + for k, v in state.items() + } class _HistorySerializer(object): From a867294e00addbf4a0f4426820beb07895b81441 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:12:17 -0600 Subject: [PATCH 148/220] dead --- src/allmydata/storage/expirer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 946498eaf..254264e38 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -61,7 +61,6 @@ class _HistorySerializer(object): :return dict: the existing history state """ - assert self._path is not None, "Not initialized" with self._path.open("rb") as f: history = json.load(f) return history @@ -70,7 +69,6 @@ class _HistorySerializer(object): """ Serialize the existing data as JSON. """ - assert self._path is not None, "Not initialized" with self._path.open("wb") as f: json.dump(new_history, f) return None From 94670461f1d93bef6766652a03a2b9bd85916224 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:37:51 -0600 Subject: [PATCH 149/220] tests --- src/allmydata/storage/expirer.py | 10 +- src/allmydata/test/test_storage_web.py | 168 +++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 254264e38..9ba71539c 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -55,7 +55,7 @@ class _HistorySerializer(object): with self._path.open("wb") as f: json.dump({}, f) - def read(self): + def load(self): """ Deserialize the existing data. @@ -65,7 +65,7 @@ class _HistorySerializer(object): history = json.load(f) return history - def write(self, new_history): + def save(self, new_history): """ Serialize the existing data as JSON. """ @@ -368,12 +368,12 @@ class LeaseCheckingCrawler(ShareCrawler): # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() - history = self._history_serializer.read() + history = self._history_serializer.load() history[str(cycle)] = h while len(history) > 10: oldcycles = sorted(int(k) for k in history.keys()) del history[str(oldcycles[0])] - self._history_serializer.write(history) + self._history_serializer.save(history) def get_state(self): """In addition to the crawler state described in @@ -442,7 +442,7 @@ class LeaseCheckingCrawler(ShareCrawler): progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy - state["history"] = self._history_serializer.read() + state["history"] = self._history_serializer.load() if not progress["cycle-in-progress"]: del state["cycle-to-date"] diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 5cdf02a25..033462d46 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -38,6 +38,7 @@ from allmydata.storage.crawler import ( ) from allmydata.storage.expirer import ( LeaseCheckingCrawler, + _HistorySerializer, ) from allmydata.web.storage import ( StorageStatus, @@ -1149,6 +1150,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): """ The crawler can read existing state from the old pickle format """ + # this file came from an "in the wild" tahoe version 1.16.0 original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state") test_pickle = FilePath("lease_checker.state") with test_pickle.open("w") as local, original_pickle.open("r") as remote: @@ -1326,6 +1328,172 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): } ) + def test_deserialize_history_pickle(self): + """ + The crawler can read existing history state from the old pickle + format + """ + # this file came from an "in the wild" tahoe version 1.16.0 + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history") + test_pickle = FilePath("lease_checker.history") + with test_pickle.open("w") as local, original_pickle.open("r") as remote: + local.write(remote.read()) + + serial = _HistorySerializer(test_pickle.path) + + self.maxDiff = None + self.assertEqual( + serial.load(), + { + "363": { + 'configured-expiration-mode': ['age', None, None, ['immutable', 'mutable']], + 'expiration-enabled': False, + 'leases-per-share-histogram': { + '1': 39774, + }, + 'lease-age-histogram': [ + [0, 86400, 3125], + [345600, 432000, 4175], + [950400, 1036800, 141], + [1036800, 1123200, 345], + [1123200, 1209600, 81], + [1296000, 1382400, 1832], + [1555200, 1641600, 390], + [1728000, 1814400, 12], + [2073600, 2160000, 84], + [2160000, 2246400, 228], + [2246400, 2332800, 75], + [2592000, 2678400, 644], + [2678400, 2764800, 273], + [2764800, 2851200, 94], + [2851200, 2937600, 97], + [3196800, 3283200, 143], + [3283200, 3369600, 48], + [4147200, 4233600, 374], + [4320000, 4406400, 534], + [5270400, 5356800, 1005], + [6739200, 6825600, 8704], + [6825600, 6912000, 3986], + [6912000, 6998400, 7592], + [6998400, 7084800, 2607], + [7689600, 7776000, 35], + [8035200, 8121600, 33], + [8294400, 8380800, 54], + [8640000, 8726400, 45], + [8726400, 8812800, 27], + [8812800, 8899200, 12], + [9763200, 9849600, 77], + [9849600, 9936000, 91], + [9936000, 10022400, 1210], + [10022400, 10108800, 45], + [10108800, 10195200, 186], + [10368000, 10454400, 113], + [10972800, 11059200, 21], + [11232000, 11318400, 5], + [11318400, 11404800, 19], + [11404800, 11491200, 238], + [11491200, 11577600, 159], + [11750400, 11836800, 1], + [11836800, 11923200, 32], + [11923200, 12009600, 192], + [12009600, 12096000, 222], + [12096000, 12182400, 18], + [12182400, 12268800, 224], + [12268800, 12355200, 9], + [12355200, 12441600, 9], + [12441600, 12528000, 10], + [12528000, 12614400, 6], + [12614400, 12700800, 6], + [12700800, 12787200, 18], + [12787200, 12873600, 6], + [12873600, 12960000, 62], + ], + 'cycle-start-finish-times': [1634446505.241972, 1634446666.055401], + 'space-recovered': { + 'examined-buckets-immutable': 17896, + 'configured-buckets-mutable': 0, + 'examined-shares-mutable': 2473, + 'original-shares-mutable': 1185, + 'configured-buckets-immutable': 0, + 'original-shares-immutable': 27457, + 'original-diskbytes-immutable': 2810982400, + 'examined-shares-immutable': 37301, + 'original-buckets': 14047, + 'actual-shares-immutable': 0, + 'configured-shares': 0, + 'original-buckets-mutable': 691, + 'actual-diskbytes': 4096, + 'actual-shares-mutable': 0, + 'configured-buckets': 1, + 'examined-buckets-unknown': 14, + 'actual-sharebytes': 0, + 'original-shares': 28642, + 'actual-buckets-immutable': 0, + 'original-sharebytes': 2695552941, + 'examined-sharebytes-immutable': 2754798505, + 'actual-shares': 0, + 'actual-sharebytes-immutable': 0, + 'original-diskbytes': 2818981888, + 'configured-diskbytes-mutable': 0, + 'configured-sharebytes-immutable': 0, + 'configured-shares-mutable': 0, + 'actual-diskbytes-immutable': 0, + 'configured-diskbytes-immutable': 0, + 'original-diskbytes-mutable': 7995392, + 'actual-sharebytes-mutable': 0, + 'configured-sharebytes': 0, + 'examined-shares': 39774, + 'actual-diskbytes-mutable': 0, + 'actual-buckets': 1, + 'original-buckets-immutable': 13355, + 'configured-sharebytes-mutable': 0, + 'examined-sharebytes': 2763646972, + 'original-sharebytes-immutable': 2692076909, + 'original-sharebytes-mutable': 3476032, + 'actual-buckets-mutable': 0, + 'examined-buckets-mutable': 1286, + 'configured-shares-immutable': 0, + 'examined-diskbytes': 2854801408, + 'examined-diskbytes-mutable': 12161024, + 'examined-sharebytes-mutable': 8848467, + 'examined-buckets': 19197, + 'configured-diskbytes': 4096, + 'examined-diskbytes-immutable': 2842640384 + }, + 'corrupt-shares': [ + ['2dn6xnlnsqwtnapwxfdivpm3s4', 3], + ['2dn6xnlnsqwtnapwxfdivpm3s4', 0], + ['2rrzthwsrrxolevmwdvbdy3rqi', 3], + ['2rrzthwsrrxolevmwdvbdy3rqi', 0], + ['2skfngcto6h7eqmn4uo7ntk3ne', 3], + ['2skfngcto6h7eqmn4uo7ntk3ne', 0], + ['32d5swqpqx2mwix7xmqzvhdwje', 3], + ['32d5swqpqx2mwix7xmqzvhdwje', 0], + ['5mmayp66yflmpon3o6unsnbaca', 3], + ['5mmayp66yflmpon3o6unsnbaca', 0], + ['6ixhpvbtre7fnrl6pehlrlflc4', 3], + ['6ixhpvbtre7fnrl6pehlrlflc4', 0], + ['ewzhvswjsz4vp2bqkb6mi3bz2u', 3], + ['ewzhvswjsz4vp2bqkb6mi3bz2u', 0], + ['fu7pazf6ogavkqj6z4q5qqex3u', 3], + ['fu7pazf6ogavkqj6z4q5qqex3u', 0], + ['hbyjtqvpcimwxiyqbcbbdn2i4a', 3], + ['hbyjtqvpcimwxiyqbcbbdn2i4a', 0], + ['pmcjbdkbjdl26k3e6yja77femq', 3], + ['pmcjbdkbjdl26k3e6yja77femq', 0], + ['r6swof4v2uttbiiqwj5pi32cm4', 3], + ['r6swof4v2uttbiiqwj5pi32cm4', 0], + ['t45v5akoktf53evc2fi6gwnv6y', 3], + ['t45v5akoktf53evc2fi6gwnv6y', 0], + ['y6zb4faar3rdvn3e6pfg4wlotm', 3], + ['y6zb4faar3rdvn3e6pfg4wlotm', 0], + ['z3yghutvqoqbchjao4lndnrh3a', 3], + ['z3yghutvqoqbchjao4lndnrh3a', 0], + ] + } + } + ) + class WebStatus(unittest.TestCase, pollmixin.PollMixin): From 069c332a6815c6c67b77a7af328e7bb2993d175d Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:49:25 -0600 Subject: [PATCH 150/220] straight assert --- src/allmydata/storage/crawler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 2e9bafd13..d1366765e 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -86,10 +86,7 @@ def _convert_pickle_state_to_json(state): # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int # ["cycle-start-finish-times"] from 2-tuple to list # ["history"] keys are strings - if state["version"] != 1: - raise ValueError( - "Unknown version {version} in pickle state".format(**state) - ) + assert state["version"] == 1, "Only known version is 1" converters = { "cycle-to-date": _convert_cycle_data, From 9b3c55e4aa856e48b6d6fb5ee0252d69aeb64110 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:59:29 -0600 Subject: [PATCH 151/220] test a second deserialzation --- src/allmydata/test/test_storage_web.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 033462d46..70866cba9 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1327,6 +1327,11 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): 'last-complete-bucket': None, } ) + second_serial = _LeaseStateSerializer(serial._path.path) + self.assertEqual( + serial.load(), + second_serial.load(), + ) def test_deserialize_history_pickle(self): """ From 4f64bbaa0086af61745f7f55fd04fb716f018960 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 22:15:49 -0600 Subject: [PATCH 152/220] data --- src/allmydata/test/data/lease_checker.history | 501 ++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 src/allmydata/test/data/lease_checker.history diff --git a/src/allmydata/test/data/lease_checker.history b/src/allmydata/test/data/lease_checker.history new file mode 100644 index 000000000..0c27a5ad0 --- /dev/null +++ b/src/allmydata/test/data/lease_checker.history @@ -0,0 +1,501 @@ +(dp0 +I363 +(dp1 +Vconfigured-expiration-mode +p2 +(S'age' +p3 +NN(S'immutable' +p4 +S'mutable' +p5 +tp6 +tp7 +sVexpiration-enabled +p8 +I00 +sVleases-per-share-histogram +p9 +(dp10 +I1 +I39774 +ssVlease-age-histogram +p11 +(lp12 +(I0 +I86400 +I3125 +tp13 +a(I345600 +I432000 +I4175 +tp14 +a(I950400 +I1036800 +I141 +tp15 +a(I1036800 +I1123200 +I345 +tp16 +a(I1123200 +I1209600 +I81 +tp17 +a(I1296000 +I1382400 +I1832 +tp18 +a(I1555200 +I1641600 +I390 +tp19 +a(I1728000 +I1814400 +I12 +tp20 +a(I2073600 +I2160000 +I84 +tp21 +a(I2160000 +I2246400 +I228 +tp22 +a(I2246400 +I2332800 +I75 +tp23 +a(I2592000 +I2678400 +I644 +tp24 +a(I2678400 +I2764800 +I273 +tp25 +a(I2764800 +I2851200 +I94 +tp26 +a(I2851200 +I2937600 +I97 +tp27 +a(I3196800 +I3283200 +I143 +tp28 +a(I3283200 +I3369600 +I48 +tp29 +a(I4147200 +I4233600 +I374 +tp30 +a(I4320000 +I4406400 +I534 +tp31 +a(I5270400 +I5356800 +I1005 +tp32 +a(I6739200 +I6825600 +I8704 +tp33 +a(I6825600 +I6912000 +I3986 +tp34 +a(I6912000 +I6998400 +I7592 +tp35 +a(I6998400 +I7084800 +I2607 +tp36 +a(I7689600 +I7776000 +I35 +tp37 +a(I8035200 +I8121600 +I33 +tp38 +a(I8294400 +I8380800 +I54 +tp39 +a(I8640000 +I8726400 +I45 +tp40 +a(I8726400 +I8812800 +I27 +tp41 +a(I8812800 +I8899200 +I12 +tp42 +a(I9763200 +I9849600 +I77 +tp43 +a(I9849600 +I9936000 +I91 +tp44 +a(I9936000 +I10022400 +I1210 +tp45 +a(I10022400 +I10108800 +I45 +tp46 +a(I10108800 +I10195200 +I186 +tp47 +a(I10368000 +I10454400 +I113 +tp48 +a(I10972800 +I11059200 +I21 +tp49 +a(I11232000 +I11318400 +I5 +tp50 +a(I11318400 +I11404800 +I19 +tp51 +a(I11404800 +I11491200 +I238 +tp52 +a(I11491200 +I11577600 +I159 +tp53 +a(I11750400 +I11836800 +I1 +tp54 +a(I11836800 +I11923200 +I32 +tp55 +a(I11923200 +I12009600 +I192 +tp56 +a(I12009600 +I12096000 +I222 +tp57 +a(I12096000 +I12182400 +I18 +tp58 +a(I12182400 +I12268800 +I224 +tp59 +a(I12268800 +I12355200 +I9 +tp60 +a(I12355200 +I12441600 +I9 +tp61 +a(I12441600 +I12528000 +I10 +tp62 +a(I12528000 +I12614400 +I6 +tp63 +a(I12614400 +I12700800 +I6 +tp64 +a(I12700800 +I12787200 +I18 +tp65 +a(I12787200 +I12873600 +I6 +tp66 +a(I12873600 +I12960000 +I62 +tp67 +asVcycle-start-finish-times +p68 +(F1634446505.241972 +F1634446666.055401 +tp69 +sVspace-recovered +p70 +(dp71 +Vexamined-buckets-immutable +p72 +I17896 +sVconfigured-buckets-mutable +p73 +I0 +sVexamined-shares-mutable +p74 +I2473 +sVoriginal-shares-mutable +p75 +I1185 +sVconfigured-buckets-immutable +p76 +I0 +sVoriginal-shares-immutable +p77 +I27457 +sVoriginal-diskbytes-immutable +p78 +I2810982400 +sVexamined-shares-immutable +p79 +I37301 +sVoriginal-buckets +p80 +I14047 +sVactual-shares-immutable +p81 +I0 +sVconfigured-shares +p82 +I0 +sVoriginal-buckets-mutable +p83 +I691 +sVactual-diskbytes +p84 +I4096 +sVactual-shares-mutable +p85 +I0 +sVconfigured-buckets +p86 +I1 +sVexamined-buckets-unknown +p87 +I14 +sVactual-sharebytes +p88 +I0 +sVoriginal-shares +p89 +I28642 +sVactual-buckets-immutable +p90 +I0 +sVoriginal-sharebytes +p91 +I2695552941 +sVexamined-sharebytes-immutable +p92 +I2754798505 +sVactual-shares +p93 +I0 +sVactual-sharebytes-immutable +p94 +I0 +sVoriginal-diskbytes +p95 +I2818981888 +sVconfigured-diskbytes-mutable +p96 +I0 +sVconfigured-sharebytes-immutable +p97 +I0 +sVconfigured-shares-mutable +p98 +I0 +sVactual-diskbytes-immutable +p99 +I0 +sVconfigured-diskbytes-immutable +p100 +I0 +sVoriginal-diskbytes-mutable +p101 +I7995392 +sVactual-sharebytes-mutable +p102 +I0 +sVconfigured-sharebytes +p103 +I0 +sVexamined-shares +p104 +I39774 +sVactual-diskbytes-mutable +p105 +I0 +sVactual-buckets +p106 +I1 +sVoriginal-buckets-immutable +p107 +I13355 +sVconfigured-sharebytes-mutable +p108 +I0 +sVexamined-sharebytes +p109 +I2763646972 +sVoriginal-sharebytes-immutable +p110 +I2692076909 +sVoriginal-sharebytes-mutable +p111 +I3476032 +sVactual-buckets-mutable +p112 +I0 +sVexamined-buckets-mutable +p113 +I1286 +sVconfigured-shares-immutable +p114 +I0 +sVexamined-diskbytes +p115 +I2854801408 +sVexamined-diskbytes-mutable +p116 +I12161024 +sVexamined-sharebytes-mutable +p117 +I8848467 +sVexamined-buckets +p118 +I19197 +sVconfigured-diskbytes +p119 +I4096 +sVexamined-diskbytes-immutable +p120 +I2842640384 +ssVcorrupt-shares +p121 +(lp122 +(V2dn6xnlnsqwtnapwxfdivpm3s4 +p123 +I3 +tp124 +a(g123 +I0 +tp125 +a(V2rrzthwsrrxolevmwdvbdy3rqi +p126 +I3 +tp127 +a(g126 +I0 +tp128 +a(V2skfngcto6h7eqmn4uo7ntk3ne +p129 +I3 +tp130 +a(g129 +I0 +tp131 +a(V32d5swqpqx2mwix7xmqzvhdwje +p132 +I3 +tp133 +a(g132 +I0 +tp134 +a(V5mmayp66yflmpon3o6unsnbaca +p135 +I3 +tp136 +a(g135 +I0 +tp137 +a(V6ixhpvbtre7fnrl6pehlrlflc4 +p138 +I3 +tp139 +a(g138 +I0 +tp140 +a(Vewzhvswjsz4vp2bqkb6mi3bz2u +p141 +I3 +tp142 +a(g141 +I0 +tp143 +a(Vfu7pazf6ogavkqj6z4q5qqex3u +p144 +I3 +tp145 +a(g144 +I0 +tp146 +a(Vhbyjtqvpcimwxiyqbcbbdn2i4a +p147 +I3 +tp148 +a(g147 +I0 +tp149 +a(Vpmcjbdkbjdl26k3e6yja77femq +p150 +I3 +tp151 +a(g150 +I0 +tp152 +a(Vr6swof4v2uttbiiqwj5pi32cm4 +p153 +I3 +tp154 +a(g153 +I0 +tp155 +a(Vt45v5akoktf53evc2fi6gwnv6y +p156 +I3 +tp157 +a(g156 +I0 +tp158 +a(Vy6zb4faar3rdvn3e6pfg4wlotm +p159 +I3 +tp160 +a(g159 +I0 +tp161 +a(Vz3yghutvqoqbchjao4lndnrh3a +p162 +I3 +tp163 +a(g162 +I0 +tp164 +ass. \ No newline at end of file From 1c93175583365966f0b476ecc95720efcbf2f827 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 2 Nov 2021 22:42:33 -0600 Subject: [PATCH 153/220] cleanup --- src/allmydata/storage/crawler.py | 22 ++++------------------ src/allmydata/storage/expirer.py | 1 - 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index d1366765e..a06806d17 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -82,10 +82,6 @@ def _convert_pickle_state_to_json(state): :return dict: the state in the JSON form """ - # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list - # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int - # ["cycle-start-finish-times"] from 2-tuple to list - # ["history"] keys are strings assert state["version"] == 1, "Only known version is 1" converters = { @@ -123,12 +119,12 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): # upgrade the pickle data to JSON import pickle - with state_path.open("r") as f: + with state_path.open("rb") as f: state = pickle.load(f) new_state = convert_pickle(state) - json_state_path = state_path.siblingExtension(".json") - with json_state_path.open("w") as f: + with json_state_path.open("wb") as f: json.dump(new_state, f) + # we've written the JSON, delete the pickle state_path.remove() return json_state_path.path @@ -148,15 +144,9 @@ class _LeaseStateSerializer(object): _convert_pickle_state_to_json, ) ) - # XXX want this to .. load and save the state - # - if the state is pickle-only: - # - load it and convert to json format - # - save json - # - delete pickle - # - if the state is json, load it def load(self): - with self._path.open("r") as f: + with self._path.open("rb") as f: return json.load(f) def save(self, data): @@ -388,10 +378,6 @@ class ShareCrawler(service.MultiService): else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix - - # Note: we use self.get_state() here because e.g - # LeaseCheckingCrawler stores non-JSON-able state in - # self.state() but converts it in self.get_state() self._state_serializer.save(self.get_state()) def startService(self): diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 9ba71539c..ad1343ef5 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -123,7 +123,6 @@ class LeaseCheckingCrawler(ShareCrawler): cutoff_date, # used if expiration_mode=="cutoff-date" sharetypes): self._history_serializer = _HistorySerializer(historyfile) - ##self.historyfile = historyfile self.expiration_enabled = expiration_enabled self.mode = mode self.override_lease_duration = None From 23ff1b2430334d376abd426421039562f94bcfc8 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 2 Nov 2021 22:45:08 -0600 Subject: [PATCH 154/220] noqa --- src/allmydata/storage/crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index a06806d17..129659d27 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -11,7 +11,7 @@ from __future__ import print_function from future.utils import PY2, PY3 if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import os import time From 2fe686135bf9513cbdbbe700e5faa63ee1743e83 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 2 Nov 2021 23:33:54 -0600 Subject: [PATCH 155/220] rename data to appease distutils --- .../data/{lease_checker.history => lease_checker.history.txt} | 0 .../data/{lease_checker.state => lease_checker.state.txt} | 0 src/allmydata/test/test_storage_web.py | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/allmydata/test/data/{lease_checker.history => lease_checker.history.txt} (100%) rename src/allmydata/test/data/{lease_checker.state => lease_checker.state.txt} (100%) diff --git a/src/allmydata/test/data/lease_checker.history b/src/allmydata/test/data/lease_checker.history.txt similarity index 100% rename from src/allmydata/test/data/lease_checker.history rename to src/allmydata/test/data/lease_checker.history.txt diff --git a/src/allmydata/test/data/lease_checker.state b/src/allmydata/test/data/lease_checker.state.txt similarity index 100% rename from src/allmydata/test/data/lease_checker.state rename to src/allmydata/test/data/lease_checker.state.txt diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 70866cba9..269af2203 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1151,7 +1151,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): The crawler can read existing state from the old pickle format """ # this file came from an "in the wild" tahoe version 1.16.0 - original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state") + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state.txt") test_pickle = FilePath("lease_checker.state") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) @@ -1339,7 +1339,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): format """ # this file came from an "in the wild" tahoe version 1.16.0 - original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history") + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history.txt") test_pickle = FilePath("lease_checker.history") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) From a208502e18c4f4faf85d500e86e3b2093d219ecf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:29:01 -0500 Subject: [PATCH 156/220] whitespace --- src/allmydata/storage/lease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 63dba15e8..bc94ca6d5 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -272,7 +272,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign Hash the candidate secret and compare the result to the stored hashed secret. """ - if isinstance(candidate_secret, _HashedCancelSecret): + if isinstance(candidate_secret, _HashedCancelSecret): # Someone read it off of this object in this project - probably # the lease crawler - and is just trying to use it to identify # which lease it wants to operate on. Avoid re-hashing the value. From 3a8432713fb0885f3795d4501c77e80a21caea5a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:29:05 -0500 Subject: [PATCH 157/220] a note about what's happening with proxyForInterface --- src/allmydata/storage/lease.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index bc94ca6d5..0c3b219f6 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -260,6 +260,10 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _lease_info = attr.ib() _hash = attr.ib() + # proxyForInterface will take care of forwarding all methods on ILeaseInfo + # to `_lease_info`. Here we override a few of those methods to adjust + # their behavior to make them suitable for use with hashed secrets. + def is_renew_secret(self, candidate_secret): """ Hash the candidate secret and compare the result to the stored hashed From e8adca40abdfa9f4c8616194bbe0bf1fe8817f1f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:32:35 -0500 Subject: [PATCH 158/220] give the ContainerVersionError exceptions a nice str --- src/allmydata/storage/common.py | 6 ++++++ src/allmydata/test/test_storage.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index 48fc77840..17a3f41b7 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -21,6 +21,12 @@ class UnknownContainerVersionError(Exception): self.filename = filename self.version = version + def __str__(self): + return "sharefile {!r} had unexpected version {!r}".format( + self.filename, + self.version, + ) + class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 655395042..ba3d3598f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -651,6 +651,7 @@ class Server(unittest.TestCase): ss.remote_get_buckets, b"si1") self.assertEqual(e.filename, fn) self.assertEqual(e.version, 0) + self.assertIn("had unexpected version 0", str(e)) def test_disconnect(self): # simulate a disconnection @@ -1136,6 +1137,8 @@ class MutableServer(unittest.TestCase): read, b"si1", [0], [(0,10)]) self.assertEqual(e.filename, fn) self.assertTrue(e.version.startswith(b"BAD MAGIC")) + self.assertIn("had unexpected version", str(e)) + self.assertIn("BAD MAGIC", str(e)) def test_container_size(self): ss = self.create("test_container_size") From 6a78703675e9ce9e42a8401ac38410d78357a86e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Nov 2021 10:53:51 -0500 Subject: [PATCH 159/220] News file. --- newsfragments/3807.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3807.feature diff --git a/newsfragments/3807.feature b/newsfragments/3807.feature new file mode 100644 index 000000000..f82363ffd --- /dev/null +++ b/newsfragments/3807.feature @@ -0,0 +1 @@ +If uploading an immutable hasn't had a write for 30 minutes, the storage server will abort the upload. \ No newline at end of file From 92c36a67d8c98436e2cc2616d89ff0135307858c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Nov 2021 11:01:04 -0500 Subject: [PATCH 160/220] Use IReactorTime instead of ad-hoc solutions. --- src/allmydata/storage/server.py | 39 ++++++++++++----------- src/allmydata/test/test_istorageserver.py | 10 +++--- src/allmydata/test/test_storage.py | 16 +++++----- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index ee2ea1c61..499d47276 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -20,6 +20,7 @@ import six from foolscap.api import Referenceable from foolscap.ipb import IRemoteReference from twisted.application import service +from twisted.internet import reactor from zope.interface import implementer from allmydata.interfaces import RIStorageServer, IStatsProducer @@ -71,7 +72,7 @@ class StorageServer(service.MultiService, Referenceable): expiration_override_lease_duration=None, expiration_cutoff_date=None, expiration_sharetypes=("mutable", "immutable"), - get_current_time=time.time): + clock=reactor): service.MultiService.__init__(self) assert isinstance(nodeid, bytes) assert len(nodeid) == 20 @@ -122,7 +123,7 @@ class StorageServer(service.MultiService, Referenceable): expiration_cutoff_date, expiration_sharetypes) self.lease_checker.setServiceParent(self) - self._get_current_time = get_current_time + self._clock = clock # Currently being-written Bucketwriters. For Foolscap, lifetime is tied # to connection: when disconnection happens, the BucketWriters are @@ -292,7 +293,7 @@ class StorageServer(service.MultiService, Referenceable): # owner_num is not for clients to set, but rather it should be # curried into the PersonalStorageServer instance that is dedicated # to a particular owner. - start = self._get_current_time() + start = self._clock.seconds() self.count("allocate") alreadygot = set() bucketwriters = {} # k: shnum, v: BucketWriter @@ -305,7 +306,7 @@ class StorageServer(service.MultiService, Referenceable): # goes into the share files themselves. It could also be put into a # separate database. Note that the lease should not be added until # the BucketWriter has been closed. - expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME + expire_time = self._clock.seconds() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, expire_time, self.my_nodeid) @@ -360,7 +361,7 @@ class StorageServer(service.MultiService, Referenceable): if bucketwriters: fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) - self.add_latency("allocate", self._get_current_time() - start) + self.add_latency("allocate", self._clock.seconds() - start) return alreadygot, bucketwriters def remote_allocate_buckets(self, storage_index, @@ -395,26 +396,26 @@ class StorageServer(service.MultiService, Referenceable): def remote_add_lease(self, storage_index, renew_secret, cancel_secret, owner_num=1): - start = self._get_current_time() + start = self._clock.seconds() self.count("add-lease") - new_expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME + new_expire_time = self._clock.seconds() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, new_expire_time, self.my_nodeid) for sf in self._iter_share_files(storage_index): sf.add_or_renew_lease(lease_info) - self.add_latency("add-lease", self._get_current_time() - start) + self.add_latency("add-lease", self._clock.seconds() - start) return None def remote_renew_lease(self, storage_index, renew_secret): - start = self._get_current_time() + start = self._clock.seconds() self.count("renew") - new_expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME + new_expire_time = self._clock.seconds() + DEFAULT_RENEWAL_TIME found_buckets = False for sf in self._iter_share_files(storage_index): found_buckets = True sf.renew_lease(renew_secret, new_expire_time) - self.add_latency("renew", self._get_current_time() - start) + self.add_latency("renew", self._clock.seconds() - start) if not found_buckets: raise IndexError("no such lease to renew") @@ -441,7 +442,7 @@ class StorageServer(service.MultiService, Referenceable): pass def remote_get_buckets(self, storage_index): - start = self._get_current_time() + start = self._clock.seconds() self.count("get") si_s = si_b2a(storage_index) log.msg("storage: get_buckets %r" % si_s) @@ -449,7 +450,7 @@ class StorageServer(service.MultiService, Referenceable): for shnum, filename in self._get_bucket_shares(storage_index): bucketreaders[shnum] = BucketReader(self, filename, storage_index, shnum) - self.add_latency("get", self._get_current_time() - start) + self.add_latency("get", self._clock.seconds() - start) return bucketreaders def get_leases(self, storage_index): @@ -608,7 +609,7 @@ class StorageServer(service.MultiService, Referenceable): :return LeaseInfo: Information for a new lease for a share. """ ownerid = 1 # TODO - expire_time = self._get_current_time() + DEFAULT_RENEWAL_TIME + expire_time = self._clock.seconds() + DEFAULT_RENEWAL_TIME lease_info = LeaseInfo(ownerid, renew_secret, cancel_secret, expire_time, self.my_nodeid) @@ -646,7 +647,7 @@ class StorageServer(service.MultiService, Referenceable): See ``allmydata.interfaces.RIStorageServer`` for details about other parameters and return value. """ - start = self._get_current_time() + start = self._clock.seconds() self.count("writev") si_s = si_b2a(storage_index) log.msg("storage: slot_writev %r" % si_s) @@ -687,7 +688,7 @@ class StorageServer(service.MultiService, Referenceable): self._add_or_renew_leases(remaining_shares, lease_info) # all done - self.add_latency("writev", self._get_current_time() - start) + self.add_latency("writev", self._clock.seconds() - start) return (testv_is_good, read_data) def remote_slot_testv_and_readv_and_writev(self, storage_index, @@ -713,7 +714,7 @@ class StorageServer(service.MultiService, Referenceable): return share def remote_slot_readv(self, storage_index, shares, readv): - start = self._get_current_time() + start = self._clock.seconds() self.count("readv") si_s = si_b2a(storage_index) lp = log.msg("storage: slot_readv %r %r" % (si_s, shares), @@ -722,7 +723,7 @@ class StorageServer(service.MultiService, Referenceable): # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) if not os.path.isdir(bucketdir): - self.add_latency("readv", self._get_current_time() - start) + self.add_latency("readv", self._clock.seconds() - start) return {} datavs = {} for sharenum_s in os.listdir(bucketdir): @@ -736,7 +737,7 @@ class StorageServer(service.MultiService, Referenceable): datavs[sharenum] = msf.readv(readv) log.msg("returning shares %s" % (list(datavs.keys()),), facility="tahoe.storage", level=log.NOISY, parent=lp) - self.add_latency("readv", self._get_current_time() - start) + self.add_latency("readv", self._clock.seconds() - start) return datavs def remote_advise_corrupt_share(self, share_type, storage_index, shnum, diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index fe494a9d4..a17264713 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -21,6 +21,7 @@ if PY2: from random import Random from twisted.internet.defer import inlineCallbacks, returnValue +from twisted.internet.task import Clock from foolscap.api import Referenceable, RemoteException @@ -1017,16 +1018,17 @@ class _FoolscapMixin(SystemTestMixin): self.server = s break assert self.server is not None, "Couldn't find StorageServer" - self._current_time = 123456 - self.server._get_current_time = self.fake_time + self._clock = Clock() + self._clock.advance(123456) + self.server._clock = self._clock def fake_time(self): """Return the current fake, test-controlled, time.""" - return self._current_time + return self._clock.seconds() def fake_sleep(self, seconds): """Advance the fake time by the given number of seconds.""" - self._current_time += seconds + self._clock.advance(seconds) @inlineCallbacks def tearDown(self): diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 4e40a76a5..e143bec63 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -23,7 +23,7 @@ from uuid import uuid4 from twisted.trial import unittest -from twisted.internet import defer +from twisted.internet import defer, reactor from twisted.internet.task import Clock from hypothesis import given, strategies @@ -438,11 +438,11 @@ class Server(unittest.TestCase): basedir = os.path.join("storage", "Server", name) return basedir - def create(self, name, reserved_space=0, klass=StorageServer, get_current_time=time.time): + def create(self, name, reserved_space=0, klass=StorageServer, clock=reactor): workdir = self.workdir(name) ss = klass(workdir, b"\x00" * 20, reserved_space=reserved_space, stats_provider=FakeStatsProvider(), - get_current_time=get_current_time) + clock=clock) ss.setServiceParent(self.sparent) return ss @@ -626,7 +626,7 @@ class Server(unittest.TestCase): clock.advance(first_lease) ss = self.create( "test_allocate_without_lease_renewal", - get_current_time=clock.seconds, + clock=clock, ) # Put a share on there @@ -918,7 +918,7 @@ class Server(unittest.TestCase): """ clock = Clock() clock.advance(123) - ss = self.create("test_immutable_add_lease_renews", get_current_time=clock.seconds) + ss = self.create("test_immutable_add_lease_renews", clock=clock) # Start out with single lease created with bucket: renewal_secret, cancel_secret = self.create_bucket_5_shares(ss, b"si0") @@ -1032,10 +1032,10 @@ class MutableServer(unittest.TestCase): basedir = os.path.join("storage", "MutableServer", name) return basedir - def create(self, name, get_current_time=time.time): + def create(self, name, clock=reactor): workdir = self.workdir(name) ss = StorageServer(workdir, b"\x00" * 20, - get_current_time=get_current_time) + clock=clock) ss.setServiceParent(self.sparent) return ss @@ -1420,7 +1420,7 @@ class MutableServer(unittest.TestCase): clock = Clock() clock.advance(235) ss = self.create("test_mutable_add_lease_renews", - get_current_time=clock.seconds) + clock=clock) def secrets(n): return ( self.write_enabler(b"we1"), self.renew_secret(b"we1-%d" % n), From bf7d03310fc1bd730ba12449112144f3e6935020 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Nov 2021 11:09:45 -0500 Subject: [PATCH 161/220] Hide all _trial_temp. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 50a1352a2..7c7fa2afd 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,7 @@ zope.interface-*.egg .pc /src/allmydata/test/plugins/dropin.cache -/_trial_temp* +**/_trial_temp* /tmp* /*.patch /dist/ From 45c00e93c9709e216fe87410783b0a6125e159e7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Nov 2021 11:12:40 -0500 Subject: [PATCH 162/220] Use clock in BucketWriter. --- src/allmydata/storage/immutable.py | 11 ++++++----- src/allmydata/storage/server.py | 3 ++- src/allmydata/test/test_storage.py | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 8a7a5a966..7cfb7a1bf 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -233,7 +233,7 @@ class ShareFile(object): @implementer(RIBucketWriter) class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 - def __init__(self, ss, incominghome, finalhome, max_size, lease_info): + def __init__(self, ss, incominghome, finalhome, max_size, lease_info, clock): self.ss = ss self.incominghome = incominghome self.finalhome = finalhome @@ -245,12 +245,13 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 # added by simultaneous uploaders self._sharefile.add_lease(lease_info) self._already_written = RangeMap() + self._clock = clock def allocated_size(self): return self._max_size def remote_write(self, offset, data): - start = time.time() + start = self._clock.seconds() precondition(not self.closed) if self.throw_out_all_data: return @@ -268,12 +269,12 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 self._sharefile.write_share_data(offset, data) self._already_written.set(True, offset, end) - self.ss.add_latency("write", time.time() - start) + self.ss.add_latency("write", self._clock.seconds() - start) self.ss.count("write") def remote_close(self): precondition(not self.closed) - start = time.time() + start = self._clock.seconds() fileutil.make_dirs(os.path.dirname(self.finalhome)) fileutil.rename(self.incominghome, self.finalhome) @@ -306,7 +307,7 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 filelen = os.stat(self.finalhome)[stat.ST_SIZE] self.ss.bucket_writer_closed(self, filelen) - self.ss.add_latency("close", time.time() - start) + self.ss.add_latency("close", self._clock.seconds() - start) self.ss.count("close") def disconnected(self): diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 499d47276..080c1aea1 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -347,7 +347,8 @@ class StorageServer(service.MultiService, Referenceable): elif (not limited) or (remaining_space >= max_space_per_bucket): # ok! we need to create the new share file. bw = BucketWriter(self, incominghome, finalhome, - max_space_per_bucket, lease_info) + max_space_per_bucket, lease_info, + clock=self._clock) if self.no_storage: bw.throw_out_all_data = True bucketwriters[shnum] = bw diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index e143bec63..36c776fba 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -128,7 +128,7 @@ class Bucket(unittest.TestCase): def test_create(self): incoming, final = self.make_workdir("test_create") - bw = BucketWriter(self, incoming, final, 200, self.make_lease()) + bw = BucketWriter(self, incoming, final, 200, self.make_lease(), Clock()) bw.remote_write(0, b"a"*25) bw.remote_write(25, b"b"*25) bw.remote_write(50, b"c"*25) @@ -137,7 +137,7 @@ class Bucket(unittest.TestCase): def test_readwrite(self): incoming, final = self.make_workdir("test_readwrite") - bw = BucketWriter(self, incoming, final, 200, self.make_lease()) + bw = BucketWriter(self, incoming, final, 200, self.make_lease(), Clock()) bw.remote_write(0, b"a"*25) bw.remote_write(25, b"b"*25) bw.remote_write(50, b"c"*7) # last block may be short @@ -155,7 +155,7 @@ class Bucket(unittest.TestCase): incoming, final = self.make_workdir( "test_write_past_size_errors-{}".format(i) ) - bw = BucketWriter(self, incoming, final, 200, self.make_lease()) + bw = BucketWriter(self, incoming, final, 200, self.make_lease(), Clock()) with self.assertRaises(DataTooLargeError): bw.remote_write(offset, b"a" * length) @@ -174,7 +174,7 @@ class Bucket(unittest.TestCase): expected_data = b"".join(bchr(i) for i in range(100)) incoming, final = self.make_workdir("overlapping_writes_{}".format(uuid4())) bw = BucketWriter( - self, incoming, final, length, self.make_lease(), + self, incoming, final, length, self.make_lease(), Clock() ) # Three writes: 10-19, 30-39, 50-59. This allows for a bunch of holes. bw.remote_write(10, expected_data[10:20]) @@ -212,7 +212,7 @@ class Bucket(unittest.TestCase): length = 100 incoming, final = self.make_workdir("overlapping_writes_{}".format(uuid4())) bw = BucketWriter( - self, incoming, final, length, self.make_lease(), + self, incoming, final, length, self.make_lease(), Clock() ) # Three writes: 10-19, 30-39, 50-59. This allows for a bunch of holes. bw.remote_write(10, b"1" * 10) @@ -312,7 +312,7 @@ class BucketProxy(unittest.TestCase): final = os.path.join(basedir, "bucket") fileutil.make_dirs(basedir) fileutil.make_dirs(os.path.join(basedir, "tmp")) - bw = BucketWriter(self, incoming, final, size, self.make_lease()) + bw = BucketWriter(self, incoming, final, size, self.make_lease(), Clock()) rb = RemoteBucket(bw) return bw, rb, final From 5e341ad43a85444ffc3c12c685463171a53838ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Nov 2021 11:29:34 -0500 Subject: [PATCH 163/220] New tests to write. --- src/allmydata/test/test_storage.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 36c776fba..93779bb29 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -285,6 +285,22 @@ class Bucket(unittest.TestCase): result_of_read = br.remote_read(0, len(share_data)+1) self.failUnlessEqual(result_of_read, share_data) + def test_bucket_expires_if_no_writes_for_30_minutes(self): + pass + + def test_bucket_writes_delay_timeout(self): + pass + + def test_bucket_finishing_writiing_cancels_timeout(self): + pass + + def test_bucket_closing_cancels_timeout(self): + pass + + def test_bucket_aborting_cancels_timeout(self): + pass + + class RemoteBucket(object): def __init__(self, target): @@ -559,7 +575,6 @@ class Server(unittest.TestCase): writer.remote_abort() self.failUnlessEqual(ss.allocated_size(), 0) - def test_allocate(self): ss = self.create("test_allocate") From 8c8e377466bcf2659029f7d59636d5039e12abf7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 18 Nov 2021 14:35:04 -0500 Subject: [PATCH 164/220] Implement timeout and corresponding tests. --- src/allmydata/storage/immutable.py | 28 +++++++++++++-- src/allmydata/test/test_storage.py | 58 ++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 7cfb7a1bf..8a7519b7b 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -246,11 +246,17 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 self._sharefile.add_lease(lease_info) self._already_written = RangeMap() self._clock = clock + self._timeout = clock.callLater(30 * 60, self._abort_due_to_timeout) def allocated_size(self): return self._max_size def remote_write(self, offset, data): + self.write(offset, data) + + def write(self, offset, data): + # Delay the timeout, since we received data: + self._timeout.reset(30 * 60) start = self._clock.seconds() precondition(not self.closed) if self.throw_out_all_data: @@ -273,7 +279,11 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 self.ss.count("write") def remote_close(self): + self.close() + + def close(self): precondition(not self.closed) + self._timeout.cancel() start = self._clock.seconds() fileutil.make_dirs(os.path.dirname(self.finalhome)) @@ -312,15 +322,23 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 def disconnected(self): if not self.closed: - self._abort() + self.abort() + + def _abort_due_to_timeout(self): + """ + Called if we run out of time. + """ + log.msg("storage: aborting sharefile %s due to timeout" % self.incominghome, + facility="tahoe.storage", level=log.UNUSUAL) + self.abort() def remote_abort(self): log.msg("storage: aborting sharefile %s" % self.incominghome, facility="tahoe.storage", level=log.UNUSUAL) - self._abort() + self.abort() self.ss.count("abort") - def _abort(self): + def abort(self): if self.closed: return @@ -338,6 +356,10 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 self.closed = True self.ss.bucket_writer_closed(self, 0) + # Cancel timeout if it wasn't already cancelled. + if self._timeout.active(): + self._timeout.cancel() + @implementer(RIBucketReader) class BucketReader(Referenceable): # type: ignore # warner/foolscap#78 diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 93779bb29..18dca9856 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -285,20 +285,64 @@ class Bucket(unittest.TestCase): result_of_read = br.remote_read(0, len(share_data)+1) self.failUnlessEqual(result_of_read, share_data) + def _assert_timeout_only_after_30_minutes(self, clock, bw): + """ + The ``BucketWriter`` times out and is closed after 30 minutes, but not + sooner. + """ + self.assertFalse(bw.closed) + # 29 minutes pass. Everything is fine. + for i in range(29): + clock.advance(60) + self.assertFalse(bw.closed, "Bucket closed after only %d minutes" % (i + 1,)) + # After the 30th minute, the bucket is closed due to lack of writes. + clock.advance(60) + self.assertTrue(bw.closed) + def test_bucket_expires_if_no_writes_for_30_minutes(self): - pass + """ + If a ``BucketWriter`` receives no writes for 30 minutes, it is removed. + """ + incoming, final = self.make_workdir("test_bucket_expires") + clock = Clock() + bw = BucketWriter(self, incoming, final, 200, self.make_lease(), clock) + self._assert_timeout_only_after_30_minutes(clock, bw) def test_bucket_writes_delay_timeout(self): - pass - - def test_bucket_finishing_writiing_cancels_timeout(self): - pass + """ + So long as the ``BucketWriter`` receives writes, the the removal + timeout is put off. + """ + incoming, final = self.make_workdir("test_bucket_writes_delay_timeout") + clock = Clock() + bw = BucketWriter(self, incoming, final, 200, self.make_lease(), clock) + # 20 minutes pass, getting close to the timeout... + clock.advance(29 * 60) + # .. but we receive a write! So that should delay the timeout. + bw.write(0, b"hello") + self._assert_timeout_only_after_30_minutes(clock, bw) def test_bucket_closing_cancels_timeout(self): - pass + """ + Closing cancels the ``BucketWriter`` timeout. + """ + incoming, final = self.make_workdir("test_bucket_close_timeout") + clock = Clock() + bw = BucketWriter(self, incoming, final, 10, self.make_lease(), clock) + self.assertTrue(clock.getDelayedCalls()) + bw.close() + self.assertFalse(clock.getDelayedCalls()) def test_bucket_aborting_cancels_timeout(self): - pass + """ + Closing cancels the ``BucketWriter`` timeout. + """ + incoming, final = self.make_workdir("test_bucket_abort_timeout") + clock = Clock() + bw = BucketWriter(self, incoming, final, 10, self.make_lease(), clock) + self.assertTrue(clock.getDelayedCalls()) + bw.abort() + self.assertFalse(clock.getDelayedCalls()) class RemoteBucket(object): From 1827faf36b60751811302d1d20ba87348b7e32c4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 18 Nov 2021 14:45:44 -0500 Subject: [PATCH 165/220] Fix issue with leaked-past-end-of-test DelayedCalls. --- src/allmydata/test/test_storage.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 18dca9856..977ed768f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -498,7 +498,9 @@ class Server(unittest.TestCase): basedir = os.path.join("storage", "Server", name) return basedir - def create(self, name, reserved_space=0, klass=StorageServer, clock=reactor): + def create(self, name, reserved_space=0, klass=StorageServer, clock=None): + if clock is None: + clock = Clock() workdir = self.workdir(name) ss = klass(workdir, b"\x00" * 20, reserved_space=reserved_space, stats_provider=FakeStatsProvider(), @@ -1091,8 +1093,10 @@ class MutableServer(unittest.TestCase): basedir = os.path.join("storage", "MutableServer", name) return basedir - def create(self, name, clock=reactor): + def create(self, name, clock=None): workdir = self.workdir(name) + if clock is None: + clock = Clock() ss = StorageServer(workdir, b"\x00" * 20, clock=clock) ss.setServiceParent(self.sparent) From 5d915afe1c00d5832fec59d9a1599482d66a9e85 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 18 Nov 2021 15:42:54 -0500 Subject: [PATCH 166/220] Clean up BucketWriters on shutdown (also preventing DelayedCalls leaks in tests). --- src/allmydata/storage/server.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 080c1aea1..6e3d6f683 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -136,6 +136,12 @@ class StorageServer(service.MultiService, Referenceable): # Canaries and disconnect markers for BucketWriters created via Foolscap: self._bucket_writer_disconnect_markers = {} # type: Dict[BucketWriter,(IRemoteReference, object)] + def stopService(self): + # Cancel any in-progress uploads: + for bw in list(self._bucket_writers.values()): + bw.disconnected() + return service.MultiService.stopService(self) + def __repr__(self): return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) From bd645edd9e68efef5c21b5864957b6e2858acc12 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 18 Nov 2021 15:44:51 -0500 Subject: [PATCH 167/220] Fix flake. --- src/allmydata/test/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 977ed768f..92de63f0d 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -23,7 +23,7 @@ from uuid import uuid4 from twisted.trial import unittest -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.task import Clock from hypothesis import given, strategies From e2636466b584fff59dcd513866e254443417e771 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 18 Nov 2021 15:47:25 -0500 Subject: [PATCH 168/220] Fix a flake. --- src/allmydata/storage/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 6e3d6f683..7dc277e39 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -14,7 +14,7 @@ if PY2: else: from typing import Dict -import os, re, time +import os, re import six from foolscap.api import Referenceable From 4c111773876bb97192dc2604e6a32e14f7898c16 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 18 Nov 2021 15:58:55 -0500 Subject: [PATCH 169/220] Fix a problem with typechecking. Using remote_write() isn't quite right given move to HTTP, but can fight that battle another day. --- src/allmydata/storage/immutable.py | 3 --- src/allmydata/test/test_storage.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 8a7519b7b..08b83cd87 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -252,9 +252,6 @@ class BucketWriter(Referenceable): # type: ignore # warner/foolscap#78 return self._max_size def remote_write(self, offset, data): - self.write(offset, data) - - def write(self, offset, data): # Delay the timeout, since we received data: self._timeout.reset(30 * 60) start = self._clock.seconds() diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 92de63f0d..7fbe8f87b 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -319,7 +319,7 @@ class Bucket(unittest.TestCase): # 20 minutes pass, getting close to the timeout... clock.advance(29 * 60) # .. but we receive a write! So that should delay the timeout. - bw.write(0, b"hello") + bw.remote_write(0, b"hello") self._assert_timeout_only_after_30_minutes(clock, bw) def test_bucket_closing_cancels_timeout(self): From 04e45f065ab496acf8aadb9f4cca0f60f55c41a2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 22 Nov 2021 07:56:51 -0500 Subject: [PATCH 170/220] document `compare_leases_without_timestamps` --- src/allmydata/test/test_storage.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 92176ce52..91c7adb7f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1361,6 +1361,10 @@ class MutableServer(unittest.TestCase): 2: [b"2"*10]}) def compare_leases_without_timestamps(self, leases_a, leases_b): + """ + Assert that, except for expiration times, ``leases_a`` contains the same + lease information as ``leases_b``. + """ for a, b in zip(leases_a, leases_b): # The leases aren't always of the same type (though of course # corresponding elements in the two lists should be of the same From b92343c664c15605df4a4244208d614f2b3390b2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 22 Nov 2021 08:36:12 -0500 Subject: [PATCH 171/220] some more docstrings --- src/allmydata/storage/lease_schema.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py index 697ac9e34..c09a9279b 100644 --- a/src/allmydata/storage/lease_schema.py +++ b/src/allmydata/storage/lease_schema.py @@ -28,11 +28,17 @@ from .lease import ( @attr.s(frozen=True) class CleartextLeaseSerializer(object): + """ + Serialize and unserialize leases with cleartext secrets. + """ _to_data = attr.ib() _from_data = attr.ib() def serialize(self, lease): # type: (LeaseInfo) -> bytes + """ + Represent the given lease as bytes with cleartext secrets. + """ if isinstance(lease, LeaseInfo): return self._to_data(lease) raise ValueError( @@ -42,6 +48,9 @@ class CleartextLeaseSerializer(object): ) def unserialize(self, data): + """ + Load a lease with cleartext secrets from the given bytes representation. + """ # type: (bytes) -> LeaseInfo # In v1 of the immutable schema lease secrets are stored plaintext. # So load the data into a plain LeaseInfo which works on plaintext From d1839187f148f0e8f265123149c5fc2bc3c9d143 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 22 Nov 2021 08:45:10 -0500 Subject: [PATCH 172/220] "misplaced type annotation" --- src/allmydata/storage/lease_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py index c09a9279b..7e604388e 100644 --- a/src/allmydata/storage/lease_schema.py +++ b/src/allmydata/storage/lease_schema.py @@ -48,10 +48,10 @@ class CleartextLeaseSerializer(object): ) def unserialize(self, data): + # type: (bytes) -> LeaseInfo """ Load a lease with cleartext secrets from the given bytes representation. """ - # type: (bytes) -> LeaseInfo # In v1 of the immutable schema lease secrets are stored plaintext. # So load the data into a plain LeaseInfo which works on plaintext # secrets. From c341a86abdd4aa2b4244d0adeff53d5893be9a03 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 23 Nov 2021 10:01:03 -0500 Subject: [PATCH 173/220] Correct the comment. --- src/allmydata/test/test_storage.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 7fbe8f87b..bc87e168d 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -316,9 +316,10 @@ class Bucket(unittest.TestCase): incoming, final = self.make_workdir("test_bucket_writes_delay_timeout") clock = Clock() bw = BucketWriter(self, incoming, final, 200, self.make_lease(), clock) - # 20 minutes pass, getting close to the timeout... + # 29 minutes pass, getting close to the timeout... clock.advance(29 * 60) - # .. but we receive a write! So that should delay the timeout. + # .. but we receive a write! So that should delay the timeout again to + # another 30 minutes. bw.remote_write(0, b"hello") self._assert_timeout_only_after_30_minutes(clock, bw) From 6c514dfda57bfc2ede45719db24acecfbfee3ed1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 23 Nov 2021 10:33:45 -0500 Subject: [PATCH 174/220] Add klein. --- nix/klein.nix | 18 ++++++++++++++++++ nix/overlays.nix | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 nix/klein.nix diff --git a/nix/klein.nix b/nix/klein.nix new file mode 100644 index 000000000..aa109e3d1 --- /dev/null +++ b/nix/klein.nix @@ -0,0 +1,18 @@ +{ lib, buildPythonPackage, fetchPypi }: +buildPythonPackage rec { + pname = "klein"; + version = "21.8.0"; + + src = fetchPypi { + sha256 = "09i1x5ppan3kqsgclbz8xdnlvzvp3amijbmdzv0kik8p5l5zswxa"; + inherit pname version; + }; + + doCheck = false; + + meta = with lib; { + homepage = https://github.com/twisted/klein; + description = "Nicer web server for Twisted"; + license = licenses.mit; + }; +} diff --git a/nix/overlays.nix b/nix/overlays.nix index fbd0ce3bb..011d8dd6b 100644 --- a/nix/overlays.nix +++ b/nix/overlays.nix @@ -28,6 +28,9 @@ self: super: { packageOverrides = python-self: python-super: { # collections-extended is not part of nixpkgs at this time. collections-extended = python-super.pythonPackages.callPackage ./collections-extended.nix { }; + + # klein is not in nixpkgs 21.05, at least: + klein = python-super.pythonPackages.callPackage ./klein.nix { }; }; }; } From c921b153f4990e98a32dde1286d3a9c11d5fd2e4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 23 Nov 2021 10:39:15 -0500 Subject: [PATCH 175/220] A better name for the API. --- src/allmydata/storage/http_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 3baa336fa..327892ecd 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -47,7 +47,7 @@ def _authorization_decorator(f): return route -def _route(app, *route_args, **route_kwargs): +def _authorized_route(app, *route_args, **route_kwargs): """ Like Klein's @route, but with additional support for checking the ``Authorization`` header as well as ``X-Tahoe-Authorization`` headers. The @@ -89,6 +89,6 @@ class HTTPServer(object): # TODO if data is big, maybe want to use a temporary file eventually... return dumps(data) - @_route(_app, "/v1/version", methods=["GET"]) + @_authorized_route(_app, "/v1/version", methods=["GET"]) def version(self, request, authorization): return self._cbor(request, self._storage_server.remote_get_version()) From a593095dc935b6719e266e0bf3a996b39047d9c0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 23 Nov 2021 10:39:53 -0500 Subject: [PATCH 176/220] Explain why it's a conditional import. --- src/allmydata/storage/http_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index e1743343d..f8a7590aa 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -14,6 +14,8 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 # fmt: on else: + # typing module not available in Python 2, and we only do type checking in + # Python 3 anyway. from typing import Union from treq.testing import StubTreq From 8abc1ad8f4e43a244d0bcead201a133f3cf8b0c1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 23 Nov 2021 10:44:45 -0500 Subject: [PATCH 177/220] cbor2 for Python 2 on Nix. --- nix/cbor2.nix | 18 ++++++++++++++++++ nix/overlays.nix | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 nix/cbor2.nix diff --git a/nix/cbor2.nix b/nix/cbor2.nix new file mode 100644 index 000000000..02c810e1e --- /dev/null +++ b/nix/cbor2.nix @@ -0,0 +1,18 @@ +{ lib, buildPythonPackage, fetchPypi }: +buildPythonPackage rec { + pname = "cbor2"; + version = "5.2.0"; + + src = fetchPypi { + sha256 = "1mmmncfbsx7cbdalcrsagp9hx7wqfawaz9361gjkmsk3lp6chd5w"; + inherit pname version; + }; + + doCheck = false; + + meta = with lib; { + homepage = https://github.com/agronholm/cbor2; + description = "CBOR encoder/decoder"; + license = licenses.mit; + }; +} diff --git a/nix/overlays.nix b/nix/overlays.nix index 011d8dd6b..5cfab200c 100644 --- a/nix/overlays.nix +++ b/nix/overlays.nix @@ -21,6 +21,9 @@ self: super: { # collections-extended is not part of nixpkgs at this time. collections-extended = python-super.pythonPackages.callPackage ./collections-extended.nix { }; + + # cbor2 is not part of nixpkgs at this time. + cbor2 = python-super.pythonPackages.callPackage ./cbor2.nix { }; }; }; From 30511ea8502dc04848f9ed3715b5517c51444c96 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 23 Nov 2021 11:39:51 -0500 Subject: [PATCH 178/220] Add more build inputs. --- nix/tahoe-lafs.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index df12f21d4..59864d36d 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -98,6 +98,7 @@ EOF service-identity pyyaml magic-wormhole eliot autobahn cryptography netifaces setuptools future pyutil distro configparser collections-extended + klein cbor2 treq ]; checkInputs = with python.pkgs; [ From 5855a30e34e1f18d44cbd898dee1b128be2cd976 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 23 Nov 2021 14:01:43 -0700 Subject: [PATCH 179/220] add docstrings --- src/allmydata/storage/crawler.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 129659d27..dcbea909a 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -146,10 +146,17 @@ class _LeaseStateSerializer(object): ) def load(self): + """ + :returns: deserialized JSON state + """ with self._path.open("rb") as f: return json.load(f) def save(self, data): + """ + Serialize the given data as JSON into the state-path + :returns: None + """ tmpfile = self._path.siblingExtension(".tmp") with tmpfile.open("wb") as f: json.dump(data, f) From 5fef83078d863843ef1f5f8a35990bfc3fcdb338 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 29 Nov 2021 13:08:11 -0500 Subject: [PATCH 180/220] news fragment --- newsfragments/3847.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3847.minor diff --git a/newsfragments/3847.minor b/newsfragments/3847.minor new file mode 100644 index 000000000..e69de29bb From 66a0c6f3f43ecd018cdbb571ebd6eab740b6cca7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 29 Nov 2021 13:43:06 -0500 Subject: [PATCH 181/220] add a direct test for the non-utf-8 bytestring behavior --- src/allmydata/test/test_eliotutil.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 3f915ecd2..00110530c 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -78,6 +78,9 @@ from .common import ( class EliotLoggedTestTests(AsyncTestCase): + """ + Tests for the automatic log-related provided by ``EliotLoggedRunTest``. + """ def test_returns_none(self): Message.log(hello="world") @@ -95,6 +98,12 @@ class EliotLoggedTestTests(AsyncTestCase): # We didn't start an action. We're not finishing an action. return d.result + def test_logs_non_utf_8_byte(self): + """ + If an Eliot message is emitted that contains a non-UTF-8 byte string then + the test nevertheless passes. + """ + Message.log(hello=b"\xFF") class ParseDestinationDescriptionTests(SyncTestCase): From f40da7dc27d089e3bdfdca48e51aec25aea282c0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 29 Nov 2021 13:23:59 -0500 Subject: [PATCH 182/220] Put the choice of JSON encoder for Eliot into its own module and use it in a few places --- src/allmydata/test/__init__.py | 4 ++-- src/allmydata/test/test_eliotutil.py | 4 ++-- src/allmydata/util/_eliot_updates.py | 28 ++++++++++++++++++++++++++++ src/allmydata/util/eliotutil.py | 7 ++++--- 4 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 src/allmydata/util/_eliot_updates.py diff --git a/src/allmydata/test/__init__.py b/src/allmydata/test/__init__.py index 893aa15ce..ad245ca77 100644 --- a/src/allmydata/test/__init__.py +++ b/src/allmydata/test/__init__.py @@ -125,5 +125,5 @@ if sys.platform == "win32": initialize() from eliot import to_file -from allmydata.util.jsonbytes import AnyBytesJSONEncoder -to_file(open("eliot.log", "wb"), encoder=AnyBytesJSONEncoder) +from allmydata.util.eliotutil import eliot_json_encoder +to_file(open("eliot.log", "wb"), encoder=eliot_json_encoder) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 00110530c..0be02b277 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -65,11 +65,11 @@ from twisted.internet.task import deferLater from twisted.internet import reactor from ..util.eliotutil import ( + eliot_json_encoder, log_call_deferred, _parse_destination_description, _EliotLogging, ) -from ..util.jsonbytes import AnyBytesJSONEncoder from .common import ( SyncTestCase, @@ -118,7 +118,7 @@ class ParseDestinationDescriptionTests(SyncTestCase): reactor = object() self.assertThat( _parse_destination_description("file:-")(reactor), - Equals(FileDestination(stdout, encoder=AnyBytesJSONEncoder)), + Equals(FileDestination(stdout, encoder=eliot_json_encoder)), ) diff --git a/src/allmydata/util/_eliot_updates.py b/src/allmydata/util/_eliot_updates.py new file mode 100644 index 000000000..4300f2be8 --- /dev/null +++ b/src/allmydata/util/_eliot_updates.py @@ -0,0 +1,28 @@ +""" +Bring in some Eliot updates from newer versions of Eliot than we can +depend on in Python 2. + +Every API in this module (except ``eliot_json_encoder``) should be obsolete as +soon as we depend on Eliot 1.14 or newer. + +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +from .jsonbytes import AnyBytesJSONEncoder + +# There are currently a number of log messages that include non-UTF-8 bytes. +# Allow these, at least for now. Later when the whole test suite has been +# converted to our SyncTestCase or AsyncTestCase it will be easier to turn +# this off and then attribute log failures to specific codepaths so they can +# be fixed (and then not regressed later) because those instances will result +# in test failures instead of only garbage being written to the eliot log. +eliot_json_encoder = AnyBytesJSONEncoder diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index 4e48fbb9f..ff858531d 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -87,8 +87,9 @@ from twisted.internet.defer import ( ) from twisted.application.service import Service -from .jsonbytes import AnyBytesJSONEncoder - +from ._eliot_updates import ( + eliot_json_encoder, +) def validateInstanceOf(t): """ @@ -306,7 +307,7 @@ class _DestinationParser(object): rotateLength=rotate_length, maxRotatedFiles=max_rotated_files, ) - return lambda reactor: FileDestination(get_file(), AnyBytesJSONEncoder) + return lambda reactor: FileDestination(get_file(), eliot_json_encoder) _parse_destination_description = _DestinationParser().parse From 3eb1a5e7cb6bc227feda6f254b43e35b1807446d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 29 Nov 2021 13:25:03 -0500 Subject: [PATCH 183/220] Add a MemoryLogger that prefers our encoder and use it instead of Eliot's --- src/allmydata/test/eliotutil.py | 16 ++----- src/allmydata/util/_eliot_updates.py | 62 +++++++++++++++++++++++++++- src/allmydata/util/eliotutil.py | 2 + 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index 1685744fd..dd21f1e9d 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -42,7 +42,6 @@ from zope.interface import ( from eliot import ( ActionType, Field, - MemoryLogger, ILogger, ) from eliot.testing import ( @@ -54,8 +53,9 @@ from twisted.python.monkey import ( MonkeyPatcher, ) -from ..util.jsonbytes import AnyBytesJSONEncoder - +from ..util.eliotutil import ( + MemoryLogger, +) _NAME = Field.for_types( u"name", @@ -71,14 +71,6 @@ RUN_TEST = ActionType( ) -# On Python 3, we want to use our custom JSON encoder when validating messages -# can be encoded to JSON: -if PY2: - _memory_logger = MemoryLogger -else: - _memory_logger = lambda: MemoryLogger(encoder=AnyBytesJSONEncoder) - - @attr.s class EliotLoggedRunTest(object): """ @@ -170,7 +162,7 @@ def with_logging( """ @wraps(test_method) def run_with_logging(*args, **kwargs): - validating_logger = _memory_logger() + validating_logger = MemoryLogger() original = swap_logger(None) try: swap_logger(_TwoLoggers(original, validating_logger)) diff --git a/src/allmydata/util/_eliot_updates.py b/src/allmydata/util/_eliot_updates.py index 4300f2be8..4ff0caf4d 100644 --- a/src/allmydata/util/_eliot_updates.py +++ b/src/allmydata/util/_eliot_updates.py @@ -1,6 +1,7 @@ """ Bring in some Eliot updates from newer versions of Eliot than we can -depend on in Python 2. +depend on in Python 2. The implementations are copied from Eliot 1.14 and +only changed enough to add Python 2 compatibility. Every API in this module (except ``eliot_json_encoder``) should be obsolete as soon as we depend on Eliot 1.14 or newer. @@ -17,6 +18,13 @@ from future.utils import PY2 if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import json as pyjson +from functools import partial + +from eliot import ( + MemoryLogger as _MemoryLogger, +) + from .jsonbytes import AnyBytesJSONEncoder # There are currently a number of log messages that include non-UTF-8 bytes. @@ -26,3 +34,55 @@ from .jsonbytes import AnyBytesJSONEncoder # be fixed (and then not regressed later) because those instances will result # in test failures instead of only garbage being written to the eliot log. eliot_json_encoder = AnyBytesJSONEncoder + +class _CustomEncoderMemoryLogger(_MemoryLogger): + """ + Override message validation from the Eliot-supplied ``MemoryLogger`` to + use our chosen JSON encoder. + + This is only necessary on Python 2 where we use an old version of Eliot + that does not parameterize the encoder. + """ + def __init__(self, encoder=eliot_json_encoder): + """ + @param encoder: A JSONEncoder subclass to use when encoding JSON. + """ + self._encoder = encoder + super(_CustomEncoderMemoryLogger, self).__init__() + + def _validate_message(self, dictionary, serializer): + """Validate an individual message. + + As a side-effect, the message is replaced with its serialized contents. + + @param dictionary: A message C{dict} to be validated. Might be mutated + by the serializer! + + @param serializer: C{None} or a serializer. + + @raises TypeError: If a field name is not unicode, or the dictionary + fails to serialize to JSON. + + @raises eliot.ValidationError: If serializer was given and validation + failed. + """ + if serializer is not None: + serializer.validate(dictionary) + for key in dictionary: + if not isinstance(key, str): + if isinstance(key, bytes): + key.decode("utf-8") + else: + raise TypeError(dictionary, "%r is not unicode" % (key,)) + if serializer is not None: + serializer.serialize(dictionary) + + try: + pyjson.dumps(dictionary, cls=self._encoder) + except Exception as e: + raise TypeError("Message %s doesn't encode to JSON: %s" % (dictionary, e)) + +if PY2: + MemoryLogger = partial(_CustomEncoderMemoryLogger, encoder=eliot_json_encoder) +else: + MemoryLogger = partial(_MemoryLogger, encoder=eliot_json_encoder) diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index ff858531d..5067876c5 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -16,6 +16,7 @@ from __future__ import ( ) __all__ = [ + "MemoryLogger", "inline_callbacks", "eliot_logging_service", "opt_eliot_destination", @@ -88,6 +89,7 @@ from twisted.internet.defer import ( from twisted.application.service import Service from ._eliot_updates import ( + MemoryLogger, eliot_json_encoder, ) From 20e0626e424276c83cd1f2eb42fdddb7cf56072e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 29 Nov 2021 13:27:17 -0500 Subject: [PATCH 184/220] add capture_logging that parameterizes JSON encoder --- src/allmydata/util/_eliot_updates.py | 100 ++++++++++++++++++++++++++- src/allmydata/util/eliotutil.py | 13 +--- 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/src/allmydata/util/_eliot_updates.py b/src/allmydata/util/_eliot_updates.py index 4ff0caf4d..8e3beca45 100644 --- a/src/allmydata/util/_eliot_updates.py +++ b/src/allmydata/util/_eliot_updates.py @@ -19,12 +19,17 @@ if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import json as pyjson -from functools import partial +from functools import wraps, partial from eliot import ( MemoryLogger as _MemoryLogger, ) +from eliot.testing import ( + check_for_errors, + swap_logger, +) + from .jsonbytes import AnyBytesJSONEncoder # There are currently a number of log messages that include non-UTF-8 bytes. @@ -86,3 +91,96 @@ if PY2: MemoryLogger = partial(_CustomEncoderMemoryLogger, encoder=eliot_json_encoder) else: MemoryLogger = partial(_MemoryLogger, encoder=eliot_json_encoder) + +def validateLogging( + assertion, *assertionArgs, **assertionKwargs +): + """ + Decorator factory for L{unittest.TestCase} methods to add logging + validation. + + 1. The decorated test method gets a C{logger} keyword argument, a + L{MemoryLogger}. + 2. All messages logged to this logger will be validated at the end of + the test. + 3. Any unflushed logged tracebacks will cause the test to fail. + + For example: + + from unittest import TestCase + from eliot.testing import assertContainsFields, validateLogging + + class MyTests(TestCase): + def assertFooLogging(self, logger): + assertContainsFields(self, logger.messages[0], {"key": 123}) + + + @param assertion: A callable that will be called with the + L{unittest.TestCase} instance, the logger and C{assertionArgs} and + C{assertionKwargs} once the actual test has run, allowing for extra + logging-related assertions on the effects of the test. Use L{None} if you + want the cleanup assertions registered but no custom assertions. + + @param assertionArgs: Additional positional arguments to pass to + C{assertion}. + + @param assertionKwargs: Additional keyword arguments to pass to + C{assertion}. + + @param encoder_: C{json.JSONEncoder} subclass to use when validating JSON. + """ + encoder_ = assertionKwargs.pop("encoder_", eliot_json_encoder) + def decorator(function): + @wraps(function) + def wrapper(self, *args, **kwargs): + skipped = False + + kwargs["logger"] = logger = MemoryLogger(encoder=encoder_) + self.addCleanup(check_for_errors, logger) + # TestCase runs cleanups in reverse order, and we want this to + # run *before* tracebacks are checked: + if assertion is not None: + self.addCleanup( + lambda: skipped + or assertion(self, logger, *assertionArgs, **assertionKwargs) + ) + try: + return function(self, *args, **kwargs) + except self.skipException: + skipped = True + raise + + return wrapper + + return decorator + +# PEP 8 variant: +validate_logging = validateLogging + +def capture_logging( + assertion, *assertionArgs, **assertionKwargs +): + """ + Capture and validate all logging that doesn't specify a L{Logger}. + + See L{validate_logging} for details on the rest of its behavior. + """ + encoder_ = assertionKwargs.pop("encoder_", eliot_json_encoder) + def decorator(function): + @validate_logging( + assertion, *assertionArgs, encoder_=encoder_, **assertionKwargs + ) + @wraps(function) + def wrapper(self, *args, **kwargs): + logger = kwargs["logger"] + previous_logger = swap_logger(logger) + + def cleanup(): + swap_logger(previous_logger) + + self.addCleanup(cleanup) + return function(self, *args, **kwargs) + + return wrapper + + return decorator diff --git a/src/allmydata/util/eliotutil.py b/src/allmydata/util/eliotutil.py index 5067876c5..789ef38ff 100644 --- a/src/allmydata/util/eliotutil.py +++ b/src/allmydata/util/eliotutil.py @@ -23,6 +23,7 @@ __all__ = [ "opt_help_eliot_destinations", "validateInstanceOf", "validateSetMembership", + "capture_logging", ] from future.utils import PY2 @@ -33,7 +34,7 @@ from six import ensure_text from sys import ( stdout, ) -from functools import wraps, partial +from functools import wraps from logging import ( INFO, Handler, @@ -67,8 +68,6 @@ from eliot.twisted import ( DeferredContext, inline_callbacks, ) -from eliot.testing import capture_logging as eliot_capture_logging - from twisted.python.usage import ( UsageError, ) @@ -91,6 +90,7 @@ from twisted.application.service import Service from ._eliot_updates import ( MemoryLogger, eliot_json_encoder, + capture_logging, ) def validateInstanceOf(t): @@ -330,10 +330,3 @@ def log_call_deferred(action_type): return DeferredContext(d).addActionFinish() return logged_f return decorate_log_call_deferred - -# On Python 3, encoding bytes to JSON doesn't work, so we have a custom JSON -# encoder we want to use when validating messages. -if PY2: - capture_logging = eliot_capture_logging -else: - capture_logging = partial(eliot_capture_logging, encoder_=AnyBytesJSONEncoder) From 7626a02bdb84013e4f45bad81c8a3f5ba4586401 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 29 Nov 2021 13:27:28 -0500 Subject: [PATCH 185/220] remove redundant assertion --- src/allmydata/test/test_util.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index a03845ed6..9a0af1e06 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -553,11 +553,6 @@ class JSONBytes(unittest.TestCase): o, cls=jsonbytes.AnyBytesJSONEncoder)), expected, ) - self.assertEqual( - json.loads(jsonbytes.dumps(o, any_bytes=True)), - expected - ) - class FakeGetVersion(object): From b01478659ea9868164aa9f7b7368f295f2d47921 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 30 Nov 2021 13:18:18 -0500 Subject: [PATCH 186/220] Apparently I generated wrong hashes. --- nix/cbor2.nix | 2 +- nix/klein.nix | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/cbor2.nix b/nix/cbor2.nix index 02c810e1e..1bd9920e6 100644 --- a/nix/cbor2.nix +++ b/nix/cbor2.nix @@ -4,7 +4,7 @@ buildPythonPackage rec { version = "5.2.0"; src = fetchPypi { - sha256 = "1mmmncfbsx7cbdalcrsagp9hx7wqfawaz9361gjkmsk3lp6chd5w"; + sha256 = "1gwlgjl70vlv35cgkcw3cg7b5qsmws36hs4mmh0l9msgagjs4fm3"; inherit pname version; }; diff --git a/nix/klein.nix b/nix/klein.nix index aa109e3d1..0bb025cf8 100644 --- a/nix/klein.nix +++ b/nix/klein.nix @@ -4,7 +4,7 @@ buildPythonPackage rec { version = "21.8.0"; src = fetchPypi { - sha256 = "09i1x5ppan3kqsgclbz8xdnlvzvp3amijbmdzv0kik8p5l5zswxa"; + sha256 = "1mpydmz90d0n9dwa7mr6pgj5v0kczfs05ykssrasdq368dssw7ch"; inherit pname version; }; From 1fc77504aeec738acac315d65b68b4a7e01db095 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 30 Nov 2021 13:39:42 -0500 Subject: [PATCH 187/220] List dependencies. --- nix/klein.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nix/klein.nix b/nix/klein.nix index 0bb025cf8..196f95e88 100644 --- a/nix/klein.nix +++ b/nix/klein.nix @@ -10,6 +10,8 @@ buildPythonPackage rec { doCheck = false; + propagatedBuildInputs = [ attrs hyperlink incremental Tubes Twisted typing_extensions Werkzeug zope.interface ]; + meta = with lib; { homepage = https://github.com/twisted/klein; description = "Nicer web server for Twisted"; From c65a13e63228fada255a94b99ca4e61a1e9e58dc Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 30 Nov 2021 13:47:28 -0500 Subject: [PATCH 188/220] Rip out klein, maybe not necessary. --- nix/klein.nix | 20 -------------------- nix/overlays.nix | 3 --- 2 files changed, 23 deletions(-) delete mode 100644 nix/klein.nix diff --git a/nix/klein.nix b/nix/klein.nix deleted file mode 100644 index 196f95e88..000000000 --- a/nix/klein.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPythonPackage, fetchPypi }: -buildPythonPackage rec { - pname = "klein"; - version = "21.8.0"; - - src = fetchPypi { - sha256 = "1mpydmz90d0n9dwa7mr6pgj5v0kczfs05ykssrasdq368dssw7ch"; - inherit pname version; - }; - - doCheck = false; - - propagatedBuildInputs = [ attrs hyperlink incremental Tubes Twisted typing_extensions Werkzeug zope.interface ]; - - meta = with lib; { - homepage = https://github.com/twisted/klein; - description = "Nicer web server for Twisted"; - license = licenses.mit; - }; -} diff --git a/nix/overlays.nix b/nix/overlays.nix index 5cfab200c..92f36e93e 100644 --- a/nix/overlays.nix +++ b/nix/overlays.nix @@ -31,9 +31,6 @@ self: super: { packageOverrides = python-self: python-super: { # collections-extended is not part of nixpkgs at this time. collections-extended = python-super.pythonPackages.callPackage ./collections-extended.nix { }; - - # klein is not in nixpkgs 21.05, at least: - klein = python-super.pythonPackages.callPackage ./klein.nix { }; }; }; } From 2f4d1079aa3b0621e4ad5991f810a5baf32c23db Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 30 Nov 2021 13:51:36 -0500 Subject: [PATCH 189/220] Needs setuptools_scm --- nix/cbor2.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nix/cbor2.nix b/nix/cbor2.nix index 1bd9920e6..4d9734a8b 100644 --- a/nix/cbor2.nix +++ b/nix/cbor2.nix @@ -1,4 +1,4 @@ -{ lib, buildPythonPackage, fetchPypi }: +{ lib, buildPythonPackage, fetchPypi , setuptools_scm }: buildPythonPackage rec { pname = "cbor2"; version = "5.2.0"; @@ -10,6 +10,8 @@ buildPythonPackage rec { doCheck = false; + nativeBuildInputs = [ setuptools_scm ]; + meta = with lib; { homepage = https://github.com/agronholm/cbor2; description = "CBOR encoder/decoder"; From 136bf95bdfcf0819285f7c4ed937f4de64a99125 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 30 Nov 2021 13:58:02 -0500 Subject: [PATCH 190/220] Simpler way. --- nix/cbor2.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/cbor2.nix b/nix/cbor2.nix index 4d9734a8b..ace5e13c6 100644 --- a/nix/cbor2.nix +++ b/nix/cbor2.nix @@ -1,4 +1,4 @@ -{ lib, buildPythonPackage, fetchPypi , setuptools_scm }: +{ lib, buildPythonPackage, fetchPypi }: buildPythonPackage rec { pname = "cbor2"; version = "5.2.0"; @@ -10,7 +10,7 @@ buildPythonPackage rec { doCheck = false; - nativeBuildInputs = [ setuptools_scm ]; + buildInputs = [ setuptools_scm ]; meta = with lib; { homepage = https://github.com/agronholm/cbor2; From f2b52f368d63059ebe559109b4dbe8c4720bdd2f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 30 Nov 2021 13:58:22 -0500 Subject: [PATCH 191/220] Another way. --- nix/cbor2.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/cbor2.nix b/nix/cbor2.nix index ace5e13c6..0544b1eb1 100644 --- a/nix/cbor2.nix +++ b/nix/cbor2.nix @@ -10,7 +10,7 @@ buildPythonPackage rec { doCheck = false; - buildInputs = [ setuptools_scm ]; + propagatedBuildInputs = [ setuptools_scm ]; meta = with lib; { homepage = https://github.com/agronholm/cbor2; From 49f24893219482a53a882c8139e3c46ffedcd48e Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 15:59:27 -0700 Subject: [PATCH 192/220] explicit 'migrate pickle files' command --- src/allmydata/scripts/admin.py | 55 +++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index a9feed0dd..c125bc9e6 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -18,7 +18,17 @@ except ImportError: pass from twisted.python import usage -from allmydata.scripts.common import BaseOptions +from twisted.python.filepath import ( + FilePath, +) +from allmydata.scripts.common import ( + BaseOptions, + BasedirOptions, +) +from allmydata.storage import ( + crawler, + expirer, +) class GenerateKeypairOptions(BaseOptions): @@ -65,12 +75,54 @@ def derive_pubkey(options): print("public:", str(ed25519.string_from_verifying_key(public_key), "ascii"), file=out) return 0 +class MigrateCrawlerOptions(BasedirOptions): + + def getSynopsis(self): + return "Usage: tahoe [global-options] admin migrate-crawler" + + def getUsage(self, width=None): + t = BasedirOptions.getUsage(self, width) + t += ( + "The crawler data is now stored as JSON to avoid" + " potential security issues with pickle files.\n\nIf" + " you are confident the state files in the 'storage/'" + " subdirectory of your node are trustworthy, run this" + " command to upgrade them to JSON.\n\nThe files are:" + " lease_checker.history, lease_checker.state, and" + " bucket_counter.state" + ) + return t + +def migrate_crawler(options): + out = options.stdout + storage = FilePath(options['basedir']).child("storage") + + conversions = [ + (storage.child("lease_checker.state"), crawler._convert_pickle_state_to_json), + (storage.child("bucket_counter.state"), crawler._convert_pickle_state_to_json), + (storage.child("lease_checker.history"), expirer._convert_pickle_state_to_json), + ] + + for fp, converter in conversions: + existed = fp.exists() + newfp = crawler._maybe_upgrade_pickle_to_json(fp, converter) + if existed: + print("Converted '{}' to '{}'".format(fp.path, newfp.path)) + else: + if newfp.exists(): + print("Already converted: '{}'".format(newfp.path)) + else: + print("Not found: '{}'".format(fp.path)) + + class AdminCommand(BaseOptions): subCommands = [ ("generate-keypair", None, GenerateKeypairOptions, "Generate a public/private keypair, write to stdout."), ("derive-pubkey", None, DerivePubkeyOptions, "Derive a public key from a private key."), + ("migrate-crawler", None, MigrateCrawlerOptions, + "Write the crawler-history data as JSON."), ] def postOptions(self): if not hasattr(self, 'subOptions'): @@ -88,6 +140,7 @@ each subcommand. subDispatch = { "generate-keypair": print_keypair, "derive-pubkey": derive_pubkey, + "migrate-crawler": migrate_crawler, } def do_admin(options): From ce25795e4e86b778ad6cc739fdc83d42d101101e Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 16:00:19 -0700 Subject: [PATCH 193/220] new news --- newsfragments/3825.security | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/newsfragments/3825.security b/newsfragments/3825.security index b16418d2b..df83821de 100644 --- a/newsfragments/3825.security +++ b/newsfragments/3825.security @@ -1,5 +1,6 @@ The lease-checker now uses JSON instead of pickle to serialize its state. -Once you have run this version the lease state files will be stored in JSON -and an older version of the software won't load them (it simply won't notice -them so it will appear to have never run). +tahoe will now refuse to run until you either delete all pickle files or +migrate them using the new command: + + tahoe admin migrate-crawler From 3fd1ca8acbc3d046c97e3c520fdc6fab5d67541d Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 16:00:35 -0700 Subject: [PATCH 194/220] it's an error to have pickle-format files --- src/allmydata/scripts/tahoe_run.py | 16 +++++++++++++++- src/allmydata/storage/crawler.py | 8 ++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 01f1a354c..51be32ee3 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -27,7 +27,9 @@ from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin - +from allmydata.storage.crawler import ( + MigratePickleFileError, +) from allmydata.node import ( PortAssignmentRequired, PrivacyError, @@ -164,6 +166,18 @@ class DaemonizeTheRealService(Service, HookMixin): self.stderr.write("\ntub.port cannot be 0: you must choose.\n\n") elif reason.check(PrivacyError): self.stderr.write("\n{}\n\n".format(reason.value)) + elif reason.check(MigratePickleFileError): + self.stderr.write( + "Error\nAt least one 'pickle' format file exists.\n" + "The file is {}\n" + "You must either delete the pickle-format files" + " or migrate them using the command:\n" + " tahoe admin migrate-crawler --basedir {}\n\n" + .format( + reason.value.args[0].path, + self.basedir, + ) + ) else: self.stderr.write("\nUnknown error\n") reason.printTraceback(self.stderr) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index dcbea909a..2b8cde230 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -27,6 +27,14 @@ class TimeSliceExceeded(Exception): pass +class MigratePickleFileError(Exception): + """ + A pickle-format file exists (the FilePath to the file will be the + single arg). + """ + pass + + def _convert_cycle_data(state): """ :param dict state: cycle-to-date or history-item state From 1b8ae8039e79bf288916edb9f7949f76f943aef4 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 16:01:15 -0700 Subject: [PATCH 195/220] no auto-migrate; produce error if pickle-files exist --- src/allmydata/storage/crawler.py | 31 +++++++++++++++++++++---------- src/allmydata/storage/expirer.py | 14 ++++++-------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 2b8cde230..f63754e10 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -108,7 +108,7 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): :param Callable[dict] convert_pickle: function to change pickle-style state into JSON-style state - :returns unicode: the local path where the state is stored + :returns FilePath: the local path where the state is stored If this state path is JSON, simply return it. @@ -116,14 +116,14 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): JSON path. """ if state_path.path.endswith(".json"): - return state_path.path + return state_path json_state_path = state_path.siblingExtension(".json") # if there's no file there at all, we're done because there's # nothing to upgrade if not state_path.exists(): - return json_state_path.path + return json_state_path # upgrade the pickle data to JSON import pickle @@ -135,7 +135,23 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): # we've written the JSON, delete the pickle state_path.remove() - return json_state_path.path + return json_state_path + + +def _confirm_json_format(fp): + """ + :param FilePath fp: the original (pickle) name of a state file + + This confirms that we do _not_ have the pickle-version of a + state-file and _do_ either have nothing, or the JSON version. If + the pickle-version exists, an exception is raised. + + :returns FilePath: the JSON name of a state file + """ + jsonfp = fp.siblingExtension(".json") + if fp.exists(): + raise MigratePickleFileError(fp) + return jsonfp class _LeaseStateSerializer(object): @@ -146,12 +162,7 @@ class _LeaseStateSerializer(object): """ def __init__(self, state_path): - self._path = FilePath( - _maybe_upgrade_pickle_to_json( - FilePath(state_path), - _convert_pickle_state_to_json, - ) - ) + self._path = _confirm_json_format(FilePath(state_path)) def load(self): """ diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index ad1343ef5..cd0a9369a 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -12,6 +12,8 @@ import os import struct from allmydata.storage.crawler import ( ShareCrawler, + MigratePickleFileError, + _confirm_json_format, _maybe_upgrade_pickle_to_json, _convert_cycle_data, ) @@ -40,17 +42,13 @@ def _convert_pickle_state_to_json(state): class _HistorySerializer(object): """ Serialize the 'history' file of the lease-crawler state. This is - "storage/history.state" for the pickle or - "storage/history.state.json" for the new JSON format. + "storage/lease_checker.history" for the pickle or + "storage/lease_checker.history.json" for the new JSON format. """ def __init__(self, history_path): - self._path = FilePath( - _maybe_upgrade_pickle_to_json( - FilePath(history_path), - _convert_pickle_state_to_json, - ) - ) + self._path = _confirm_json_format(FilePath(history_path)) + if not self._path.exists(): with self._path.open("wb") as f: json.dump({}, f) From 0a4bc385c5ac7c5e9410e83703250b425608be57 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 18:00:58 -0700 Subject: [PATCH 196/220] fix tests to use migrate command --- src/allmydata/scripts/admin.py | 7 ++-- src/allmydata/storage/crawler.py | 2 ++ src/allmydata/test/test_storage_web.py | 45 +++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index c125bc9e6..a6e826174 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -93,6 +93,7 @@ class MigrateCrawlerOptions(BasedirOptions): ) return t + def migrate_crawler(options): out = options.stdout storage = FilePath(options['basedir']).child("storage") @@ -107,12 +108,12 @@ def migrate_crawler(options): existed = fp.exists() newfp = crawler._maybe_upgrade_pickle_to_json(fp, converter) if existed: - print("Converted '{}' to '{}'".format(fp.path, newfp.path)) + print("Converted '{}' to '{}'".format(fp.path, newfp.path), file=out) else: if newfp.exists(): - print("Already converted: '{}'".format(newfp.path)) + print("Already converted: '{}'".format(newfp.path), file=out) else: - print("Not found: '{}'".format(fp.path)) + print("Not found: '{}'".format(fp.path), file=out) class AdminCommand(BaseOptions): diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index f63754e10..a1f70f4e5 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -148,6 +148,8 @@ def _confirm_json_format(fp): :returns FilePath: the JSON name of a state file """ + if fp.path.endswith(".json"): + return fp jsonfp = fp.siblingExtension(".json") if fp.exists(): raise MigratePickleFileError(fp) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 269af2203..86c2382f0 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -19,6 +19,7 @@ import time import os.path import re import json +from six.moves import StringIO from twisted.trial import unittest @@ -45,6 +46,13 @@ from allmydata.web.storage import ( StorageStatusElement, remove_prefix ) +from allmydata.scripts.admin import ( + MigrateCrawlerOptions, + migrate_crawler, +) +from allmydata.scripts.runner import ( + Options, +) from .common_util import FakeCanary from .common_web import ( @@ -1152,15 +1160,29 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): """ # this file came from an "in the wild" tahoe version 1.16.0 original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state.txt") - test_pickle = FilePath("lease_checker.state") + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + test_pickle = storage.child("lease_checker.state") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) - serial = _LeaseStateSerializer(test_pickle.path) + # convert from pickle format to JSON + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) # the (existing) state file should have been upgraded to JSON - self.assertNot(test_pickle.exists()) + self.assertFalse(test_pickle.exists()) self.assertTrue(test_pickle.siblingExtension(".json").exists()) + serial = _LeaseStateSerializer(test_pickle.path) self.assertEqual( serial.load(), @@ -1340,10 +1362,25 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): """ # this file came from an "in the wild" tahoe version 1.16.0 original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history.txt") - test_pickle = FilePath("lease_checker.history") + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + test_pickle = storage.child("lease_checker.history") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) + # convert from pickle format to JSON + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + serial = _HistorySerializer(test_pickle.path) self.maxDiff = None From fc9671a8122bd085fa1d4ea74e2d4850abdf529f Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 18:25:32 -0700 Subject: [PATCH 197/220] simplify, flake9 --- src/allmydata/scripts/admin.py | 2 +- src/allmydata/storage/crawler.py | 7 +------ src/allmydata/storage/expirer.py | 2 -- src/allmydata/test/test_storage_web.py | 1 - 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index a6e826174..e0dcc8821 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -106,7 +106,7 @@ def migrate_crawler(options): for fp, converter in conversions: existed = fp.exists() - newfp = crawler._maybe_upgrade_pickle_to_json(fp, converter) + newfp = crawler._upgrade_pickle_to_json(fp, converter) if existed: print("Converted '{}' to '{}'".format(fp.path, newfp.path), file=out) else: diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index a1f70f4e5..dbf4b1300 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -101,7 +101,7 @@ def _convert_pickle_state_to_json(state): } -def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): +def _upgrade_pickle_to_json(state_path, convert_pickle): """ :param FilePath state_path: the filepath to ensure is json @@ -110,14 +110,9 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): :returns FilePath: the local path where the state is stored - If this state path is JSON, simply return it. - If this state is pickle, convert to the JSON format and return the JSON path. """ - if state_path.path.endswith(".json"): - return state_path - json_state_path = state_path.siblingExtension(".json") # if there's no file there at all, we're done because there's diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index cd0a9369a..abe3c37b6 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -12,9 +12,7 @@ import os import struct from allmydata.storage.crawler import ( ShareCrawler, - MigratePickleFileError, _confirm_json_format, - _maybe_upgrade_pickle_to_json, _convert_cycle_data, ) from allmydata.storage.shares import get_share_file diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 86c2382f0..490a3f775 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -47,7 +47,6 @@ from allmydata.web.storage import ( remove_prefix ) from allmydata.scripts.admin import ( - MigrateCrawlerOptions, migrate_crawler, ) from allmydata.scripts.runner import ( From 679c46451764aae1213239bb90dd25b36bba324e Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 18:43:06 -0700 Subject: [PATCH 198/220] tests --- src/allmydata/test/cli/test_admin.py | 86 ++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/allmydata/test/cli/test_admin.py diff --git a/src/allmydata/test/cli/test_admin.py b/src/allmydata/test/cli/test_admin.py new file mode 100644 index 000000000..bdfc0a46f --- /dev/null +++ b/src/allmydata/test/cli/test_admin.py @@ -0,0 +1,86 @@ +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +from six.moves import StringIO + +from testtools.matchers import ( + Contains, +) + +from twisted.trial import unittest +from twisted.python.filepath import FilePath + +from allmydata.scripts.admin import ( + migrate_crawler, +) +from allmydata.scripts.runner import ( + Options, +) +from ..common import ( + SyncTestCase, +) + +class AdminMigrateCrawler(SyncTestCase): + """ + Tests related to 'tahoe admin migrate-crawler' + """ + + def test_already(self): + """ + We've already migrated; don't do it again. + """ + + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + with storage.child("lease_checker.state.json").open("w") as f: + f.write(b"{}\n") + + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + + self.assertThat( + options.stdout.getvalue(), + Contains("Already converted:"), + ) + + def test_usage(self): + """ + We've already migrated; don't do it again. + """ + + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + with storage.child("lease_checker.state.json").open("w") as f: + f.write(b"{}\n") + + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + self.assertThat( + str(options), + Contains("security issues with pickle") + ) From b47381401c589c056afe89744df5b3f01f2ae5ae Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 19:01:09 -0700 Subject: [PATCH 199/220] flake8 --- src/allmydata/test/cli/test_admin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/cli/test_admin.py b/src/allmydata/test/cli/test_admin.py index bdfc0a46f..082904652 100644 --- a/src/allmydata/test/cli/test_admin.py +++ b/src/allmydata/test/cli/test_admin.py @@ -16,8 +16,9 @@ from testtools.matchers import ( Contains, ) -from twisted.trial import unittest -from twisted.python.filepath import FilePath +from twisted.python.filepath import ( + FilePath, +) from allmydata.scripts.admin import ( migrate_crawler, From 85fa8fe32e05c68da46f15fe68b69781d05384a7 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 23:00:59 -0700 Subject: [PATCH 200/220] py2/py3 glue code for json dumping --- src/allmydata/storage/crawler.py | 18 ++++++++++++++---- src/allmydata/storage/expirer.py | 7 +++---- src/allmydata/test/test_storage_web.py | 4 ++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index dbf4b1300..7516bc4e9 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -125,8 +125,7 @@ def _upgrade_pickle_to_json(state_path, convert_pickle): with state_path.open("rb") as f: state = pickle.load(f) new_state = convert_pickle(state) - with json_state_path.open("wb") as f: - json.dump(new_state, f) + _dump_json_to_file(new_state, json_state_path) # we've written the JSON, delete the pickle state_path.remove() @@ -151,6 +150,18 @@ def _confirm_json_format(fp): return jsonfp +def _dump_json_to_file(js, afile): + """ + Dump the JSON object `js` to the FilePath `afile` + """ + with afile.open("wb") as f: + data = json.dumps(js) + if PY2: + f.write(data) + else: + f.write(data.encode("utf8")) + + class _LeaseStateSerializer(object): """ Read and write state for LeaseCheckingCrawler. This understands @@ -174,8 +185,7 @@ class _LeaseStateSerializer(object): :returns: None """ tmpfile = self._path.siblingExtension(".tmp") - with tmpfile.open("wb") as f: - json.dump(data, f) + _dump_json_to_file(data, tmpfile) fileutil.move_into_place(tmpfile.path, self._path.path) return None diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index abe3c37b6..55ab51843 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -14,6 +14,7 @@ from allmydata.storage.crawler import ( ShareCrawler, _confirm_json_format, _convert_cycle_data, + _dump_json_to_file, ) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ @@ -48,8 +49,7 @@ class _HistorySerializer(object): self._path = _confirm_json_format(FilePath(history_path)) if not self._path.exists(): - with self._path.open("wb") as f: - json.dump({}, f) + _dump_json_to_file({}, self._path) def load(self): """ @@ -65,8 +65,7 @@ class _HistorySerializer(object): """ Serialize the existing data as JSON. """ - with self._path.open("wb") as f: - json.dump(new_history, f) + _dump_json_to_file(new_history, self._path) return None diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 490a3f775..dff3b36f5 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1163,7 +1163,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.state") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: local.write(remote.read()) # convert from pickle format to JSON @@ -1365,7 +1365,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.history") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: local.write(remote.read()) # convert from pickle format to JSON From d985d1062295e2f816214bcbedb6746838d7a67d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 1 Dec 2021 09:24:03 -0500 Subject: [PATCH 201/220] Update nix/cbor2.nix Co-authored-by: Jean-Paul Calderone --- nix/cbor2.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/cbor2.nix b/nix/cbor2.nix index 0544b1eb1..16ca8ff63 100644 --- a/nix/cbor2.nix +++ b/nix/cbor2.nix @@ -1,4 +1,4 @@ -{ lib, buildPythonPackage, fetchPypi }: +{ lib, buildPythonPackage, fetchPypi, setuptools_scm }: buildPythonPackage rec { pname = "cbor2"; version = "5.2.0"; From 18a5966f1d27791d3129690926791a623957472c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 1 Dec 2021 09:38:56 -0500 Subject: [PATCH 202/220] Don't bother running HTTP server tests on Python 2, since it's going away any day now. --- src/allmydata/test/test_storage_http.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 9ba8adf21..442e154a0 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -14,6 +14,8 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 # fmt: on +from unittest import SkipTest + from twisted.trial.unittest import TestCase from twisted.internet.defer import inlineCallbacks @@ -31,6 +33,8 @@ class HTTPTests(TestCase): """ def setUp(self): + if PY2: + raise SkipTest("Not going to bother supporting Python 2") self.storage_server = StorageServer(self.mktemp(), b"\x00" * 20) # TODO what should the swissnum _actually_ be? self._http_server = HTTPServer(self.storage_server, b"abcd") From 25ca767095019f3f0d6288b2a870ef3b98eef112 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 11:49:52 -0700 Subject: [PATCH 203/220] an offering to the windows godesses --- src/allmydata/test/test_storage_web.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index dff3b36f5..282fb67e1 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -22,11 +22,11 @@ import json from six.moves import StringIO from twisted.trial import unittest - from twisted.internet import defer from twisted.application import service from twisted.web.template import flattenString from twisted.python.filepath import FilePath +from twisted.python.runtime import platform from foolscap.api import fireEventually from allmydata.util import fileutil, hashutil, base32, pollmixin @@ -1163,8 +1163,12 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.state") - with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - local.write(remote.read()) + with test_pickle.open("w") as local, original_pickle.open("r") as remote: + for line in remote.readlines(): + if platform.isWindows(): + local.write(line.replace("\n", "\r\n")) + else: + local.write(line.replace("\n", "\r\n")) # convert from pickle format to JSON top = Options() @@ -1366,7 +1370,11 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage.makedirs() test_pickle = storage.child("lease_checker.history") with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - local.write(remote.read()) + for line in remote.readlines(): + if platform.isWindows(): + local.write(line.replace("\n", "\r\n")) + else: + local.write(line) # convert from pickle format to JSON top = Options() From 7080ee6fc7747e1a9ca10ddb47fe81fd5e96a37b Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 12:02:06 -0700 Subject: [PATCH 204/220] oops --- src/allmydata/test/test_storage_web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 282fb67e1..1cf96d660 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1168,7 +1168,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): if platform.isWindows(): local.write(line.replace("\n", "\r\n")) else: - local.write(line.replace("\n", "\r\n")) + local.write(line) # convert from pickle format to JSON top = Options() From 940c6343cf32318b9ba72f1330fdd5371346ce1f Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 12:02:42 -0700 Subject: [PATCH 205/220] consistency --- src/allmydata/test/test_storage_web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 1cf96d660..961bbef98 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1369,7 +1369,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.history") - with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + with test_pickle.open("w") as local, original_pickle.open("r") as remote: for line in remote.readlines(): if platform.isWindows(): local.write(line.replace("\n", "\r\n")) From 90d1e90a14b0a3455e2e5ac86cde814a8a81b378 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 1 Dec 2021 15:05:29 -0500 Subject: [PATCH 206/220] rewrite the Eliot interaction tests to make expected behavior clearer and to have explicit assertions about that behavior --- src/allmydata/test/test_eliotutil.py | 113 ++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 21 deletions(-) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 0be02b277..cabe599b3 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -27,13 +27,12 @@ from fixtures import ( ) from testtools import ( TestCase, -) -from testtools import ( TestResult, ) from testtools.matchers import ( Is, IsInstance, + Not, MatchesStructure, Equals, HasLength, @@ -77,33 +76,105 @@ from .common import ( ) -class EliotLoggedTestTests(AsyncTestCase): +def passes(): """ - Tests for the automatic log-related provided by ``EliotLoggedRunTest``. + Create a matcher that matches a ``TestCase`` that runs without failures or + errors. """ - def test_returns_none(self): - Message.log(hello="world") + def run(case): + result = TestResult() + case.run(result) + return result.wasSuccessful() + return AfterPreprocessing(run, Equals(True)) - def test_returns_fired_deferred(self): - Message.log(hello="world") - return succeed(None) - def test_returns_unfired_deferred(self): - Message.log(hello="world") - # @eliot_logged_test automatically gives us an action context but it's - # still our responsibility to maintain it across stack-busting - # operations. - d = DeferredContext(deferLater(reactor, 0.0, lambda: None)) - d.addCallback(lambda ignored: Message.log(goodbye="world")) - # We didn't start an action. We're not finishing an action. - return d.result +class EliotLoggedTestTests(TestCase): + """ + Tests for the automatic log-related provided by ``AsyncTestCase``. + + This class uses ``testtools.TestCase`` because it is inconvenient to nest + ``AsyncTestCase`` inside ``AsyncTestCase`` (in particular, Eliot messages + emitted by the inner test case get observed by the outer test case and if + an inner case emits invalid messages they cause the outer test case to + fail). + """ + def test_fails(self): + """ + A test method of an ``AsyncTestCase`` subclass can fail. + """ + class UnderTest(AsyncTestCase): + def test_it(self): + self.fail("make sure it can fail") + + self.assertThat(UnderTest("test_it"), Not(passes())) + + def test_unserializable_fails(self): + """ + A test method of an ``AsyncTestCase`` subclass that logs an unserializable + value with Eliot fails. + """ + class world(object): + """ + an unserializable object + """ + + class UnderTest(AsyncTestCase): + def test_it(self): + Message.log(hello=world) + + self.assertThat(UnderTest("test_it"), Not(passes())) def test_logs_non_utf_8_byte(self): """ - If an Eliot message is emitted that contains a non-UTF-8 byte string then - the test nevertheless passes. + A test method of an ``AsyncTestCase`` subclass can log a message that + contains a non-UTF-8 byte string and return ``None`` and pass. """ - Message.log(hello=b"\xFF") + class UnderTest(AsyncTestCase): + def test_it(self): + Message.log(hello=b"\xFF") + + self.assertThat(UnderTest("test_it"), passes()) + + def test_returns_none(self): + """ + A test method of an ``AsyncTestCase`` subclass can log a message and + return ``None`` and pass. + """ + class UnderTest(AsyncTestCase): + def test_it(self): + Message.log(hello="world") + + self.assertThat(UnderTest("test_it"), passes()) + + def test_returns_fired_deferred(self): + """ + A test method of an ``AsyncTestCase`` subclass can log a message and + return an already-fired ``Deferred`` and pass. + """ + class UnderTest(AsyncTestCase): + def test_it(self): + Message.log(hello="world") + return succeed(None) + + self.assertThat(UnderTest("test_it"), passes()) + + def test_returns_unfired_deferred(self): + """ + A test method of an ``AsyncTestCase`` subclass can log a message and + return an unfired ``Deferred`` and pass when the ``Deferred`` fires. + """ + class UnderTest(AsyncTestCase): + def test_it(self): + Message.log(hello="world") + # @eliot_logged_test automatically gives us an action context + # but it's still our responsibility to maintain it across + # stack-busting operations. + d = DeferredContext(deferLater(reactor, 0.0, lambda: None)) + d.addCallback(lambda ignored: Message.log(goodbye="world")) + # We didn't start an action. We're not finishing an action. + return d.result + + self.assertThat(UnderTest("test_it"), passes()) class ParseDestinationDescriptionTests(SyncTestCase): From eee1f0975d5bd32acbb5d1c481623235558ae47c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 1 Dec 2021 15:16:16 -0500 Subject: [PATCH 207/220] note about how to clean this up later --- src/allmydata/util/_eliot_updates.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/allmydata/util/_eliot_updates.py b/src/allmydata/util/_eliot_updates.py index 8e3beca45..81db566a4 100644 --- a/src/allmydata/util/_eliot_updates.py +++ b/src/allmydata/util/_eliot_updates.py @@ -6,6 +6,15 @@ only changed enough to add Python 2 compatibility. Every API in this module (except ``eliot_json_encoder``) should be obsolete as soon as we depend on Eliot 1.14 or newer. +When that happens: + +* replace ``capture_logging`` + with ``partial(eliot.testing.capture_logging, encoder_=eliot_json_encoder)`` +* replace ``validateLogging`` + with ``partial(eliot.testing.validateLogging, encoder_=eliot_json_encoder)`` +* replace ``MemoryLogger`` + with ``partial(eliot.MemoryLogger, encoder=eliot_json_encoder)`` + Ported to Python 3. """ From e0092ededaa64a800058658de2d9ab8472acb3bf Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 20:52:22 -0700 Subject: [PATCH 208/220] fine, just skip tests on windows --- src/allmydata/test/test_storage_web.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 961bbef98..a49b71325 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -19,6 +19,7 @@ import time import os.path import re import json +from unittest import skipIf from six.moves import StringIO from twisted.trial import unittest @@ -1153,6 +1154,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d + @skipIf(platform.isWindows()) def test_deserialize_pickle(self): """ The crawler can read existing state from the old pickle format @@ -1163,12 +1165,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.state") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: - for line in remote.readlines(): - if platform.isWindows(): - local.write(line.replace("\n", "\r\n")) - else: - local.write(line) + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + test_pickle.write(original_pickle.read()) # convert from pickle format to JSON top = Options() @@ -1358,6 +1356,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): second_serial.load(), ) + @skipIf(platform.isWindows()) def test_deserialize_history_pickle(self): """ The crawler can read existing history state from the old pickle @@ -1369,12 +1368,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.history") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: - for line in remote.readlines(): - if platform.isWindows(): - local.write(line.replace("\n", "\r\n")) - else: - local.write(line) + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + test_pickle.write(original_pickle.read()) # convert from pickle format to JSON top = Options() From 40e7be6d8d7581f4c9fa71c0817207e11ac1a7e6 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 23:46:10 -0700 Subject: [PATCH 209/220] needs reason --- src/allmydata/test/test_storage_web.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index a49b71325..9292c0b20 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1154,7 +1154,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d - @skipIf(platform.isWindows()) + @skipIf(platform.isWindows(), "pickle test-data can't be loaded on windows") def test_deserialize_pickle(self): """ The crawler can read existing state from the old pickle format @@ -1356,7 +1356,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): second_serial.load(), ) - @skipIf(platform.isWindows()) + @skipIf(platform.isWindows(), "pickle test-data can't be loaded on windows") def test_deserialize_history_pickle(self): """ The crawler can read existing history state from the old pickle From 4bc0df7cc14f53901470a3e0d0f78a6d975c4781 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 2 Dec 2021 00:05:21 -0700 Subject: [PATCH 210/220] file, not path --- src/allmydata/test/test_storage_web.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 9292c0b20..18ea0220c 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1166,7 +1166,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage.makedirs() test_pickle = storage.child("lease_checker.state") with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - test_pickle.write(original_pickle.read()) + local.write(remote.read()) # convert from pickle format to JSON top = Options() @@ -1369,7 +1369,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage.makedirs() test_pickle = storage.child("lease_checker.history") with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - test_pickle.write(original_pickle.read()) + local.write(remote.read()) # convert from pickle format to JSON top = Options() From 6b8a42b0439bd81bbb8359c256538daf53622733 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 2 Dec 2021 09:34:29 -0500 Subject: [PATCH 211/220] Make the test more robust. --- newsfragments/3850.minor | 0 src/allmydata/test/test_storage_http.py | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 newsfragments/3850.minor diff --git a/newsfragments/3850.minor b/newsfragments/3850.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 442e154a0..e30eb24c7 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -63,7 +63,15 @@ class HTTPTests(TestCase): def test_version(self): """ The client can return the version. + + We ignore available disk space since that might change across calls. """ version = yield self.client.get_version() + version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( + b"available-space" + ) expected_version = self.storage_server.remote_get_version() + expected_version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( + b"available-space" + ) self.assertEqual(version, expected_version) From 53ff16f1a43717dd86e9582c379beb4d92ea17e9 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 2 Dec 2021 12:56:52 -0700 Subject: [PATCH 212/220] rst for news --- newsfragments/3825.security | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/newsfragments/3825.security b/newsfragments/3825.security index df83821de..3d112dd49 100644 --- a/newsfragments/3825.security +++ b/newsfragments/3825.security @@ -1,6 +1,8 @@ The lease-checker now uses JSON instead of pickle to serialize its state. tahoe will now refuse to run until you either delete all pickle files or -migrate them using the new command: +migrate them using the new command:: tahoe admin migrate-crawler + +This will migrate all crawler-related pickle files. From 314b20291442bd485b9919081611efb9c145c277 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 3 Dec 2021 12:58:12 -0500 Subject: [PATCH 213/220] Ignore another field which can change. --- src/allmydata/test/test_storage_http.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index e30eb24c7..23a3e3ea6 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -64,14 +64,21 @@ class HTTPTests(TestCase): """ The client can return the version. - We ignore available disk space since that might change across calls. + We ignore available disk space and max immutable share size, since that + might change across calls. """ version = yield self.client.get_version() version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( b"available-space" ) + version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( + b"maximum-immutable-share-size" + ) expected_version = self.storage_server.remote_get_version() expected_version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( b"available-space" ) + expected_version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( + b"maximum-immutable-share-size" + ) self.assertEqual(version, expected_version) From 5bb6fbc51f4d1d1d871410aba2cc91a09a2bb3ab Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 4 Dec 2021 10:14:31 -0700 Subject: [PATCH 214/220] merge errors --- src/allmydata/storage/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index acca83d6a..ac8c41c07 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -337,7 +337,7 @@ class StorageServer(service.MultiService, Referenceable): alreadygot[shnum] = ShareFile(fn) if renew_leases: sf = ShareFile(fn) - sf.add_or_renew_lease(lease_info) + sf.add_or_renew_lease(remaining_space, lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) @@ -411,7 +411,7 @@ class StorageServer(service.MultiService, Referenceable): renew_secret, cancel_secret, new_expire_time, self.my_nodeid) for sf in self._iter_share_files(storage_index): - sf.add_or_renew_lease(lease_info) + sf.add_or_renew_lease(self.get_available_space(), lease_info) self.add_latency("add-lease", self._clock.seconds() - start) return None From 50cdd9bd9659ad9886de0ca021b34ef3028f411d Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 4 Dec 2021 17:20:10 -0700 Subject: [PATCH 215/220] unused --- src/allmydata/storage/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index ac8c41c07..9a9b3e624 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -15,7 +15,6 @@ else: from typing import Dict import os, re -import six from foolscap.api import Referenceable from foolscap.ipb import IRemoteReference From 402d11ecd61cd821b0d6afe8f492253106747759 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 5 Dec 2021 00:39:31 -0700 Subject: [PATCH 216/220] update NEWS.txt for release --- NEWS.rst | 75 ++++++++++++++++++++++++++++++++ newsfragments/3525.minor | 0 newsfragments/3527.minor | 0 newsfragments/3735.feature | 1 - newsfragments/3754.minor | 0 newsfragments/3758.minor | 0 newsfragments/3784.minor | 0 newsfragments/3786.feature | 1 - newsfragments/3792.minor | 0 newsfragments/3793.minor | 0 newsfragments/3795.minor | 0 newsfragments/3797.minor | 0 newsfragments/3798.minor | 0 newsfragments/3799.minor | 0 newsfragments/3800.minor | 0 newsfragments/3801.bugfix | 1 - newsfragments/3805.minor | 0 newsfragments/3806.minor | 0 newsfragments/3807.feature | 1 - newsfragments/3808.installation | 1 - newsfragments/3810.minor | 0 newsfragments/3812.minor | 0 newsfragments/3814.removed | 1 - newsfragments/3815.documentation | 1 - newsfragments/3819.security | 1 - newsfragments/3820.minor | 0 newsfragments/3821.security | 2 - newsfragments/3822.security | 2 - newsfragments/3823.security | 4 -- newsfragments/3824.security | 1 - newsfragments/3825.security | 8 ---- newsfragments/3827.security | 4 -- newsfragments/3829.minor | 0 newsfragments/3830.minor | 0 newsfragments/3831.minor | 0 newsfragments/3832.minor | 0 newsfragments/3833.minor | 0 newsfragments/3834.minor | 0 newsfragments/3835.minor | 0 newsfragments/3836.minor | 0 newsfragments/3837.other | 1 - newsfragments/3838.minor | 0 newsfragments/3839.security | 1 - newsfragments/3841.security | 1 - newsfragments/3842.minor | 0 newsfragments/3843.minor | 0 newsfragments/3847.minor | 0 47 files changed, 75 insertions(+), 32 deletions(-) delete mode 100644 newsfragments/3525.minor delete mode 100644 newsfragments/3527.minor delete mode 100644 newsfragments/3735.feature delete mode 100644 newsfragments/3754.minor delete mode 100644 newsfragments/3758.minor delete mode 100644 newsfragments/3784.minor delete mode 100644 newsfragments/3786.feature delete mode 100644 newsfragments/3792.minor delete mode 100644 newsfragments/3793.minor delete mode 100644 newsfragments/3795.minor delete mode 100644 newsfragments/3797.minor delete mode 100644 newsfragments/3798.minor delete mode 100644 newsfragments/3799.minor delete mode 100644 newsfragments/3800.minor delete mode 100644 newsfragments/3801.bugfix delete mode 100644 newsfragments/3805.minor delete mode 100644 newsfragments/3806.minor delete mode 100644 newsfragments/3807.feature delete mode 100644 newsfragments/3808.installation delete mode 100644 newsfragments/3810.minor delete mode 100644 newsfragments/3812.minor delete mode 100644 newsfragments/3814.removed delete mode 100644 newsfragments/3815.documentation delete mode 100644 newsfragments/3819.security delete mode 100644 newsfragments/3820.minor delete mode 100644 newsfragments/3821.security delete mode 100644 newsfragments/3822.security delete mode 100644 newsfragments/3823.security delete mode 100644 newsfragments/3824.security delete mode 100644 newsfragments/3825.security delete mode 100644 newsfragments/3827.security delete mode 100644 newsfragments/3829.minor delete mode 100644 newsfragments/3830.minor delete mode 100644 newsfragments/3831.minor delete mode 100644 newsfragments/3832.minor delete mode 100644 newsfragments/3833.minor delete mode 100644 newsfragments/3834.minor delete mode 100644 newsfragments/3835.minor delete mode 100644 newsfragments/3836.minor delete mode 100644 newsfragments/3837.other delete mode 100644 newsfragments/3838.minor delete mode 100644 newsfragments/3839.security delete mode 100644 newsfragments/3841.security delete mode 100644 newsfragments/3842.minor delete mode 100644 newsfragments/3843.minor delete mode 100644 newsfragments/3847.minor diff --git a/NEWS.rst b/NEWS.rst index e4fef833a..697c44c30 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -5,6 +5,81 @@ User-Visible Changes in Tahoe-LAFS ================================== .. towncrier start line +Release 1.16.0.post463 (2021-12-05)Release 1.16.0.post463 (2021-12-05) +''''''''''''''''''''''''''''''''''' + +Security-related Changes +------------------------ + +- The introducer server no longer writes the sensitive introducer fURL value to its log at startup time. Instead it writes the well-known path of the file from which this value can be read. (`#3819 `_) +- The storage protocol operation ``add_lease`` now safely rejects an attempt to add a 4,294,967,296th lease to an immutable share. + Previously this failed with an error after recording the new lease in the share file, resulting in the share file losing track of a one previous lease. (`#3821 `_) +- The storage protocol operation ``readv`` now safely rejects attempts to read negative lengths. + Previously these read requests were satisfied with the complete contents of the share file (including trailing metadata) starting from the specified offset. (`#3822 `_) +- The storage server implementation now respects the ``reserved_space`` configuration value when writing lease information and recording corruption advisories. + Previously, new leases could be created and written to disk even when the storage server had less remaining space than the configured reserve space value. + Now this operation will fail with an exception and the lease will not be created. + Similarly, if there is no space available, corruption advisories will be logged but not written to disk. (`#3823 `_) +- The storage server implementation no longer records corruption advisories about storage indexes for which it holds no shares. (`#3824 `_) +- The lease-checker now uses JSON instead of pickle to serialize its state. + + tahoe will now refuse to run until you either delete all pickle files or + migrate them using the new command:: + + tahoe admin migrate-crawler + + This will migrate all crawler-related pickle files. (`#3825 `_) +- The SFTP server no longer accepts password-based credentials for authentication. + Public/private key-based credentials are now the only supported authentication type. + This removes plaintext password storage from the SFTP credentials file. + It also removes a possible timing side-channel vulnerability which might have allowed attackers to discover an account's plaintext password. (`#3827 `_) +- The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets. (`#3839 `_) +- The storage server now keeps hashes of lease renew and cancel secrets for mutable share files instead of keeping the original secrets. (`#3841 `_) + + +Features +-------- + +- Tahoe-LAFS releases now have just a .tar.gz source release and a (universal) wheel (`#3735 `_) +- tahoe-lafs now provides its statistics also in OpenMetrics format (for Prometheus et. al.) at `/statistics?t=openmetrics`. (`#3786 `_) +- If uploading an immutable hasn't had a write for 30 minutes, the storage server will abort the upload. (`#3807 `_) + + +Bug Fixes +--------- + +- When uploading an immutable, overlapping writes that include conflicting data are rejected. In practice, this likely didn't happen in real-world usage. (`#3801 `_) + + +Dependency/Installation Changes +------------------------------- + +- Tahoe-LAFS now supports running on NixOS 21.05 with Python 3. (`#3808 `_) + + +Documentation Changes +--------------------- + +- The news file for future releases will include a section for changes with a security impact. (`#3815 `_) + + +Removed Features +---------------- + +- The little-used "control port" has been removed from all node types. (`#3814 `_) + + +Other Changes +------------- + +- Tahoe-LAFS no longer runs its Tor integration test suite on Python 2 due to the increased complexity of obtaining compatible versions of necessary dependencies. (`#3837 `_) + + +Misc/Other +---------- + +- `#3525 `_, `#3527 `_, `#3754 `_, `#3758 `_, `#3784 `_, `#3792 `_, `#3793 `_, `#3795 `_, `#3797 `_, `#3798 `_, `#3799 `_, `#3800 `_, `#3805 `_, `#3806 `_, `#3810 `_, `#3812 `_, `#3820 `_, `#3829 `_, `#3830 `_, `#3831 `_, `#3832 `_, `#3833 `_, `#3834 `_, `#3835 `_, `#3836 `_, `#3838 `_, `#3842 `_, `#3843 `_, `#3847 `_ + Release 1.16.0 (2021-09-17) ''''''''''''''''''''''''''' diff --git a/newsfragments/3525.minor b/newsfragments/3525.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3527.minor b/newsfragments/3527.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3735.feature b/newsfragments/3735.feature deleted file mode 100644 index 5a86d5547..000000000 --- a/newsfragments/3735.feature +++ /dev/null @@ -1 +0,0 @@ -Tahoe-LAFS releases now have just a .tar.gz source release and a (universal) wheel diff --git a/newsfragments/3754.minor b/newsfragments/3754.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3758.minor b/newsfragments/3758.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3784.minor b/newsfragments/3784.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3786.feature b/newsfragments/3786.feature deleted file mode 100644 index ecbfc0372..000000000 --- a/newsfragments/3786.feature +++ /dev/null @@ -1 +0,0 @@ -tahoe-lafs now provides its statistics also in OpenMetrics format (for Prometheus et. al.) at `/statistics?t=openmetrics`. diff --git a/newsfragments/3792.minor b/newsfragments/3792.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3793.minor b/newsfragments/3793.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3795.minor b/newsfragments/3795.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3797.minor b/newsfragments/3797.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3798.minor b/newsfragments/3798.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3799.minor b/newsfragments/3799.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3800.minor b/newsfragments/3800.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3801.bugfix b/newsfragments/3801.bugfix deleted file mode 100644 index 504b3999d..000000000 --- a/newsfragments/3801.bugfix +++ /dev/null @@ -1 +0,0 @@ -When uploading an immutable, overlapping writes that include conflicting data are rejected. In practice, this likely didn't happen in real-world usage. \ No newline at end of file diff --git a/newsfragments/3805.minor b/newsfragments/3805.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3806.minor b/newsfragments/3806.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3807.feature b/newsfragments/3807.feature deleted file mode 100644 index f82363ffd..000000000 --- a/newsfragments/3807.feature +++ /dev/null @@ -1 +0,0 @@ -If uploading an immutable hasn't had a write for 30 minutes, the storage server will abort the upload. \ No newline at end of file diff --git a/newsfragments/3808.installation b/newsfragments/3808.installation deleted file mode 100644 index 157f08a0c..000000000 --- a/newsfragments/3808.installation +++ /dev/null @@ -1 +0,0 @@ -Tahoe-LAFS now supports running on NixOS 21.05 with Python 3. diff --git a/newsfragments/3810.minor b/newsfragments/3810.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3812.minor b/newsfragments/3812.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3814.removed b/newsfragments/3814.removed deleted file mode 100644 index 939d20ffc..000000000 --- a/newsfragments/3814.removed +++ /dev/null @@ -1 +0,0 @@ -The little-used "control port" has been removed from all node types. diff --git a/newsfragments/3815.documentation b/newsfragments/3815.documentation deleted file mode 100644 index 7abc70bd1..000000000 --- a/newsfragments/3815.documentation +++ /dev/null @@ -1 +0,0 @@ -The news file for future releases will include a section for changes with a security impact. \ No newline at end of file diff --git a/newsfragments/3819.security b/newsfragments/3819.security deleted file mode 100644 index 975fd0035..000000000 --- a/newsfragments/3819.security +++ /dev/null @@ -1 +0,0 @@ -The introducer server no longer writes the sensitive introducer fURL value to its log at startup time. Instead it writes the well-known path of the file from which this value can be read. diff --git a/newsfragments/3820.minor b/newsfragments/3820.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3821.security b/newsfragments/3821.security deleted file mode 100644 index 75d9904a2..000000000 --- a/newsfragments/3821.security +++ /dev/null @@ -1,2 +0,0 @@ -The storage protocol operation ``add_lease`` now safely rejects an attempt to add a 4,294,967,296th lease to an immutable share. -Previously this failed with an error after recording the new lease in the share file, resulting in the share file losing track of a one previous lease. diff --git a/newsfragments/3822.security b/newsfragments/3822.security deleted file mode 100644 index 5d6c07ab5..000000000 --- a/newsfragments/3822.security +++ /dev/null @@ -1,2 +0,0 @@ -The storage protocol operation ``readv`` now safely rejects attempts to read negative lengths. -Previously these read requests were satisfied with the complete contents of the share file (including trailing metadata) starting from the specified offset. diff --git a/newsfragments/3823.security b/newsfragments/3823.security deleted file mode 100644 index ba2bbd741..000000000 --- a/newsfragments/3823.security +++ /dev/null @@ -1,4 +0,0 @@ -The storage server implementation now respects the ``reserved_space`` configuration value when writing lease information and recording corruption advisories. -Previously, new leases could be created and written to disk even when the storage server had less remaining space than the configured reserve space value. -Now this operation will fail with an exception and the lease will not be created. -Similarly, if there is no space available, corruption advisories will be logged but not written to disk. diff --git a/newsfragments/3824.security b/newsfragments/3824.security deleted file mode 100644 index b29b2acc8..000000000 --- a/newsfragments/3824.security +++ /dev/null @@ -1 +0,0 @@ -The storage server implementation no longer records corruption advisories about storage indexes for which it holds no shares. diff --git a/newsfragments/3825.security b/newsfragments/3825.security deleted file mode 100644 index 3d112dd49..000000000 --- a/newsfragments/3825.security +++ /dev/null @@ -1,8 +0,0 @@ -The lease-checker now uses JSON instead of pickle to serialize its state. - -tahoe will now refuse to run until you either delete all pickle files or -migrate them using the new command:: - - tahoe admin migrate-crawler - -This will migrate all crawler-related pickle files. diff --git a/newsfragments/3827.security b/newsfragments/3827.security deleted file mode 100644 index 4fee19c76..000000000 --- a/newsfragments/3827.security +++ /dev/null @@ -1,4 +0,0 @@ -The SFTP server no longer accepts password-based credentials for authentication. -Public/private key-based credentials are now the only supported authentication type. -This removes plaintext password storage from the SFTP credentials file. -It also removes a possible timing side-channel vulnerability which might have allowed attackers to discover an account's plaintext password. diff --git a/newsfragments/3829.minor b/newsfragments/3829.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3830.minor b/newsfragments/3830.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3831.minor b/newsfragments/3831.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3832.minor b/newsfragments/3832.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3833.minor b/newsfragments/3833.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3834.minor b/newsfragments/3834.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3835.minor b/newsfragments/3835.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3836.minor b/newsfragments/3836.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3837.other b/newsfragments/3837.other deleted file mode 100644 index a9e4e6986..000000000 --- a/newsfragments/3837.other +++ /dev/null @@ -1 +0,0 @@ -Tahoe-LAFS no longer runs its Tor integration test suite on Python 2 due to the increased complexity of obtaining compatible versions of necessary dependencies. diff --git a/newsfragments/3838.minor b/newsfragments/3838.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3839.security b/newsfragments/3839.security deleted file mode 100644 index 1ae054542..000000000 --- a/newsfragments/3839.security +++ /dev/null @@ -1 +0,0 @@ -The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets. diff --git a/newsfragments/3841.security b/newsfragments/3841.security deleted file mode 100644 index 867322e0a..000000000 --- a/newsfragments/3841.security +++ /dev/null @@ -1 +0,0 @@ -The storage server now keeps hashes of lease renew and cancel secrets for mutable share files instead of keeping the original secrets. \ No newline at end of file diff --git a/newsfragments/3842.minor b/newsfragments/3842.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3843.minor b/newsfragments/3843.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3847.minor b/newsfragments/3847.minor deleted file mode 100644 index e69de29bb..000000000 From b8d00ab04a1ae6309d3dd5cf93b937d759f3c9d6 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 5 Dec 2021 00:50:22 -0700 Subject: [PATCH 217/220] update release notes --- NEWS.rst | 6 ++++-- relnotes.txt | 39 ++++++++++++++++----------------------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/NEWS.rst b/NEWS.rst index 697c44c30..15cb9459d 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -5,8 +5,10 @@ User-Visible Changes in Tahoe-LAFS ================================== .. towncrier start line -Release 1.16.0.post463 (2021-12-05)Release 1.16.0.post463 (2021-12-05) -''''''''''''''''''''''''''''''''''' + + +Release 1.17.0 (2021-12-06) +''''''''''''''''''''''''''' Security-related Changes ------------------------ diff --git a/relnotes.txt b/relnotes.txt index 2748bc4fa..dff4f192e 100644 --- a/relnotes.txt +++ b/relnotes.txt @@ -1,6 +1,6 @@ -ANNOUNCING Tahoe, the Least-Authority File Store, v1.16.0 +ANNOUNCING Tahoe, the Least-Authority File Store, v1.17.0 -The Tahoe-LAFS team is pleased to announce version 1.16.0 of +The Tahoe-LAFS team is pleased to announce version 1.17.0 of Tahoe-LAFS, an extremely reliable decentralized storage system. Get it with "pip install tahoe-lafs", or download a tarball here: @@ -15,24 +15,17 @@ unique security and fault-tolerance properties: https://tahoe-lafs.readthedocs.org/en/latest/about.html -The previous stable release of Tahoe-LAFS was v1.15.1, released on -March 23rd, 2021. +The previous stable release of Tahoe-LAFS was v1.16.0, released on +October 19, 2021. -The major change in this release is the completion of the Python 3 -port -- while maintaining support for Python 2. A future release will -remove Python 2 support. +This release fixes several security issues raised as part of an audit +by Cure53. We developed fixes for these issues in a private +repository. Shortly after this release, public tickets will be updated +with further information (along with, of course, all the code). -The previously deprecated subcommands "start", "stop", "restart" and -"daemonize" have been removed. You must now use "tahoe run" (possibly -along with your favourite daemonization software). +There is also OpenMetrics support now and several bug fixes. -Several features are now removed: the Account Server, stats-gatherer -and FTP support. - -There are several dependency changes that will be interesting for -distribution maintainers. - -In all, 240 issues have been fixed since the last release. +In all, 46 issues have been fixed since the last release. Please see ``NEWS.rst`` for a more complete list of changes. @@ -151,19 +144,19 @@ solely as a labor of love by volunteers. Thank you very much to the team of "hackers in the public interest" who make Tahoe-LAFS possible. -fenn-cs + meejah +meejah on behalf of the Tahoe-LAFS team -October 19, 2021 +December 6, 2021 Planet Earth -[1] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.16.0/NEWS.rst +[1] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.17.0/NEWS.rst [2] https://github.com/tahoe-lafs/tahoe-lafs/blob/master/docs/known_issues.rst [3] https://tahoe-lafs.org/trac/tahoe-lafs/wiki/RelatedProjects -[4] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.16.0/COPYING.GPL -[5] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.16.0/COPYING.TGPPL.rst -[6] https://tahoe-lafs.readthedocs.org/en/tahoe-lafs-1.16.0/INSTALL.html +[4] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.17.0/COPYING.GPL +[5] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.17.0/COPYING.TGPPL.rst +[6] https://tahoe-lafs.readthedocs.org/en/tahoe-lafs-1.17.0/INSTALL.html [7] https://lists.tahoe-lafs.org/mailman/listinfo/tahoe-dev [8] https://tahoe-lafs.org/trac/tahoe-lafs/roadmap [9] https://github.com/tahoe-lafs/tahoe-lafs/blob/master/CREDITS From 95fdaf286e2f195672d4f2cd3371ed41ee49aae1 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 5 Dec 2021 00:51:13 -0700 Subject: [PATCH 218/220] update nix version --- nix/tahoe-lafs.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index 59864d36d..04d6c4163 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -7,7 +7,7 @@ , html5lib, pyutil, distro, configparser, klein, cbor2 }: python.pkgs.buildPythonPackage rec { - # Most of the time this is not exactly the release version (eg 1.16.0). + # Most of the time this is not exactly the release version (eg 1.17.0). # Give it a `post` component to make it look newer than the release version # and we'll bump this up at the time of each release. # @@ -20,7 +20,7 @@ python.pkgs.buildPythonPackage rec { # is not a reproducable artifact (in the sense of "reproducable builds") so # it is excluded from the source tree by default. When it is included, the # package tends to be frequently spuriously rebuilt. - version = "1.16.0.post1"; + version = "1.17.0.post1"; name = "tahoe-lafs-${version}"; src = lib.cleanSourceWith { src = ../.; From a8bdb8dcbb66bfa75389816817e25b6f9ec5d74b Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 5 Dec 2021 00:53:50 -0700 Subject: [PATCH 219/220] add Florian --- CREDITS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CREDITS b/CREDITS index 8a6e876ec..89e1468aa 100644 --- a/CREDITS +++ b/CREDITS @@ -260,3 +260,7 @@ D: Community-manager and documentation improvements N: Yash Nayani E: yashaswi.nram@gmail.com D: Installation Guide improvements + +N: Florian Sesser +E: florian@private.storage +D: OpenMetrics support \ No newline at end of file From 5f6579d44622b0774aec8ba06b67f2d4f2ee7af7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 6 Dec 2021 12:47:37 -0500 Subject: [PATCH 220/220] hew closer to security/master version of these lines --- src/allmydata/storage/server.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 9a9b3e624..80b337d36 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -334,9 +334,8 @@ class StorageServer(service.MultiService, Referenceable): # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): alreadygot[shnum] = ShareFile(fn) - if renew_leases: - sf = ShareFile(fn) - sf.add_or_renew_lease(remaining_space, lease_info) + if renew_leases: + self._add_or_renew_leases(alreadygot.values(), lease_info) for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) @@ -409,8 +408,10 @@ class StorageServer(service.MultiService, Referenceable): lease_info = LeaseInfo(owner_num, renew_secret, cancel_secret, new_expire_time, self.my_nodeid) - for sf in self._iter_share_files(storage_index): - sf.add_or_renew_lease(self.get_available_space(), lease_info) + self._add_or_renew_leases( + self._iter_share_files(storage_index), + lease_info, + ) self.add_latency("add-lease", self._clock.seconds() - start) return None