From 512897eca0310ce6ddfe262520f3c21bcf345ccf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 13:46:32 -0500 Subject: [PATCH 01/50] news fragment --- newsfragments/3592.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3592.minor diff --git a/newsfragments/3592.minor b/newsfragments/3592.minor new file mode 100644 index 000000000..e69de29bb From 61d5f920bb9db703edb9f3aed73f2d0488ee0d2c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 14:28:16 -0500 Subject: [PATCH 02/50] Add tests for the tag construction code and make it a bit safer Check for sane inputs, reject insane ones --- src/allmydata/test/test_hashutil.py | 36 +++++++++++++++++++++++++++++ src/allmydata/util/hashutil.py | 29 ++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_hashutil.py b/src/allmydata/test/test_hashutil.py index 6ec861c9f..6ac73ae60 100644 --- a/src/allmydata/test/test_hashutil.py +++ b/src/allmydata/test/test_hashutil.py @@ -126,6 +126,42 @@ class HashUtilTests(unittest.TestCase): base32.a2b(b"2ckv3dfzh6rgjis6ogfqhyxnzy"), ) + def test_convergence_hasher_tag(self): + """ + ``_convergence_hasher_tag`` constructs the convergence hasher tag from a + unique prefix, the required, total, and segment size parameters, and a + convergence secret. + """ + self.assertEqual( + "allmydata_immutable_content_to_key_with_added_secret_v1+" + "16:\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42," + "9:3,10,1024,", + hashutil._convergence_hasher_tag( + k=3, + n=10, + segsize=1024, + convergence=b"\x42" * 16, + ), + ) + + def test_convergence_hasher_out_of_bounds(self): + """ + ``_convergence_hasher_tag`` raises ``ValueError`` if k or n is not between + 1 and 256 inclusive or if k is greater than n. + """ + segsize = 1024 + secret = b"\x42" * 16 + for bad_k in (0, 2, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=bad_k, n=1, segsize=segsize, convergence=secret, + ) + for bad_n in (0, 1, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=2, n=bad_n, segsize=segsize, convergence=secret, + ) + def test_known_answers(self): """ Verify backwards compatibility by comparing hash outputs for some diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index ebb2f12af..f82c04efd 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -176,10 +176,37 @@ def convergence_hash(k, n, segsize, data, convergence): return h.digest() -def convergence_hasher(k, n, segsize, convergence): +def _convergence_hasher_tag(k, n, segsize, convergence): + """ + Create the convergence hashing tag. + + :param int k: Required shares. + :param int n: Total shares. + :param int segsize: Maximum segment size. + :param bytes convergence: The convergence secret. + + :return bytes: The bytestring to use as a tag in the convergence hash. + """ assert isinstance(convergence, bytes) + if k > n: + raise ValueError( + "k > n not allowed; k = {}, n = {}".format(k, n), + ) + if k < 1 or n < 1: + raise ValueError( + "k, n < 1 not allowed; k = {}, n = {}".format(k, n), + ) + if k > 256 or n > 256: + raise ValueError( + "k, n > 256 not allowed; k = {}, n = {}".format(k, n), + ) param_tag = netstring(b"%d,%d,%d" % (k, n, segsize)) tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag + return tag + + +def convergence_hasher(k, n, segsize, convergence): + tag = _convergence_hasher_tag(k, n, segsize, convergence) return tagged_hasher(tag, KEYLEN) From 755de5edafe7589ecf2202a05d17a403913b31fd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jan 2021 09:58:51 -0500 Subject: [PATCH 03/50] Start of passing tests on Python 3. --- src/allmydata/test/web/test_web.py | 32 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 2f000b7a1..244478a55 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -275,13 +275,13 @@ class FakeClient(_Client): # type: ignore # tahoe-lafs/ticket/3573 node_config=EMPTY_CLIENT_CONFIG, ) # fake knowledge of another server - self.storage_broker.test_add_server("other_nodeid", + self.storage_broker.test_add_server(b"other_nodeid", FakeDisplayableServer( serverid=b"other_nodeid", nickname=u"other_nickname \u263B", connected = True, last_connect_time = 10, last_loss_time = 20, last_rx_time = 30)) - self.storage_broker.test_add_server("disconnected_nodeid", + self.storage_broker.test_add_server(b"disconnected_nodeid", FakeDisplayableServer( - serverid="disconnected_nodeid", nickname=u"disconnected_nickname \u263B", connected = False, + serverid=b"disconnected_nodeid", nickname=u"disconnected_nickname \u263B", connected = False, last_connect_time = None, last_loss_time = 25, last_rx_time = 35)) self.introducer_client = None self.history = FakeHistory() @@ -665,6 +665,8 @@ class MultiFormatResourceTests(TrialTestCase): Tests for ``MultiFormatResource``. """ def render(self, resource, **queryargs): + # Query arguments in real twisted.web requests have byte keys. + queryargs = {k.encode("utf-8"): v for (k, v) in queryargs.items()} return self.successResultOf(render(resource, queryargs)) def resource(self): @@ -675,13 +677,13 @@ class MultiFormatResourceTests(TrialTestCase): class Content(MultiFormatResource): def render_HTML(self, req): - return "html" + return b"html" def render_A(self, req): - return "a" + return b"a" def render_B(self, req): - return "b" + return b"b" return Content() @@ -693,7 +695,7 @@ class MultiFormatResourceTests(TrialTestCase): """ resource = self.resource() resource.formatArgument = "foo" - self.assertEqual("a", self.render(resource, foo=["a"])) + self.assertEqual(b"a", self.render(resource, foo=[b"a"])) def test_default_format_argument(self): @@ -702,7 +704,7 @@ class MultiFormatResourceTests(TrialTestCase): then the ``t`` argument is used. """ resource = self.resource() - self.assertEqual("a", self.render(resource, t=["a"])) + self.assertEqual(b"a", self.render(resource, t=[b"a"])) def test_no_format(self): @@ -711,7 +713,7 @@ class MultiFormatResourceTests(TrialTestCase): been defined, the base rendering behavior is used (``render_HTML``). """ resource = self.resource() - self.assertEqual("html", self.render(resource)) + self.assertEqual(b"html", self.render(resource)) def test_default_format(self): @@ -722,7 +724,7 @@ class MultiFormatResourceTests(TrialTestCase): """ resource = self.resource() resource.formatDefault = "b" - self.assertEqual("b", self.render(resource)) + self.assertEqual(b"b", self.render(resource)) def test_explicit_none_format_renderer(self): @@ -732,7 +734,7 @@ class MultiFormatResourceTests(TrialTestCase): """ resource = self.resource() resource.render_FOO = None - self.assertEqual("html", self.render(resource, t=["foo"])) + self.assertEqual(b"html", self.render(resource, t=[b"foo"])) def test_unknown_format(self): @@ -741,15 +743,15 @@ class MultiFormatResourceTests(TrialTestCase): returned. """ resource = self.resource() - response_body = self.render(resource, t=["foo"]) + response_body = self.render(resource, t=[b"foo"]) self.assertIn( - "400 - Bad Format", response_body, + b"400 - Bad Format", response_body, ) self.assertIn( - "Unknown t value:", response_body, + b"Unknown t value:", response_body, ) self.assertIn( - "'foo'", response_body, + b"'foo'", response_body, ) From 011b027c392ea1b3c182e419b1db91922a4f9465 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jan 2021 10:14:28 -0500 Subject: [PATCH 04/50] More passing tests on Python 3. --- src/allmydata/test/common.py | 14 ++--- src/allmydata/test/web/test_web.py | 95 +++++++++++++++--------------- 2 files changed, 56 insertions(+), 53 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index fde92fb59..f7f339b98 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -566,12 +566,12 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation self.file_types[self.storage_index] = version initial_contents = self._get_initial_contents(contents) data = initial_contents.read(initial_contents.get_size()) - data = "".join(data) + data = b"".join(data) self.all_contents[self.storage_index] = data return defer.succeed(self) def _get_initial_contents(self, contents): if contents is None: - return MutableData("") + return MutableData(b"") if IMutableUploadable.providedBy(contents): return contents @@ -625,7 +625,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation def raise_error(self): pass def get_writekey(self): - return "\x00"*16 + return b"\x00"*16 def get_size(self): return len(self.all_contents[self.storage_index]) def get_current_size(self): @@ -644,7 +644,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation return self.file_types[self.storage_index] def check(self, monitor, verify=False, add_lease=False): - s = StubServer("\x00"*20) + s = StubServer(b"\x00"*20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, count_happiness=10, @@ -655,7 +655,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation count_recoverable_versions=1, count_unrecoverable_versions=0, servers_responding=[s], - sharemap={"seq1-abcd-sh0": [s]}, + sharemap={b"seq1-abcd-sh0": [s]}, count_wrong_shares=0, list_corrupt_shares=[], count_corrupt_shares=0, @@ -709,7 +709,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation def overwrite(self, new_contents): assert not self.is_readonly() new_data = new_contents.read(new_contents.get_size()) - new_data = "".join(new_data) + new_data = b"".join(new_data) self.all_contents[self.storage_index] = new_data return defer.succeed(None) def modify(self, modifier): @@ -740,7 +740,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation def update(self, data, offset): assert not self.is_readonly() def modifier(old, servermap, first_time): - new = old[:offset] + "".join(data.read(data.get_size())) + new = old[:offset] + b"".join(data.read(data.get_size())) new += old[len(new):] return new return self.modify(modifier) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 244478a55..bcf4db1bf 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -1,8 +1,11 @@ from __future__ import print_function -import os.path, re, urllib, time +from past.builtins import unicode + +import os.path, re, time import json import treq +from urllib.parse import quote as urlquote, unquote as urlunquote from bs4 import BeautifulSoup @@ -115,8 +118,8 @@ class FakeUploader(service.Service): servermap={}, timings={}, uri_extension_data={}, - uri_extension_hash="fake", - verifycapstr="fakevcap") + uri_extension_hash=b"fake", + verifycapstr=b"fakevcap") ur.set_uri(n.get_uri()) return ur d.addCallback(_got_data) @@ -297,12 +300,12 @@ class FakeClient(_Client): # type: ignore # tahoe-lafs/ticket/3573 self.addService(FakeStorageServer(self.nodeid, self.nickname)) def get_long_nodeid(self): - return "v0-nodeid" + return b"v0-nodeid" def get_long_tubid(self): - return "tubid" + return u"tubid" def get_auth_token(self): - return 'a fake debug auth token' + return b'a fake debug auth token' def startService(self): return service.MultiService.startService(self) @@ -340,7 +343,7 @@ class WebMixin(TimezoneMixin): def _then(res): self.public_root = res[0][1] assert interfaces.IDirectoryNode.providedBy(self.public_root), res - self.public_url = "/uri/" + self.public_root.get_uri() + self.public_url = "/uri/" + unicode(self.public_root.get_uri(), "ascii") self.private_root = res[1][1] foo = res[2][1] @@ -365,7 +368,7 @@ class WebMixin(TimezoneMixin): # mdmf self.QUUX_CONTENTS, n, self._quux_txt_uri, self._quux_txt_readonly_uri = self.makefile_mutable(0, mdmf=True) - assert self._quux_txt_uri.startswith("URI:MDMF") + assert self._quux_txt_uri.startswith(b"URI:MDMF") foo.set_uri(u"quux.txt", self._quux_txt_uri, self._quux_txt_readonly_uri) foo.set_uri(u"empty", res[3][1].get_uri(), @@ -382,7 +385,7 @@ class WebMixin(TimezoneMixin): # filenode to test for html encoding issues self._htmlname_unicode = u"<&weirdly'named\"file>>>_