From d52f6747f64385289a79b410839a3346dcef91fc Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 30 Oct 2020 14:21:16 -0400 Subject: [PATCH 01/20] Some progress towards passing tests. --- src/allmydata/storage_client.py | 4 ++-- src/allmydata/test/test_storage_client.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 1e4497d68..63859a368 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -350,8 +350,8 @@ class StorageFarmBroker(service.MultiService): so if we use more than one introducer it is possible for them to deliver us stale announcements in some cases. """ - precondition(isinstance(key_s, str), key_s) - precondition(key_s.startswith("v0-"), key_s) + precondition(isinstance(key_s, bytes), key_s) + precondition(key_s.startswith(b"v0-"), key_s) precondition(ann["service-name"] == "storage", ann["service-name"]) server_id = key_s diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index 00343ea20..344c03e29 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -107,7 +107,7 @@ class TestNativeStorageServer(unittest.TestCase): ann = {"anonymous-storage-FURL": "pb://w2hqnbaa25yw4qgcvghl5psa3srpfgw3@tcp:127.0.0.1:51309/vucto2z4fxment3vfxbqecblbf6zyp6x", "permutation-seed-base32": "w2hqnbaa25yw4qgcvghl5psa3srpfgw3", } - nss = NativeStorageServer("server_id", ann, None, {}, EMPTY_CLIENT_CONFIG) + nss = NativeStorageServer(b"server_id", ann, None, {}, EMPTY_CLIENT_CONFIG) self.assertEqual(nss.get_nickname(), "") @@ -123,7 +123,7 @@ class GetConnectionStatus(unittest.TestCase): """ # Pretty hard to recognize anything from an empty announcement. ann = {} - nss = NativeStorageServer("server_id", ann, Tub, {}, EMPTY_CLIENT_CONFIG) + nss = NativeStorageServer(b"server_id", ann, Tub, {}, EMPTY_CLIENT_CONFIG) nss.start_connecting(lambda: None) connection_status = nss.get_connection_status() self.assertTrue(IConnectionStatus.providedBy(connection_status)) @@ -505,7 +505,7 @@ class TestStorageFarmBroker(unittest.TestCase): broker = make_broker() key_s = 'v0-1234-1' - servers_yaml = b"""\ + servers_yaml = """\ storage: v0-1234-1: ann: @@ -513,7 +513,7 @@ storage: permutation-seed-base32: aaaaaaaaaaaaaaaaaaaaaaaa """.format(furl=SOME_FURL) servers = yamlutil.safe_load(servers_yaml) - permseed = base32.a2b("aaaaaaaaaaaaaaaaaaaaaaaa") + permseed = base32.a2b(b"aaaaaaaaaaaaaaaaaaaaaaaa") broker.set_static_servers(servers["storage"]) self.failUnlessEqual(len(broker._static_server_ids), 1) s = broker.servers[key_s] @@ -537,7 +537,7 @@ storage: def test_static_permutation_seed_pubkey(self): broker = make_broker() - server_id = "v0-4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" + server_id = b"v0-4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" k = "4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" ann = { "anonymous-storage-FURL": SOME_FURL, @@ -548,7 +548,7 @@ storage: def test_static_permutation_seed_explicit(self): broker = make_broker() - server_id = "v0-4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" + server_id = b"v0-4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" k = "w5gl5igiexhwmftwzhai5jy2jixn7yx7" ann = { "anonymous-storage-FURL": SOME_FURL, @@ -560,7 +560,7 @@ storage: def test_static_permutation_seed_hashed(self): broker = make_broker() - server_id = "unparseable" + server_id = b"unparseable" ann = { "anonymous-storage-FURL": SOME_FURL, } @@ -591,10 +591,10 @@ storage: } def add_one_server(x): - data["anonymous-storage-FURL"] = "pb://{}@nowhere/fake".format(base32.b2a(str(x))) + data["anonymous-storage-FURL"] = b"pb://%s@nowhere/fake" % (base32.b2a(b"%d" % x),) tub = Mock() new_tubs.append(tub) - got_announcement('v0-1234-{}'.format(x), data) + got_announcement(b'v0-1234-%d' % x, data) self.assertEqual(tub.mock_calls[-1][0], 'connectTo') got_connection = tub.mock_calls[-1][1][1] rref = Mock() From 45a2fcc3f22f72392386b37eb60f834cdba4d03e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 30 Oct 2020 14:34:23 -0400 Subject: [PATCH 02/20] Fix for Python 2. --- src/allmydata/storage_client.py | 26 ++++++++++-------- src/allmydata/test/test_storage_client.py | 10 +++---- src/allmydata/util/dictutil.py | 32 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 63859a368..58aa749de 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -65,6 +65,8 @@ from allmydata.util.assertutil import precondition from allmydata.util.observer import ObserverList from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.util.hashutil import permute_server_hash +from allmydata.util.dictutil import BytesKeyDict + # who is responsible for de-duplication? # both? @@ -92,7 +94,7 @@ class StorageClientConfig(object): decreasing preference. See the *[client]peers.preferred* documentation for details. - :ivar dict[unicode, dict[bytes, bytes]] storage_plugins: A mapping from + :ivar dict[unicode, dict[unicode, unicode]] storage_plugins: A mapping from names of ``IFoolscapStoragePlugin`` configured in *tahoe.cfg* to the respective configuration. """ @@ -107,24 +109,24 @@ class StorageClientConfig(object): :param _Config config: The loaded Tahoe-LAFS node configuration. """ - ps = config.get_config("client", "peers.preferred", b"").split(b",") - preferred_peers = tuple([p.strip() for p in ps if p != b""]) + ps = config.get_config("client", "peers.preferred", "").split(",") + preferred_peers = tuple([p.strip() for p in ps if p != ""]) enabled_storage_plugins = ( name.strip() for name in config.get_config( - b"client", - b"storage.plugins", - b"", - ).decode("utf-8").split(u",") + "client", + "storage.plugins", + "", + ).split(u",") if name.strip() ) storage_plugins = {} for plugin_name in enabled_storage_plugins: try: - plugin_config = config.items(b"storageclient.plugins." + plugin_name) + plugin_config = config.items("storageclient.plugins." + plugin_name) except NoSectionError: plugin_config = [] storage_plugins[plugin_name] = dict(plugin_config) @@ -173,7 +175,7 @@ class StorageFarmBroker(service.MultiService): # storage servers that we've heard about. Each descriptor manages its # own Reconnector, and will give us a RemoteReference when we ask # them for it. - self.servers = {} + self.servers = BytesKeyDict() self._static_server_ids = set() # ignore announcements for these self.introducer_client = None self._threshold_listeners = [] # tuples of (threshold, Deferred) @@ -198,8 +200,10 @@ class StorageFarmBroker(service.MultiService): # written tests will still fail if a surprising exception # arrives here but they might be harder to debug without this # information. - pass + raise else: + if isinstance(server_id, unicode): + server_id = server_id.encode("utf-8") self._static_server_ids.add(server_id) self.servers[server_id] = storage_server storage_server.setServiceParent(self) @@ -555,7 +559,7 @@ class _FoolscapStorage(object): if isinstance(seed, unicode): seed = seed.encode("utf-8") ps = base32.a2b(seed) - elif re.search(r'^v0-[0-9a-zA-Z]{52}$', server_id): + elif re.search(br'^v0-[0-9a-zA-Z]{52}$', server_id): ps = base32.a2b(server_id[3:]) else: log.msg("unable to parse serverid '%(server_id)s as pubkey, " diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index 344c03e29..11784adfe 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -504,14 +504,14 @@ class TestStorageFarmBroker(unittest.TestCase): def test_static_servers(self): broker = make_broker() - key_s = 'v0-1234-1' + key_s = b'v0-1234-1' servers_yaml = """\ storage: v0-1234-1: ann: anonymous-storage-FURL: {furl} permutation-seed-base32: aaaaaaaaaaaaaaaaaaaaaaaa -""".format(furl=SOME_FURL) +""".format(furl=SOME_FURL.decode("utf-8")) servers = yamlutil.safe_load(servers_yaml) permseed = base32.a2b(b"aaaaaaaaaaaaaaaaaaaaaaaa") broker.set_static_servers(servers["storage"]) @@ -527,7 +527,7 @@ storage: ann2 = { "service-name": "storage", - "anonymous-storage-FURL": "pb://{}@nowhere/fake2".format(base32.b2a(str(1))), + "anonymous-storage-FURL": "pb://{}@nowhere/fake2".format(base32.b2a(b"1")), "permutation-seed-base32": "bbbbbbbbbbbbbbbbbbbbbbbb", } broker._got_announcement(key_s, ann2) @@ -538,7 +538,7 @@ storage: def test_static_permutation_seed_pubkey(self): broker = make_broker() server_id = b"v0-4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" - k = "4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" + k = b"4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" ann = { "anonymous-storage-FURL": SOME_FURL, } @@ -549,7 +549,7 @@ storage: def test_static_permutation_seed_explicit(self): broker = make_broker() server_id = b"v0-4uazse3xb6uu5qpkb7tel2bm6bpea4jhuigdhqcuvvse7hugtsia" - k = "w5gl5igiexhwmftwzhai5jy2jixn7yx7" + k = b"w5gl5igiexhwmftwzhai5jy2jixn7yx7" ann = { "anonymous-storage-FURL": SOME_FURL, "permutation-seed-base32": k, diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index 3ace8fca4..5fc85fbd1 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -14,6 +14,7 @@ if PY2: # subclassing dict, so we'd end up exposing Python 3 dict APIs to lots of # code that doesn't support it. from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, list, object, range, str, max, min # noqa: F401 +from six import ensure_str class DictOfSets(dict): @@ -76,3 +77,34 @@ class AuxValueDict(dict): have an auxvalue.""" super(AuxValueDict, self).__setitem__(key, value) self.auxilliary[key] = auxilliary + + +class _TypedKeyDict(dict): + """Dictionary that enforces key type. + + Doesn't override everything, but probably good enough to catch most + problems. + + Subclass and override KEY_TYPE. + """ + + KEY_TYPE = object + + +def _make_enforcing_override(K, method_name): + def f(self, key, *args, **kwargs): + assert isinstance(key, self.KEY_TYPE) + return getattr(dict, method_name)(self, key, *args, **kwargs) + f.__name__ = ensure_str(method_name) + setattr(K, method_name, f) + +for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", + "__delitem__"]: + _make_enforcing_override(_TypedKeyDict, _method_name) +del _method_name + + +class BytesKeyDict(_TypedKeyDict): + """Keys should be bytes.""" + + KEY_TYPE = bytes From 92a4a5afccfebb9c197c3af8700172f8091cd5dd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 30 Oct 2020 15:04:00 -0400 Subject: [PATCH 03/20] Closer to more passing tests, maybe. --- src/allmydata/test/common.py | 8 +++---- src/allmydata/test/test_storage_client.py | 28 ++++++++++++----------- src/allmydata/webish.py | 22 +++++++++--------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 15d677f89..1cf1d6428 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -230,16 +230,16 @@ class UseNode(object): def setUp(self): def format_config_items(config): - return b"\n".join( - b" = ".join((key, value)) + return "\n".join( + " = ".join((key, value)) for (key, value) in config.items() ) if self.plugin_config is None: - plugin_config_section = b"" + plugin_config_section = "" else: - plugin_config_section = b""" + plugin_config_section = """ [storageclient.plugins.{storage_plugin}] {config} """.format( diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index 11784adfe..ca01d1623 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -1,3 +1,5 @@ +from six import ensure_text + import hashlib from mock import Mock from json import ( @@ -271,7 +273,7 @@ class PluginMatchedAnnouncement(SyncTestCase): """ yield self.make_node( introducer_furl=SOME_FURL, - storage_plugin=b"tahoe-lafs-dummy-v1", + storage_plugin="tahoe-lafs-dummy-v1", plugin_config=None, ) server_id = b"v0-abcdef" @@ -295,9 +297,9 @@ class PluginMatchedAnnouncement(SyncTestCase): configuration is matched and the plugin's storage client is used. """ plugin_config = { - b"abc": b"xyz", + "abc": "xyz", } - plugin_name = b"tahoe-lafs-dummy-v1" + plugin_name = "tahoe-lafs-dummy-v1" yield self.make_node( introducer_furl=SOME_FURL, storage_plugin=plugin_name, @@ -348,7 +350,7 @@ class PluginMatchedAnnouncement(SyncTestCase): An announcement that could be matched by a plugin that is enabled with no configuration is matched and the plugin's storage client is used. """ - plugin_name = b"tahoe-lafs-dummy-v1" + plugin_name = "tahoe-lafs-dummy-v1" yield self.make_node( introducer_furl=SOME_FURL, storage_plugin=plugin_name, @@ -425,7 +427,7 @@ class StoragePluginWebPresence(AsyncTestCase): self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) - self.storage_plugin = b"tahoe-lafs-dummy-v1" + self.storage_plugin = "tahoe-lafs-dummy-v1" from twisted.internet import reactor _, port_endpoint = self.port_assigner.assign(reactor) @@ -436,15 +438,15 @@ class StoragePluginWebPresence(AsyncTestCase): self.basedir.child(u"private").makedirs() self.node_fixture = self.useFixture(UseNode( plugin_config={ - b"web": b"1", + "web": "1", }, node_config={ - b"tub.location": b"127.0.0.1:1", - b"web.port": port_endpoint, + "tub.location": "127.0.0.1:1", + "web.port": ensure_text(port_endpoint), }, storage_plugin=self.storage_plugin, basedir=self.basedir, - introducer_furl=SOME_FURL, + introducer_furl=ensure_text(SOME_FURL), )) self.node = yield self.node_fixture.create_node() self.webish = self.node.getServiceNamed(WebishServer.name) @@ -461,7 +463,7 @@ class StoragePluginWebPresence(AsyncTestCase): port=self.port, plugin_name=self.storage_plugin, ).encode("utf-8") - result = yield do_http(b"get", url) + result = yield do_http("get", url) self.assertThat(result, Equals(dumps({b"web": b"1"}))) @inlineCallbacks @@ -476,13 +478,13 @@ class StoragePluginWebPresence(AsyncTestCase): port=self.port, path=( u"storage-plugins", - self.storage_plugin.decode("utf-8"), + self.storage_plugin, u"counter", ), ).to_text().encode("utf-8") values = { - loads((yield do_http(b"get", url)))[u"value"], - loads((yield do_http(b"get", url)))[u"value"], + loads((yield do_http("get", url)))[u"value"], + loads((yield do_http("get", url)))[u"value"], } self.assertThat( values, diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 12a5c197b..f94d6f7da 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -57,7 +57,7 @@ class TahoeLAFSRequest(Request, object): self.method, self.uri = command, path self.clientproto = version - x = self.uri.split('?', 1) + x = self.uri.split(b'?', 1) if len(x) == 1: self.path = self.uri @@ -116,25 +116,25 @@ def _logFormatter(logDateTime, request): # match apache formatting. TODO: when we move to DSA dirnodes and # shorter caps, consider exposing a few characters of the cap, or # maybe a few characters of its hash. - x = request.uri.split("?", 1) + x = request.uri.split(b"?", 1) if len(x) == 1: # no query args path = request.uri - queryargs = "" + queryargs = b"" else: path, queryargs = x # there is a form handler which redirects POST /uri?uri=FOO into # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make # sure we censor these too. - if queryargs.startswith("uri="): - queryargs = "uri=[CENSORED]" + if queryargs.startswith(b"uri="): + queryargs = b"uri=[CENSORED]" queryargs = "?" + queryargs - if path.startswith("/uri/"): - path = "/uri/[CENSORED]" - elif path.startswith("/file/"): - path = "/file/[CENSORED]" - elif path.startswith("/named/"): - path = "/named/[CENSORED]" + if path.startswith(b"/uri/"): + path = b"/uri/[CENSORED]" + elif path.startswith(b"/file/"): + path = b"/file/[CENSORED]" + elif path.startswith(b"/named/"): + path = b"/named/[CENSORED]" uri = path + queryargs From 35c304cd82f3e6f2d1ac6624645778c04106a853 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 10:04:16 -0500 Subject: [PATCH 04/20] Workaround for Eliot flaw. --- src/allmydata/test/eliotutil.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index 9c12629f9..0c72502a7 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -6,6 +6,7 @@ Tools aimed at the interaction between tests and Eliot. # Can't use `builtins.str` because it's not JSON encodable: # `exceptions.TypeError: is not JSON-encodeable` from past.builtins import unicode as str +from future.utils import PY3 __all__ = [ "RUN_TEST", @@ -164,6 +165,9 @@ class EliotLoggedRunTest(object): @eliot_logged_test def run(self, result=None): + # Workaround for https://github.com/itamarst/eliot/issues/456 + if PY3: + self.case.eliot_logger._validate_message = lambda *args, **kwargs: None return self._run_tests_with_factory( self.case, self.handlers, From e3a0f61dcac2906679fb09d34bc72e74c6d2a0de Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 10:04:24 -0500 Subject: [PATCH 05/20] More passing tests. --- src/allmydata/test/test_storage_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index ca01d1623..c78a56f5e 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -405,7 +405,7 @@ class FoolscapStorageServers(unittest.TestCase): verifyObject( IFoolscapStorageServer, _FoolscapStorage.from_announcement( - u"server-id", + b"server-id", SOME_FURL, {u"permutation-seed-base32": base32.b2a(b"permutationseed")}, NotStorageServer(), @@ -427,7 +427,7 @@ class StoragePluginWebPresence(AsyncTestCase): self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) - self.storage_plugin = "tahoe-lafs-dummy-v1" + self.storage_plugin = u"tahoe-lafs-dummy-v1" from twisted.internet import reactor _, port_endpoint = self.port_assigner.assign(reactor) From 672c44009127987b9653db5a2bb7bac4182d3c2f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 10:40:41 -0500 Subject: [PATCH 06/20] Better error reporting. --- src/allmydata/test/common_web.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/common_web.py b/src/allmydata/test/common_web.py index 033b77c98..a6278810b 100644 --- a/src/allmydata/test/common_web.py +++ b/src/allmydata/test/common_web.py @@ -1,3 +1,4 @@ +from six import ensure_str __all__ = [ "do_http", @@ -36,6 +37,14 @@ from ..webish import ( TahoeLAFSRequest, ) + +class VerboseError(Error): + """Include the HTTP body response too.""" + + def __str__(self): + return Error.__str__(self) + " " + ensure_str(self.response) + + @inlineCallbacks def do_http(method, url, **kwargs): response = yield treq.request(method, url, persistent=False, **kwargs) @@ -43,7 +52,7 @@ def do_http(method, url, **kwargs): # TODO: replace this with response.fail_for_status when # https://github.com/twisted/treq/pull/159 has landed if 400 <= response.code < 600: - raise Error(response.code, response=body) + raise VerboseError(response.code, response=body) returnValue(body) From 60992174ff228e03e3d7cd230cfe144df15b14ec Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 10:40:53 -0500 Subject: [PATCH 07/20] twisted.web expects bytes. --- src/allmydata/test/storage_plugin.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/storage_plugin.py b/src/allmydata/test/storage_plugin.py index 52e909b13..4a1f84531 100644 --- a/src/allmydata/test/storage_plugin.py +++ b/src/allmydata/test/storage_plugin.py @@ -3,11 +3,8 @@ A storage server plugin the test suite can use to validate the functionality. """ -from future.utils import native_str - -from json import ( - dumps, -) +from future.utils import native_str, native_str_to_bytes +from six import ensure_str import attr @@ -35,6 +32,9 @@ from allmydata.interfaces import ( from allmydata.client import ( AnnounceableStorageServer, ) +from allmydata.util.jsonbytes import ( + dumps, +) class RIDummy(RemoteInterface): @@ -84,8 +84,8 @@ class DummyStorage(object): """ items = configuration.items(self._client_section_name, []) resource = Data( - dumps(dict(items)), - b"text/json", + native_str_to_bytes(dumps(dict(items))), + ensure_str("text/json"), ) # Give it some dynamic stuff too. resource.putChild(b"counter", GetCounter()) @@ -102,7 +102,7 @@ class GetCounter(Resource, object): value = 0 def render_GET(self, request): self.value += 1 - return dumps({"value": self.value}) + return native_str_to_bytes(dumps({"value": self.value})) @implementer(RIDummy) From 3edc1cb29ec0ded19c877c56d554f018daea66df Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 10:41:02 -0500 Subject: [PATCH 08/20] The dictionary is unicode, not bytes. --- src/allmydata/web/storage_plugins.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/web/storage_plugins.py b/src/allmydata/web/storage_plugins.py index 57f636f50..4994b23f3 100644 --- a/src/allmydata/web/storage_plugins.py +++ b/src/allmydata/web/storage_plugins.py @@ -27,6 +27,8 @@ class StoragePlugins(Resource, object): :see: ``twisted.web.iweb.IResource.getChild`` """ + # Technically client could be using some other encoding? + segment = segment.decode("utf-8") resources = self._client.get_client_storage_plugin_web_resources() try: result = resources[segment] From f6eb4aef576ec944956e5ad3bd42b5f3f4c6d4cc Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 10:41:13 -0500 Subject: [PATCH 09/20] Work consistently across Python 2 and 3. --- src/allmydata/test/test_storage_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index c78a56f5e..8345e330d 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -1,11 +1,11 @@ from six import ensure_text +from json import ( + loads, +) + import hashlib from mock import Mock -from json import ( - dumps, - loads, -) from fixtures import ( TempDir, ) @@ -464,7 +464,7 @@ class StoragePluginWebPresence(AsyncTestCase): plugin_name=self.storage_plugin, ).encode("utf-8") result = yield do_http("get", url) - self.assertThat(result, Equals(dumps({b"web": b"1"}))) + self.assertThat(loads(result), Equals({"web": "1"})) @inlineCallbacks def test_plugin_resource_persistent_across_requests(self): From d30014f8f59f60cec4b9e8a5ebdd4d64df5f91a6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 3 Nov 2020 11:14:25 -0500 Subject: [PATCH 10/20] The Resource dictionary is keyed by bytes, so storing unicode means you don't get the cached resource! --- src/allmydata/web/storage_plugins.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/allmydata/web/storage_plugins.py b/src/allmydata/web/storage_plugins.py index 4994b23f3..939047c6e 100644 --- a/src/allmydata/web/storage_plugins.py +++ b/src/allmydata/web/storage_plugins.py @@ -27,11 +27,10 @@ class StoragePlugins(Resource, object): :see: ``twisted.web.iweb.IResource.getChild`` """ - # Technically client could be using some other encoding? - segment = segment.decode("utf-8") resources = self._client.get_client_storage_plugin_web_resources() try: - result = resources[segment] + # Technically client could be using some other encoding? + result = resources[segment.decode("utf-8")] except KeyError: result = NoResource() self.putChild(segment, result) From f34597ac61726d167aa22187e073464127e6195c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:09:55 -0500 Subject: [PATCH 11/20] All tests pass on Python 3. --- src/allmydata/storage_client.py | 28 ++++++++++++++-------------- src/allmydata/util/dictutil.py | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 58aa749de..5834ae66f 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -65,7 +65,7 @@ from allmydata.util.assertutil import precondition from allmydata.util.observer import ObserverList from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.util.hashutil import permute_server_hash -from allmydata.util.dictutil import BytesKeyDict +from allmydata.util.dictutil import BytesKeyDict, UnicodeKeyDict # who is responsible for de-duplication? @@ -684,16 +684,16 @@ class NativeStorageServer(service.MultiService): @ivar remote_host: the IAddress, if connected, otherwise None """ - VERSION_DEFAULTS = { - b"http://allmydata.org/tahoe/protocols/storage/v1" : - { b"maximum-immutable-share-size": 2**32 - 1, - b"maximum-mutable-share-size": 2*1000*1000*1000, # maximum prior to v1.9.2 - b"tolerates-immutable-read-overrun": False, - b"delete-mutable-shares-with-zero-length-writev": False, - b"available-space": None, - }, - b"application-version": "unknown: no get_version()", - } + VERSION_DEFAULTS = UnicodeKeyDict({ + "http://allmydata.org/tahoe/protocols/storage/v1" : + UnicodeKeyDict({ "maximum-immutable-share-size": 2**32 - 1, + "maximum-mutable-share-size": 2*1000*1000*1000, # maximum prior to v1.9.2 + "tolerates-immutable-read-overrun": False, + "delete-mutable-shares-with-zero-length-writev": False, + "available-space": None, + }), + "application-version": "unknown: no get_version()", + }) def __init__(self, server_id, ann, tub_maker, handler_overrides, node_config, config=StorageClientConfig()): service.MultiService.__init__(self) @@ -833,10 +833,10 @@ class NativeStorageServer(service.MultiService): version = self.get_version() if version is None: return None - protocol_v1_version = version.get(b'http://allmydata.org/tahoe/protocols/storage/v1', {}) - available_space = protocol_v1_version.get('available-space') + protocol_v1_version = version.get('http://allmydata.org/tahoe/protocols/storage/v1', UnicodeKeyDict()) + available_space = protocol_v1_version.get(u'available-space') if available_space is None: - available_space = protocol_v1_version.get('maximum-immutable-share-size', None) + available_space = protocol_v1_version.get(u'maximum-immutable-share-size', None) return available_space def start_connecting(self, trigger_cb): diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index 5fc85fbd1..ed66a0e52 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -104,7 +104,18 @@ for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", del _method_name -class BytesKeyDict(_TypedKeyDict): - """Keys should be bytes.""" +if PY2: + # No need for enforcement, can use either bytes or unicode as keys and it's + # fine. + BytesKeyDict = UnicodeKeyDict = dict +else: + class BytesKeyDict(_TypedKeyDict): + """Keys should be bytes.""" - KEY_TYPE = bytes + KEY_TYPE = bytes + + + class UnicodeKeyDict(_TypedKeyDict): + """Keys should be unicode strings.""" + + KEY_TYPE = str From d407cb50053cbf0ebfe0fb3dd531ccac5404a70f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:19:16 -0500 Subject: [PATCH 12/20] Port to Python 3. --- src/allmydata/test/test_storage_client.py | 12 ++++++++++++ src/allmydata/util/_python3.py | 1 + 2 files changed, 13 insertions(+) diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index 8345e330d..fa3a34b15 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -1,3 +1,15 @@ +""" +Ported from Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + from six import ensure_text from json import ( diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index df4e5c68c..9218f7407 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -129,6 +129,7 @@ PORTED_TEST_MODULES = [ "allmydata.test.test_spans", "allmydata.test.test_statistics", "allmydata.test.test_storage", + "allmydata.test.test_storage_client", "allmydata.test.test_storage_web", "allmydata.test.test_time_format", "allmydata.test.test_upload", From 2b557287c828123292ef23b10a1f49d1c3b780c2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:21:13 -0500 Subject: [PATCH 13/20] Lint fix. --- src/allmydata/test/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_client.py b/src/allmydata/test/test_client.py index 9f0f73a52..0f0648a4c 100644 --- a/src/allmydata/test/test_client.py +++ b/src/allmydata/test/test_client.py @@ -39,7 +39,7 @@ from testtools.twistedsupport import ( import allmydata import allmydata.util.log -from allmydata.node import OldConfigError, UnescapedHashError, _Config, create_node_dir +from allmydata.node import OldConfigError, UnescapedHashError, create_node_dir from allmydata.frontends.auth import NeedRootcapLookupScheme from allmydata.version_checks import ( get_package_versions_string, From dc818757b65ca1711320b4f415e7dc74fd9eb923 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:22:34 -0500 Subject: [PATCH 14/20] Port to Python 3. --- src/allmydata/storage_client.py | 23 ++++++++++++++++------- src/allmydata/util/_python3.py | 1 + 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 5834ae66f..9b7422443 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -2,7 +2,13 @@ """ I contain the client-side code which speaks to storage servers, in particular the foolscap-based server implemented in src/allmydata/storage/*.py . + +Ported to Python 3. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals # roadmap: # @@ -28,7 +34,10 @@ the foolscap-based server implemented in src/allmydata/storage/*.py . # # 6: implement other sorts of IStorageClient classes: S3, etc -from past.builtins import unicode +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + import re, time, hashlib @@ -202,7 +211,7 @@ class StorageFarmBroker(service.MultiService): # information. raise else: - if isinstance(server_id, unicode): + if isinstance(server_id, str): server_id = server_id.encode("utf-8") self._static_server_ids.add(server_id) self.servers[server_id] = storage_server @@ -403,7 +412,7 @@ class StorageFarmBroker(service.MultiService): # connections to only a subset of the servers, which would increase # the chances that we'll put shares in weird places (and not update # existing shares of mutable files). See #374 for more details. - for dsc in self.servers.values(): + for dsc in list(self.servers.values()): dsc.try_to_connect() def get_servers_for_psi(self, peer_selection_index): @@ -443,7 +452,7 @@ class StorageFarmBroker(service.MultiService): # Upload Results web page). If the Helper is running 1.12 or newer, # it will send pubkeys, but if it's still running 1.11, it will send # tubids. This clause maps the old tubids to our existing servers. - for s in self.servers.values(): + for s in list(self.servers.values()): if isinstance(s, NativeStorageServer): if serverid == s.get_tubid(): return s @@ -556,7 +565,7 @@ class _FoolscapStorage(object): tubid = base32.a2b(tubid_s) if "permutation-seed-base32" in ann: seed = ann["permutation-seed-base32"] - if isinstance(seed, unicode): + if isinstance(seed, str): seed = seed.encode("utf-8") ps = base32.a2b(seed) elif re.search(br'^v0-[0-9a-zA-Z]{52}$', server_id): @@ -651,7 +660,7 @@ def _storage_from_foolscap_plugin(node_config, config, announcement, get_rref): in getPlugins(IFoolscapStoragePlugin) } storage_options = announcement.get(u"storage-options", []) - for plugin_name, plugin_config in config.storage_plugins.items(): + for plugin_name, plugin_config in list(config.storage_plugins.items()): try: plugin = plugins[plugin_name] except KeyError: @@ -758,7 +767,7 @@ class NativeStorageServer(service.MultiService): # Nope pass else: - if isinstance(furl, unicode): + if isinstance(furl, str): furl = furl.encode("utf-8") # See comment above for the _storage_from_foolscap_plugin case # about passing in get_rref. diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index 9218f7407..1d0517213 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -52,6 +52,7 @@ PORTED_MODULES = [ "allmydata.introducer.interfaces", "allmydata.monitor", "allmydata.node", + "allmydata.storage_client", "allmydata.storage.common", "allmydata.storage.crawler", "allmydata.storage.expirer", From 0a6321cc9a91f6038df483ae27e72e14348a237b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:36:08 -0500 Subject: [PATCH 15/20] Tests and additional check for typed key dicts. --- src/allmydata/test/test_dictutil.py | 58 +++++++++++++++++++++++++++++ src/allmydata/util/dictutil.py | 11 +++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_dictutil.py b/src/allmydata/test/test_dictutil.py index 9b7124114..111613cf1 100644 --- a/src/allmydata/test/test_dictutil.py +++ b/src/allmydata/test/test_dictutil.py @@ -12,6 +12,8 @@ from future.utils import PY2 if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from unittest import skipIf + from twisted.trial import unittest from allmydata.util import dictutil @@ -88,3 +90,59 @@ class DictUtil(unittest.TestCase): self.failUnlessEqual(sorted(d.keys()), ["one","two"]) self.failUnlessEqual(d["one"], 1) self.failUnlessEqual(d.get_aux("one"), None) + + +class TypedKeyDict(unittest.TestCase): + """Tests for dictionaries that limit keys.""" + + @skipIf(PY2, "Python 2 doesn't have issues mixing bytes and unicode.") + def setUp(self): + pass + + def test_bytes(self): + """BytesKeyDict is limited to just byte keys.""" + self.assertRaises(TypeError, dictutil.BytesKeyDict, {u"hello": 123}) + d = dictutil.BytesKeyDict({b"123": 200}) + with self.assertRaises(TypeError): + d[u"hello"] = "blah" + with self.assertRaises(TypeError): + d[u"hello"] + with self.assertRaises(TypeError): + del d[u"hello"] + with self.assertRaises(TypeError): + d.setdefault(u"hello", "123") + with self.assertRaises(TypeError): + d.get(u"xcd") + + # Byte keys are fine: + self.assertEqual(d, {b"123": 200}) + d[b"456"] = 400 + self.assertEqual(d[b"456"], 400) + del d[b"456"] + self.assertEqual(d.get(b"456", 50), 50) + self.assertEqual(d.setdefault(b"456", 300), 300) + self.assertEqual(d[b"456"], 300) + + def test_unicode(self): + """UnicodeKeyDict is limited to just byte keys.""" + self.assertRaises(TypeError, dictutil.UnicodeKeyDict, {b"hello": 123}) + d = dictutil.UnicodeKeyDict({u"123": 200}) + with self.assertRaises(TypeError): + d[b"hello"] = "blah" + with self.assertRaises(TypeError): + d[b"hello"] + with self.assertRaises(TypeError): + del d[b"hello"] + with self.assertRaises(TypeError): + d.setdefault(b"hello", "123") + with self.assertRaises(TypeError): + d.get(b"xcd") + + # Byte keys are fine: + self.assertEqual(d, {u"123": 200}) + d[u"456"] = 400 + self.assertEqual(d[u"456"], 400) + del d[u"456"] + self.assertEqual(d.get(u"456", 50), 50) + self.assertEqual(d.setdefault(u"456", 300), 300) + self.assertEqual(d[u"456"], 300) diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index ed66a0e52..5971d26f6 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -90,10 +90,19 @@ class _TypedKeyDict(dict): KEY_TYPE = object + def __init__(self, *args, **kwargs): + dict.__init__(self, *args, **kwargs) + for key in self: + if not isinstance(key, self.KEY_TYPE): + raise TypeError("{} must be of type {}".format( + repr(key), self.KEY_TYPE)) + def _make_enforcing_override(K, method_name): def f(self, key, *args, **kwargs): - assert isinstance(key, self.KEY_TYPE) + if not isinstance(key, self.KEY_TYPE): + raise TypeError("{} must be of type {}".format( + repr(key), self.KEY_TYPE)) return getattr(dict, method_name)(self, key, *args, **kwargs) f.__name__ = ensure_str(method_name) setattr(K, method_name, f) From 4c4f7f8fa198c58bef389119b11db21aba7cd303 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:41:00 -0500 Subject: [PATCH 16/20] News file. --- newsfragments/3465.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3465.minor diff --git a/newsfragments/3465.minor b/newsfragments/3465.minor new file mode 100644 index 000000000..e69de29bb From 813594bbac6ac6951ccfe20dff12e2483ee79fda Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 13:45:41 -0500 Subject: [PATCH 17/20] Go back to the way it was. --- src/allmydata/storage_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 9b7422443..010d63233 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -209,7 +209,7 @@ class StorageFarmBroker(service.MultiService): # written tests will still fail if a surprising exception # arrives here but they might be harder to debug without this # information. - raise + pass else: if isinstance(server_id, str): server_id = server_id.encode("utf-8") From f57ba3c927f53d9df8a72e266bc1d8171e47e05c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 Nov 2020 14:13:08 -0500 Subject: [PATCH 18/20] Can't send unicode over foolscap. --- src/allmydata/introducer/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/introducer/client.py b/src/allmydata/introducer/client.py index 243dd48bd..36adae474 100644 --- a/src/allmydata/introducer/client.py +++ b/src/allmydata/introducer/client.py @@ -179,7 +179,7 @@ class IntroducerClient(service.Service, Referenceable): self._subscriptions.add(service_name) self._debug_outstanding += 1 d = self._publisher.callRemote("subscribe_v2", - self, service_name, + self, service_name.encode("utf-8"), self._my_subscriber_info) d.addBoth(self._debug_retired) d.addErrback(log.err, facility="tahoe.introducer.client", From a49ef086b6528510ce6d56787f65dc718c094843 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 5 Nov 2020 10:09:52 -0500 Subject: [PATCH 19/20] No need for explicit unicode. --- src/allmydata/storage_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 010d63233..2b0ec5af1 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -843,9 +843,9 @@ class NativeStorageServer(service.MultiService): if version is None: return None protocol_v1_version = version.get('http://allmydata.org/tahoe/protocols/storage/v1', UnicodeKeyDict()) - available_space = protocol_v1_version.get(u'available-space') + available_space = protocol_v1_version.get('available-space') if available_space is None: - available_space = protocol_v1_version.get(u'maximum-immutable-share-size', None) + available_space = protocol_v1_version.get('maximum-immutable-share-size', None) return available_space def start_connecting(self, trigger_cb): From cd01d4dafa1ea458d1e5b47f480039fa770e8784 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 5 Nov 2020 10:15:38 -0500 Subject: [PATCH 20/20] Test for Python 2 BytesKeyDict and UnicodeKeyDict behavior. --- src/allmydata/test/test_dictutil.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_dictutil.py b/src/allmydata/test/test_dictutil.py index 111613cf1..7e26a6ed9 100644 --- a/src/allmydata/test/test_dictutil.py +++ b/src/allmydata/test/test_dictutil.py @@ -8,9 +8,10 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals -from future.utils import PY2 +from future.utils import PY2, PY3 if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + # dict omitted to match dictutil.py. + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, list, object, range, str, max, min # noqa: F401 from unittest import skipIf @@ -124,7 +125,7 @@ class TypedKeyDict(unittest.TestCase): self.assertEqual(d[b"456"], 300) def test_unicode(self): - """UnicodeKeyDict is limited to just byte keys.""" + """UnicodeKeyDict is limited to just unicode keys.""" self.assertRaises(TypeError, dictutil.UnicodeKeyDict, {b"hello": 123}) d = dictutil.UnicodeKeyDict({u"123": 200}) with self.assertRaises(TypeError): @@ -146,3 +147,24 @@ class TypedKeyDict(unittest.TestCase): self.assertEqual(d.get(u"456", 50), 50) self.assertEqual(d.setdefault(u"456", 300), 300) self.assertEqual(d[u"456"], 300) + + +class TypedKeyDictPython2(unittest.TestCase): + """Tests for dictionaries that limit keys on Python 2.""" + + @skipIf(PY3, "Testing Python 2 behavior.") + def test_python2(self): + """ + On Python2, BytesKeyDict and UnicodeKeyDict are unnecessary, because + dicts can mix both without problem so you don't get confusing behavior + if you get the type wrong. + + Eventually in a Python 3-only world mixing bytes and unicode will be + bad, thus the existence of these classes, but as we port there will be + situations where it's mixed on Python 2, which again is fine. + """ + self.assertIs(dictutil.UnicodeKeyDict, dict) + self.assertIs(dictutil.BytesKeyDict, dict) + # Demonstration of how bytes and unicode can be mixed: + d = {u"abc": 1} + self.assertEqual(d[b"abc"], 1)