From 416ab643359c10e80f9ef0b18e8e4d173757bde5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 10:50:44 -0400 Subject: [PATCH 1/8] Fix an import. --- src/allmydata/test/common_py3.py | 23 +++++++++++++++++++++++ src/allmydata/test/test_crawler.py | 2 +- src/allmydata/test/test_storage.py | 18 +----------------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/allmydata/test/common_py3.py b/src/allmydata/test/common_py3.py index 88f52ce8d..0daf66e62 100644 --- a/src/allmydata/test/common_py3.py +++ b/src/allmydata/test/common_py3.py @@ -112,3 +112,26 @@ def skip_if_cannot_represent_filename(u): except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.") + +class Marker(object): + pass + +class FakeCanary(object): + """For use in storage tests. + + Can be moved back to test_storage.py once enough Python 3 porting has been + done. + """ + def __init__(self, ignore_disconnectors=False): + self.ignore = ignore_disconnectors + self.disconnectors = {} + def notifyOnDisconnect(self, f, *args, **kwargs): + if self.ignore: + return + m = Marker() + self.disconnectors[m] = (f, args, kwargs) + return m + def dontNotifyOnDisconnect(self, marker): + if self.ignore: + return + del self.disconnectors[marker] diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 48d1ba26e..95cb2ce75 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -11,7 +11,7 @@ from allmydata.util import fileutil, hashutil, pollmixin from allmydata.storage.server import StorageServer, si_b2a from allmydata.storage.crawler import ShareCrawler, TimeSliceExceeded -from allmydata.test.test_storage import FakeCanary +from allmydata.test.common_py3 import FakeCanary from allmydata.test.common_util import StallMixin class BucketEnumeratingCrawler(ShareCrawler): diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index ca7f2b0d0..14c342c41 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -35,24 +35,8 @@ from allmydata.test.no_network import NoNetworkServer from allmydata.storage_client import ( _StorageServer, ) +from .common_py3 import FakeCanary -class Marker(object): - pass - -class FakeCanary(object): - def __init__(self, ignore_disconnectors=False): - self.ignore = ignore_disconnectors - self.disconnectors = {} - def notifyOnDisconnect(self, f, *args, **kwargs): - if self.ignore: - return - m = Marker() - self.disconnectors[m] = (f, args, kwargs) - return m - def dontNotifyOnDisconnect(self, marker): - if self.ignore: - return - del self.disconnectors[marker] class FakeStatsProvider(object): def count(self, name, delta=1): From 14f349e846492215c59d397c423a1aa303d1accd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 11:02:26 -0400 Subject: [PATCH 2/8] Manual porting to Python 3. --- src/allmydata/test/test_crawler.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 95cb2ce75..12f456f7c 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -92,27 +92,27 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): return self.s.stopService() def si(self, i): - return hashutil.storage_index_hash(str(i)) + return hashutil.storage_index_hash(bytes(i)) def rs(self, i, serverid): - return hashutil.bucket_renewal_secret_hash(str(i), serverid) + return hashutil.bucket_renewal_secret_hash(bytes(i), serverid) def cs(self, i, serverid): - return hashutil.bucket_cancel_secret_hash(str(i), serverid) + return hashutil.bucket_cancel_secret_hash(bytes(i), serverid) def write(self, i, ss, serverid, tail=0): si = self.si(i) - si = si[:-1] + chr(tail) + si = si[:-1] + bytearray((tail,)) had,made = ss.remote_allocate_buckets(si, self.rs(i, serverid), self.cs(i, serverid), set([0]), 99, FakeCanary()) - made[0].remote_write(0, "data") + made[0].remote_write(0, b"data") made[0].remote_close() return si_b2a(si) def test_immediate(self): self.basedir = "crawler/Basic/immediate" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -141,7 +141,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_service(self): self.basedir = "crawler/Basic/service" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -169,7 +169,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_paced(self): self.basedir = "crawler/Basic/paced" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -271,7 +271,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_paced_service(self): self.basedir = "crawler/Basic/paced_service" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -338,7 +338,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): self.basedir = "crawler/Basic/cpu_usage" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -383,7 +383,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_empty_subclass(self): self.basedir = "crawler/Basic/empty_subclass" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -411,7 +411,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_oneshot(self): self.basedir = "crawler/Basic/oneshot" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) From e044309bd37418432f1b1a7cbaa79003b02b9646 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 11:03:54 -0400 Subject: [PATCH 3/8] Finish port to Python 3. --- src/allmydata/test/test_crawler.py | 7 +++++++ src/allmydata/util/_python3.py | 1 + 2 files changed, 8 insertions(+) diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 12f456f7c..8be9a3d92 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -1,4 +1,11 @@ from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import time import os.path diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index 6f85ce214..ad71909f9 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -63,6 +63,7 @@ PORTED_TEST_MODULES = [ "allmydata.test.test_abbreviate", "allmydata.test.test_base32", "allmydata.test.test_base62", + "allmydata.test.test_crawler", "allmydata.test.test_crypto", "allmydata.test.test_deferredutil", "allmydata.test.test_dictutil", From e971ccf58e5de3835c4e47499f177358c8cf1e96 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 11:12:29 -0400 Subject: [PATCH 4/8] Unbreak so tests pass on Python 2 again. --- src/allmydata/test/test_crawler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 8be9a3d92..1b9d85458 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -5,7 +5,8 @@ from __future__ import unicode_literals from future.utils import PY2 if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + # Don't use future bytes, since it breaks tests. + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401 import time import os.path @@ -107,7 +108,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def write(self, i, ss, serverid, tail=0): si = self.si(i) - si = si[:-1] + bytearray((tail,)) + si = si[:-1] + bytes(bytearray((tail,))) had,made = ss.remote_allocate_buckets(si, self.rs(i, serverid), self.cs(i, serverid), From ff582c5129dd9f575057f3713a06ce5f8bc0ec52 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 11:38:59 -0400 Subject: [PATCH 5/8] Some progress towards running crawler on Python 3. --- src/allmydata/storage/common.py | 8 ++++++++ src/allmydata/storage/crawler.py | 7 ++++++- src/allmydata/storage/lease.py | 2 +- src/allmydata/storage/server.py | 2 +- src/allmydata/test/test_crawler.py | 6 +++--- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index 865275bc1..55036eea7 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -1,3 +1,4 @@ +from future.utils import PY3 import os.path from allmydata.util import base32 @@ -17,5 +18,12 @@ def si_a2b(ascii_storageindex): return base32.a2b(ascii_storageindex) def storage_index_to_dir(storageindex): + """Convert storage index to directory path. + + Returns native string. + """ sia = si_b2a(storageindex) + if PY3: + # On Python 3 we expect paths to be unicode. + sia = sia.decode("ascii") return os.path.join(sia[:2], sia) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 14139d81e..3f48a127d 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -1,3 +1,4 @@ +from future.utils import native_str, PY3 import os, time, struct try: @@ -77,6 +78,9 @@ class ShareCrawler(service.MultiService): self.statefile = statefile self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] + if PY3: + # On Python 3 we expect the paths to be unicode, not bytes. + self.prefixes = [p.decode("ascii") for p in self.prefixes] self.prefixes.sort() self.timer = None self.bucket_cache = (None, []) @@ -314,7 +318,8 @@ class ShareCrawler(service.MultiService): try: buckets = os.listdir(prefixdir) buckets.sort() - except EnvironmentError: + except EnvironmentError as e: + print(e) buckets = [] self.bucket_cache = (i, buckets) self.process_prefixdir(cycle, prefix, prefixdir, diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 5e3e01716..4d2dce8c4 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -8,7 +8,7 @@ class LeaseInfo(object): self.cancel_secret = cancel_secret self.expiration_time = expiration_time if nodeid is not None: - assert isinstance(nodeid, str) + assert isinstance(nodeid, bytes) assert len(nodeid) == 20 self.nodeid = nodeid diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 26823957e..3ffb58b68 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -49,7 +49,7 @@ class StorageServer(service.MultiService, Referenceable): expiration_cutoff_date=None, expiration_sharetypes=("mutable", "immutable")): service.MultiService.__init__(self) - assert isinstance(nodeid, str) + assert isinstance(nodeid, bytes) assert len(nodeid) == 20 self.my_nodeid = nodeid self.storedir = storedir diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 1b9d85458..1e1e79adb 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -100,11 +100,11 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): return self.s.stopService() def si(self, i): - return hashutil.storage_index_hash(bytes(i)) + return hashutil.storage_index_hash(b"%d" % (i,)) def rs(self, i, serverid): - return hashutil.bucket_renewal_secret_hash(bytes(i), serverid) + return hashutil.bucket_renewal_secret_hash(b"%d" % (i,), serverid) def cs(self, i, serverid): - return hashutil.bucket_cancel_secret_hash(bytes(i), serverid) + return hashutil.bucket_cancel_secret_hash(b"%d" % (i,), serverid) def write(self, i, ss, serverid, tail=0): si = self.si(i) From 35ac5a62e721a0ae913d8f817f16387dad37136b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 12:15:39 -0400 Subject: [PATCH 6/8] Tests now pass on Python 3 too. --- src/allmydata/storage/crawler.py | 6 +++--- src/allmydata/test/test_crawler.py | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 3f48a127d..75dd723ce 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -318,8 +318,7 @@ class ShareCrawler(service.MultiService): try: buckets = os.listdir(prefixdir) buckets.sort() - except EnvironmentError as e: - print(e) + except EnvironmentError: buckets = [] self.bucket_cache = (i, buckets) self.process_prefixdir(cycle, prefix, prefixdir, @@ -361,7 +360,8 @@ class ShareCrawler(service.MultiService): """ for bucket in buckets: - if bucket <= self.state["last-complete-bucket"]: + last_complete = self.state["last-complete-bucket"] + if last_complete is not None and bucket <= last_complete: continue self.process_bucket(cycle, prefix, prefixdir, bucket) self.state["last-complete-bucket"] = bucket diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 1e1e79adb..cfab04c31 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -3,7 +3,7 @@ from __future__ import division from __future__ import absolute_import from __future__ import unicode_literals -from future.utils import PY2 +from future.utils import PY2, PY3, native_str if PY2: # Don't use future bytes, since it breaks tests. from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401 @@ -30,6 +30,10 @@ class BucketEnumeratingCrawler(ShareCrawler): self.all_buckets = [] self.finished_d = defer.Deferred() def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): + if PY3: + # Bucket _inputs_ are bytes, and that's what we will compare this + # to: + storage_index_b32 = storage_index_b32.encode("ascii") self.all_buckets.append(storage_index_b32) def finished_cycle(self, cycle): eventually(self.finished_d.callback, None) @@ -44,6 +48,10 @@ class PacedCrawler(ShareCrawler): self.finished_d = defer.Deferred() self.yield_cb = None def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): + if PY3: + # Bucket _inputs_ are bytes, and that's what we will compare this + # to: + storage_index_b32 = storage_index_b32.encode("ascii") self.all_buckets.append(storage_index_b32) self.countdown -= 1 if self.countdown == 0: From 8279be38c110ee52538d814cf66f0ab56644eb40 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 13:25:11 -0400 Subject: [PATCH 7/8] Finish porting to Python 3. --- newsfragments/3386.minor | 0 src/allmydata/storage/crawler.py | 17 ++++++++++++++++- src/allmydata/test/test_crawler.py | 6 ++++++ src/allmydata/util/_python3.py | 1 + 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 newsfragments/3386.minor diff --git a/newsfragments/3386.minor b/newsfragments/3386.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 75dd723ce..24042c38b 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -1,4 +1,19 @@ -from future.utils import native_str, PY3 +""" +Crawl the storage server shares. + +Ported to Python 3. +""" + +from __future__ import unicode_literals +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from future.utils import PY2, PY3 +if PY2: + # We don't import bytes, object, dict, and list just in case they're used, + # so as not to create brittle pickles with random magic objects. + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 import os, time, struct try: diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index cfab04c31..04128769e 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -1,3 +1,9 @@ +""" +Tests for allmydata.storage.crawler. + +Ported to Python 3. +""" + from __future__ import print_function from __future__ import division from __future__ import absolute_import diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index ad71909f9..794edef40 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -32,6 +32,7 @@ PORTED_MODULES = [ "allmydata.crypto.util", "allmydata.hashtree", "allmydata.immutable.happiness_upload", + "allmydata.storage.crawler", "allmydata.test.common_py3", "allmydata.util._python3", "allmydata.util.abbreviate", From 733b2cab361879aaf5ce0cffa73b5ef275645a29 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Aug 2020 13:26:57 -0400 Subject: [PATCH 8/8] Remove unnecessary import. --- src/allmydata/test/test_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 04128769e..38bc8dcc6 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -9,7 +9,7 @@ from __future__ import division from __future__ import absolute_import from __future__ import unicode_literals -from future.utils import PY2, PY3, native_str +from future.utils import PY2, PY3 if PY2: # Don't use future bytes, since it breaks tests. from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401