Merge pull request #785 from tahoe-lafs/3386.storage-crawler-python-3

Port allmydata.storage.crawler to Python 3

Fixes ticket:3386
This commit is contained in:
Itamar Turner-Trauring 2020-08-20 14:52:30 -04:00 committed by GitHub
commit 9089a1226a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 94 additions and 33 deletions

0
newsfragments/3386.minor Normal file
View File

View File

@ -1,3 +1,4 @@
from future.utils import PY3
import os.path
from allmydata.util import base32
@ -17,5 +18,12 @@ def si_a2b(ascii_storageindex):
return base32.a2b(ascii_storageindex)
def storage_index_to_dir(storageindex):
"""Convert storage index to directory path.
Returns native string.
"""
sia = si_b2a(storageindex)
if PY3:
# On Python 3 we expect paths to be unicode.
sia = sia.decode("ascii")
return os.path.join(sia[:2], sia)

View File

@ -1,3 +1,19 @@
"""
Crawl the storage server shares.
Ported to Python 3.
"""
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2, PY3
if PY2:
# We don't import bytes, object, dict, and list just in case they're used,
# so as not to create brittle pickles with random magic objects.
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401
import os, time, struct
try:
@ -77,6 +93,9 @@ class ShareCrawler(service.MultiService):
self.statefile = statefile
self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2]
for i in range(2**10)]
if PY3:
# On Python 3 we expect the paths to be unicode, not bytes.
self.prefixes = [p.decode("ascii") for p in self.prefixes]
self.prefixes.sort()
self.timer = None
self.bucket_cache = (None, [])
@ -356,7 +375,8 @@ class ShareCrawler(service.MultiService):
"""
for bucket in buckets:
if bucket <= self.state["last-complete-bucket"]:
last_complete = self.state["last-complete-bucket"]
if last_complete is not None and bucket <= last_complete:
continue
self.process_bucket(cycle, prefix, prefixdir, bucket)
self.state["last-complete-bucket"] = bucket

View File

@ -8,7 +8,7 @@ class LeaseInfo(object):
self.cancel_secret = cancel_secret
self.expiration_time = expiration_time
if nodeid is not None:
assert isinstance(nodeid, str)
assert isinstance(nodeid, bytes)
assert len(nodeid) == 20
self.nodeid = nodeid

View File

@ -49,7 +49,7 @@ class StorageServer(service.MultiService, Referenceable):
expiration_cutoff_date=None,
expiration_sharetypes=("mutable", "immutable")):
service.MultiService.__init__(self)
assert isinstance(nodeid, str)
assert isinstance(nodeid, bytes)
assert len(nodeid) == 20
self.my_nodeid = nodeid
self.storedir = storedir

View File

@ -112,3 +112,26 @@ def skip_if_cannot_represent_filename(u):
except UnicodeEncodeError:
raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
class Marker(object):
pass
class FakeCanary(object):
"""For use in storage tests.
Can be moved back to test_storage.py once enough Python 3 porting has been
done.
"""
def __init__(self, ignore_disconnectors=False):
self.ignore = ignore_disconnectors
self.disconnectors = {}
def notifyOnDisconnect(self, f, *args, **kwargs):
if self.ignore:
return
m = Marker()
self.disconnectors[m] = (f, args, kwargs)
return m
def dontNotifyOnDisconnect(self, marker):
if self.ignore:
return
del self.disconnectors[marker]

View File

@ -1,4 +1,20 @@
"""
Tests for allmydata.storage.crawler.
Ported to Python 3.
"""
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from __future__ import unicode_literals
from future.utils import PY2, PY3
if PY2:
# Don't use future bytes, since it breaks tests. No further works is
# needed, once we're only on Python 3 we'll be deleting this future imports
# anyway, and tests pass just fine on Python 3.
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401
import time
import os.path
@ -11,7 +27,7 @@ from allmydata.util import fileutil, hashutil, pollmixin
from allmydata.storage.server import StorageServer, si_b2a
from allmydata.storage.crawler import ShareCrawler, TimeSliceExceeded
from allmydata.test.test_storage import FakeCanary
from allmydata.test.common_py3 import FakeCanary
from allmydata.test.common_util import StallMixin
class BucketEnumeratingCrawler(ShareCrawler):
@ -22,6 +38,10 @@ class BucketEnumeratingCrawler(ShareCrawler):
self.all_buckets = []
self.finished_d = defer.Deferred()
def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32):
if PY3:
# Bucket _inputs_ are bytes, and that's what we will compare this
# to:
storage_index_b32 = storage_index_b32.encode("ascii")
self.all_buckets.append(storage_index_b32)
def finished_cycle(self, cycle):
eventually(self.finished_d.callback, None)
@ -36,6 +56,10 @@ class PacedCrawler(ShareCrawler):
self.finished_d = defer.Deferred()
self.yield_cb = None
def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32):
if PY3:
# Bucket _inputs_ are bytes, and that's what we will compare this
# to:
storage_index_b32 = storage_index_b32.encode("ascii")
self.all_buckets.append(storage_index_b32)
self.countdown -= 1
if self.countdown == 0:
@ -92,27 +116,27 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
return self.s.stopService()
def si(self, i):
return hashutil.storage_index_hash(str(i))
return hashutil.storage_index_hash(b"%d" % (i,))
def rs(self, i, serverid):
return hashutil.bucket_renewal_secret_hash(str(i), serverid)
return hashutil.bucket_renewal_secret_hash(b"%d" % (i,), serverid)
def cs(self, i, serverid):
return hashutil.bucket_cancel_secret_hash(str(i), serverid)
return hashutil.bucket_cancel_secret_hash(b"%d" % (i,), serverid)
def write(self, i, ss, serverid, tail=0):
si = self.si(i)
si = si[:-1] + chr(tail)
si = si[:-1] + bytes(bytearray((tail,)))
had,made = ss.remote_allocate_buckets(si,
self.rs(i, serverid),
self.cs(i, serverid),
set([0]), 99, FakeCanary())
made[0].remote_write(0, "data")
made[0].remote_write(0, b"data")
made[0].remote_close()
return si_b2a(si)
def test_immediate(self):
self.basedir = "crawler/Basic/immediate"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -141,7 +165,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_service(self):
self.basedir = "crawler/Basic/service"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -169,7 +193,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_paced(self):
self.basedir = "crawler/Basic/paced"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -271,7 +295,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_paced_service(self):
self.basedir = "crawler/Basic/paced_service"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -338,7 +362,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
self.basedir = "crawler/Basic/cpu_usage"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -383,7 +407,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_empty_subclass(self):
self.basedir = "crawler/Basic/empty_subclass"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -411,7 +435,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_oneshot(self):
self.basedir = "crawler/Basic/oneshot"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)

View File

@ -35,24 +35,8 @@ from allmydata.test.no_network import NoNetworkServer
from allmydata.storage_client import (
_StorageServer,
)
from .common_py3 import FakeCanary
class Marker(object):
pass
class FakeCanary(object):
def __init__(self, ignore_disconnectors=False):
self.ignore = ignore_disconnectors
self.disconnectors = {}
def notifyOnDisconnect(self, f, *args, **kwargs):
if self.ignore:
return
m = Marker()
self.disconnectors[m] = (f, args, kwargs)
return m
def dontNotifyOnDisconnect(self, marker):
if self.ignore:
return
del self.disconnectors[marker]
class FakeStatsProvider(object):
def count(self, name, delta=1):

View File

@ -32,6 +32,7 @@ PORTED_MODULES = [
"allmydata.crypto.util",
"allmydata.hashtree",
"allmydata.immutable.happiness_upload",
"allmydata.storage.crawler",
"allmydata.test.common_py3",
"allmydata.util._python3",
"allmydata.util.abbreviate",
@ -63,6 +64,7 @@ PORTED_TEST_MODULES = [
"allmydata.test.test_abbreviate",
"allmydata.test.test_base32",
"allmydata.test.test_base62",
"allmydata.test.test_crawler",
"allmydata.test.test_crypto",
"allmydata.test.test_deferredutil",
"allmydata.test.test_dictutil",