Merge branch '3386.storage-crawler-python-3' into 3387.expirer-python-3

This commit is contained in:
Itamar Turner-Trauring 2020-08-20 13:05:03 -04:00
commit 356a5a322e
9 changed files with 92 additions and 33 deletions

0
newsfragments/3386.minor Normal file
View File

View File

@ -1,3 +1,4 @@
from future.utils import PY3
import os.path
from allmydata.util import base32
@ -17,5 +18,12 @@ def si_a2b(ascii_storageindex):
return base32.a2b(ascii_storageindex)
def storage_index_to_dir(storageindex):
"""Convert storage index to directory path.
Returns native string.
"""
sia = si_b2a(storageindex)
if PY3:
# On Python 3 we expect paths to be unicode.
sia = sia.decode("ascii")
return os.path.join(sia[:2], sia)

View File

@ -1,3 +1,19 @@
"""
Crawl the storage server shares.
Ported to Python 3.
"""
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2, PY3
if PY2:
# We don't import bytes, object, dict, and list just in case they're used,
# so as not to create brittle pickles with random magic objects.
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401
import os, time, struct
try:
@ -77,6 +93,9 @@ class ShareCrawler(service.MultiService):
self.statefile = statefile
self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2]
for i in range(2**10)]
if PY3:
# On Python 3 we expect the paths to be unicode, not bytes.
self.prefixes = [p.decode("ascii") for p in self.prefixes]
self.prefixes.sort()
self.timer = None
self.bucket_cache = (None, [])
@ -356,7 +375,8 @@ class ShareCrawler(service.MultiService):
"""
for bucket in buckets:
if bucket <= self.state["last-complete-bucket"]:
last_complete = self.state["last-complete-bucket"]
if last_complete is not None and bucket <= last_complete:
continue
self.process_bucket(cycle, prefix, prefixdir, bucket)
self.state["last-complete-bucket"] = bucket

View File

@ -8,7 +8,7 @@ class LeaseInfo(object):
self.cancel_secret = cancel_secret
self.expiration_time = expiration_time
if nodeid is not None:
assert isinstance(nodeid, str)
assert isinstance(nodeid, bytes)
assert len(nodeid) == 20
self.nodeid = nodeid

View File

@ -49,7 +49,7 @@ class StorageServer(service.MultiService, Referenceable):
expiration_cutoff_date=None,
expiration_sharetypes=("mutable", "immutable")):
service.MultiService.__init__(self)
assert isinstance(nodeid, str)
assert isinstance(nodeid, bytes)
assert len(nodeid) == 20
self.my_nodeid = nodeid
self.storedir = storedir

View File

@ -112,3 +112,26 @@ def skip_if_cannot_represent_filename(u):
except UnicodeEncodeError:
raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
class Marker(object):
pass
class FakeCanary(object):
"""For use in storage tests.
Can be moved back to test_storage.py once enough Python 3 porting has been
done.
"""
def __init__(self, ignore_disconnectors=False):
self.ignore = ignore_disconnectors
self.disconnectors = {}
def notifyOnDisconnect(self, f, *args, **kwargs):
if self.ignore:
return
m = Marker()
self.disconnectors[m] = (f, args, kwargs)
return m
def dontNotifyOnDisconnect(self, marker):
if self.ignore:
return
del self.disconnectors[marker]

View File

@ -1,4 +1,18 @@
"""
Tests for allmydata.storage.crawler.
Ported to Python 3.
"""
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from __future__ import unicode_literals
from future.utils import PY2, PY3
if PY2:
# Don't use future bytes, since it breaks tests.
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401
import time
import os.path
@ -11,7 +25,7 @@ from allmydata.util import fileutil, hashutil, pollmixin
from allmydata.storage.server import StorageServer, si_b2a
from allmydata.storage.crawler import ShareCrawler, TimeSliceExceeded
from allmydata.test.test_storage import FakeCanary
from allmydata.test.common_py3 import FakeCanary
from allmydata.test.common_util import StallMixin
class BucketEnumeratingCrawler(ShareCrawler):
@ -22,6 +36,10 @@ class BucketEnumeratingCrawler(ShareCrawler):
self.all_buckets = []
self.finished_d = defer.Deferred()
def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32):
if PY3:
# Bucket _inputs_ are bytes, and that's what we will compare this
# to:
storage_index_b32 = storage_index_b32.encode("ascii")
self.all_buckets.append(storage_index_b32)
def finished_cycle(self, cycle):
eventually(self.finished_d.callback, None)
@ -36,6 +54,10 @@ class PacedCrawler(ShareCrawler):
self.finished_d = defer.Deferred()
self.yield_cb = None
def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32):
if PY3:
# Bucket _inputs_ are bytes, and that's what we will compare this
# to:
storage_index_b32 = storage_index_b32.encode("ascii")
self.all_buckets.append(storage_index_b32)
self.countdown -= 1
if self.countdown == 0:
@ -92,27 +114,27 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
return self.s.stopService()
def si(self, i):
return hashutil.storage_index_hash(str(i))
return hashutil.storage_index_hash(b"%d" % (i,))
def rs(self, i, serverid):
return hashutil.bucket_renewal_secret_hash(str(i), serverid)
return hashutil.bucket_renewal_secret_hash(b"%d" % (i,), serverid)
def cs(self, i, serverid):
return hashutil.bucket_cancel_secret_hash(str(i), serverid)
return hashutil.bucket_cancel_secret_hash(b"%d" % (i,), serverid)
def write(self, i, ss, serverid, tail=0):
si = self.si(i)
si = si[:-1] + chr(tail)
si = si[:-1] + bytes(bytearray((tail,)))
had,made = ss.remote_allocate_buckets(si,
self.rs(i, serverid),
self.cs(i, serverid),
set([0]), 99, FakeCanary())
made[0].remote_write(0, "data")
made[0].remote_write(0, b"data")
made[0].remote_close()
return si_b2a(si)
def test_immediate(self):
self.basedir = "crawler/Basic/immediate"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -141,7 +163,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_service(self):
self.basedir = "crawler/Basic/service"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -169,7 +191,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_paced(self):
self.basedir = "crawler/Basic/paced"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -271,7 +293,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_paced_service(self):
self.basedir = "crawler/Basic/paced_service"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -338,7 +360,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
self.basedir = "crawler/Basic/cpu_usage"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -383,7 +405,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_empty_subclass(self):
self.basedir = "crawler/Basic/empty_subclass"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)
@ -411,7 +433,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
def test_oneshot(self):
self.basedir = "crawler/Basic/oneshot"
fileutil.make_dirs(self.basedir)
serverid = "\x00" * 20
serverid = b"\x00" * 20
ss = StorageServer(self.basedir, serverid)
ss.setServiceParent(self.s)

View File

@ -35,24 +35,8 @@ from allmydata.test.no_network import NoNetworkServer
from allmydata.storage_client import (
_StorageServer,
)
from .common_py3 import FakeCanary
class Marker(object):
pass
class FakeCanary(object):
def __init__(self, ignore_disconnectors=False):
self.ignore = ignore_disconnectors
self.disconnectors = {}
def notifyOnDisconnect(self, f, *args, **kwargs):
if self.ignore:
return
m = Marker()
self.disconnectors[m] = (f, args, kwargs)
return m
def dontNotifyOnDisconnect(self, marker):
if self.ignore:
return
del self.disconnectors[marker]
class FakeStatsProvider(object):
def count(self, name, delta=1):

View File

@ -32,6 +32,7 @@ PORTED_MODULES = [
"allmydata.crypto.util",
"allmydata.hashtree",
"allmydata.immutable.happiness_upload",
"allmydata.storage.crawler",
"allmydata.test.common_py3",
"allmydata.util._python3",
"allmydata.util.abbreviate",
@ -63,6 +64,7 @@ PORTED_TEST_MODULES = [
"allmydata.test.test_abbreviate",
"allmydata.test.test_base32",
"allmydata.test.test_base62",
"allmydata.test.test_crawler",
"allmydata.test.test_crypto",
"allmydata.test.test_deferredutil",
"allmydata.test.test_dictutil",