diff --git a/docs/frontends/FTP-and-SFTP.rst b/docs/frontends/FTP-and-SFTP.rst index 9d4f1dcec..ede719e26 100644 --- a/docs/frontends/FTP-and-SFTP.rst +++ b/docs/frontends/FTP-and-SFTP.rst @@ -47,8 +47,8 @@ servers must be configured with a way to first authenticate a user (confirm that a prospective client has a legitimate claim to whatever authorities we might grant a particular user), and second to decide what directory cap should be used as the root directory for a log-in by the authenticated user. -A username and password can be used; as of Tahoe-LAFS v1.11, RSA or DSA -public key authentication is also supported. +As of Tahoe-LAFS v1.17, +RSA/DSA public key authentication is the only supported mechanism. Tahoe-LAFS provides two mechanisms to perform this user-to-cap mapping. The first (recommended) is a simple flat file with one account per line. @@ -59,20 +59,14 @@ Creating an Account File To use the first form, create a file (for example ``BASEDIR/private/accounts``) in which each non-comment/non-blank line is a space-separated line of -(USERNAME, PASSWORD, ROOTCAP), like so:: +(USERNAME, KEY-TYPE, PUBLIC-KEY, ROOTCAP), like so:: % cat BASEDIR/private/accounts - # This is a password line: username password cap - alice password URI:DIR2:ioej8xmzrwilg772gzj4fhdg7a:wtiizszzz2rgmczv4wl6bqvbv33ag4kvbr6prz3u6w3geixa6m6a - bob sekrit URI:DIR2:6bdmeitystckbl9yqlw7g56f4e:serp5ioqxnh34mlbmzwvkp3odehsyrr7eytt5f64we3k9hhcrcja - # This is a public key line: username keytype pubkey cap # (Tahoe-LAFS v1.11 or later) carol ssh-rsa AAAA... URI:DIR2:ovjy4yhylqlfoqg2vcze36dhde:4d4f47qko2xm5g7osgo2yyidi5m4muyo2vjjy53q4vjju2u55mfa -For public key authentication, the keytype may be either "ssh-rsa" or "ssh-dsa". -To avoid ambiguity between passwords and public key types, a password cannot -start with "ssh-". +The key type may be either "ssh-rsa" or "ssh-dsa". Now add an ``accounts.file`` directive to your ``tahoe.cfg`` file, as described in the next sections. diff --git a/integration/conftest.py b/integration/conftest.py index 39ff3b42b..ef5c518a8 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -353,10 +353,23 @@ def storage_nodes(reactor, temp_dir, introducer, introducer_furl, flog_gatherer, nodes.append(process) return nodes +@pytest.fixture(scope="session") +def alice_sftp_client_key_path(temp_dir): + # The client SSH key path is typically going to be somewhere else (~/.ssh, + # typically), but for convenience sake for testing we'll put it inside node. + return join(temp_dir, "alice", "private", "ssh_client_rsa_key") @pytest.fixture(scope='session') @log_call(action_type=u"integration:alice", include_args=[], include_result=False) -def alice(reactor, temp_dir, introducer_furl, flog_gatherer, storage_nodes, request): +def alice( + reactor, + temp_dir, + introducer_furl, + flog_gatherer, + storage_nodes, + alice_sftp_client_key_path, + request, +): process = pytest_twisted.blockon( _create_node( reactor, request, temp_dir, introducer_furl, flog_gatherer, "alice", @@ -387,19 +400,13 @@ accounts.file = {accounts_path} """.format(ssh_key_path=host_ssh_key_path, accounts_path=accounts_path)) generate_ssh_key(host_ssh_key_path) - # 3. Add a SFTP access file with username/password and SSH key auth. - - # The client SSH key path is typically going to be somewhere else (~/.ssh, - # typically), but for convenience sake for testing we'll put it inside node. - client_ssh_key_path = join(process.node_dir, "private", "ssh_client_rsa_key") - generate_ssh_key(client_ssh_key_path) + # 3. Add a SFTP access file with an SSH key for auth. + generate_ssh_key(alice_sftp_client_key_path) # Pub key format is "ssh-rsa ". We want the key. - ssh_public_key = open(client_ssh_key_path + ".pub").read().strip().split()[1] + ssh_public_key = open(alice_sftp_client_key_path + ".pub").read().strip().split()[1] with open(accounts_path, "w") as f: f.write("""\ -alice password {rwcap} - -alice2 ssh-rsa {ssh_public_key} {rwcap} +alice-key ssh-rsa {ssh_public_key} {rwcap} """.format(rwcap=rwcap, ssh_public_key=ssh_public_key)) # 4. Restart the node with new SFTP config. diff --git a/integration/test_sftp.py b/integration/test_sftp.py index 6171c7413..3fdbb56d7 100644 --- a/integration/test_sftp.py +++ b/integration/test_sftp.py @@ -19,6 +19,7 @@ from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import os.path from posixpath import join from stat import S_ISDIR @@ -33,7 +34,7 @@ import pytest from .util import generate_ssh_key, run_in_thread -def connect_sftp(connect_args={"username": "alice", "password": "password"}): +def connect_sftp(connect_args): """Create an SFTP client.""" client = SSHClient() client.set_missing_host_key_policy(AutoAddPolicy) @@ -60,24 +61,24 @@ def connect_sftp(connect_args={"username": "alice", "password": "password"}): @run_in_thread def test_bad_account_password_ssh_key(alice, tmpdir): """ - Can't login with unknown username, wrong password, or wrong SSH pub key. + Can't login with unknown username, any password, or wrong SSH pub key. """ - # Wrong password, wrong username: - for u, p in [("alice", "wrong"), ("someuser", "password")]: + # Any password, wrong username: + for u, p in [("alice-key", "wrong"), ("someuser", "password")]: with pytest.raises(AuthenticationException): connect_sftp(connect_args={ "username": u, "password": p, }) - another_key = join(str(tmpdir), "ssh_key") + another_key = os.path.join(str(tmpdir), "ssh_key") generate_ssh_key(another_key) - good_key = RSAKey(filename=join(alice.node_dir, "private", "ssh_client_rsa_key")) + good_key = RSAKey(filename=os.path.join(alice.node_dir, "private", "ssh_client_rsa_key")) bad_key = RSAKey(filename=another_key) # Wrong key: with pytest.raises(AuthenticationException): connect_sftp(connect_args={ - "username": "alice2", "pkey": bad_key, + "username": "alice-key", "pkey": bad_key, }) # Wrong username: @@ -86,13 +87,24 @@ def test_bad_account_password_ssh_key(alice, tmpdir): "username": "someoneelse", "pkey": good_key, }) +def sftp_client_key(node): + return RSAKey( + filename=os.path.join(node.node_dir, "private", "ssh_client_rsa_key"), + ) + +def test_sftp_client_key_exists(alice, alice_sftp_client_key_path): + """ + Weakly validate the sftp client key fixture by asserting that *something* + exists at the supposed key path. + """ + assert os.path.exists(alice_sftp_client_key_path) @run_in_thread def test_ssh_key_auth(alice): """It's possible to login authenticating with SSH public key.""" - key = RSAKey(filename=join(alice.node_dir, "private", "ssh_client_rsa_key")) + key = sftp_client_key(alice) sftp = connect_sftp(connect_args={ - "username": "alice2", "pkey": key + "username": "alice-key", "pkey": key }) assert sftp.listdir() == [] @@ -100,7 +112,10 @@ def test_ssh_key_auth(alice): @run_in_thread def test_read_write_files(alice): """It's possible to upload and download files.""" - sftp = connect_sftp() + sftp = connect_sftp(connect_args={ + "username": "alice-key", + "pkey": sftp_client_key(alice), + }) with sftp.file("myfile", "wb") as f: f.write(b"abc") f.write(b"def") @@ -117,7 +132,10 @@ def test_directories(alice): It's possible to create, list directories, and create and remove files in them. """ - sftp = connect_sftp() + sftp = connect_sftp(connect_args={ + "username": "alice-key", + "pkey": sftp_client_key(alice), + }) assert sftp.listdir() == [] sftp.mkdir("childdir") @@ -148,7 +166,10 @@ def test_directories(alice): @run_in_thread def test_rename(alice): """Directories and files can be renamed.""" - sftp = connect_sftp() + sftp = connect_sftp(connect_args={ + "username": "alice-key", + "pkey": sftp_client_key(alice), + }) sftp.mkdir("dir") filepath = join("dir", "file") diff --git a/newsfragments/3819.security b/newsfragments/3819.security new file mode 100644 index 000000000..975fd0035 --- /dev/null +++ b/newsfragments/3819.security @@ -0,0 +1 @@ +The introducer server no longer writes the sensitive introducer fURL value to its log at startup time. Instead it writes the well-known path of the file from which this value can be read. diff --git a/newsfragments/3821.security b/newsfragments/3821.security new file mode 100644 index 000000000..75d9904a2 --- /dev/null +++ b/newsfragments/3821.security @@ -0,0 +1,2 @@ +The storage protocol operation ``add_lease`` now safely rejects an attempt to add a 4,294,967,296th lease to an immutable share. +Previously this failed with an error after recording the new lease in the share file, resulting in the share file losing track of a one previous lease. diff --git a/newsfragments/3822.security b/newsfragments/3822.security new file mode 100644 index 000000000..5d6c07ab5 --- /dev/null +++ b/newsfragments/3822.security @@ -0,0 +1,2 @@ +The storage protocol operation ``readv`` now safely rejects attempts to read negative lengths. +Previously these read requests were satisfied with the complete contents of the share file (including trailing metadata) starting from the specified offset. diff --git a/newsfragments/3823.security b/newsfragments/3823.security new file mode 100644 index 000000000..ba2bbd741 --- /dev/null +++ b/newsfragments/3823.security @@ -0,0 +1,4 @@ +The storage server implementation now respects the ``reserved_space`` configuration value when writing lease information and recording corruption advisories. +Previously, new leases could be created and written to disk even when the storage server had less remaining space than the configured reserve space value. +Now this operation will fail with an exception and the lease will not be created. +Similarly, if there is no space available, corruption advisories will be logged but not written to disk. diff --git a/newsfragments/3824.security b/newsfragments/3824.security new file mode 100644 index 000000000..b29b2acc8 --- /dev/null +++ b/newsfragments/3824.security @@ -0,0 +1 @@ +The storage server implementation no longer records corruption advisories about storage indexes for which it holds no shares. diff --git a/newsfragments/3825.security b/newsfragments/3825.security new file mode 100644 index 000000000..3d112dd49 --- /dev/null +++ b/newsfragments/3825.security @@ -0,0 +1,8 @@ +The lease-checker now uses JSON instead of pickle to serialize its state. + +tahoe will now refuse to run until you either delete all pickle files or +migrate them using the new command:: + + tahoe admin migrate-crawler + +This will migrate all crawler-related pickle files. diff --git a/newsfragments/3827.security b/newsfragments/3827.security new file mode 100644 index 000000000..4fee19c76 --- /dev/null +++ b/newsfragments/3827.security @@ -0,0 +1,4 @@ +The SFTP server no longer accepts password-based credentials for authentication. +Public/private key-based credentials are now the only supported authentication type. +This removes plaintext password storage from the SFTP credentials file. +It also removes a possible timing side-channel vulnerability which might have allowed attackers to discover an account's plaintext password. diff --git a/newsfragments/3839.security b/newsfragments/3839.security new file mode 100644 index 000000000..1ae054542 --- /dev/null +++ b/newsfragments/3839.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets. diff --git a/newsfragments/3841.security b/newsfragments/3841.security new file mode 100644 index 000000000..867322e0a --- /dev/null +++ b/newsfragments/3841.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for mutable share files instead of keeping the original secrets. \ No newline at end of file diff --git a/src/allmydata/frontends/auth.py b/src/allmydata/frontends/auth.py index b61062334..b6f9c2b7e 100644 --- a/src/allmydata/frontends/auth.py +++ b/src/allmydata/frontends/auth.py @@ -12,7 +12,7 @@ if PY2: from zope.interface import implementer from twisted.internet import defer -from twisted.cred import error, checkers, credentials +from twisted.cred import checkers, credentials from twisted.conch.ssh import keys from twisted.conch.checkers import SSHPublicKeyChecker, InMemorySSHKeyDB @@ -32,65 +32,93 @@ class FTPAvatarID(object): @implementer(checkers.ICredentialsChecker) class AccountFileChecker(object): - credentialInterfaces = (credentials.IUsernamePassword, - credentials.IUsernameHashedPassword, - credentials.ISSHPrivateKey) + credentialInterfaces = (credentials.ISSHPrivateKey,) + def __init__(self, client, accountfile): self.client = client - self.passwords = BytesKeyDict() - pubkeys = BytesKeyDict() - self.rootcaps = BytesKeyDict() - with open(abspath_expanduser_unicode(accountfile), "rb") as f: - for line in f: - line = line.strip() - if line.startswith(b"#") or not line: - continue - name, passwd, rest = line.split(None, 2) - if passwd.startswith(b"ssh-"): - bits = rest.split() - keystring = b" ".join([passwd] + bits[:-1]) - key = keys.Key.fromString(keystring) - rootcap = bits[-1] - pubkeys[name] = [key] - else: - self.passwords[name] = passwd - rootcap = rest - self.rootcaps[name] = rootcap + path = abspath_expanduser_unicode(accountfile) + with open_account_file(path) as f: + self.rootcaps, pubkeys = load_account_file(f) self._pubkeychecker = SSHPublicKeyChecker(InMemorySSHKeyDB(pubkeys)) def _avatarId(self, username): return FTPAvatarID(username, self.rootcaps[username]) - def _cbPasswordMatch(self, matched, username): - if matched: - return self._avatarId(username) - raise error.UnauthorizedLogin - def requestAvatarId(self, creds): if credentials.ISSHPrivateKey.providedBy(creds): d = defer.maybeDeferred(self._pubkeychecker.requestAvatarId, creds) d.addCallback(self._avatarId) return d - elif credentials.IUsernameHashedPassword.providedBy(creds): - return self._checkPassword(creds) - elif credentials.IUsernamePassword.providedBy(creds): - return self._checkPassword(creds) - else: - raise NotImplementedError() + raise NotImplementedError() - def _checkPassword(self, creds): - """ - Determine whether the password in the given credentials matches the - password in the account file. +def open_account_file(path): + """ + Open and return the accounts file at the given path. + """ + return open(path, "rt", encoding="utf-8") - Returns a Deferred that fires with the username if the password matches - or with an UnauthorizedLogin failure otherwise. - """ - try: - correct = self.passwords[creds.username] - except KeyError: - return defer.fail(error.UnauthorizedLogin()) +def load_account_file(lines): + """ + Load credentials from an account file. - d = defer.maybeDeferred(creds.checkPassword, correct) - d.addCallback(self._cbPasswordMatch, creds.username) - return d + :param lines: An iterable of account lines to load. + + :return: See ``create_account_maps``. + """ + return create_account_maps( + parse_accounts( + content_lines( + lines, + ), + ), + ) + +def content_lines(lines): + """ + Drop empty and commented-out lines (``#``-prefixed) from an iterator of + lines. + + :param lines: An iterator of lines to process. + + :return: An iterator of lines including only those from ``lines`` that + include content intended to be loaded. + """ + for line in lines: + line = line.strip() + if line and not line.startswith("#"): + yield line + +def parse_accounts(lines): + """ + Parse account lines into their components (name, key, rootcap). + """ + for line in lines: + name, passwd, rest = line.split(None, 2) + if not passwd.startswith("ssh-"): + raise ValueError( + "Password-based authentication is not supported; " + "configure key-based authentication instead." + ) + + bits = rest.split() + keystring = " ".join([passwd] + bits[:-1]) + key = keys.Key.fromString(keystring) + rootcap = bits[-1] + yield (name, key, rootcap) + +def create_account_maps(accounts): + """ + Build mappings from account names to keys and rootcaps. + + :param accounts: An iterator if (name, key, rootcap) tuples. + + :return: A tuple of two dicts. The first maps account names to rootcaps. + The second maps account names to public keys. + """ + rootcaps = BytesKeyDict() + pubkeys = BytesKeyDict() + for (name, key, rootcap) in accounts: + name_bytes = name.encode("utf-8") + rootcaps[name_bytes] = rootcap.encode("utf-8") + pubkeys[name_bytes] = [key] + return rootcaps, pubkeys diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 5522663ee..f055a01e2 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -52,6 +52,8 @@ WriteEnablerSecret = Hash # used to protect mutable share modifications LeaseRenewSecret = Hash # used to protect lease renewal requests LeaseCancelSecret = Hash # was used to protect lease cancellation requests +class NoSpace(Exception): + """Storage space was not available for a space-allocating operation.""" class DataTooLargeError(Exception): """The write went past the expected size of the bucket.""" diff --git a/src/allmydata/introducer/server.py b/src/allmydata/introducer/server.py index 8678ad5bf..f0638439a 100644 --- a/src/allmydata/introducer/server.py +++ b/src/allmydata/introducer/server.py @@ -133,7 +133,7 @@ class _IntroducerNode(node.Node): os.rename(old_public_fn, private_fn) furl = self.tub.registerReference(introducerservice, furlFile=private_fn) - self.log(" introducer is at %s" % furl, umid="qF2L9A") + self.log(" introducer can be found in {!r}".format(private_fn), umid="qF2L9A") self.introducer_url = furl # for tests def init_web(self, webport): diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index a9feed0dd..e0dcc8821 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -18,7 +18,17 @@ except ImportError: pass from twisted.python import usage -from allmydata.scripts.common import BaseOptions +from twisted.python.filepath import ( + FilePath, +) +from allmydata.scripts.common import ( + BaseOptions, + BasedirOptions, +) +from allmydata.storage import ( + crawler, + expirer, +) class GenerateKeypairOptions(BaseOptions): @@ -65,12 +75,55 @@ def derive_pubkey(options): print("public:", str(ed25519.string_from_verifying_key(public_key), "ascii"), file=out) return 0 +class MigrateCrawlerOptions(BasedirOptions): + + def getSynopsis(self): + return "Usage: tahoe [global-options] admin migrate-crawler" + + def getUsage(self, width=None): + t = BasedirOptions.getUsage(self, width) + t += ( + "The crawler data is now stored as JSON to avoid" + " potential security issues with pickle files.\n\nIf" + " you are confident the state files in the 'storage/'" + " subdirectory of your node are trustworthy, run this" + " command to upgrade them to JSON.\n\nThe files are:" + " lease_checker.history, lease_checker.state, and" + " bucket_counter.state" + ) + return t + + +def migrate_crawler(options): + out = options.stdout + storage = FilePath(options['basedir']).child("storage") + + conversions = [ + (storage.child("lease_checker.state"), crawler._convert_pickle_state_to_json), + (storage.child("bucket_counter.state"), crawler._convert_pickle_state_to_json), + (storage.child("lease_checker.history"), expirer._convert_pickle_state_to_json), + ] + + for fp, converter in conversions: + existed = fp.exists() + newfp = crawler._upgrade_pickle_to_json(fp, converter) + if existed: + print("Converted '{}' to '{}'".format(fp.path, newfp.path), file=out) + else: + if newfp.exists(): + print("Already converted: '{}'".format(newfp.path), file=out) + else: + print("Not found: '{}'".format(fp.path), file=out) + + class AdminCommand(BaseOptions): subCommands = [ ("generate-keypair", None, GenerateKeypairOptions, "Generate a public/private keypair, write to stdout."), ("derive-pubkey", None, DerivePubkeyOptions, "Derive a public key from a private key."), + ("migrate-crawler", None, MigrateCrawlerOptions, + "Write the crawler-history data as JSON."), ] def postOptions(self): if not hasattr(self, 'subOptions'): @@ -88,6 +141,7 @@ each subcommand. subDispatch = { "generate-keypair": print_keypair, "derive-pubkey": derive_pubkey, + "migrate-crawler": migrate_crawler, } def do_admin(options): diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 260cca55b..6201ce28f 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -230,8 +230,8 @@ def dump_mutable_share(options): print(" ownerid: %d" % lease.owner_num, file=out) when = format_expiration_time(lease.get_expiration_time()) print(" expires in %s" % when, file=out) - print(" renew_secret: %s" % str(base32.b2a(lease.renew_secret), "utf-8"), file=out) - print(" cancel_secret: %s" % str(base32.b2a(lease.cancel_secret), "utf-8"), file=out) + print(" renew_secret: %s" % lease.present_renew_secret(), file=out) + print(" cancel_secret: %s" % lease.present_cancel_secret(), file=out) print(" secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid), file=out) else: print("No leases.", file=out) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 01f1a354c..51be32ee3 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -27,7 +27,9 @@ from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin - +from allmydata.storage.crawler import ( + MigratePickleFileError, +) from allmydata.node import ( PortAssignmentRequired, PrivacyError, @@ -164,6 +166,18 @@ class DaemonizeTheRealService(Service, HookMixin): self.stderr.write("\ntub.port cannot be 0: you must choose.\n\n") elif reason.check(PrivacyError): self.stderr.write("\n{}\n\n".format(reason.value)) + elif reason.check(MigratePickleFileError): + self.stderr.write( + "Error\nAt least one 'pickle' format file exists.\n" + "The file is {}\n" + "You must either delete the pickle-format files" + " or migrate them using the command:\n" + " tahoe admin migrate-crawler --basedir {}\n\n" + .format( + reason.value.args[0].path, + self.basedir, + ) + ) else: self.stderr.write("\nUnknown error\n") reason.printTraceback(self.stderr) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index e5563647f..17a3f41b7 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -16,11 +16,22 @@ from allmydata.util import base32 # Backwards compatibility. from allmydata.interfaces import DataTooLargeError # noqa: F401 -class UnknownMutableContainerVersionError(Exception): - pass -class UnknownImmutableContainerVersionError(Exception): +class UnknownContainerVersionError(Exception): + def __init__(self, filename, version): + self.filename = filename + self.version = version + + def __str__(self): + return "sharefile {!r} had unexpected version {!r}".format( + self.filename, + self.version, + ) + +class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass +class UnknownImmutableContainerVersionError(UnknownContainerVersionError): + pass def si_b2a(storageindex): return base32.b2a(storageindex) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index bd4f4f432..7516bc4e9 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -11,23 +11,185 @@ from __future__ import print_function from future.utils import PY2, PY3 if PY2: - # We don't import bytes, object, dict, and list just in case they're used, - # so as not to create brittle pickles with random magic objects. - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 -import os, time, struct -try: - import cPickle as pickle -except ImportError: - import pickle # type: ignore +import os +import time +import json +import struct from twisted.internet import reactor from twisted.application import service +from twisted.python.filepath import FilePath from allmydata.storage.common import si_b2a from allmydata.util import fileutil class TimeSliceExceeded(Exception): pass + +class MigratePickleFileError(Exception): + """ + A pickle-format file exists (the FilePath to the file will be the + single arg). + """ + pass + + +def _convert_cycle_data(state): + """ + :param dict state: cycle-to-date or history-item state + + :return dict: the state in the JSON form + """ + + def _convert_expiration_mode(value): + # original is a 4-tuple, with the last element being a 2-tuple + # .. convert both to lists + return [ + value[0], + value[1], + value[2], + list(value[3]), + ] + + def _convert_lease_age(value): + # if we're in cycle-to-date, this is a dict + if isinstance(value, dict): + return { + "{},{}".format(k[0], k[1]): v + for k, v in value.items() + } + # otherwise, it's a history-item and they're 3-tuples + return [ + list(v) + for v in value + ] + + converters = { + "configured-expiration-mode": _convert_expiration_mode, + "cycle-start-finish-times": list, + "lease-age-histogram": _convert_lease_age, + "corrupt-shares": lambda value: [ + list(x) + for x in value + ], + "leases-per-share-histogram": lambda value: { + str(k): v + for k, v in value.items() + }, + } + return { + k: converters.get(k, lambda z: z)(v) + for k, v in state.items() + } + + +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + assert state["version"] == 1, "Only known version is 1" + + converters = { + "cycle-to-date": _convert_cycle_data, + } + return { + k: converters.get(k, lambda x: x)(v) + for k, v in state.items() + } + + +def _upgrade_pickle_to_json(state_path, convert_pickle): + """ + :param FilePath state_path: the filepath to ensure is json + + :param Callable[dict] convert_pickle: function to change + pickle-style state into JSON-style state + + :returns FilePath: the local path where the state is stored + + If this state is pickle, convert to the JSON format and return the + JSON path. + """ + json_state_path = state_path.siblingExtension(".json") + + # if there's no file there at all, we're done because there's + # nothing to upgrade + if not state_path.exists(): + return json_state_path + + # upgrade the pickle data to JSON + import pickle + with state_path.open("rb") as f: + state = pickle.load(f) + new_state = convert_pickle(state) + _dump_json_to_file(new_state, json_state_path) + + # we've written the JSON, delete the pickle + state_path.remove() + return json_state_path + + +def _confirm_json_format(fp): + """ + :param FilePath fp: the original (pickle) name of a state file + + This confirms that we do _not_ have the pickle-version of a + state-file and _do_ either have nothing, or the JSON version. If + the pickle-version exists, an exception is raised. + + :returns FilePath: the JSON name of a state file + """ + if fp.path.endswith(".json"): + return fp + jsonfp = fp.siblingExtension(".json") + if fp.exists(): + raise MigratePickleFileError(fp) + return jsonfp + + +def _dump_json_to_file(js, afile): + """ + Dump the JSON object `js` to the FilePath `afile` + """ + with afile.open("wb") as f: + data = json.dumps(js) + if PY2: + f.write(data) + else: + f.write(data.encode("utf8")) + + +class _LeaseStateSerializer(object): + """ + Read and write state for LeaseCheckingCrawler. This understands + how to read the legacy pickle format files and upgrade them to the + new JSON format (which will occur automatically). + """ + + def __init__(self, state_path): + self._path = _confirm_json_format(FilePath(state_path)) + + def load(self): + """ + :returns: deserialized JSON state + """ + with self._path.open("rb") as f: + return json.load(f) + + def save(self, data): + """ + Serialize the given data as JSON into the state-path + :returns: None + """ + tmpfile = self._path.siblingExtension(".tmp") + _dump_json_to_file(data, tmpfile) + fileutil.move_into_place(tmpfile.path, self._path.path) + return None + + class ShareCrawler(service.MultiService): """A ShareCrawler subclass is attached to a StorageServer, and periodically walks all of its shares, processing each one in some @@ -90,7 +252,7 @@ class ShareCrawler(service.MultiService): self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir - self.statefile = statefile + self._state_serializer = _LeaseStateSerializer(statefile) self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] if PY3: @@ -213,8 +375,7 @@ class ShareCrawler(service.MultiService): # of the last bucket to be processed, or # None if we are sleeping between cycles try: - with open(self.statefile, "rb") as f: - state = pickle.load(f) + state = self._state_serializer.load() except Exception: state = {"version": 1, "last-cycle-finished": None, @@ -250,12 +411,7 @@ class ShareCrawler(service.MultiService): else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix - tmpfile = self.statefile + ".tmp" - with open(tmpfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(self.state, f, protocol=2) - fileutil.move_into_place(tmpfile, self.statefile) + self._state_serializer.save(self.get_state()) def startService(self): # arrange things to look like we were just sleeping, so diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 7c6cd8218..55ab51843 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -5,15 +5,69 @@ from __future__ import unicode_literals from future.utils import PY2 if PY2: - # We omit anything that might end up in pickle, just in case. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 - -import time, os, pickle, struct -from allmydata.storage.crawler import ShareCrawler + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import json +import time +import os +import struct +from allmydata.storage.crawler import ( + ShareCrawler, + _confirm_json_format, + _convert_cycle_data, + _dump_json_to_file, +) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ UnknownImmutableContainerVersionError from twisted.python import log as twlog +from twisted.python.filepath import FilePath + + +def _convert_pickle_state_to_json(state): + """ + Convert a pickle-serialized crawler-history state to the new JSON + format. + + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + return { + str(k): _convert_cycle_data(v) + for k, v in state.items() + } + + +class _HistorySerializer(object): + """ + Serialize the 'history' file of the lease-crawler state. This is + "storage/lease_checker.history" for the pickle or + "storage/lease_checker.history.json" for the new JSON format. + """ + + def __init__(self, history_path): + self._path = _confirm_json_format(FilePath(history_path)) + + if not self._path.exists(): + _dump_json_to_file({}, self._path) + + def load(self): + """ + Deserialize the existing data. + + :return dict: the existing history state + """ + with self._path.open("rb") as f: + history = json.load(f) + return history + + def save(self, new_history): + """ + Serialize the existing data as JSON. + """ + _dump_json_to_file(new_history, self._path) + return None + class LeaseCheckingCrawler(ShareCrawler): """I examine the leases on all shares, determining which are still valid @@ -63,7 +117,7 @@ class LeaseCheckingCrawler(ShareCrawler): override_lease_duration, # used if expiration_mode=="age" cutoff_date, # used if expiration_mode=="cutoff-date" sharetypes): - self.historyfile = historyfile + self._history_serializer = _HistorySerializer(historyfile) self.expiration_enabled = expiration_enabled self.mode = mode self.override_lease_duration = None @@ -91,14 +145,6 @@ class LeaseCheckingCrawler(ShareCrawler): for k in so_far: self.state["cycle-to-date"].setdefault(k, so_far[k]) - # initialize history - if not os.path.exists(self.historyfile): - history = {} # cyclenum -> dict - with open(self.historyfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(history, f, protocol=2) - def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() so_far = {"corrupt-shares": [], @@ -142,7 +188,7 @@ class LeaseCheckingCrawler(ShareCrawler): struct.error): twlog.msg("lease-checker error processing %s" % sharefile) twlog.err() - which = (storage_index_b32, shnum) + which = [storage_index_b32, shnum] self.state["cycle-to-date"]["corrupt-shares"].append(which) wks = (1, 1, 1, "unknown") would_keep_shares.append(wks) @@ -212,7 +258,7 @@ class LeaseCheckingCrawler(ShareCrawler): num_valid_leases_configured += 1 so_far = self.state["cycle-to-date"] - self.increment(so_far["leases-per-share-histogram"], num_leases, 1) + self.increment(so_far["leases-per-share-histogram"], str(num_leases), 1) self.increment_space("examined", s, sharetype) would_keep_share = [1, 1, 1, sharetype] @@ -291,12 +337,14 @@ class LeaseCheckingCrawler(ShareCrawler): start = self.state["current-cycle-start-time"] now = time.time() - h["cycle-start-finish-times"] = (start, now) + h["cycle-start-finish-times"] = [start, now] h["expiration-enabled"] = self.expiration_enabled - h["configured-expiration-mode"] = (self.mode, - self.override_lease_duration, - self.cutoff_date, - self.sharetypes_to_expire) + h["configured-expiration-mode"] = [ + self.mode, + self.override_lease_duration, + self.cutoff_date, + self.sharetypes_to_expire, + ] s = self.state["cycle-to-date"] @@ -314,16 +362,12 @@ class LeaseCheckingCrawler(ShareCrawler): # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() - with open(self.historyfile, "rb") as f: - history = pickle.load(f) - history[cycle] = h + history = self._history_serializer.load() + history[str(cycle)] = h while len(history) > 10: - oldcycles = sorted(history.keys()) - del history[oldcycles[0]] - with open(self.historyfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(history, f, protocol=2) + oldcycles = sorted(int(k) for k in history.keys()) + del history[str(oldcycles[0])] + self._history_serializer.save(history) def get_state(self): """In addition to the crawler state described in @@ -392,9 +436,7 @@ class LeaseCheckingCrawler(ShareCrawler): progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy - with open(self.historyfile, "rb") as f: - history = pickle.load(f) - state["history"] = history + state["history"] = self._history_serializer.load() if not progress["cycle-in-progress"]: del state["cycle-to-date"] @@ -406,10 +448,12 @@ class LeaseCheckingCrawler(ShareCrawler): lah = so_far["lease-age-histogram"] so_far["lease-age-histogram"] = self.convert_lease_age_histogram(lah) so_far["expiration-enabled"] = self.expiration_enabled - so_far["configured-expiration-mode"] = (self.mode, - self.override_lease_duration, - self.cutoff_date, - self.sharetypes_to_expire) + so_far["configured-expiration-mode"] = [ + self.mode, + self.override_lease_duration, + self.cutoff_date, + self.sharetypes_to_expire, + ] so_far_sr = so_far["space-recovered"] remaining_sr = {} diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 08b83cd87..b80d4648d 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -21,26 +21,32 @@ from zope.interface import implementer from allmydata.interfaces import ( RIBucketWriter, RIBucketReader, ConflictingWriteError, DataTooLargeError, + NoSpace, ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition -from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError +from .immutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_version, +) + + # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and # RIBucketReader.read . The lease information is not accessible through these # interfaces. # The share file has the following layout: -# 0x00: share file version number, four bytes, current version is 1 +# 0x00: share file version number, four bytes, current version is 2 # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. # 0x08: number of leases, four bytes big-endian # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) # A+0x0c = B: first lease. Lease format is: # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner -# B+0x04: renew secret, 32 bytes (SHA256) -# B+0x24: cancel secret, 32 bytes (SHA256) +# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below. +# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b) # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch # B+0x48: next lease, or end of record @@ -52,7 +58,72 @@ from allmydata.storage.common import UnknownImmutableContainerVersionError # then the value stored in this field will be the actual share data length # modulo 2**32. +# Footnote 2: The change between share file version number 1 and 2 is that +# storage of lease secrets is changed from plaintext to hashed. This change +# protects the secrets from compromises of local storage on the server: if a +# plaintext cancel secret is somehow exfiltrated from the storage server, an +# attacker could use it to cancel that lease and potentially cause user data +# to be discarded before intended by the real owner. As of this comment, +# lease cancellation is disabled because there have been at least two bugs +# which leak the persisted value of the cancellation secret. If lease secrets +# were stored hashed instead of plaintext then neither of these bugs would +# have allowed an attacker to learn a usable cancel secret. +# +# Clients are free to construct these secrets however they like. The +# Tahoe-LAFS client uses a SHA256-based construction. The server then uses +# blake2b to hash these values for storage so that it retains no persistent +# copy of the original secret. +# + +def _fix_lease_count_format(lease_count_format): + """ + Turn a single character struct format string into a format string suitable + for use in encoding and decoding the lease count value inside a share + file, if possible. + + :param str lease_count_format: A single character format string like + ``"B"`` or ``"L"``. + + :raise ValueError: If the given format string is not suitable for use + encoding and decoding a lease count. + + :return str: A complete format string which can safely be used to encode + and decode lease counts in a share file. + """ + if len(lease_count_format) != 1: + raise ValueError( + "Cannot construct ShareFile with lease_count_format={!r}; " + "format must accept a single value".format( + lease_count_format, + ), + ) + # Make it big-endian with standard size so all platforms agree on the + # result. + fixed = ">" + lease_count_format + if struct.calcsize(fixed) > 4: + # There is only room for at most 4 bytes in the share file format so + # we can't allow any larger formats. + raise ValueError( + "Cannot construct ShareFile with lease_count_format={!r}; " + "size must be smaller than size of '>L'".format( + lease_count_format, + ), + ) + return fixed + + class ShareFile(object): + """ + Support interaction with persistent storage of a share. + + :ivar str _lease_count_format: The format string which is used to encode + and decode the lease count inside the share file. As stated in the + comment in this module there is room for at most 4 bytes in this part + of the file. A format string that works on fewer bytes is allowed to + restrict the number of leases allowed in the share file to a smaller + number than could be supported by using the full 4 bytes. This is + mostly of interest for testing. + """ LEASE_SIZE = struct.calcsize(">L32s32sL") sharetype = "immutable" @@ -69,11 +140,44 @@ class ShareFile(object): ``False`` otherwise. """ (version,) = struct.unpack(">L", header[:4]) - return version == 1 + return schema_from_version(version) is not None + + def __init__( + self, + filename, + max_size=None, + create=False, + lease_count_format="L", + schema=NEWEST_SCHEMA_VERSION, + ): + """ + Initialize a ``ShareFile``. + + :param Optional[int] max_size: If given, the maximum number of bytes + that this ``ShareFile`` will accept to be stored. + + :param bool create: If ``True``, create the file (and fail if it + exists already). ``max_size`` must not be ``None`` in this case. + If ``False``, open an existing file for reading. + + :param str lease_count_format: A format character to use to encode and + decode the number of leases in the share file. There are only 4 + bytes available in the file so the format must be 4 bytes or + smaller. If different formats are used at different times with + the same share file, the result will likely be nonsense. + + This parameter is intended for the test suite to use to be able to + exercise values near the maximum encodeable value without having + to create billions of leases. + + :raise ValueError: If the encoding of ``lease_count_format`` is too + large or if it is not a single format character. + """ - def __init__(self, filename, max_size=None, create=False): - """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """ precondition((max_size is not None) or (not create), max_size, create) + + self._lease_count_format = _fix_lease_count_format(lease_count_format) + self._lease_count_size = struct.calcsize(self._lease_count_format) self.home = filename self._max_size = max_size if create: @@ -81,27 +185,18 @@ class ShareFile(object): # it. Also construct the metadata. assert not os.path.exists(self.home) fileutil.make_dirs(os.path.dirname(self.home)) - # The second field -- the four-byte share data length -- is no - # longer used as of Tahoe v1.3.0, but we continue to write it in - # there in case someone downgrades a storage server from >= - # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one - # server to another, etc. We do saturation -- a share data length - # larger than 2**32-1 (what can fit into the field) is marked as - # the largest length that can fit into the field. That way, even - # if this does happen, the old < v1.3.0 server will still allow - # clients to read the first part of the share. + self._schema = schema with open(self.home, 'wb') as f: - f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) + f.write(self._schema.header(max_size)) self._lease_offset = max_size + 0x0c self._num_leases = 0 else: with open(self.home, 'rb') as f: filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - if version != 1: - msg = "sharefile %s had version %d but we wanted 1" % \ - (filename, version) - raise UnknownImmutableContainerVersionError(msg) + self._schema = schema_from_version(version) + if self._schema is None: + raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) self._data_offset = 0xc @@ -136,16 +231,25 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_immutable_data()) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_num_leases(self, f): f.seek(0x08) - (num_leases,) = struct.unpack(">L", f.read(4)) + (num_leases,) = struct.unpack( + self._lease_count_format, + f.read(self._lease_count_size), + ) return num_leases def _write_num_leases(self, f, num_leases): + self._write_encoded_num_leases( + f, + struct.pack(self._lease_count_format, num_leases), + ) + + def _write_encoded_num_leases(self, f, encoded_num_leases): f.seek(0x08) - f.write(struct.pack(">L", num_leases)) + f.write(encoded_num_leases) def _truncate_leases(self, f, num_leases): f.truncate(self._lease_offset + num_leases * self.LEASE_SIZE) @@ -158,13 +262,16 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield LeaseInfo.from_immutable_data(data) + yield self._schema.lease_serializer.unserialize(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: num_leases = self._read_num_leases(f) + # Before we write the new lease record, make sure we can encode + # the new lease count. + new_lease_count = struct.pack(self._lease_count_format, num_leases + 1) self._write_lease_record(f, num_leases, lease_info) - self._write_num_leases(f, num_leases+1) + self._write_encoded_num_leases(f, new_lease_count) def renew_lease(self, renew_secret, new_expire_time, allow_backdate=False): # type: (bytes, int, bool) -> None @@ -189,14 +296,29 @@ class ShareFile(object): return raise IndexError("unable to renew non-existent lease") - def add_or_renew_lease(self, lease_info): + def add_or_renew_lease(self, available_space, lease_info): + """ + Renew an existing lease if possible, otherwise allocate a new one. + + :param int available_space: The maximum number of bytes of storage to + commit in this operation. If more than this number of bytes is + required, raise ``NoSpace`` instead. + + :param LeaseInfo lease_info: The details of the lease to renew or add. + + :raise NoSpace: If more than ``available_space`` bytes is required to + complete the operation. In this case, no lease is added. + + :return: ``None`` + """ try: self.renew_lease(lease_info.renew_secret, lease_info.get_expiration_time()) except IndexError: + if lease_info.immutable_size() > available_space: + raise NoSpace() self.add_lease(lease_info) - def cancel_lease(self, cancel_secret): """Remove a lease with the given cancel_secret. If the last lease is cancelled, the file will be removed. Return the number of bytes that diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py new file mode 100644 index 000000000..40663b935 --- /dev/null +++ b/src/allmydata/storage/immutable_schema.py @@ -0,0 +1,72 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +import attr + +from .lease_schema import ( + v1_immutable, + v2_immutable, +) + +@attr.s(frozen=True) +class _Schema(object): + """ + Implement encoding and decoding for multiple versions of the immutable + container schema. + + :ivar int version: the version number of the schema this object supports + + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization + """ + version = attr.ib() + lease_serializer = attr.ib() + + def header(self, max_size): + # type: (int) -> bytes + """ + Construct a container header. + + :param max_size: the maximum size the container can hold + + :return: the header bytes + """ + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0) + +ALL_SCHEMAS = { + _Schema(version=2, lease_serializer=v2_immutable), + _Schema(version=1, lease_serializer=v1_immutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) + +def schema_from_version(version): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.version == version: + return schema + return None diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index ff96ebaf4..c056a7d28 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -15,8 +15,114 @@ import struct, time import attr -from allmydata.util.hashutil import timing_safe_compare +from zope.interface import ( + Interface, + implementer, +) +from twisted.python.components import ( + proxyForInterface, +) + +from allmydata.util.hashutil import timing_safe_compare +from allmydata.util import base32 + +# struct format for representation of a lease in an immutable share +IMMUTABLE_FORMAT = ">L32s32sL" + +# struct format for representation of a lease in a mutable share +MUTABLE_FORMAT = ">LL32s32s20s" + + +class ILeaseInfo(Interface): + """ + Represent a marker attached to a share that indicates that share should be + retained for some amount of time. + + Typically clients will create and renew leases on their shares as a way to + inform storage servers that there is still interest in those shares. A + share may have more than one lease. If all leases on a share have + expiration times in the past then the storage server may take this as a + strong hint that no one is interested in the share anymore and therefore + the share may be deleted to reclaim the space. + """ + def renew(new_expire_time): + """ + Create a new ``ILeaseInfo`` with the given expiration time. + + :param Union[int, float] new_expire_time: The expiration time the new + ``ILeaseInfo`` will have. + + :return: The new ``ILeaseInfo`` provider with the new expiration time. + """ + + def get_expiration_time(): + """ + :return Union[int, float]: this lease's expiration time + """ + + def get_grant_renew_time_time(): + """ + :return Union[int, float]: a guess about the last time this lease was + renewed + """ + + def get_age(): + """ + :return Union[int, float]: a guess about how long it has been since this + lease was renewed + """ + + def to_immutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in an immutable container + """ + + def to_mutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in a mutable container + """ + + def immutable_size(): + """ + :return int: the size of the serialized representation of this lease in an + immutable container + """ + + def mutable_size(): + """ + :return int: the size of the serialized representation of this lease in a + mutable container + """ + + def is_renew_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's renew + secret, ``False`` otherwise + """ + + def present_renew_secret(): + """ + :return str: Text which could reasonably be shown to a person representing + this lease's renew secret. + """ + + def is_cancel_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's cancel + secret, ``False`` otherwise + """ + + def present_cancel_secret(): + """ + :return str: Text which could reasonably be shown to a person representing + this lease's cancel secret. + """ + + +@implementer(ILeaseInfo) @attr.s(frozen=True) class LeaseInfo(object): """ @@ -80,6 +186,13 @@ class LeaseInfo(object): """ return timing_safe_compare(self.renew_secret, candidate_secret) + def present_renew_secret(self): + # type: () -> str + """ + Return the renew secret, base32-encoded. + """ + return str(base32.b2a(self.renew_secret), "utf-8") + def is_cancel_secret(self, candidate_secret): # type: (bytes) -> bool """ @@ -90,6 +203,13 @@ class LeaseInfo(object): """ return timing_safe_compare(self.cancel_secret, candidate_secret) + def present_cancel_secret(self): + # type: () -> str + """ + Return the cancel secret, base32-encoded. + """ + return str(base32.b2a(self.cancel_secret), "utf-8") + def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self._expiration_time - 31*24*60*60 @@ -110,17 +230,31 @@ class LeaseInfo(object): "cancel_secret", "expiration_time", ] - values = struct.unpack(">L32s32sL", data) + values = struct.unpack(IMMUTABLE_FORMAT, data) return cls(nodeid=None, **dict(zip(names, values))) + def immutable_size(self): + """ + :return int: The size, in bytes, of the representation of this lease in an + immutable share file. + """ + return struct.calcsize(IMMUTABLE_FORMAT) + + def mutable_size(self): + """ + :return int: The size, in bytes, of the representation of this lease in a + mutable share file. + """ + return struct.calcsize(MUTABLE_FORMAT) + def to_immutable_data(self): - return struct.pack(">L32s32sL", + return struct.pack(IMMUTABLE_FORMAT, self.owner_num, self.renew_secret, self.cancel_secret, int(self._expiration_time)) def to_mutable_data(self): - return struct.pack(">LL32s32s20s", + return struct.pack(MUTABLE_FORMAT, self.owner_num, int(self._expiration_time), self.renew_secret, self.cancel_secret, @@ -140,5 +274,114 @@ class LeaseInfo(object): "cancel_secret", "nodeid", ] - values = struct.unpack(">LL32s32s20s", data) + values = struct.unpack(MUTABLE_FORMAT, data) return cls(**dict(zip(names, values))) + + +@attr.s(frozen=True) +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ignore # unsupported dynamic base class + """ + A ``HashedLeaseInfo`` wraps lease information in which the secrets have + been hashed. + """ + _lease_info = attr.ib() + _hash = attr.ib() + + # proxyForInterface will take care of forwarding all methods on ILeaseInfo + # to `_lease_info`. Here we override a few of those methods to adjust + # their behavior to make them suitable for use with hashed secrets. + + def renew(self, new_expire_time): + # Preserve the HashedLeaseInfo wrapper around the renewed LeaseInfo. + return attr.assoc( + self, + _lease_info=super(HashedLeaseInfo, self).renew(new_expire_time), + ) + + def is_renew_secret(self, candidate_secret): + # type: (bytes) -> bool + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + + def present_renew_secret(self): + # type: () -> str + """ + Present the hash of the secret with a marker indicating it is a hash. + """ + return u"hash:" + super(HashedLeaseInfo, self).present_renew_secret() + + def is_cancel_secret(self, candidate_secret): + # type: (bytes) -> bool + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + if isinstance(candidate_secret, _HashedCancelSecret): + # Someone read it off of this object in this project - probably + # the lease crawler - and is just trying to use it to identify + # which lease it wants to operate on. Avoid re-hashing the value. + # + # It is important that this codepath is only availably internally + # for this process to talk to itself. If it were to be exposed to + # clients over the network, they could just provide the hashed + # value to avoid having to ever learn the original value. + hashed_candidate = candidate_secret.hashed_value + else: + # It is not yet hashed so hash it. + hashed_candidate = self._hash(candidate_secret) + + return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + + def present_cancel_secret(self): + # type: () -> str + """ + Present the hash of the secret with a marker indicating it is a hash. + """ + return u"hash:" + super(HashedLeaseInfo, self).present_cancel_secret() + + @property + def owner_num(self): + return self._lease_info.owner_num + + @property + def nodeid(self): + return self._lease_info.nodeid + + @property + def cancel_secret(self): + """ + Give back an opaque wrapper around the hashed cancel secret which can + later be presented for a succesful equality comparison. + """ + # We don't *have* the cancel secret. We hashed it and threw away the + # original. That's good. It does mean that some code that runs + # in-process with the storage service (LeaseCheckingCrawler) runs into + # some difficulty. That code wants to cancel leases and does so using + # the same interface that faces storage clients (or would face them, + # if lease cancellation were exposed). + # + # Since it can't use the hashed secret to cancel a lease (that's the + # point of the hashing) and we don't have the unhashed secret to give + # it, instead we give it a marker that `cancel_lease` will recognize. + # On recognizing it, if the hashed value given matches the hashed + # value stored it is considered a match and the lease can be + # cancelled. + # + # This isn't great. Maybe the internal and external consumers of + # cancellation should use different interfaces. + return _HashedCancelSecret(self._lease_info.cancel_secret) + + +@attr.s(frozen=True) +class _HashedCancelSecret(object): + """ + ``_HashedCancelSecret`` is a marker type for an already-hashed lease + cancel secret that lets internal lease cancellers bypass the hash-based + protection that's imposed on external lease cancellers. + + :ivar bytes hashed_value: The already-hashed secret. + """ + hashed_value = attr.ib() diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py new file mode 100644 index 000000000..7e604388e --- /dev/null +++ b/src/allmydata/storage/lease_schema.py @@ -0,0 +1,138 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +try: + from typing import Union +except ImportError: + pass + +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + +from .lease import ( + LeaseInfo, + HashedLeaseInfo, +) + +@attr.s(frozen=True) +class CleartextLeaseSerializer(object): + """ + Serialize and unserialize leases with cleartext secrets. + """ + _to_data = attr.ib() + _from_data = attr.ib() + + def serialize(self, lease): + # type: (LeaseInfo) -> bytes + """ + Represent the given lease as bytes with cleartext secrets. + """ + if isinstance(lease, LeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> LeaseInfo + """ + Load a lease with cleartext secrets from the given bytes representation. + """ + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return self._from_data(data) + +@attr.s(frozen=True) +class HashedLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + def serialize(self, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = self._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> HashedLeaseInfo + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(self._from_data(data), self._hash_secret) + +v1_immutable = CleartextLeaseSerializer( + LeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v2_immutable = HashedLeaseSerializer( + HashedLeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v1_mutable = CleartextLeaseSerializer( + LeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) + +v2_mutable = HashedLeaseSerializer( + HashedLeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 9480a3c03..bd59d96b8 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -13,7 +13,10 @@ if PY2: import os, stat, struct -from allmydata.interfaces import BadWriteEnablerError +from allmydata.interfaces import ( + BadWriteEnablerError, + NoSpace, +) from allmydata.util import idlib, log from allmydata.util.assertutil import precondition from allmydata.util.hashutil import timing_safe_compare @@ -21,7 +24,10 @@ from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownMutableContainerVersionError, \ DataTooLargeError from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE - +from .mutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_header, +) # the MutableShareFile is like the ShareFile, but used for mutable data. It # has a different layout. See docs/mutable.txt for more details. @@ -61,9 +67,6 @@ class MutableShareFile(object): # our sharefiles share with a recognizable string, plus some random # binary data to reduce the chance that a regular text file will look # like a sharefile. - MAGIC = b"Tahoe mutable container v1\n" + b"\x75\x09\x44\x03\x8e" - assert len(MAGIC) == 32 - assert isinstance(MAGIC, bytes) MAX_SIZE = MAX_MUTABLE_SHARE_SIZE # TODO: decide upon a policy for max share size @@ -79,22 +82,19 @@ class MutableShareFile(object): :return: ``True`` if the bytes could belong to this container, ``False`` otherwise. """ - return header.startswith(cls.MAGIC) + return schema_from_header(header) is not None - def __init__(self, filename, parent=None): + def __init__(self, filename, parent=None, schema=NEWEST_SCHEMA_VERSION): self.home = filename if os.path.exists(self.home): # we don't cache anything, just check the magic with open(self.home, 'rb') as f: - data = f.read(self.HEADER_SIZE) - (magic, - write_enabler_nodeid, write_enabler, - data_length, extra_least_offset) = \ - struct.unpack(">32s20s32sQQ", data) - if not self.is_valid_header(data): - msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ - (filename, magic, self.MAGIC) - raise UnknownMutableContainerVersionError(msg) + header = f.read(self.HEADER_SIZE) + self._schema = schema_from_header(header) + if self._schema is None: + raise UnknownMutableContainerVersionError(filename, header) + else: + self._schema = schema self.parent = parent # for logging def log(self, *args, **kwargs): @@ -102,23 +102,8 @@ class MutableShareFile(object): def create(self, my_nodeid, write_enabler): assert not os.path.exists(self.home) - data_length = 0 - extra_lease_offset = (self.HEADER_SIZE - + 4 * self.LEASE_SIZE - + data_length) - assert extra_lease_offset == self.DATA_OFFSET # true at creation - num_extra_leases = 0 with open(self.home, 'wb') as f: - header = struct.pack( - ">32s20s32sQQ", - self.MAGIC, my_nodeid, write_enabler, - data_length, extra_lease_offset, - ) - leases = (b"\x00" * self.LEASE_SIZE) * 4 - f.write(header + leases) - # data goes here, empty after creation - f.write(struct.pack(">L", num_extra_leases)) - # extra leases go here, none at creation + f.write(self._schema.header(my_nodeid, write_enabler)) def unlink(self): os.unlink(self.home) @@ -134,6 +119,7 @@ class MutableShareFile(object): def _read_share_data(self, f, offset, length): precondition(offset >= 0) + precondition(length >= 0) data_length = self._read_data_length(f) if offset+length > data_length: # reads beyond the end of the data are truncated. Reads that @@ -250,7 +236,7 @@ class MutableShareFile(object): + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_mutable_data()) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None @@ -267,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = LeaseInfo.from_mutable_data(data) + lease_info = self._schema.lease_serializer.unserialize(data) if lease_info.owner_num == 0: return None return lease_info @@ -302,7 +288,19 @@ class MutableShareFile(object): except IndexError: return - def add_lease(self, lease_info): + def add_lease(self, available_space, lease_info): + """ + Add a new lease to this share. + + :param int available_space: The maximum number of bytes of storage to + commit in this operation. If more than this number of bytes is + required, raise ``NoSpace`` instead. + + :raise NoSpace: If more than ``available_space`` bytes is required to + complete the operation. In this case, no lease is added. + + :return: ``None`` + """ precondition(lease_info.owner_num != 0) # 0 means "no lease here" with open(self.home, 'rb+') as f: num_lease_slots = self._get_num_lease_slots(f) @@ -310,6 +308,8 @@ class MutableShareFile(object): if empty_slot is not None: self._write_lease_record(f, empty_slot, lease_info) else: + if lease_info.mutable_size() > available_space: + raise NoSpace() self._write_lease_record(f, num_lease_slots, lease_info) def renew_lease(self, renew_secret, new_expire_time, allow_backdate=False): @@ -345,13 +345,13 @@ class MutableShareFile(object): msg += " ." raise IndexError(msg) - def add_or_renew_lease(self, lease_info): + def add_or_renew_lease(self, available_space, lease_info): precondition(lease_info.owner_num != 0) # 0 means "no lease here" try: self.renew_lease(lease_info.renew_secret, lease_info.get_expiration_time()) except IndexError: - self.add_lease(lease_info) + self.add_lease(available_space, lease_info) def cancel_lease(self, cancel_secret): """Remove any leases with the given cancel_secret. If the last lease diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py new file mode 100644 index 000000000..4be0d2137 --- /dev/null +++ b/src/allmydata/storage/mutable_schema.py @@ -0,0 +1,144 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +import attr + +from ..util.hashutil import ( + tagged_hash, +) +from .lease import ( + LeaseInfo, +) +from .lease_schema import ( + v1_mutable, + v2_mutable, +) + +def _magic(version): + # type: (int) -> bytes + """ + Compute a "magic" header string for a container of the given version. + + :param version: The version number of the container. + """ + # Make it easy for people to recognize + human_readable = u"Tahoe mutable container v{:d}\n".format(version).encode("ascii") + # But also keep the chance of accidental collision low + if version == 1: + # It's unclear where this byte sequence came from. It may have just + # been random. In any case, preserve it since it is the magic marker + # in all v1 share files. + random_bytes = b"\x75\x09\x44\x03\x8e" + else: + # For future versions, use a reproducable scheme. + random_bytes = tagged_hash( + b"allmydata_mutable_container_header", + human_readable, + truncate_to=5, + ) + magic = human_readable + random_bytes + assert len(magic) == 32 + if version > 1: + # The chance of collision is pretty low but let's just be sure about + # it. + assert magic != _magic(version - 1) + + return magic + +def _header(magic, extra_lease_offset, nodeid, write_enabler): + # type: (bytes, int, bytes, bytes) -> bytes + """ + Construct a container header. + + :param nodeid: A unique identifier for the node holding this + container. + + :param write_enabler: A secret shared with the client used to + authorize changes to the contents of this container. + """ + fixed_header = struct.pack( + ">32s20s32sQQ", + magic, + nodeid, + write_enabler, + # data length, initially the container is empty + 0, + extra_lease_offset, + ) + blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 + extra_lease_count = struct.pack(">L", 0) + + return b"".join([ + fixed_header, + # share data will go in between the next two items eventually but + # for now there is none. + blank_leases, + extra_lease_count, + ]) + + +_HEADER_FORMAT = ">32s20s32sQQ" + +# This size excludes leases +_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + +_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + +@attr.s(frozen=True) +class _Schema(object): + """ + Implement encoding and decoding for the mutable container. + + :ivar int version: the version number of the schema this object supports + + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization + """ + version = attr.ib() + lease_serializer = attr.ib() + _magic = attr.ib() + + @classmethod + def for_version(cls, version, lease_serializer): + return cls(version, lease_serializer, magic=_magic(version)) + + def magic_matches(self, candidate_magic): + # type: (bytes) -> bool + """ + Return ``True`` if a candidate string matches the expected magic string + from a mutable container header, ``False`` otherwise. + """ + return candidate_magic[:len(self._magic)] == self._magic + + def header(self, nodeid, write_enabler): + return _header(self._magic, _EXTRA_LEASE_OFFSET, nodeid, write_enabler) + +ALL_SCHEMAS = { + _Schema.for_version(version=2, lease_serializer=v2_mutable), + _Schema.for_version(version=1, lease_serializer=v1_mutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) + +def schema_from_header(header): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.magic_matches(header): + return schema + return None diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 7dc277e39..acca83d6a 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -62,6 +62,7 @@ class StorageServer(service.MultiService, Referenceable): A filesystem-based implementation of ``RIStorageServer``. """ name = 'storage' + # only the tests change this to anything else LeaseCheckerClass = LeaseCheckingCrawler def __init__(self, storedir, nodeid, reserved_space=0, @@ -82,9 +83,9 @@ class StorageServer(service.MultiService, Referenceable): sharedir = os.path.join(storedir, "shares") fileutil.make_dirs(sharedir) self.sharedir = sharedir - # we don't actually create the corruption-advisory dir until necessary self.corruption_advisory_dir = os.path.join(storedir, "corruption-advisories") + fileutil.make_dirs(self.corruption_advisory_dir) self.reserved_space = int(reserved_space) self.no_storage = discard_storage self.readonly_storage = readonly_storage @@ -301,7 +302,7 @@ class StorageServer(service.MultiService, Referenceable): # to a particular owner. start = self._clock.seconds() self.count("allocate") - alreadygot = set() + alreadygot = {} bucketwriters = {} # k: shnum, v: BucketWriter si_dir = storage_index_to_dir(storage_index) si_s = si_b2a(storage_index) @@ -333,7 +334,7 @@ class StorageServer(service.MultiService, Referenceable): # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. for (shnum, fn) in self._get_bucket_shares(storage_index): - alreadygot.add(shnum) + alreadygot[shnum] = ShareFile(fn) if renew_leases: sf = ShareFile(fn) sf.add_or_renew_lease(lease_info) @@ -369,7 +370,7 @@ class StorageServer(service.MultiService, Referenceable): fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) self.add_latency("allocate", self._clock.seconds() - start) - return alreadygot, bucketwriters + return set(alreadygot), bucketwriters def remote_allocate_buckets(self, storage_index, renew_secret, cancel_secret, @@ -626,13 +627,13 @@ class StorageServer(service.MultiService, Referenceable): """ Put the given lease onto the given shares. - :param dict[int, MutableShareFile] shares: The shares to put the lease - onto. + :param Iterable[Union[MutableShareFile, ShareFile]] shares: The shares + to put the lease onto. :param LeaseInfo lease_info: The lease to put on the shares. """ - for share in six.viewvalues(shares): - share.add_or_renew_lease(lease_info) + for share in shares: + share.add_or_renew_lease(self.get_available_space(), lease_info) def slot_testv_and_readv_and_writev( # type: ignore # warner/foolscap#78 self, @@ -692,7 +693,7 @@ class StorageServer(service.MultiService, Referenceable): ) if renew_leases: lease_info = self._make_lease_info(renew_secret, cancel_secret) - self._add_or_renew_leases(remaining_shares, lease_info) + self._add_or_renew_leases(remaining_shares.values(), lease_info) # all done self.add_latency("writev", self._clock.seconds() - start) @@ -747,30 +748,110 @@ class StorageServer(service.MultiService, Referenceable): self.add_latency("readv", self._clock.seconds() - start) return datavs + def _share_exists(self, storage_index, shnum): + """ + Check local share storage to see if a matching share exists. + + :param bytes storage_index: The storage index to inspect. + :param int shnum: The share number to check for. + + :return bool: ``True`` if a share with the given number exists at the + given storage index, ``False`` otherwise. + """ + for existing_sharenum, ignored in self._get_bucket_shares(storage_index): + if existing_sharenum == shnum: + return True + return False + def remote_advise_corrupt_share(self, share_type, storage_index, shnum, reason): # This is a remote API, I believe, so this has to be bytes for legacy # protocol backwards compatibility reasons. assert isinstance(share_type, bytes) assert isinstance(reason, bytes), "%r is not bytes" % (reason,) - fileutil.make_dirs(self.corruption_advisory_dir) - now = time_format.iso_utc(sep="T") + si_s = si_b2a(storage_index) - # windows can't handle colons in the filename - fn = os.path.join( - self.corruption_advisory_dir, - ("%s--%s-%d" % (now, str(si_s, "utf-8"), shnum)).replace(":","") - ) - with open(fn, "w") as f: - f.write("report: Share Corruption\n") - f.write("type: %s\n" % bytes_to_native_str(share_type)) - f.write("storage_index: %s\n" % bytes_to_native_str(si_s)) - f.write("share_number: %d\n" % shnum) - f.write("\n") - f.write(bytes_to_native_str(reason)) - f.write("\n") + + if not self._share_exists(storage_index, shnum): + log.msg( + format=( + "discarding client corruption claim for %(si)s/%(shnum)d " + "which I do not have" + ), + si=si_s, + shnum=shnum, + ) + return + log.msg(format=("client claims corruption in (%(share_type)s) " + "%(si)s-%(shnum)d: %(reason)s"), share_type=share_type, si=si_s, shnum=shnum, reason=reason, level=log.SCARY, umid="SGx2fA") + + report = render_corruption_report(share_type, si_s, shnum, reason) + if len(report) > self.get_available_space(): + return None + + now = time_format.iso_utc(sep="T") + report_path = get_corruption_report_path( + self.corruption_advisory_dir, + now, + si_s, + shnum, + ) + with open(report_path, "w") as f: + f.write(report) + return None + +CORRUPTION_REPORT_FORMAT = """\ +report: Share Corruption +type: {type} +storage_index: {storage_index} +share_number: {share_number} + +{reason} + +""" + +def render_corruption_report(share_type, si_s, shnum, reason): + """ + Create a string that explains a corruption report using freeform text. + + :param bytes share_type: The type of the share which the report is about. + + :param bytes si_s: The encoded representation of the storage index which + the report is about. + + :param int shnum: The share number which the report is about. + + :param bytes reason: The reason given by the client for the corruption + report. + """ + return CORRUPTION_REPORT_FORMAT.format( + type=bytes_to_native_str(share_type), + storage_index=bytes_to_native_str(si_s), + share_number=shnum, + reason=bytes_to_native_str(reason), + ) + +def get_corruption_report_path(base_dir, now, si_s, shnum): + """ + Determine the path to which a certain corruption report should be written. + + :param str base_dir: The directory beneath which to construct the path. + + :param str now: The time of the report. + + :param str si_s: The encoded representation of the storage index which the + report is about. + + :param int shnum: The share number which the report is about. + + :return str: A path to which the report can be written. + """ + # windows can't handle colons in the filename + return os.path.join( + base_dir, + ("%s--%s-%d" % (now, str(si_s, "utf-8"), shnum)).replace(":","") + ) diff --git a/src/allmydata/test/cli/test_admin.py b/src/allmydata/test/cli/test_admin.py new file mode 100644 index 000000000..082904652 --- /dev/null +++ b/src/allmydata/test/cli/test_admin.py @@ -0,0 +1,87 @@ +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +from six.moves import StringIO + +from testtools.matchers import ( + Contains, +) + +from twisted.python.filepath import ( + FilePath, +) + +from allmydata.scripts.admin import ( + migrate_crawler, +) +from allmydata.scripts.runner import ( + Options, +) +from ..common import ( + SyncTestCase, +) + +class AdminMigrateCrawler(SyncTestCase): + """ + Tests related to 'tahoe admin migrate-crawler' + """ + + def test_already(self): + """ + We've already migrated; don't do it again. + """ + + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + with storage.child("lease_checker.state.json").open("w") as f: + f.write(b"{}\n") + + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + + self.assertThat( + options.stdout.getvalue(), + Contains("Already converted:"), + ) + + def test_usage(self): + """ + We've already migrated; don't do it again. + """ + + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + with storage.child("lease_checker.state.json").open("w") as f: + f.write(b"{}\n") + + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + self.assertThat( + str(options), + Contains("security issues with pickle") + ) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index fee13cca9..27134fdd8 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -88,6 +88,7 @@ from allmydata.interfaces import ( SDMF_VERSION, MDMF_VERSION, IAddressFamily, + NoSpace, ) from allmydata.check_results import CheckResults, CheckAndRepairResults, \ DeepCheckResults, DeepCheckAndRepairResults @@ -140,6 +141,42 @@ EMPTY_CLIENT_CONFIG = config_from_string( "" ) +@attr.s +class FakeDisk(object): + """ + Just enough of a disk to be able to report free / used information. + """ + total = attr.ib() + used = attr.ib() + + def use(self, num_bytes): + """ + Mark some amount of available bytes as used (and no longer available). + + :param int num_bytes: The number of bytes to use. + + :raise NoSpace: If there are fewer bytes available than ``num_bytes``. + + :return: ``None`` + """ + if num_bytes > self.total - self.used: + raise NoSpace() + self.used += num_bytes + + @property + def available(self): + return self.total - self.used + + def get_disk_stats(self, whichdir, reserved_space): + avail = self.available + return { + 'total': self.total, + 'free_for_root': avail, + 'free_for_nonroot': avail, + 'used': self.used, + 'avail': avail - reserved_space, + } + @attr.s class MemoryIntroducerClient(object): diff --git a/src/allmydata/test/common_storage.py b/src/allmydata/test/common_storage.py new file mode 100644 index 000000000..529ebe586 --- /dev/null +++ b/src/allmydata/test/common_storage.py @@ -0,0 +1,65 @@ + +from .common_util import ( + FakeCanary, +) + +def upload_immutable(storage_server, storage_index, renew_secret, cancel_secret, shares): + """ + Synchronously upload some immutable shares to a ``StorageServer``. + + :param allmydata.storage.server.StorageServer storage_server: The storage + server object to use to perform the upload. + + :param bytes storage_index: The storage index for the immutable shares. + + :param bytes renew_secret: The renew secret for the implicitly created lease. + :param bytes cancel_secret: The cancel secret for the implicitly created lease. + + :param dict[int, bytes] shares: A mapping from share numbers to share data + to upload. The data for all shares must be of the same length. + + :return: ``None`` + """ + already, writers = storage_server.remote_allocate_buckets( + storage_index, + renew_secret, + cancel_secret, + shares.keys(), + len(next(iter(shares.values()))), + canary=FakeCanary(), + ) + for shnum, writer in writers.items(): + writer.remote_write(0, shares[shnum]) + writer.remote_close() + + +def upload_mutable(storage_server, storage_index, secrets, shares): + """ + Synchronously upload some mutable shares to a ``StorageServer``. + + :param allmydata.storage.server.StorageServer storage_server: The storage + server object to use to perform the upload. + + :param bytes storage_index: The storage index for the immutable shares. + + :param secrets: A three-tuple of a write enabler, renew secret, and cancel + secret. + + :param dict[int, bytes] shares: A mapping from share numbers to share data + to upload. + + :return: ``None`` + """ + test_and_write_vectors = { + sharenum: ([], [(0, data)], None) + for sharenum, data + in shares.items() + } + read_vector = [] + + storage_server.remote_slot_testv_and_readv_and_writev( + storage_index, + secrets, + test_and_write_vectors, + read_vector, + ) diff --git a/src/allmydata/test/data/lease_checker.history.txt b/src/allmydata/test/data/lease_checker.history.txt new file mode 100644 index 000000000..0c27a5ad0 --- /dev/null +++ b/src/allmydata/test/data/lease_checker.history.txt @@ -0,0 +1,501 @@ +(dp0 +I363 +(dp1 +Vconfigured-expiration-mode +p2 +(S'age' +p3 +NN(S'immutable' +p4 +S'mutable' +p5 +tp6 +tp7 +sVexpiration-enabled +p8 +I00 +sVleases-per-share-histogram +p9 +(dp10 +I1 +I39774 +ssVlease-age-histogram +p11 +(lp12 +(I0 +I86400 +I3125 +tp13 +a(I345600 +I432000 +I4175 +tp14 +a(I950400 +I1036800 +I141 +tp15 +a(I1036800 +I1123200 +I345 +tp16 +a(I1123200 +I1209600 +I81 +tp17 +a(I1296000 +I1382400 +I1832 +tp18 +a(I1555200 +I1641600 +I390 +tp19 +a(I1728000 +I1814400 +I12 +tp20 +a(I2073600 +I2160000 +I84 +tp21 +a(I2160000 +I2246400 +I228 +tp22 +a(I2246400 +I2332800 +I75 +tp23 +a(I2592000 +I2678400 +I644 +tp24 +a(I2678400 +I2764800 +I273 +tp25 +a(I2764800 +I2851200 +I94 +tp26 +a(I2851200 +I2937600 +I97 +tp27 +a(I3196800 +I3283200 +I143 +tp28 +a(I3283200 +I3369600 +I48 +tp29 +a(I4147200 +I4233600 +I374 +tp30 +a(I4320000 +I4406400 +I534 +tp31 +a(I5270400 +I5356800 +I1005 +tp32 +a(I6739200 +I6825600 +I8704 +tp33 +a(I6825600 +I6912000 +I3986 +tp34 +a(I6912000 +I6998400 +I7592 +tp35 +a(I6998400 +I7084800 +I2607 +tp36 +a(I7689600 +I7776000 +I35 +tp37 +a(I8035200 +I8121600 +I33 +tp38 +a(I8294400 +I8380800 +I54 +tp39 +a(I8640000 +I8726400 +I45 +tp40 +a(I8726400 +I8812800 +I27 +tp41 +a(I8812800 +I8899200 +I12 +tp42 +a(I9763200 +I9849600 +I77 +tp43 +a(I9849600 +I9936000 +I91 +tp44 +a(I9936000 +I10022400 +I1210 +tp45 +a(I10022400 +I10108800 +I45 +tp46 +a(I10108800 +I10195200 +I186 +tp47 +a(I10368000 +I10454400 +I113 +tp48 +a(I10972800 +I11059200 +I21 +tp49 +a(I11232000 +I11318400 +I5 +tp50 +a(I11318400 +I11404800 +I19 +tp51 +a(I11404800 +I11491200 +I238 +tp52 +a(I11491200 +I11577600 +I159 +tp53 +a(I11750400 +I11836800 +I1 +tp54 +a(I11836800 +I11923200 +I32 +tp55 +a(I11923200 +I12009600 +I192 +tp56 +a(I12009600 +I12096000 +I222 +tp57 +a(I12096000 +I12182400 +I18 +tp58 +a(I12182400 +I12268800 +I224 +tp59 +a(I12268800 +I12355200 +I9 +tp60 +a(I12355200 +I12441600 +I9 +tp61 +a(I12441600 +I12528000 +I10 +tp62 +a(I12528000 +I12614400 +I6 +tp63 +a(I12614400 +I12700800 +I6 +tp64 +a(I12700800 +I12787200 +I18 +tp65 +a(I12787200 +I12873600 +I6 +tp66 +a(I12873600 +I12960000 +I62 +tp67 +asVcycle-start-finish-times +p68 +(F1634446505.241972 +F1634446666.055401 +tp69 +sVspace-recovered +p70 +(dp71 +Vexamined-buckets-immutable +p72 +I17896 +sVconfigured-buckets-mutable +p73 +I0 +sVexamined-shares-mutable +p74 +I2473 +sVoriginal-shares-mutable +p75 +I1185 +sVconfigured-buckets-immutable +p76 +I0 +sVoriginal-shares-immutable +p77 +I27457 +sVoriginal-diskbytes-immutable +p78 +I2810982400 +sVexamined-shares-immutable +p79 +I37301 +sVoriginal-buckets +p80 +I14047 +sVactual-shares-immutable +p81 +I0 +sVconfigured-shares +p82 +I0 +sVoriginal-buckets-mutable +p83 +I691 +sVactual-diskbytes +p84 +I4096 +sVactual-shares-mutable +p85 +I0 +sVconfigured-buckets +p86 +I1 +sVexamined-buckets-unknown +p87 +I14 +sVactual-sharebytes +p88 +I0 +sVoriginal-shares +p89 +I28642 +sVactual-buckets-immutable +p90 +I0 +sVoriginal-sharebytes +p91 +I2695552941 +sVexamined-sharebytes-immutable +p92 +I2754798505 +sVactual-shares +p93 +I0 +sVactual-sharebytes-immutable +p94 +I0 +sVoriginal-diskbytes +p95 +I2818981888 +sVconfigured-diskbytes-mutable +p96 +I0 +sVconfigured-sharebytes-immutable +p97 +I0 +sVconfigured-shares-mutable +p98 +I0 +sVactual-diskbytes-immutable +p99 +I0 +sVconfigured-diskbytes-immutable +p100 +I0 +sVoriginal-diskbytes-mutable +p101 +I7995392 +sVactual-sharebytes-mutable +p102 +I0 +sVconfigured-sharebytes +p103 +I0 +sVexamined-shares +p104 +I39774 +sVactual-diskbytes-mutable +p105 +I0 +sVactual-buckets +p106 +I1 +sVoriginal-buckets-immutable +p107 +I13355 +sVconfigured-sharebytes-mutable +p108 +I0 +sVexamined-sharebytes +p109 +I2763646972 +sVoriginal-sharebytes-immutable +p110 +I2692076909 +sVoriginal-sharebytes-mutable +p111 +I3476032 +sVactual-buckets-mutable +p112 +I0 +sVexamined-buckets-mutable +p113 +I1286 +sVconfigured-shares-immutable +p114 +I0 +sVexamined-diskbytes +p115 +I2854801408 +sVexamined-diskbytes-mutable +p116 +I12161024 +sVexamined-sharebytes-mutable +p117 +I8848467 +sVexamined-buckets +p118 +I19197 +sVconfigured-diskbytes +p119 +I4096 +sVexamined-diskbytes-immutable +p120 +I2842640384 +ssVcorrupt-shares +p121 +(lp122 +(V2dn6xnlnsqwtnapwxfdivpm3s4 +p123 +I3 +tp124 +a(g123 +I0 +tp125 +a(V2rrzthwsrrxolevmwdvbdy3rqi +p126 +I3 +tp127 +a(g126 +I0 +tp128 +a(V2skfngcto6h7eqmn4uo7ntk3ne +p129 +I3 +tp130 +a(g129 +I0 +tp131 +a(V32d5swqpqx2mwix7xmqzvhdwje +p132 +I3 +tp133 +a(g132 +I0 +tp134 +a(V5mmayp66yflmpon3o6unsnbaca +p135 +I3 +tp136 +a(g135 +I0 +tp137 +a(V6ixhpvbtre7fnrl6pehlrlflc4 +p138 +I3 +tp139 +a(g138 +I0 +tp140 +a(Vewzhvswjsz4vp2bqkb6mi3bz2u +p141 +I3 +tp142 +a(g141 +I0 +tp143 +a(Vfu7pazf6ogavkqj6z4q5qqex3u +p144 +I3 +tp145 +a(g144 +I0 +tp146 +a(Vhbyjtqvpcimwxiyqbcbbdn2i4a +p147 +I3 +tp148 +a(g147 +I0 +tp149 +a(Vpmcjbdkbjdl26k3e6yja77femq +p150 +I3 +tp151 +a(g150 +I0 +tp152 +a(Vr6swof4v2uttbiiqwj5pi32cm4 +p153 +I3 +tp154 +a(g153 +I0 +tp155 +a(Vt45v5akoktf53evc2fi6gwnv6y +p156 +I3 +tp157 +a(g156 +I0 +tp158 +a(Vy6zb4faar3rdvn3e6pfg4wlotm +p159 +I3 +tp160 +a(g159 +I0 +tp161 +a(Vz3yghutvqoqbchjao4lndnrh3a +p162 +I3 +tp163 +a(g162 +I0 +tp164 +ass. \ No newline at end of file diff --git a/src/allmydata/test/data/lease_checker.state.txt b/src/allmydata/test/data/lease_checker.state.txt new file mode 100644 index 000000000..b32554434 --- /dev/null +++ b/src/allmydata/test/data/lease_checker.state.txt @@ -0,0 +1,545 @@ +(dp1 +S'last-complete-prefix' +p2 +NsS'version' +p3 +I1 +sS'current-cycle-start-time' +p4 +F1635003106.611748 +sS'last-cycle-finished' +p5 +I312 +sS'cycle-to-date' +p6 +(dp7 +Vleases-per-share-histogram +p8 +(dp9 +I1 +I36793 +sI2 +I1 +ssVspace-recovered +p10 +(dp11 +Vexamined-buckets-immutable +p12 +I17183 +sVconfigured-buckets-mutable +p13 +I0 +sVexamined-shares-mutable +p14 +I1796 +sVoriginal-shares-mutable +p15 +I1563 +sVconfigured-buckets-immutable +p16 +I0 +sVoriginal-shares-immutable +p17 +I27926 +sVoriginal-diskbytes-immutable +p18 +I431149056 +sVexamined-shares-immutable +p19 +I34998 +sVoriginal-buckets +p20 +I14661 +sVactual-shares-immutable +p21 +I0 +sVconfigured-shares +p22 +I0 +sVoriginal-buckets-immutable +p23 +I13761 +sVactual-diskbytes +p24 +I4096 +sVactual-shares-mutable +p25 +I0 +sVconfigured-buckets +p26 +I1 +sVexamined-buckets-unknown +p27 +I14 +sVactual-sharebytes +p28 +I0 +sVoriginal-shares +p29 +I29489 +sVoriginal-sharebytes +p30 +I312664812 +sVexamined-sharebytes-immutable +p31 +I383801602 +sVactual-shares +p32 +I0 +sVactual-sharebytes-immutable +p33 +I0 +sVoriginal-diskbytes +p34 +I441643008 +sVconfigured-diskbytes-mutable +p35 +I0 +sVconfigured-sharebytes-immutable +p36 +I0 +sVconfigured-shares-mutable +p37 +I0 +sVactual-diskbytes-immutable +p38 +I0 +sVconfigured-diskbytes-immutable +p39 +I0 +sVoriginal-diskbytes-mutable +p40 +I10489856 +sVactual-sharebytes-mutable +p41 +I0 +sVconfigured-sharebytes +p42 +I0 +sVexamined-shares +p43 +I36794 +sVactual-diskbytes-mutable +p44 +I0 +sVactual-buckets +p45 +I1 +sVoriginal-buckets-mutable +p46 +I899 +sVconfigured-sharebytes-mutable +p47 +I0 +sVexamined-sharebytes +p48 +I390369660 +sVoriginal-sharebytes-immutable +p49 +I308125753 +sVoriginal-sharebytes-mutable +p50 +I4539059 +sVactual-buckets-mutable +p51 +I0 +sVexamined-diskbytes-mutable +p52 +I9154560 +sVexamined-buckets-mutable +p53 +I1043 +sVconfigured-shares-immutable +p54 +I0 +sVexamined-diskbytes +p55 +I476598272 +sVactual-buckets-immutable +p56 +I0 +sVexamined-sharebytes-mutable +p57 +I6568058 +sVexamined-buckets +p58 +I18241 +sVconfigured-diskbytes +p59 +I4096 +sVexamined-diskbytes-immutable +p60 +I467443712 +ssVcorrupt-shares +p61 +(lp62 +(V2dn6xnlnsqwtnapwxfdivpm3s4 +p63 +I4 +tp64 +a(g63 +I1 +tp65 +a(V2rrzthwsrrxolevmwdvbdy3rqi +p66 +I4 +tp67 +a(g66 +I1 +tp68 +a(V2skfngcto6h7eqmn4uo7ntk3ne +p69 +I4 +tp70 +a(g69 +I1 +tp71 +a(V32d5swqpqx2mwix7xmqzvhdwje +p72 +I4 +tp73 +a(g72 +I1 +tp74 +a(V5mmayp66yflmpon3o6unsnbaca +p75 +I4 +tp76 +a(g75 +I1 +tp77 +a(V6ixhpvbtre7fnrl6pehlrlflc4 +p78 +I4 +tp79 +a(g78 +I1 +tp80 +a(Vewzhvswjsz4vp2bqkb6mi3bz2u +p81 +I4 +tp82 +a(g81 +I1 +tp83 +a(Vfu7pazf6ogavkqj6z4q5qqex3u +p84 +I4 +tp85 +a(g84 +I1 +tp86 +a(Vhbyjtqvpcimwxiyqbcbbdn2i4a +p87 +I4 +tp88 +a(g87 +I1 +tp89 +a(Vpmcjbdkbjdl26k3e6yja77femq +p90 +I4 +tp91 +a(g90 +I1 +tp92 +a(Vr6swof4v2uttbiiqwj5pi32cm4 +p93 +I4 +tp94 +a(g93 +I1 +tp95 +a(Vt45v5akoktf53evc2fi6gwnv6y +p96 +I4 +tp97 +a(g96 +I1 +tp98 +a(Vy6zb4faar3rdvn3e6pfg4wlotm +p99 +I4 +tp100 +a(g99 +I1 +tp101 +a(Vz3yghutvqoqbchjao4lndnrh3a +p102 +I4 +tp103 +a(g102 +I1 +tp104 +asVlease-age-histogram +p105 +(dp106 +(I45619200 +I45705600 +tp107 +I4 +s(I12441600 +I12528000 +tp108 +I78 +s(I11923200 +I12009600 +tp109 +I89 +s(I33436800 +I33523200 +tp110 +I7 +s(I37411200 +I37497600 +tp111 +I4 +s(I38361600 +I38448000 +tp112 +I5 +s(I4665600 +I4752000 +tp113 +I256 +s(I11491200 +I11577600 +tp114 +I20 +s(I10713600 +I10800000 +tp115 +I183 +s(I42076800 +I42163200 +tp116 +I4 +s(I47865600 +I47952000 +tp117 +I7 +s(I3110400 +I3196800 +tp118 +I328 +s(I5788800 +I5875200 +tp119 +I954 +s(I9331200 +I9417600 +tp120 +I12 +s(I7430400 +I7516800 +tp121 +I7228 +s(I1555200 +I1641600 +tp122 +I492 +s(I37929600 +I38016000 +tp123 +I3 +s(I38880000 +I38966400 +tp124 +I3 +s(I12528000 +I12614400 +tp125 +I193 +s(I10454400 +I10540800 +tp126 +I1239 +s(I11750400 +I11836800 +tp127 +I7 +s(I950400 +I1036800 +tp128 +I4435 +s(I44409600 +I44496000 +tp129 +I13 +s(I12787200 +I12873600 +tp130 +I218 +s(I10368000 +I10454400 +tp131 +I117 +s(I3283200 +I3369600 +tp132 +I86 +s(I7516800 +I7603200 +tp133 +I993 +s(I42336000 +I42422400 +tp134 +I33 +s(I46310400 +I46396800 +tp135 +I1 +s(I39052800 +I39139200 +tp136 +I51 +s(I7603200 +I7689600 +tp137 +I2004 +s(I10540800 +I10627200 +tp138 +I16 +s(I36374400 +I36460800 +tp139 +I3 +s(I3369600 +I3456000 +tp140 +I79 +s(I12700800 +I12787200 +tp141 +I25 +s(I4838400 +I4924800 +tp142 +I386 +s(I10972800 +I11059200 +tp143 +I122 +s(I8812800 +I8899200 +tp144 +I57 +s(I38966400 +I39052800 +tp145 +I61 +s(I3196800 +I3283200 +tp146 +I628 +s(I9244800 +I9331200 +tp147 +I73 +s(I30499200 +I30585600 +tp148 +I5 +s(I12009600 +I12096000 +tp149 +I329 +s(I12960000 +I13046400 +tp150 +I8 +s(I12614400 +I12700800 +tp151 +I210 +s(I3801600 +I3888000 +tp152 +I32 +s(I10627200 +I10713600 +tp153 +I43 +s(I44928000 +I45014400 +tp154 +I2 +s(I8208000 +I8294400 +tp155 +I38 +s(I8640000 +I8726400 +tp156 +I32 +s(I7344000 +I7430400 +tp157 +I12689 +s(I49075200 +I49161600 +tp158 +I19 +s(I2764800 +I2851200 +tp159 +I76 +s(I2592000 +I2678400 +tp160 +I40 +s(I2073600 +I2160000 +tp161 +I388 +s(I37497600 +I37584000 +tp162 +I11 +s(I1641600 +I1728000 +tp163 +I78 +s(I12873600 +I12960000 +tp164 +I5 +s(I1814400 +I1900800 +tp165 +I1860 +s(I40176000 +I40262400 +tp166 +I1 +s(I3715200 +I3801600 +tp167 +I104 +s(I2332800 +I2419200 +tp168 +I12 +s(I2678400 +I2764800 +tp169 +I278 +s(I12268800 +I12355200 +tp170 +I2 +s(I28771200 +I28857600 +tp171 +I6 +s(I41990400 +I42076800 +tp172 +I10 +sssS'last-complete-bucket' +p173 +NsS'current-cycle' +p174 +Ns. \ No newline at end of file diff --git a/src/allmydata/test/strategies.py b/src/allmydata/test/strategies.py index c0f558ef6..2bb23a373 100644 --- a/src/allmydata/test/strategies.py +++ b/src/allmydata/test/strategies.py @@ -16,6 +16,7 @@ from hypothesis.strategies import ( one_of, builds, binary, + integers, ) from ..uri import ( @@ -119,3 +120,17 @@ def dir2_mdmf_capabilities(): MDMFDirectoryURI, mdmf_capabilities(), ) + +def offsets(min_value=0, max_value=2 ** 16): + """ + Build ``int`` values that could be used as valid offsets into a sequence + (such as share data in a share file). + """ + return integers(min_value, max_value) + +def lengths(min_value=1, max_value=2 ** 16): + """ + Build ``int`` values that could be used as valid lengths of data (such as + share data in a share file). + """ + return integers(min_value, max_value) diff --git a/src/allmydata/test/test_auth.py b/src/allmydata/test/test_auth.py index d5198d326..bfe717f79 100644 --- a/src/allmydata/test/test_auth.py +++ b/src/allmydata/test/test_auth.py @@ -8,7 +8,16 @@ from __future__ import unicode_literals from future.utils import PY2 if PY2: - from future.builtins import str # noqa: F401 + from future.builtins import str, open # noqa: F401 + +from hypothesis import ( + given, +) +from hypothesis.strategies import ( + text, + characters, + lists, +) from twisted.trial import unittest from twisted.python import filepath @@ -38,25 +47,184 @@ dBSD8940XU3YW+oeq8e+p3yQ2GinHfeJ3BYQyNQLuMAJ -----END RSA PRIVATE KEY----- """) -DUMMY_ACCOUNTS = u"""\ -alice herpassword URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111 -bob sekrit URI:DIR2:bbbbbbbbbbbbbbbbbbbbbbbbbb:2222222222222222222222222222222222222222222222222222 +DUMMY_KEY_DSA = keys.Key.fromString("""\ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABsQAAAAdzc2gtZH +NzAAAAgQDKMh/ELaiP21LYRBuPbUy7dUhv/XZwV7aS1LzxSP+KaJvtDOei8X76XEAfkqX+ +aGh9eup+BLkezrV6LlpO9uPzhY8ChlKpkvw5PZKv/2agSrVxZyG7yEzHNtSBQXE6qNMwIk +N/ycXLGCqyAhQSzRhLz9ETNaslRDLo7YyVWkiuAQAAABUA5nTatFKux5EqZS4EarMWFRBU +i1UAAACAFpkkK+JsPixSTPyn0DNMoGKA0Klqy8h61Ds6pws+4+aJQptUBshpwNw1ypo7MO ++goDZy3wwdWtURTPGMgesNdEfxp8L2/kqE4vpMK0myoczCqOiWMeNB/x1AStbSkBI8WmHW +2htgsC01xbaix/FrA3edK8WEyv+oIxlbV1FkrPkAAACANb0EpCc8uoR4/32rO2JLsbcLBw +H5wc2khe7AKkIa9kUknRIRvoCZUtXF5XuXXdRmnpVEm2KcsLdtZjip43asQcqgt0Kz3nuF +kAf7bI98G1waFUimcCSPsal4kCmW2HC11sg/BWOt5qczX/0/3xVxpo6juUeBq9ncnFTvPX +5fOlEAAAHoJkFqHiZBah4AAAAHc3NoLWRzcwAAAIEAyjIfxC2oj9tS2EQbj21Mu3VIb/12 +cFe2ktS88Uj/imib7QznovF++lxAH5Kl/mhofXrqfgS5Hs61ei5aTvbj84WPAoZSqZL8OT +2Sr/9moEq1cWchu8hMxzbUgUFxOqjTMCJDf8nFyxgqsgIUEs0YS8/REzWrJUQy6O2MlVpI +rgEAAAAVAOZ02rRSrseRKmUuBGqzFhUQVItVAAAAgBaZJCvibD4sUkz8p9AzTKBigNCpas +vIetQ7OqcLPuPmiUKbVAbIacDcNcqaOzDvoKA2ct8MHVrVEUzxjIHrDXRH8afC9v5KhOL6 +TCtJsqHMwqjoljHjQf8dQErW0pASPFph1tobYLAtNcW2osfxawN3nSvFhMr/qCMZW1dRZK +z5AAAAgDW9BKQnPLqEeP99qztiS7G3CwcB+cHNpIXuwCpCGvZFJJ0SEb6AmVLVxeV7l13U +Zp6VRJtinLC3bWY4qeN2rEHKoLdCs957hZAH+2yPfBtcGhVIpnAkj7GpeJAplthwtdbIPw +VjreanM1/9P98VcaaOo7lHgavZ3JxU7z1+XzpRAAAAFQC7360pZLbv7PFt4BPFJ8zAHxAe +QwAAAA5leGFya3VuQGJhcnlvbgECAwQ= +-----END OPENSSH PRIVATE KEY----- +""") -# dennis password URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111 +ACCOUNTS = u"""\ +# dennis {key} URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111 carol {key} URI:DIR2:cccccccccccccccccccccccccc:3333333333333333333333333333333333333333333333333333 """.format(key=str(DUMMY_KEY.public().toString("openssh"), "ascii")).encode("ascii") +# Python str.splitlines considers NEXT LINE, LINE SEPARATOR, and PARAGRAPH +# separator to be line separators, too. However, file.readlines() does not... +LINE_SEPARATORS = ( + '\x0a', # line feed + '\x0b', # vertical tab + '\x0c', # form feed + '\x0d', # carriage return +) + +class AccountFileParserTests(unittest.TestCase): + """ + Tests for ``load_account_file`` and its helper functions. + """ + @given(lists( + text(alphabet=characters( + blacklist_categories=( + # Surrogates are an encoding trick to help out UTF-16. + # They're not necessary to represent any non-surrogate code + # point in unicode. They're also not legal individually but + # only in pairs. + 'Cs', + ), + # Exclude all our line separators too. + blacklist_characters=("\n", "\r"), + )), + )) + def test_ignore_comments(self, lines): + """ + ``auth.content_lines`` filters out lines beginning with `#` and empty + lines. + """ + expected = set() + + # It's not clear that real files and StringIO behave sufficiently + # similarly to use the latter instead of the former here. In + # particular, they seem to have distinct and incompatible + # line-splitting rules. + bufpath = self.mktemp() + with open(bufpath, "wt", encoding="utf-8") as buf: + for line in lines: + stripped = line.strip() + is_content = stripped and not stripped.startswith("#") + if is_content: + expected.add(stripped) + buf.write(line + "\n") + + with auth.open_account_file(bufpath) as buf: + actual = set(auth.content_lines(buf)) + + self.assertEqual(expected, actual) + + def test_parse_accounts(self): + """ + ``auth.parse_accounts`` accepts an iterator of account lines and returns + an iterator of structured account data. + """ + alice_key = DUMMY_KEY.public().toString("openssh").decode("utf-8") + alice_cap = "URI:DIR2:aaaa:1111" + + bob_key = DUMMY_KEY_DSA.public().toString("openssh").decode("utf-8") + bob_cap = "URI:DIR2:aaaa:2222" + self.assertEqual( + list(auth.parse_accounts([ + "alice {} {}".format(alice_key, alice_cap), + "bob {} {}".format(bob_key, bob_cap), + ])), + [ + ("alice", DUMMY_KEY.public(), alice_cap), + ("bob", DUMMY_KEY_DSA.public(), bob_cap), + ], + ) + + def test_parse_accounts_rejects_passwords(self): + """ + The iterator returned by ``auth.parse_accounts`` raises ``ValueError`` + when processing reaches a line that has what looks like a password + instead of an ssh key. + """ + with self.assertRaises(ValueError): + list(auth.parse_accounts(["alice apassword URI:DIR2:aaaa:1111"])) + + def test_create_account_maps(self): + """ + ``auth.create_account_maps`` accepts an iterator of structured account + data and returns two mappings: one from account name to rootcap, the + other from account name to public keys. + """ + alice_cap = "URI:DIR2:aaaa:1111" + alice_key = DUMMY_KEY.public() + bob_cap = "URI:DIR2:aaaa:2222" + bob_key = DUMMY_KEY_DSA.public() + accounts = [ + ("alice", alice_key, alice_cap), + ("bob", bob_key, bob_cap), + ] + self.assertEqual( + auth.create_account_maps(accounts), + ({ + b"alice": alice_cap.encode("utf-8"), + b"bob": bob_cap.encode("utf-8"), + }, + { + b"alice": [alice_key], + b"bob": [bob_key], + }), + ) + + def test_load_account_file(self): + """ + ``auth.load_account_file`` accepts an iterator of serialized account lines + and returns two mappings: one from account name to rootcap, the other + from account name to public keys. + """ + alice_key = DUMMY_KEY.public().toString("openssh").decode("utf-8") + alice_cap = "URI:DIR2:aaaa:1111" + + bob_key = DUMMY_KEY_DSA.public().toString("openssh").decode("utf-8") + bob_cap = "URI:DIR2:aaaa:2222" + + accounts = [ + "alice {} {}".format(alice_key, alice_cap), + "bob {} {}".format(bob_key, bob_cap), + "# carol {} {}".format(alice_key, alice_cap), + ] + + self.assertEqual( + auth.load_account_file(accounts), + ({ + b"alice": alice_cap.encode("utf-8"), + b"bob": bob_cap.encode("utf-8"), + }, + { + b"alice": [DUMMY_KEY.public()], + b"bob": [DUMMY_KEY_DSA.public()], + }), + ) + + class AccountFileCheckerKeyTests(unittest.TestCase): """ Tests for key handling done by allmydata.frontends.auth.AccountFileChecker. """ def setUp(self): self.account_file = filepath.FilePath(self.mktemp()) - self.account_file.setContent(DUMMY_ACCOUNTS) + self.account_file.setContent(ACCOUNTS) abspath = abspath_expanduser_unicode(str(self.account_file.path)) self.checker = auth.AccountFileChecker(None, abspath) - def test_unknown_user_ssh(self): + def test_unknown_user(self): """ AccountFileChecker.requestAvatarId returns a Deferred that fires with UnauthorizedLogin if called with an SSHPrivateKey object with a @@ -67,67 +235,6 @@ class AccountFileCheckerKeyTests(unittest.TestCase): avatarId = self.checker.requestAvatarId(key_credentials) return self.assertFailure(avatarId, error.UnauthorizedLogin) - def test_unknown_user_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - UnauthorizedLogin if called with an SSHPrivateKey object with a - username not present in the account file. - - We use a commented out user, so we're also checking that comments are - skipped. - """ - key_credentials = credentials.UsernamePassword(b"dennis", b"password") - d = self.checker.requestAvatarId(key_credentials) - return self.assertFailure(d, error.UnauthorizedLogin) - - def test_password_auth_user_with_ssh_key(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - UnauthorizedLogin if called with an SSHPrivateKey object for a username - only associated with a password in the account file. - """ - key_credentials = credentials.SSHPrivateKey( - b"alice", b"md5", None, None, None) - avatarId = self.checker.requestAvatarId(key_credentials) - return self.assertFailure(avatarId, error.UnauthorizedLogin) - - def test_password_auth_user_with_correct_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - the user if the correct password is given. - """ - key_credentials = credentials.UsernamePassword(b"alice", b"herpassword") - d = self.checker.requestAvatarId(key_credentials) - def authenticated(avatarId): - self.assertEqual( - (b"alice", - b"URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111"), - (avatarId.username, avatarId.rootcap)) - return d - - def test_password_auth_user_with_correct_hashed_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - the user if the correct password is given in hashed form. - """ - key_credentials = credentials.UsernameHashedPassword(b"alice", b"herpassword") - d = self.checker.requestAvatarId(key_credentials) - def authenticated(avatarId): - self.assertEqual( - (b"alice", - b"URI:DIR2:aaaaaaaaaaaaaaaaaaaaaaaaaa:1111111111111111111111111111111111111111111111111111"), - (avatarId.username, avatarId.rootcap)) - return d - - def test_password_auth_user_with_wrong_password(self): - """ - AccountFileChecker.requestAvatarId returns a Deferred that fires with - UnauthorizedLogin if the wrong password is given. - """ - key_credentials = credentials.UsernamePassword(b"alice", b"WRONG") - avatarId = self.checker.requestAvatarId(key_credentials) - return self.assertFailure(avatarId, error.UnauthorizedLogin) - def test_unrecognized_key(self): """ AccountFileChecker.requestAvatarId returns a Deferred that fires with diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index aeea9642e..85d89cde6 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -498,7 +498,7 @@ class DownloadTest(_Base, unittest.TestCase): d.addCallback(_done) return d - def test_simultaneous_onefails_onecancelled(self): + def test_simul_1fail_1cancel(self): # This exercises an mplayer behavior in ticket #1154. I believe that # mplayer made two simultaneous webapi GET requests: first one for an # index region at the end of the (mp3/video) file, then one for the @@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase): d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) - corrupt_values = [(3, 2, "no-sh2"), - (15, 2, "need-4th"), # share looks v2 - ] + corrupt_values = [ + # Make the container version for share number 2 look + # unsupported. If you add support for immutable share file + # version number much past 16 million then you will have to + # update this test. Also maybe you have other problems. + (1, 255, "no-sh2"), + # Make the immutable share number 2 (not the container, the + # thing inside the container) look unsupported. Ditto the + # above about version numbers in the ballpark of 16 million. + (13, 255, "need-4th"), + ] for i,newvalue,expected in corrupt_values: d.addCallback(self._corrupt_set, imm_uri, i, newvalue) d.addCallback(_download, imm_uri, i, expected) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index bc87e168d..db62ad06f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -13,14 +13,22 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from six import ensure_str +from io import ( + BytesIO, +) import time import os.path import platform import stat import struct import shutil +from functools import partial from uuid import uuid4 +from testtools.matchers import ( + HasLength, +) + from twisted.trial import unittest from twisted.internet import defer @@ -34,7 +42,13 @@ from allmydata.util import fileutil, hashutil, base32 from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile +from allmydata.storage.mutable_schema import ( + ALL_SCHEMAS as ALL_MUTABLE_SCHEMAS, +) from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile +from allmydata.storage.immutable_schema import ( + ALL_SCHEMAS as ALL_IMMUTABLE_SCHEMAS, +) from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \ si_b2a, si_a2b @@ -56,8 +70,21 @@ from allmydata.test.no_network import NoNetworkServer from allmydata.storage_client import ( _StorageServer, ) -from .common import LoggingServiceParent, ShouldFailMixin +from .common import ( + LoggingServiceParent, + ShouldFailMixin, + FakeDisk, + SyncTestCase, +) from .common_util import FakeCanary +from .common_storage import ( + upload_immutable, + upload_mutable, +) +from .strategies import ( + offsets, + lengths, +) class UtilTests(unittest.TestCase): @@ -102,6 +129,7 @@ class FakeStatsProvider(object): def register_producer(self, producer): pass + class Bucket(unittest.TestCase): def make_workdir(self, name): basedir = os.path.join("storage", "Bucket", name) @@ -749,7 +777,9 @@ class Server(unittest.TestCase): e = self.failUnlessRaises(UnknownImmutableContainerVersionError, ss.remote_get_buckets, b"si1") - self.failUnlessIn(" had version 0 but we wanted 1", str(e)) + self.assertEqual(e.filename, fn) + self.assertEqual(e.version, 0) + self.assertIn("had unexpected version 0", str(e)) def test_disconnect(self): # simulate a disconnection @@ -777,6 +807,72 @@ class Server(unittest.TestCase): self.failUnlessEqual(already, set()) self.failUnlessEqual(set(writers.keys()), set([0,1,2])) + def test_reserved_space_immutable_lease(self): + """ + If there is not enough available space to store an additional lease on an + immutable share then ``remote_add_lease`` fails with ``NoSpace`` when + an attempt is made to use it to create a new lease. + """ + disk = FakeDisk(total=1024, used=0) + self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) + + ss = self.create("test_reserved_space_immutable_lease") + + storage_index = b"x" * 16 + renew_secret = b"r" * 32 + cancel_secret = b"c" * 32 + shares = {0: b"y" * 500} + upload_immutable(ss, storage_index, renew_secret, cancel_secret, shares) + + # use up all the available space + disk.use(disk.available) + + # Different secrets to produce a different lease, not a renewal. + renew_secret = b"R" * 32 + cancel_secret = b"C" * 32 + with self.assertRaises(interfaces.NoSpace): + ss.remote_add_lease(storage_index, renew_secret, cancel_secret) + + def test_reserved_space_mutable_lease(self): + """ + If there is not enough available space to store an additional lease on a + mutable share then ``remote_add_lease`` fails with ``NoSpace`` when an + attempt is made to use it to create a new lease. + """ + disk = FakeDisk(total=1024, used=0) + self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) + + ss = self.create("test_reserved_space_mutable_lease") + + renew_secrets = iter( + "{}{}".format("r" * 31, i).encode("ascii") + for i + in range(5) + ) + + storage_index = b"x" * 16 + write_enabler = b"w" * 32 + cancel_secret = b"c" * 32 + secrets = (write_enabler, next(renew_secrets), cancel_secret) + shares = {0: b"y" * 500} + upload_mutable(ss, storage_index, secrets, shares) + + # use up all the available space + disk.use(disk.available) + + # The upload created one lease. There is room for three more leases + # in the share header. Even if we're out of disk space, on a boring + # enough filesystem we can write these. + for i in range(3): + ss.remote_add_lease(storage_index, next(renew_secrets), cancel_secret) + + # Having used all of the space for leases in the header, we would have + # to allocate storage for the next lease. Since there is no space + # available, this must fail instead. + with self.assertRaises(interfaces.NoSpace): + ss.remote_add_lease(storage_index, next(renew_secrets), cancel_secret) + + def test_reserved_space(self): reserved = 10000 allocated = 0 @@ -905,6 +1001,9 @@ class Server(unittest.TestCase): # Create a bucket: rs0, cs0 = self.create_bucket_5_shares(ss, b"si0") + + # Upload of an immutable implies creation of a single lease with the + # supplied secrets. (lease,) = ss.get_leases(b"si0") self.assertTrue(lease.is_renew_secret(rs0)) @@ -1036,12 +1135,34 @@ class Server(unittest.TestCase): self.failUnlessEqual(set(b.keys()), set([0,1,2])) self.failUnlessEqual(b[0].remote_read(0, 25), b"\x00" * 25) + def test_reserved_space_advise_corruption(self): + """ + If there is no available space then ``remote_advise_corrupt_share`` does + not write a corruption report. + """ + disk = FakeDisk(total=1024, used=1024) + self.patch(fileutil, "get_disk_stats", disk.get_disk_stats) + + workdir = self.workdir("test_reserved_space_advise_corruption") + ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) + ss.setServiceParent(self.sparent) + + upload_immutable(ss, b"si0", b"r" * 32, b"c" * 32, {0: b""}) + ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, + b"This share smells funny.\n") + + self.assertEqual( + [], + os.listdir(ss.corruption_advisory_dir), + ) + def test_advise_corruption(self): workdir = self.workdir("test_advise_corruption") ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) ss.setServiceParent(self.sparent) si0_s = base32.b2a(b"si0") + upload_immutable(ss, b"si0", b"r" * 32, b"c" * 32, {0: b""}) ss.remote_advise_corrupt_share(b"immutable", b"si0", 0, b"This share smells funny.\n") reportdir = os.path.join(workdir, "corruption-advisories") @@ -1080,6 +1201,26 @@ class Server(unittest.TestCase): self.failUnlessIn(b"share_number: 1", report) self.failUnlessIn(b"This share tastes like dust.", report) + def test_advise_corruption_missing(self): + """ + If a corruption advisory is received for a share that is not present on + this server then it is not persisted. + """ + workdir = self.workdir("test_advise_corruption_missing") + ss = StorageServer(workdir, b"\x00" * 20, discard_storage=True) + ss.setServiceParent(self.sparent) + + # Upload one share for this storage index + upload_immutable(ss, b"si0", b"r" * 32, b"c" * 32, {0: b""}) + + # And try to submit a corruption advisory about a different share + ss.remote_advise_corrupt_share(b"immutable", b"si0", 1, + b"This share smells funny.\n") + + self.assertEqual( + [], + os.listdir(ss.corruption_advisory_dir), + ) class MutableServer(unittest.TestCase): @@ -1149,8 +1290,10 @@ class MutableServer(unittest.TestCase): read = ss.remote_slot_readv e = self.failUnlessRaises(UnknownMutableContainerVersionError, read, b"si1", [0], [(0,10)]) - self.failUnlessIn(" had magic ", str(e)) - self.failUnlessIn(" but we wanted ", str(e)) + self.assertEqual(e.filename, fn) + self.assertTrue(e.version.startswith(b"BAD MAGIC")) + self.assertIn("had unexpected version", str(e)) + self.assertIn("BAD MAGIC", str(e)) def test_container_size(self): ss = self.create("test_container_size") @@ -1373,14 +1516,25 @@ class MutableServer(unittest.TestCase): 2: [b"2"*10]}) def compare_leases_without_timestamps(self, leases_a, leases_b): - self.failUnlessEqual(len(leases_a), len(leases_b)) - for i in range(len(leases_a)): - a = leases_a[i] - b = leases_b[i] - self.failUnlessEqual(a.owner_num, b.owner_num) - self.failUnlessEqual(a.renew_secret, b.renew_secret) - self.failUnlessEqual(a.cancel_secret, b.cancel_secret) - self.failUnlessEqual(a.nodeid, b.nodeid) + """ + Assert that, except for expiration times, ``leases_a`` contains the same + lease information as ``leases_b``. + """ + for a, b in zip(leases_a, leases_b): + # The leases aren't always of the same type (though of course + # corresponding elements in the two lists should be of the same + # type as each other) so it's inconvenient to just reach in and + # normalize the expiration timestamp. We don't want to call + # `renew` on both objects to normalize the expiration timestamp in + # case `renew` is broken and gives us back equal outputs from + # non-equal inputs (expiration timestamp aside). It seems + # reasonably safe to use `renew` to make _one_ of the timestamps + # equal to the other though. + self.assertEqual( + a.renew(b.get_expiration_time()), + b, + ) + self.assertEqual(len(leases_a), len(leases_b)) def test_leases(self): ss = self.create("test_leases") @@ -3146,50 +3300,114 @@ class Stats(unittest.TestCase): self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) +immutable_schemas = strategies.sampled_from(list(ALL_IMMUTABLE_SCHEMAS)) class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" - def get_sharefile(self): - sf = ShareFile(self.mktemp(), max_size=1000, create=True) + def get_sharefile(self, **kwargs): + sf = ShareFile(self.mktemp(), max_size=1000, create=True, **kwargs) sf.write_share_data(0, b"abc") sf.write_share_data(2, b"DEF") # Should be b'abDEF' now. return sf - def test_read_write(self): + @given(immutable_schemas) + def test_read_write(self, schema): """Basic writes can be read.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 3), b"abD") self.assertEqual(sf.read_share_data(1, 4), b"bDEF") - def test_reads_beyond_file_end(self): + @given(immutable_schemas) + def test_reads_beyond_file_end(self, schema): """Reads beyond the file size are truncated.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 10), b"abDEF") self.assertEqual(sf.read_share_data(5, 10), b"") - def test_too_large_write(self): + @given(immutable_schemas) + def test_too_large_write(self, schema): """Can't do write larger than file size.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(DataTooLargeError): sf.write_share_data(0, b"x" * 3000) - def test_no_leases_cancelled(self): + @given(immutable_schemas) + def test_no_leases_cancelled(self, schema): """If no leases were cancelled, IndexError is raised.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(IndexError): sf.cancel_lease(b"garbage") - def test_renew_secret(self): + @given(immutable_schemas) + def test_long_lease_count_format(self, schema): """ - A lease loaded from a share file can have its renew secret verified. + ``ShareFile.__init__`` raises ``ValueError`` if the lease count format + given is longer than one character. + """ + with self.assertRaises(ValueError): + self.get_sharefile(schema=schema, lease_count_format="BB") + + @given(immutable_schemas) + def test_large_lease_count_format(self, schema): + """ + ``ShareFile.__init__`` raises ``ValueError`` if the lease count format + encodes to a size larger than 8 bytes. + """ + with self.assertRaises(ValueError): + self.get_sharefile(schema=schema, lease_count_format="Q") + + @given(immutable_schemas) + def test_avoid_lease_overflow(self, schema): + """ + If the share file already has the maximum number of leases supported then + ``ShareFile.add_lease`` raises ``struct.error`` and makes no changes + to the share file contents. + """ + make_lease = partial( + LeaseInfo, + renew_secret=b"r" * 32, + cancel_secret=b"c" * 32, + expiration_time=2 ** 31, + ) + # Make it a little easier to reach the condition by limiting the + # number of leases to only 255. + sf = self.get_sharefile(schema=schema, lease_count_format="B") + + # Add the leases. + for i in range(2 ** 8 - 1): + lease = make_lease(owner_num=i) + sf.add_lease(lease) + + # Capture the state of the share file at this point so we can + # determine whether the next operation modifies it or not. + with open(sf.home, "rb") as f: + before_data = f.read() + + # It is not possible to add a 256th lease. + lease = make_lease(owner_num=256) + with self.assertRaises(struct.error): + sf.add_lease(lease) + + # Compare the share file state to what we captured earlier. Any + # change is a bug. + with open(sf.home, "rb") as f: + after_data = f.read() + + self.assertEqual(before_data, after_data) + + @given(immutable_schemas) + def test_renew_secret(self, schema): + """ + A lease loaded from an immutable share file at any schema version can have + its renew secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, @@ -3200,15 +3418,17 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_renew_secret(renew_secret)) - def test_cancel_secret(self): + @given(immutable_schemas) + def test_cancel_secret(self, schema): """ - A lease loaded from a share file can have its cancel secret verified. + A lease loaded from an immutable share file at any schema version can have + its cancel secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, @@ -3219,8 +3439,121 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_cancel_secret(cancel_secret)) +mutable_schemas = strategies.sampled_from(list(ALL_MUTABLE_SCHEMAS)) -class LeaseInfoTests(unittest.TestCase): +class MutableShareFileTests(unittest.TestCase): + """ + Tests for allmydata.storage.mutable.MutableShareFile. + """ + def get_sharefile(self, **kwargs): + return MutableShareFile(self.mktemp(), **kwargs) + + @given( + schema=mutable_schemas, + nodeid=strategies.just(b"x" * 20), + write_enabler=strategies.just(b"y" * 32), + datav=strategies.lists( + # Limit the max size of these so we don't write *crazy* amounts of + # data to disk. + strategies.tuples(offsets(), strategies.binary(max_size=2 ** 8)), + max_size=2 ** 8, + ), + new_length=offsets(), + ) + def test_readv_reads_share_data(self, schema, nodeid, write_enabler, datav, new_length): + """ + ``MutableShareFile.readv`` returns bytes from the share data portion + of the share file. + """ + sf = self.get_sharefile(schema=schema) + sf.create(my_nodeid=nodeid, write_enabler=write_enabler) + sf.writev(datav=datav, new_length=new_length) + + # Apply all of the writes to a simple in-memory buffer so we can + # resolve the final state of the share data. In particular, this + # helps deal with overlapping writes which otherwise make it tricky to + # figure out what data to expect to be able to read back. + buf = BytesIO() + for (offset, data) in datav: + buf.seek(offset) + buf.write(data) + buf.truncate(new_length) + + # Using that buffer, determine the expected result of a readv for all + # of the data just written. + def read_from_buf(offset, length): + buf.seek(offset) + return buf.read(length) + expected_data = list( + read_from_buf(offset, len(data)) + for (offset, data) + in datav + ) + + # Perform a read that gives back all of the data written to the share + # file. + read_vectors = list((offset, len(data)) for (offset, data) in datav) + read_data = sf.readv(read_vectors) + + # Make sure the read reproduces the value we computed using our local + # buffer. + self.assertEqual(expected_data, read_data) + + @given( + schema=mutable_schemas, + nodeid=strategies.just(b"x" * 20), + write_enabler=strategies.just(b"y" * 32), + readv=strategies.lists(strategies.tuples(offsets(), lengths()), min_size=1), + random=strategies.randoms(), + ) + def test_readv_rejects_negative_length(self, schema, nodeid, write_enabler, readv, random): + """ + If a negative length is given to ``MutableShareFile.readv`` in a read + vector then ``AssertionError`` is raised. + """ + # Pick a read vector to break with a negative value + readv_index = random.randrange(len(readv)) + # Decide on whether we're breaking offset or length + offset_or_length = random.randrange(2) + + # A helper function that will take a valid offset and length and break + # one of them. + def corrupt(break_length, offset, length): + if break_length: + # length must not be 0 or flipping the sign does nothing + # length must not be negative or flipping the sign *fixes* it + assert length > 0 + return (offset, -length) + else: + if offset > 0: + # We can break offset just by flipping the sign. + return (-offset, length) + else: + # Otherwise it has to be zero. If it was negative, what's + # going on? + assert offset == 0 + # Since we can't just flip the sign on 0 to break things, + # replace a 0 offset with a simple negative value. All + # other negative values will be tested by the `offset > 0` + # case above. + return (-1, length) + + # Break the read vector very slightly! + broken_readv = readv[:] + broken_readv[readv_index] = corrupt( + offset_or_length, + *broken_readv[readv_index] + ) + + sf = self.get_sharefile(schema=schema) + sf.create(my_nodeid=nodeid, write_enabler=write_enabler) + + # A read with a broken read vector is an error. + with self.assertRaises(AssertionError): + sf.readv(broken_readv) + + +class LeaseInfoTests(SyncTestCase): """ Tests for ``allmydata.storage.lease.LeaseInfo``. """ @@ -3279,3 +3612,30 @@ class LeaseInfoTests(unittest.TestCase): cancel_secret=cancel_secret, ) self.assertFalse(lease.is_cancel_secret(renew_secret)) + + @given( + strategies.tuples( + strategies.integers(min_value=0, max_value=2 ** 31 - 1), + strategies.binary(min_size=32, max_size=32), + strategies.binary(min_size=32, max_size=32), + strategies.integers(min_value=0, max_value=2 ** 31 - 1), + strategies.binary(min_size=20, max_size=20), + ), + ) + def test_immutable_size(self, initializer_args): + """ + ``LeaseInfo.immutable_size`` returns the length of the result of + ``LeaseInfo.to_immutable_data``. + + ``LeaseInfo.mutable_size`` returns the length of the result of + ``LeaseInfo.to_mutable_data``. + """ + info = LeaseInfo(*initializer_args) + self.expectThat( + info.to_immutable_data(), + HasLength(info.immutable_size()), + ) + self.expectThat( + info.to_mutable_data(), + HasLength(info.mutable_size()), + ) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 38e380223..18ea0220c 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -19,25 +19,40 @@ import time import os.path import re import json +from unittest import skipIf +from six.moves import StringIO from twisted.trial import unittest - from twisted.internet import defer from twisted.application import service from twisted.web.template import flattenString +from twisted.python.filepath import FilePath +from twisted.python.runtime import platform from foolscap.api import fireEventually from allmydata.util import fileutil, hashutil, base32, pollmixin from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError from allmydata.storage.server import StorageServer -from allmydata.storage.crawler import BucketCountingCrawler -from allmydata.storage.expirer import LeaseCheckingCrawler +from allmydata.storage.crawler import ( + BucketCountingCrawler, + _LeaseStateSerializer, +) +from allmydata.storage.expirer import ( + LeaseCheckingCrawler, + _HistorySerializer, +) from allmydata.web.storage import ( StorageStatus, StorageStatusElement, remove_prefix ) +from allmydata.scripts.admin import ( + migrate_crawler, +) +from allmydata.scripts.runner import ( + Options, +) from .common_util import FakeCanary from .common_web import ( @@ -376,7 +391,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) self.failUnlessEqual(lah, [ (0.0, DAY, 1) ] ) - self.failUnlessEqual(so_far["leases-per-share-histogram"], {1: 1}) + self.failUnlessEqual(so_far["leases-per-share-histogram"], {"1": 1}) self.failUnlessEqual(so_far["corrupt-shares"], []) sr1 = so_far["space-recovered"] self.failUnlessEqual(sr1["examined-buckets"], 1) @@ -427,9 +442,9 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failIf("cycle-to-date" in s) self.failIf("estimated-remaining-cycle" in s) self.failIf("estimated-current-cycle" in s) - last = s["history"][0] + last = s["history"]["0"] self.failUnlessIn("cycle-start-finish-times", last) - self.failUnlessEqual(type(last["cycle-start-finish-times"]), tuple) + self.failUnlessEqual(type(last["cycle-start-finish-times"]), list) self.failUnlessEqual(last["expiration-enabled"], False) self.failUnlessIn("configured-expiration-mode", last) @@ -437,9 +452,9 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): lah = last["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) - self.failUnlessEqual(lah, [ (0.0, DAY, 6) ] ) + self.failUnlessEqual(lah, [ [0.0, DAY, 6] ] ) - self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) + self.failUnlessEqual(last["leases-per-share-histogram"], {"1": 2, "2": 2}) self.failUnlessEqual(last["corrupt-shares"], []) rec = last["space-recovered"] @@ -587,12 +602,12 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], - ("age", 2000, None, ("mutable", "immutable"))) - self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) + ["age", 2000, None, ["mutable", "immutable"]]) + self.failUnlessEqual(last["leases-per-share-histogram"], {"1": 2, "2": 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) @@ -731,14 +746,14 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], - ("cutoff-date", None, then, - ("mutable", "immutable"))) + ["cutoff-date", None, then, + ["mutable", "immutable"]]) self.failUnlessEqual(last["leases-per-share-histogram"], - {1: 2, 2: 2}) + {"1": 2, "2": 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) @@ -924,8 +939,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): s = lc.get_state() h = s["history"] self.failUnlessEqual(len(h), 10) - self.failUnlessEqual(max(h.keys()), 15) - self.failUnlessEqual(min(h.keys()), 6) + self.failUnlessEqual(max(int(k) for k in h.keys()), 15) + self.failUnlessEqual(min(int(k) for k in h.keys()), 6) d.addCallback(_check) return d @@ -1014,7 +1029,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): def _check(ignored): s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] rec = last["space-recovered"] self.failUnlessEqual(rec["configured-buckets"], 4) self.failUnlessEqual(rec["configured-shares"], 4) @@ -1110,7 +1125,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): def _after_first_cycle(ignored): s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 5) self.failUnlessEqual(rec["examined-shares"], 3) @@ -1139,6 +1154,390 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d + @skipIf(platform.isWindows(), "pickle test-data can't be loaded on windows") + def test_deserialize_pickle(self): + """ + The crawler can read existing state from the old pickle format + """ + # this file came from an "in the wild" tahoe version 1.16.0 + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state.txt") + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + test_pickle = storage.child("lease_checker.state") + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + local.write(remote.read()) + + # convert from pickle format to JSON + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + + # the (existing) state file should have been upgraded to JSON + self.assertFalse(test_pickle.exists()) + self.assertTrue(test_pickle.siblingExtension(".json").exists()) + serial = _LeaseStateSerializer(test_pickle.path) + + self.assertEqual( + serial.load(), + { + u'last-complete-prefix': None, + u'version': 1, + u'current-cycle-start-time': 1635003106.611748, + u'last-cycle-finished': 312, + u'cycle-to-date': { + u'leases-per-share-histogram': { + u'1': 36793, + u'2': 1, + }, + u'space-recovered': { + u'examined-buckets-immutable': 17183, + u'configured-buckets-mutable': 0, + u'examined-shares-mutable': 1796, + u'original-shares-mutable': 1563, + u'configured-buckets-immutable': 0, + u'original-shares-immutable': 27926, + u'original-diskbytes-immutable': 431149056, + u'examined-shares-immutable': 34998, + u'original-buckets': 14661, + u'actual-shares-immutable': 0, + u'configured-shares': 0, + u'original-buckets-mutable': 899, + u'actual-diskbytes': 4096, + u'actual-shares-mutable': 0, + u'configured-buckets': 1, + u'examined-buckets-unknown': 14, + u'actual-sharebytes': 0, + u'original-shares': 29489, + u'actual-buckets-immutable': 0, + u'original-sharebytes': 312664812, + u'examined-sharebytes-immutable': 383801602, + u'actual-shares': 0, + u'actual-sharebytes-immutable': 0, + u'original-diskbytes': 441643008, + u'configured-diskbytes-mutable': 0, + u'configured-sharebytes-immutable': 0, + u'configured-shares-mutable': 0, + u'actual-diskbytes-immutable': 0, + u'configured-diskbytes-immutable': 0, + u'original-diskbytes-mutable': 10489856, + u'actual-sharebytes-mutable': 0, + u'configured-sharebytes': 0, + u'examined-shares': 36794, + u'actual-diskbytes-mutable': 0, + u'actual-buckets': 1, + u'original-buckets-immutable': 13761, + u'configured-sharebytes-mutable': 0, + u'examined-sharebytes': 390369660, + u'original-sharebytes-immutable': 308125753, + u'original-sharebytes-mutable': 4539059, + u'actual-buckets-mutable': 0, + u'examined-buckets-mutable': 1043, + u'configured-shares-immutable': 0, + u'examined-diskbytes': 476598272, + u'examined-diskbytes-mutable': 9154560, + u'examined-sharebytes-mutable': 6568058, + u'examined-buckets': 18241, + u'configured-diskbytes': 4096, + u'examined-diskbytes-immutable': 467443712}, + u'corrupt-shares': [ + [u'2dn6xnlnsqwtnapwxfdivpm3s4', 4], + [u'2dn6xnlnsqwtnapwxfdivpm3s4', 1], + [u'2rrzthwsrrxolevmwdvbdy3rqi', 4], + [u'2rrzthwsrrxolevmwdvbdy3rqi', 1], + [u'2skfngcto6h7eqmn4uo7ntk3ne', 4], + [u'2skfngcto6h7eqmn4uo7ntk3ne', 1], + [u'32d5swqpqx2mwix7xmqzvhdwje', 4], + [u'32d5swqpqx2mwix7xmqzvhdwje', 1], + [u'5mmayp66yflmpon3o6unsnbaca', 4], + [u'5mmayp66yflmpon3o6unsnbaca', 1], + [u'6ixhpvbtre7fnrl6pehlrlflc4', 4], + [u'6ixhpvbtre7fnrl6pehlrlflc4', 1], + [u'ewzhvswjsz4vp2bqkb6mi3bz2u', 4], + [u'ewzhvswjsz4vp2bqkb6mi3bz2u', 1], + [u'fu7pazf6ogavkqj6z4q5qqex3u', 4], + [u'fu7pazf6ogavkqj6z4q5qqex3u', 1], + [u'hbyjtqvpcimwxiyqbcbbdn2i4a', 4], + [u'hbyjtqvpcimwxiyqbcbbdn2i4a', 1], + [u'pmcjbdkbjdl26k3e6yja77femq', 4], + [u'pmcjbdkbjdl26k3e6yja77femq', 1], + [u'r6swof4v2uttbiiqwj5pi32cm4', 4], + [u'r6swof4v2uttbiiqwj5pi32cm4', 1], + [u't45v5akoktf53evc2fi6gwnv6y', 4], + [u't45v5akoktf53evc2fi6gwnv6y', 1], + [u'y6zb4faar3rdvn3e6pfg4wlotm', 4], + [u'y6zb4faar3rdvn3e6pfg4wlotm', 1], + [u'z3yghutvqoqbchjao4lndnrh3a', 4], + [u'z3yghutvqoqbchjao4lndnrh3a', 1], + ], + u'lease-age-histogram': { + "1641600,1728000": 78, + "12441600,12528000": 78, + "8640000,8726400": 32, + "1814400,1900800": 1860, + "2764800,2851200": 76, + "11491200,11577600": 20, + "10713600,10800000": 183, + "47865600,47952000": 7, + "3110400,3196800": 328, + "10627200,10713600": 43, + "45619200,45705600": 4, + "12873600,12960000": 5, + "7430400,7516800": 7228, + "1555200,1641600": 492, + "38880000,38966400": 3, + "12528000,12614400": 193, + "7344000,7430400": 12689, + "2678400,2764800": 278, + "2332800,2419200": 12, + "9244800,9331200": 73, + "12787200,12873600": 218, + "49075200,49161600": 19, + "10368000,10454400": 117, + "4665600,4752000": 256, + "7516800,7603200": 993, + "42336000,42422400": 33, + "10972800,11059200": 122, + "39052800,39139200": 51, + "12614400,12700800": 210, + "7603200,7689600": 2004, + "10540800,10627200": 16, + "950400,1036800": 4435, + "42076800,42163200": 4, + "8812800,8899200": 57, + "5788800,5875200": 954, + "36374400,36460800": 3, + "9331200,9417600": 12, + "30499200,30585600": 5, + "12700800,12787200": 25, + "2073600,2160000": 388, + "12960000,13046400": 8, + "11923200,12009600": 89, + "3369600,3456000": 79, + "3196800,3283200": 628, + "37497600,37584000": 11, + "33436800,33523200": 7, + "44928000,45014400": 2, + "37929600,38016000": 3, + "38966400,39052800": 61, + "3283200,3369600": 86, + "11750400,11836800": 7, + "3801600,3888000": 32, + "46310400,46396800": 1, + "4838400,4924800": 386, + "8208000,8294400": 38, + "37411200,37497600": 4, + "12009600,12096000": 329, + "10454400,10540800": 1239, + "40176000,40262400": 1, + "3715200,3801600": 104, + "44409600,44496000": 13, + "38361600,38448000": 5, + "12268800,12355200": 2, + "28771200,28857600": 6, + "41990400,42076800": 10, + "2592000,2678400": 40, + }, + }, + 'current-cycle': None, + 'last-complete-bucket': None, + } + ) + second_serial = _LeaseStateSerializer(serial._path.path) + self.assertEqual( + serial.load(), + second_serial.load(), + ) + + @skipIf(platform.isWindows(), "pickle test-data can't be loaded on windows") + def test_deserialize_history_pickle(self): + """ + The crawler can read existing history state from the old pickle + format + """ + # this file came from an "in the wild" tahoe version 1.16.0 + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history.txt") + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + test_pickle = storage.child("lease_checker.history") + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + local.write(remote.read()) + + # convert from pickle format to JSON + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + + serial = _HistorySerializer(test_pickle.path) + + self.maxDiff = None + self.assertEqual( + serial.load(), + { + "363": { + 'configured-expiration-mode': ['age', None, None, ['immutable', 'mutable']], + 'expiration-enabled': False, + 'leases-per-share-histogram': { + '1': 39774, + }, + 'lease-age-histogram': [ + [0, 86400, 3125], + [345600, 432000, 4175], + [950400, 1036800, 141], + [1036800, 1123200, 345], + [1123200, 1209600, 81], + [1296000, 1382400, 1832], + [1555200, 1641600, 390], + [1728000, 1814400, 12], + [2073600, 2160000, 84], + [2160000, 2246400, 228], + [2246400, 2332800, 75], + [2592000, 2678400, 644], + [2678400, 2764800, 273], + [2764800, 2851200, 94], + [2851200, 2937600, 97], + [3196800, 3283200, 143], + [3283200, 3369600, 48], + [4147200, 4233600, 374], + [4320000, 4406400, 534], + [5270400, 5356800, 1005], + [6739200, 6825600, 8704], + [6825600, 6912000, 3986], + [6912000, 6998400, 7592], + [6998400, 7084800, 2607], + [7689600, 7776000, 35], + [8035200, 8121600, 33], + [8294400, 8380800, 54], + [8640000, 8726400, 45], + [8726400, 8812800, 27], + [8812800, 8899200, 12], + [9763200, 9849600, 77], + [9849600, 9936000, 91], + [9936000, 10022400, 1210], + [10022400, 10108800, 45], + [10108800, 10195200, 186], + [10368000, 10454400, 113], + [10972800, 11059200, 21], + [11232000, 11318400, 5], + [11318400, 11404800, 19], + [11404800, 11491200, 238], + [11491200, 11577600, 159], + [11750400, 11836800, 1], + [11836800, 11923200, 32], + [11923200, 12009600, 192], + [12009600, 12096000, 222], + [12096000, 12182400, 18], + [12182400, 12268800, 224], + [12268800, 12355200, 9], + [12355200, 12441600, 9], + [12441600, 12528000, 10], + [12528000, 12614400, 6], + [12614400, 12700800, 6], + [12700800, 12787200, 18], + [12787200, 12873600, 6], + [12873600, 12960000, 62], + ], + 'cycle-start-finish-times': [1634446505.241972, 1634446666.055401], + 'space-recovered': { + 'examined-buckets-immutable': 17896, + 'configured-buckets-mutable': 0, + 'examined-shares-mutable': 2473, + 'original-shares-mutable': 1185, + 'configured-buckets-immutable': 0, + 'original-shares-immutable': 27457, + 'original-diskbytes-immutable': 2810982400, + 'examined-shares-immutable': 37301, + 'original-buckets': 14047, + 'actual-shares-immutable': 0, + 'configured-shares': 0, + 'original-buckets-mutable': 691, + 'actual-diskbytes': 4096, + 'actual-shares-mutable': 0, + 'configured-buckets': 1, + 'examined-buckets-unknown': 14, + 'actual-sharebytes': 0, + 'original-shares': 28642, + 'actual-buckets-immutable': 0, + 'original-sharebytes': 2695552941, + 'examined-sharebytes-immutable': 2754798505, + 'actual-shares': 0, + 'actual-sharebytes-immutable': 0, + 'original-diskbytes': 2818981888, + 'configured-diskbytes-mutable': 0, + 'configured-sharebytes-immutable': 0, + 'configured-shares-mutable': 0, + 'actual-diskbytes-immutable': 0, + 'configured-diskbytes-immutable': 0, + 'original-diskbytes-mutable': 7995392, + 'actual-sharebytes-mutable': 0, + 'configured-sharebytes': 0, + 'examined-shares': 39774, + 'actual-diskbytes-mutable': 0, + 'actual-buckets': 1, + 'original-buckets-immutable': 13355, + 'configured-sharebytes-mutable': 0, + 'examined-sharebytes': 2763646972, + 'original-sharebytes-immutable': 2692076909, + 'original-sharebytes-mutable': 3476032, + 'actual-buckets-mutable': 0, + 'examined-buckets-mutable': 1286, + 'configured-shares-immutable': 0, + 'examined-diskbytes': 2854801408, + 'examined-diskbytes-mutable': 12161024, + 'examined-sharebytes-mutable': 8848467, + 'examined-buckets': 19197, + 'configured-diskbytes': 4096, + 'examined-diskbytes-immutable': 2842640384 + }, + 'corrupt-shares': [ + ['2dn6xnlnsqwtnapwxfdivpm3s4', 3], + ['2dn6xnlnsqwtnapwxfdivpm3s4', 0], + ['2rrzthwsrrxolevmwdvbdy3rqi', 3], + ['2rrzthwsrrxolevmwdvbdy3rqi', 0], + ['2skfngcto6h7eqmn4uo7ntk3ne', 3], + ['2skfngcto6h7eqmn4uo7ntk3ne', 0], + ['32d5swqpqx2mwix7xmqzvhdwje', 3], + ['32d5swqpqx2mwix7xmqzvhdwje', 0], + ['5mmayp66yflmpon3o6unsnbaca', 3], + ['5mmayp66yflmpon3o6unsnbaca', 0], + ['6ixhpvbtre7fnrl6pehlrlflc4', 3], + ['6ixhpvbtre7fnrl6pehlrlflc4', 0], + ['ewzhvswjsz4vp2bqkb6mi3bz2u', 3], + ['ewzhvswjsz4vp2bqkb6mi3bz2u', 0], + ['fu7pazf6ogavkqj6z4q5qqex3u', 3], + ['fu7pazf6ogavkqj6z4q5qqex3u', 0], + ['hbyjtqvpcimwxiyqbcbbdn2i4a', 3], + ['hbyjtqvpcimwxiyqbcbbdn2i4a', 0], + ['pmcjbdkbjdl26k3e6yja77femq', 3], + ['pmcjbdkbjdl26k3e6yja77femq', 0], + ['r6swof4v2uttbiiqwj5pi32cm4', 3], + ['r6swof4v2uttbiiqwj5pi32cm4', 0], + ['t45v5akoktf53evc2fi6gwnv6y', 3], + ['t45v5akoktf53evc2fi6gwnv6y', 0], + ['y6zb4faar3rdvn3e6pfg4wlotm', 3], + ['y6zb4faar3rdvn3e6pfg4wlotm', 0], + ['z3yghutvqoqbchjao4lndnrh3a', 3], + ['z3yghutvqoqbchjao4lndnrh3a', 0], + ] + } + } + ) + class WebStatus(unittest.TestCase, pollmixin.PollMixin): diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py index f2f021a15..e568d5ed5 100644 --- a/src/allmydata/web/storage.py +++ b/src/allmydata/web/storage.py @@ -256,8 +256,8 @@ class StorageStatusElement(Element): if so_far["corrupt-shares"]: add("Corrupt shares:", - T.ul( (T.li( ["SI %s shnum %d" % corrupt_share - for corrupt_share in so_far["corrupt-shares"] ] + T.ul( (T.li( ["SI %s shnum %d" % (si, shnum) + for si, shnum in so_far["corrupt-shares"] ] )))) return tag("Current cycle:", p) @@ -267,7 +267,8 @@ class StorageStatusElement(Element): h = lc.get_state()["history"] if not h: return "" - last = h[max(h.keys())] + biggest = str(max(int(k) for k in h.keys())) + last = h[biggest] start, end = last["cycle-start-finish-times"] tag("Last complete cycle (which took %s and finished %s ago)" @@ -290,8 +291,8 @@ class StorageStatusElement(Element): if last["corrupt-shares"]: add("Corrupt shares:", - T.ul( (T.li( ["SI %s shnum %d" % corrupt_share - for corrupt_share in last["corrupt-shares"] ] + T.ul( (T.li( ["SI %s shnum %d" % (si, shnum) + for si, shnum in last["corrupt-shares"] ] )))) return tag(p)