From 0459b712b02b8ba686687d696325bcdb650f770c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 08:54:55 -0400 Subject: [PATCH 01/24] news fragment --- newsfragments/3839.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3839.security diff --git a/newsfragments/3839.security b/newsfragments/3839.security new file mode 100644 index 000000000..1ae054542 --- /dev/null +++ b/newsfragments/3839.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets. From 274dc6e837dd7181fb1f6ba9116570dc4b255d66 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 08:55:37 -0400 Subject: [PATCH 02/24] Introduce `UnknownContainerVersionError` base w/ structured args --- src/allmydata/storage/common.py | 11 ++++++++--- src/allmydata/storage/immutable.py | 4 +--- src/allmydata/storage/mutable.py | 4 +--- src/allmydata/test/test_storage.py | 7 ++++--- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index e5563647f..48fc77840 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -16,11 +16,16 @@ from allmydata.util import base32 # Backwards compatibility. from allmydata.interfaces import DataTooLargeError # noqa: F401 -class UnknownMutableContainerVersionError(Exception): - pass -class UnknownImmutableContainerVersionError(Exception): +class UnknownContainerVersionError(Exception): + def __init__(self, filename, version): + self.filename = filename + self.version = version + +class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass +class UnknownImmutableContainerVersionError(UnknownContainerVersionError): + pass def si_b2a(storageindex): return base32.b2a(storageindex) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index a43860138..fcc60509c 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -174,9 +174,7 @@ class ShareFile(object): filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) if version != 1: - msg = "sharefile %s had version %d but we wanted 1" % \ - (filename, version) - raise UnknownImmutableContainerVersionError(msg) + raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) self._data_offset = 0xc diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 4abf22064..ce9cc5ff4 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -95,9 +95,7 @@ class MutableShareFile(object): data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) if not self.is_valid_header(data): - msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ - (filename, magic, self.MAGIC) - raise UnknownMutableContainerVersionError(msg) + raise UnknownMutableContainerVersionError(filename, magic) self.parent = parent # for logging def log(self, *args, **kwargs): diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 2c8d84b9e..bf9eff37a 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -646,7 +646,8 @@ class Server(unittest.TestCase): e = self.failUnlessRaises(UnknownImmutableContainerVersionError, ss.remote_get_buckets, b"si1") - self.failUnlessIn(" had version 0 but we wanted 1", str(e)) + self.assertEqual(e.filename, fn) + self.assertEqual(e.version, 0) def test_disconnect(self): # simulate a disconnection @@ -1127,8 +1128,8 @@ class MutableServer(unittest.TestCase): read = ss.remote_slot_readv e = self.failUnlessRaises(UnknownMutableContainerVersionError, read, b"si1", [0], [(0,10)]) - self.failUnlessIn(" had magic ", str(e)) - self.failUnlessIn(" but we wanted ", str(e)) + self.assertEqual(e.filename, fn) + self.assertTrue(e.version.startswith(b"BAD MAGIC")) def test_container_size(self): ss = self.create("test_container_size") From 10724a91f9ca2fe929f4e29adb03b876b21f9fe5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:17:36 -0400 Subject: [PATCH 03/24] introduce an explicit representation of the v1 immutable container schema This is only a partial representation, sufficient to express the changes that are coming in v2. --- src/allmydata/storage/immutable.py | 37 ++++++----- src/allmydata/storage/immutable_schema.py | 81 +++++++++++++++++++++++ 2 files changed, 102 insertions(+), 16 deletions(-) create mode 100644 src/allmydata/storage/immutable_schema.py diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index fcc60509c..ae5a710af 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -25,9 +25,14 @@ from allmydata.interfaces import ( ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition -from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError +from .immutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_version, +) + + # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and # RIBucketReader.read . The lease information is not accessible through these @@ -118,9 +123,16 @@ class ShareFile(object): ``False`` otherwise. """ (version,) = struct.unpack(">L", header[:4]) - return version == 1 + return schema_from_version(version) is not None - def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): + def __init__( + self, + filename, + max_size=None, + create=False, + lease_count_format="L", + schema=NEWEST_SCHEMA_VERSION, + ): """ Initialize a ``ShareFile``. @@ -156,24 +168,17 @@ class ShareFile(object): # it. Also construct the metadata. assert not os.path.exists(self.home) fileutil.make_dirs(os.path.dirname(self.home)) - # The second field -- the four-byte share data length -- is no - # longer used as of Tahoe v1.3.0, but we continue to write it in - # there in case someone downgrades a storage server from >= - # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one - # server to another, etc. We do saturation -- a share data length - # larger than 2**32-1 (what can fit into the field) is marked as - # the largest length that can fit into the field. That way, even - # if this does happen, the old < v1.3.0 server will still allow - # clients to read the first part of the share. + self._schema = schema with open(self.home, 'wb') as f: - f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) + f.write(self._schema.header(max_size)) self._lease_offset = max_size + 0x0c self._num_leases = 0 else: with open(self.home, 'rb') as f: filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - if version != 1: + self._schema = schema_from_version(version) + if self._schema is None: raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) @@ -209,7 +214,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_immutable_data()) + f.write(self._schema.serialize_lease(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -240,7 +245,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield LeaseInfo.from_immutable_data(data) + yield self._schema.unserialize_lease(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py new file mode 100644 index 000000000..759752bed --- /dev/null +++ b/src/allmydata/storage/immutable_schema.py @@ -0,0 +1,81 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +from .lease import ( + LeaseInfo, +) + +def _header(version, max_size): + # (int, int) -> bytes + """ + Construct the header for an immutable container. + + :param version: The container version to include the in header. + :param max_size: The maximum data size the container will hold. + + :return: Some bytes to write at the beginning of the container. + """ + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V1(object): + """ + Implement encoding and decoding for v1 of the immutable container. + """ + version = 1 + + @classmethod + def header(cls, max_size): + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + if isinstance(lease, LeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return LeaseInfo.from_immutable_data(data) + + +ALL_SCHEMAS = {_V1} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) + +def schema_from_version(version): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.version == version: + return schema + return None From 3b4141952387452894ed5c0ed58113e272ad3e4f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:32:59 -0400 Subject: [PATCH 04/24] apply the ShareFile tests to all schema versions using hypothesis --- src/allmydata/test/test_storage.py | 60 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index bf9eff37a..655395042 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -43,6 +43,9 @@ from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile +from allmydata.storage.immutable_schema import ( + ALL_SCHEMAS, +) from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \ si_b2a, si_a2b @@ -844,6 +847,9 @@ class Server(unittest.TestCase): # Create a bucket: rs0, cs0 = self.create_bucket_5_shares(ss, b"si0") + + # Upload of an immutable implies creation of a single lease with the + # supplied secrets. (lease,) = ss.get_leases(b"si0") self.assertTrue(lease.is_renew_secret(rs0)) @@ -3125,6 +3131,7 @@ class Stats(unittest.TestCase): self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) +immutable_schemas = strategies.sampled_from(list(ALL_SCHEMAS)) class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" @@ -3136,47 +3143,54 @@ class ShareFileTests(unittest.TestCase): # Should be b'abDEF' now. return sf - def test_read_write(self): + @given(immutable_schemas) + def test_read_write(self, schema): """Basic writes can be read.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 3), b"abD") self.assertEqual(sf.read_share_data(1, 4), b"bDEF") - def test_reads_beyond_file_end(self): + @given(immutable_schemas) + def test_reads_beyond_file_end(self, schema): """Reads beyond the file size are truncated.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 10), b"abDEF") self.assertEqual(sf.read_share_data(5, 10), b"") - def test_too_large_write(self): + @given(immutable_schemas) + def test_too_large_write(self, schema): """Can't do write larger than file size.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(DataTooLargeError): sf.write_share_data(0, b"x" * 3000) - def test_no_leases_cancelled(self): + @given(immutable_schemas) + def test_no_leases_cancelled(self, schema): """If no leases were cancelled, IndexError is raised.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(IndexError): sf.cancel_lease(b"garbage") - def test_long_lease_count_format(self): + @given(immutable_schemas) + def test_long_lease_count_format(self, schema): """ ``ShareFile.__init__`` raises ``ValueError`` if the lease count format given is longer than one character. """ with self.assertRaises(ValueError): - self.get_sharefile(lease_count_format="BB") + self.get_sharefile(schema=schema, lease_count_format="BB") - def test_large_lease_count_format(self): + @given(immutable_schemas) + def test_large_lease_count_format(self, schema): """ ``ShareFile.__init__`` raises ``ValueError`` if the lease count format encodes to a size larger than 8 bytes. """ with self.assertRaises(ValueError): - self.get_sharefile(lease_count_format="Q") + self.get_sharefile(schema=schema, lease_count_format="Q") - def test_avoid_lease_overflow(self): + @given(immutable_schemas) + def test_avoid_lease_overflow(self, schema): """ If the share file already has the maximum number of leases supported then ``ShareFile.add_lease`` raises ``struct.error`` and makes no changes @@ -3190,7 +3204,7 @@ class ShareFileTests(unittest.TestCase): ) # Make it a little easier to reach the condition by limiting the # number of leases to only 255. - sf = self.get_sharefile(lease_count_format="B") + sf = self.get_sharefile(schema=schema, lease_count_format="B") # Add the leases. for i in range(2 ** 8 - 1): @@ -3214,16 +3228,17 @@ class ShareFileTests(unittest.TestCase): self.assertEqual(before_data, after_data) - def test_renew_secret(self): + @given(immutable_schemas) + def test_renew_secret(self, schema): """ - A lease loaded from an immutable share file can have its renew secret - verified. + A lease loaded from an immutable share file at any schema version can have + its renew secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, @@ -3234,16 +3249,17 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_renew_secret(renew_secret)) - def test_cancel_secret(self): + @given(immutable_schemas) + def test_cancel_secret(self, schema): """ - A lease loaded from an immutable share file can have its cancel secret - verified. + A lease loaded from an immutable share file at any schema version can have + its cancel secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, From 234b8dcde2febc2b3eee96e1ede4d123f634dcb1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 11:56:49 -0400 Subject: [PATCH 05/24] Formalize LeaseInfo interface in preparation for another implementation --- src/allmydata/storage/lease.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 6d21bb2b2..23071707a 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -15,6 +15,11 @@ import struct, time import attr +from zope.interface import ( + Interface, + implementer, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -23,6 +28,84 @@ IMMUTABLE_FORMAT = ">L32s32sL" # struct format for representation of a lease in a mutable share MUTABLE_FORMAT = ">LL32s32s20s" + +class ILeaseInfo(Interface): + """ + Represent a marker attached to a share that indicates that share should be + retained for some amount of time. + + Typically clients will create and renew leases on their shares as a way to + inform storage servers that there is still interest in those shares. A + share may have more than one lease. If all leases on a share have + expiration times in the past then the storage server may take this as a + strong hint that no one is interested in the share anymore and therefore + the share may be deleted to reclaim the space. + """ + def renew(new_expire_time): + """ + Create a new ``ILeaseInfo`` with the given expiration time. + + :param Union[int, float] new_expire_time: The expiration time the new + ``ILeaseInfo`` will have. + + :return: The new ``ILeaseInfo`` provider with the new expiration time. + """ + + def get_expiration_time(): + """ + :return Union[int, float]: this lease's expiration time + """ + + def get_grant_renew_time_time(): + """ + :return Union[int, float]: a guess about the last time this lease was + renewed + """ + + def get_age(): + """ + :return Union[int, float]: a guess about how long it has been since this + lease was renewed + """ + + def to_immutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in an immutable container + """ + + def to_mutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in a mutable container + """ + + def immutable_size(): + """ + :return int: the size of the serialized representation of this lease in an + immutable container + """ + + def mutable_size(): + """ + :return int: the size of the serialized representation of this lease in a + mutable container + """ + + def is_renew_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's renew + secret, ``False`` otherwise + """ + + def is_cancel_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's cancel + secret, ``False`` otherwise + """ + + +@implementer(ILeaseInfo) @attr.s(frozen=True) class LeaseInfo(object): """ From b69e8d013bfc32f8b7ca5948ad36a5c60b3db73a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:07:49 -0400 Subject: [PATCH 06/24] introduce immutable container schema version 2 This version used on-disk hashed secrets to reduce the chance of secrets leaking to unintended parties. --- src/allmydata/storage/immutable.py | 23 ++++- src/allmydata/storage/immutable_schema.py | 105 +++++++++++++++++++++- src/allmydata/storage/lease.py | 82 +++++++++++++++++ src/allmydata/test/test_download.py | 14 ++- 4 files changed, 214 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index ae5a710af..216262a81 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -39,14 +39,14 @@ from .immutable_schema import ( # interfaces. # The share file has the following layout: -# 0x00: share file version number, four bytes, current version is 1 +# 0x00: share file version number, four bytes, current version is 2 # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. # 0x08: number of leases, four bytes big-endian # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) # A+0x0c = B: first lease. Lease format is: # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner -# B+0x04: renew secret, 32 bytes (SHA256) -# B+0x24: cancel secret, 32 bytes (SHA256) +# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below. +# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b) # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch # B+0x48: next lease, or end of record @@ -58,6 +58,23 @@ from .immutable_schema import ( # then the value stored in this field will be the actual share data length # modulo 2**32. +# Footnote 2: The change between share file version number 1 and 2 is that +# storage of lease secrets is changed from plaintext to hashed. This change +# protects the secrets from compromises of local storage on the server: if a +# plaintext cancel secret is somehow exfiltrated from the storage server, an +# attacker could use it to cancel that lease and potentially cause user data +# to be discarded before intended by the real owner. As of this comment, +# lease cancellation is disabled because there have been at least two bugs +# which leak the persisted value of the cancellation secret. If lease secrets +# were stored hashed instead of plaintext then neither of these bugs would +# have allowed an attacker to learn a usable cancel secret. +# +# Clients are free to construct these secrets however they like. The +# Tahoe-LAFS client uses a SHA256-based construction. The server then uses +# blake2b to hash these values for storage so that it retains no persistent +# copy of the original secret. +# + def _fix_lease_count_format(lease_count_format): """ Turn a single character struct format string into a format string suitable diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 759752bed..fc823507a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,8 +13,14 @@ if PY2: import struct +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + from .lease import ( LeaseInfo, + HashedLeaseInfo, ) def _header(version, max_size): @@ -22,10 +28,10 @@ def _header(version, max_size): """ Construct the header for an immutable container. - :param version: The container version to include the in header. - :param max_size: The maximum data size the container will hold. + :param version: the container version to include the in header + :param max_size: the maximum data size the container will hold - :return: Some bytes to write at the beginning of the container. + :return: some bytes to write at the beginning of the container """ # The second field -- the four-byte share data length -- is no longer # used as of Tahoe v1.3.0, but we continue to write it in there in @@ -38,6 +44,97 @@ def _header(version, max_size): # the share. return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V2(object): + """ + Implement encoding and decoding for v2 of the immutable container. + """ + version = 2 + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + @classmethod + def header(cls, max_size): + # type: (int) -> bytes + """ + Construct a container header. + + :param max_size: the maximum size the container can hold + + :return: the header bytes + """ + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + """ + Serialize a lease to be written to a v2 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = cls._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> HashedLeaseInfo + """ + Unserialize some bytes from a v2 container. + + :param data: the bytes from the container + + :return: the ``HashedLeaseInfo`` the bytes represent + """ + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) + + class _V1(object): """ Implement encoding and decoding for v1 of the immutable container. @@ -66,7 +163,7 @@ class _V1(object): return LeaseInfo.from_immutable_data(data) -ALL_SCHEMAS = {_V1} +ALL_SCHEMAS = {_V2, _V1} ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 23071707a..895a0970c 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -20,6 +20,10 @@ from zope.interface import ( implementer, ) +from twisted.python.components import ( + proxyForInterface, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -245,3 +249,81 @@ class LeaseInfo(object): ] values = struct.unpack(">LL32s32s20s", data) return cls(**dict(zip(names, values))) + + +@attr.s(frozen=True) +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): + """ + A ``HashedLeaseInfo`` wraps lease information in which the secrets have + been hashed. + """ + _lease_info = attr.ib() + _hash = attr.ib() + + def is_renew_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + + def is_cancel_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + if isinstance(candidate_secret, _HashedCancelSecret): + # Someone read it off of this object in this project - probably + # the lease crawler - and is just trying to use it to identify + # which lease it wants to operate on. Avoid re-hashing the value. + # + # It is important that this codepath is only availably internally + # for this process to talk to itself. If it were to be exposed to + # clients over the network, they could just provide the hashed + # value to avoid having to ever learn the original value. + hashed_candidate = candidate_secret.hashed_value + else: + # It is not yet hashed so hash it. + hashed_candidate = self._hash(candidate_secret) + + return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + + @property + def owner_num(self): + return self._lease_info.owner_num + + @property + def cancel_secret(self): + """ + Give back an opaque wrapper around the hashed cancel secret which can + later be presented for a succesful equality comparison. + """ + # We don't *have* the cancel secret. We hashed it and threw away the + # original. That's good. It does mean that some code that runs + # in-process with the storage service (LeaseCheckingCrawler) runs into + # some difficulty. That code wants to cancel leases and does so using + # the same interface that faces storage clients (or would face them, + # if lease cancellation were exposed). + # + # Since it can't use the hashed secret to cancel a lease (that's the + # point of the hashing) and we don't have the unhashed secret to give + # it, instead we give it a marker that `cancel_lease` will recognize. + # On recognizing it, if the hashed value given matches the hashed + # value stored it is considered a match and the lease can be + # cancelled. + # + # This isn't great. Maybe the internal and external consumers of + # cancellation should use different interfaces. + return _HashedCancelSecret(self._lease_info.cancel_secret) + + +@attr.s(frozen=True) +class _HashedCancelSecret(object): + """ + ``_HashedCancelSecret`` is a marker type for an already-hashed lease + cancel secret that lets internal lease cancellers bypass the hash-based + protection that's imposed on external lease cancellers. + + :ivar bytes hashed_value: The already-hashed secret. + """ + hashed_value = attr.ib() diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index ca5b5650b..85d89cde6 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase): d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) - corrupt_values = [(3, 2, "no-sh2"), - (15, 2, "need-4th"), # share looks v2 - ] + corrupt_values = [ + # Make the container version for share number 2 look + # unsupported. If you add support for immutable share file + # version number much past 16 million then you will have to + # update this test. Also maybe you have other problems. + (1, 255, "no-sh2"), + # Make the immutable share number 2 (not the container, the + # thing inside the container) look unsupported. Ditto the + # above about version numbers in the ballpark of 16 million. + (13, 255, "need-4th"), + ] for i,newvalue,expected in corrupt_values: d.addCallback(self._corrupt_set, imm_uri, i, newvalue) d.addCallback(_download, imm_uri, i, expected) From 7a59aa83bb9e429d0b44f47fff6365dbfa24f42f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:12:54 -0400 Subject: [PATCH 07/24] add missing import --- src/allmydata/storage/immutable_schema.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index fc823507a..6ac49f6f1 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,6 +13,11 @@ if PY2: import struct +try: + from typing import Union +except ImportError: + pass + import attr from nacl.hash import blake2b From 6889ab2a76a1665dc7adb11dfa2205760641f303 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:16:55 -0400 Subject: [PATCH 08/24] fix syntax of type hint --- src/allmydata/storage/immutable_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 6ac49f6f1..7ffec418a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -29,7 +29,7 @@ from .lease import ( ) def _header(version, max_size): - # (int, int) -> bytes + # type: (int, int) -> bytes """ Construct the header for an immutable container. From 2186bfcc372d01ab79f6899e8d0a54157ee83444 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:40:43 -0400 Subject: [PATCH 09/24] silence some mypy errors :/ I don't know the "right" way to make mypy happy with these things --- src/allmydata/storage/immutable_schema.py | 4 ++-- src/allmydata/storage/lease.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 7ffec418a..440755b01 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -169,8 +169,8 @@ class _V1(object): ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore def schema_from_version(version): # (int) -> Optional[type] diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 895a0970c..63dba15e8 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -252,7 +252,7 @@ class LeaseInfo(object): @attr.s(frozen=True) -class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ignore # unsupported dynamic base class """ A ``HashedLeaseInfo`` wraps lease information in which the secrets have been hashed. From 931ddf85a532178ab83584820eda8605a495d5ab Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 15:26:58 -0400 Subject: [PATCH 10/24] introduce an explicit representation of the v1 mutable container schema This is only a partial representation, sufficient to express the changes that are coming in v2. --- src/allmydata/storage/mutable.py | 46 +++------ src/allmydata/storage/mutable_schema.py | 119 ++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 31 deletions(-) create mode 100644 src/allmydata/storage/mutable_schema.py diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index ce9cc5ff4..346edd53a 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -24,7 +24,10 @@ from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownMutableContainerVersionError, \ DataTooLargeError from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE - +from .mutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_header, +) # the MutableShareFile is like the ShareFile, but used for mutable data. It # has a different layout. See docs/mutable.txt for more details. @@ -64,9 +67,6 @@ class MutableShareFile(object): # our sharefiles share with a recognizable string, plus some random # binary data to reduce the chance that a regular text file will look # like a sharefile. - MAGIC = b"Tahoe mutable container v1\n" + b"\x75\x09\x44\x03\x8e" - assert len(MAGIC) == 32 - assert isinstance(MAGIC, bytes) MAX_SIZE = MAX_MUTABLE_SHARE_SIZE # TODO: decide upon a policy for max share size @@ -82,20 +82,19 @@ class MutableShareFile(object): :return: ``True`` if the bytes could belong to this container, ``False`` otherwise. """ - return header.startswith(cls.MAGIC) + return schema_from_header(header) is not None - def __init__(self, filename, parent=None): + def __init__(self, filename, parent=None, schema=NEWEST_SCHEMA_VERSION): self.home = filename if os.path.exists(self.home): # we don't cache anything, just check the magic with open(self.home, 'rb') as f: - data = f.read(self.HEADER_SIZE) - (magic, - write_enabler_nodeid, write_enabler, - data_length, extra_least_offset) = \ - struct.unpack(">32s20s32sQQ", data) - if not self.is_valid_header(data): - raise UnknownMutableContainerVersionError(filename, magic) + header = f.read(self.HEADER_SIZE) + self._schema = schema_from_header(header) + if self._schema is None: + raise UnknownMutableContainerVersionError(filename, header) + else: + self._schema = schema self.parent = parent # for logging def log(self, *args, **kwargs): @@ -103,23 +102,8 @@ class MutableShareFile(object): def create(self, my_nodeid, write_enabler): assert not os.path.exists(self.home) - data_length = 0 - extra_lease_offset = (self.HEADER_SIZE - + 4 * self.LEASE_SIZE - + data_length) - assert extra_lease_offset == self.DATA_OFFSET # true at creation - num_extra_leases = 0 with open(self.home, 'wb') as f: - header = struct.pack( - ">32s20s32sQQ", - self.MAGIC, my_nodeid, write_enabler, - data_length, extra_lease_offset, - ) - leases = (b"\x00" * self.LEASE_SIZE) * 4 - f.write(header + leases) - # data goes here, empty after creation - f.write(struct.pack(">L", num_extra_leases)) - # extra leases go here, none at creation + f.write(self._schema.header(my_nodeid, write_enabler)) def unlink(self): os.unlink(self.home) @@ -252,7 +236,7 @@ class MutableShareFile(object): + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_mutable_data()) + f.write(self._schema.serialize_lease(lease_info)) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None @@ -269,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = LeaseInfo.from_mutable_data(data) + lease_info = self._schema.unserialize_lease(data) if lease_info.owner_num == 0: return None return lease_info diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py new file mode 100644 index 000000000..25f24ea1f --- /dev/null +++ b/src/allmydata/storage/mutable_schema.py @@ -0,0 +1,119 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +from .lease import ( + LeaseInfo, +) + +class _V1(object): + """ + Implement encoding and decoding for v1 of the mutable container. + """ + version = 1 + + _MAGIC = ( + # Make it easy for people to recognize + b"Tahoe mutable container v1\n" + # But also keep the chance of accidental collision low + b"\x75\x09\x44\x03\x8e" + ) + assert len(_MAGIC) == 32 + + _HEADER_FORMAT = ">32s20s32sQQ" + + # This size excludes leases + _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + + _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + @classmethod + def magic_matches(cls, candidate_magic): + # type: (bytes) -> bool + """ + Return ``True`` if a candidate string matches the expected magic string + from a mutable container header, ``False`` otherwise. + """ + return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + + @classmethod + def header(cls, nodeid, write_enabler): + # type: (bytes, bytes) -> bytes + """ + Construct a container header. + + :param nodeid: A unique identifier for the node holding this + container. + + :param write_enabler: A secret shared with the client used to + authorize changes to the contents of this container. + """ + fixed_header = struct.pack( + ">32s20s32sQQ", + cls._MAGIC, + nodeid, + write_enabler, + # data length, initially the container is empty + 0, + cls._EXTRA_LEASE_OFFSET, + ) + blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 + extra_lease_count = struct.pack(">L", 0) + + return b"".join([ + fixed_header, + # share data will go in between the next two items eventually but + # for now there is none. + blank_leases, + extra_lease_count, + ]) + + @classmethod + def serialize_lease(cls, lease_info): + # type: (LeaseInfo) -> bytes + """ + Serialize a lease to be written to a v1 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + return lease_info.to_mutable_data() + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> LeaseInfo + """ + Unserialize some bytes from a v1 container. + + :param data: the bytes from the container + + :return: the ``LeaseInfo`` the bytes represent + """ + return LeaseInfo.from_mutable_data(data) + + +ALL_SCHEMAS = {_V1} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore + +def schema_from_header(header): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.magic_matches(header): + return schema + return None From 728638fe230dfdf0149c5835b0a8077230dbf021 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 15:37:29 -0400 Subject: [PATCH 11/24] apply the MutableShareFile tests to all known schemas --- src/allmydata/test/test_storage.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 655395042..fbd005050 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -42,9 +42,12 @@ from allmydata.util import fileutil, hashutil, base32 from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile +from allmydata.storage.mutable_schema import ( + ALL_SCHEMAS as ALL_MUTABLE_SCHEMAS, +) from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile from allmydata.storage.immutable_schema import ( - ALL_SCHEMAS, + ALL_SCHEMAS as ALL_IMMUTABLE_SCHEMAS, ) from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \ @@ -3131,7 +3134,7 @@ class Stats(unittest.TestCase): self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) -immutable_schemas = strategies.sampled_from(list(ALL_SCHEMAS)) +immutable_schemas = strategies.sampled_from(list(ALL_IMMUTABLE_SCHEMAS)) class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" @@ -3270,15 +3273,17 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_cancel_secret(cancel_secret)) +mutable_schemas = strategies.sampled_from(list(ALL_MUTABLE_SCHEMAS)) class MutableShareFileTests(unittest.TestCase): """ Tests for allmydata.storage.mutable.MutableShareFile. """ - def get_sharefile(self): - return MutableShareFile(self.mktemp()) + def get_sharefile(self, **kwargs): + return MutableShareFile(self.mktemp(), **kwargs) @given( + schema=mutable_schemas, nodeid=strategies.just(b"x" * 20), write_enabler=strategies.just(b"y" * 32), datav=strategies.lists( @@ -3289,12 +3294,12 @@ class MutableShareFileTests(unittest.TestCase): ), new_length=offsets(), ) - def test_readv_reads_share_data(self, nodeid, write_enabler, datav, new_length): + def test_readv_reads_share_data(self, schema, nodeid, write_enabler, datav, new_length): """ ``MutableShareFile.readv`` returns bytes from the share data portion of the share file. """ - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) sf.create(my_nodeid=nodeid, write_enabler=write_enabler) sf.writev(datav=datav, new_length=new_length) @@ -3329,12 +3334,13 @@ class MutableShareFileTests(unittest.TestCase): self.assertEqual(expected_data, read_data) @given( + schema=mutable_schemas, nodeid=strategies.just(b"x" * 20), write_enabler=strategies.just(b"y" * 32), readv=strategies.lists(strategies.tuples(offsets(), lengths()), min_size=1), random=strategies.randoms(), ) - def test_readv_rejects_negative_length(self, nodeid, write_enabler, readv, random): + def test_readv_rejects_negative_length(self, schema, nodeid, write_enabler, readv, random): """ If a negative length is given to ``MutableShareFile.readv`` in a read vector then ``AssertionError`` is raised. @@ -3373,7 +3379,7 @@ class MutableShareFileTests(unittest.TestCase): *broken_readv[readv_index] ) - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) sf.create(my_nodeid=nodeid, write_enabler=write_enabler) # A read with a broken read vector is an error. From 8adff050a7f179c6c4796f4d3b04fab60924cbad Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 13:51:46 -0400 Subject: [PATCH 12/24] compare without breaking out all of the fields HashedLeaseInfo doesn't have all of these attributes --- src/allmydata/test/test_storage.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index fbd005050..92176ce52 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1361,14 +1361,21 @@ class MutableServer(unittest.TestCase): 2: [b"2"*10]}) def compare_leases_without_timestamps(self, leases_a, leases_b): - self.failUnlessEqual(len(leases_a), len(leases_b)) - for i in range(len(leases_a)): - a = leases_a[i] - b = leases_b[i] - self.failUnlessEqual(a.owner_num, b.owner_num) - self.failUnlessEqual(a.renew_secret, b.renew_secret) - self.failUnlessEqual(a.cancel_secret, b.cancel_secret) - self.failUnlessEqual(a.nodeid, b.nodeid) + for a, b in zip(leases_a, leases_b): + # The leases aren't always of the same type (though of course + # corresponding elements in the two lists should be of the same + # type as each other) so it's inconvenient to just reach in and + # normalize the expiration timestamp. We don't want to call + # `renew` on both objects to normalize the expiration timestamp in + # case `renew` is broken and gives us back equal outputs from + # non-equal inputs (expiration timestamp aside). It seems + # reasonably safe to use `renew` to make _one_ of the timestamps + # equal to the other though. + self.assertEqual( + a.renew(b.get_expiration_time()), + b, + ) + self.assertEqual(len(leases_a), len(leases_b)) def test_leases(self): ss = self.create("test_leases") From 0cd96ed713ba6429b76e2520752acb7e8e166e40 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:09:46 -0400 Subject: [PATCH 13/24] fix the debug tool for the hashed lease secret case --- src/allmydata/scripts/debug.py | 4 ++-- src/allmydata/storage/lease.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 260cca55b..6201ce28f 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -230,8 +230,8 @@ def dump_mutable_share(options): print(" ownerid: %d" % lease.owner_num, file=out) when = format_expiration_time(lease.get_expiration_time()) print(" expires in %s" % when, file=out) - print(" renew_secret: %s" % str(base32.b2a(lease.renew_secret), "utf-8"), file=out) - print(" cancel_secret: %s" % str(base32.b2a(lease.cancel_secret), "utf-8"), file=out) + print(" renew_secret: %s" % lease.present_renew_secret(), file=out) + print(" cancel_secret: %s" % lease.present_cancel_secret(), file=out) print(" secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid), file=out) else: print("No leases.", file=out) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 63dba15e8..3ec760dbe 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -25,6 +25,7 @@ from twisted.python.components import ( ) from allmydata.util.hashutil import timing_safe_compare +from allmydata.util import base32 # struct format for representation of a lease in an immutable share IMMUTABLE_FORMAT = ">L32s32sL" @@ -102,12 +103,24 @@ class ILeaseInfo(Interface): secret, ``False`` otherwise """ + def present_renew_secret(): + """ + :return str: Text which could reasonably be shown to a person representing + this lease's renew secret. + """ + def is_cancel_secret(candidate_secret): """ :return bool: ``True`` if the given byte string is this lease's cancel secret, ``False`` otherwise """ + def present_cancel_secret(): + """ + :return str: Text which could reasonably be shown to a person representing + this lease's cancel secret. + """ + @implementer(ILeaseInfo) @attr.s(frozen=True) @@ -173,6 +186,13 @@ class LeaseInfo(object): """ return timing_safe_compare(self.renew_secret, candidate_secret) + def present_renew_secret(self): + # type: () -> bytes + """ + Return the renew secret, base32-encoded. + """ + return str(base32.b2a(self.renew_secret), "utf-8") + def is_cancel_secret(self, candidate_secret): # type: (bytes) -> bool """ @@ -183,6 +203,13 @@ class LeaseInfo(object): """ return timing_safe_compare(self.cancel_secret, candidate_secret) + def present_cancel_secret(self): + # type: () -> bytes + """ + Return the cancel secret, base32-encoded. + """ + return str(base32.b2a(self.cancel_secret), "utf-8") + def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self._expiration_time - 31*24*60*60 @@ -267,6 +294,12 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign """ return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + def present_renew_secret(self): + """ + Present the hash of the secret with a marker indicating it is a hash. + """ + return u"hash:" + super(HashedLeaseInfo, self).present_renew_secret() + def is_cancel_secret(self, candidate_secret): """ Hash the candidate secret and compare the result to the stored hashed @@ -288,10 +321,20 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + def present_cancel_secret(self): + """ + Present the hash of the secret with a marker indicating it is a hash. + """ + return u"hash:" + super(HashedLeaseInfo, self).present_cancel_secret() + @property def owner_num(self): return self._lease_info.owner_num + @property + def nodeid(self): + return self._lease_info.nodeid + @property def cancel_secret(self): """ From 5d703d989339587cfd5706fea1728ecb59e17808 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:10:27 -0400 Subject: [PATCH 14/24] some type annotations --- src/allmydata/storage/lease.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 3ec760dbe..9ddbc9c68 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -187,7 +187,7 @@ class LeaseInfo(object): return timing_safe_compare(self.renew_secret, candidate_secret) def present_renew_secret(self): - # type: () -> bytes + # type: () -> str """ Return the renew secret, base32-encoded. """ @@ -204,7 +204,7 @@ class LeaseInfo(object): return timing_safe_compare(self.cancel_secret, candidate_secret) def present_cancel_secret(self): - # type: () -> bytes + # type: () -> str """ Return the cancel secret, base32-encoded. """ @@ -288,6 +288,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _hash = attr.ib() def is_renew_secret(self, candidate_secret): + # type: (bytes) -> bool """ Hash the candidate secret and compare the result to the stored hashed secret. @@ -295,12 +296,14 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) def present_renew_secret(self): + # type: () -> str """ Present the hash of the secret with a marker indicating it is a hash. """ return u"hash:" + super(HashedLeaseInfo, self).present_renew_secret() def is_cancel_secret(self, candidate_secret): + # type: (bytes) -> bool """ Hash the candidate secret and compare the result to the stored hashed secret. @@ -322,6 +325,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) def present_cancel_secret(self): + # type: () -> str """ Present the hash of the secret with a marker indicating it is a hash. """ From 3de9c73b0b066e5e15978a15c2903d10e398ed0a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:11:05 -0400 Subject: [PATCH 15/24] preserve the type when renewing HashedLeaseInfo does this mean immutable lease renewal is untested? maybe --- src/allmydata/storage/lease.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 9ddbc9c68..1a5416d6a 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -287,6 +287,13 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _lease_info = attr.ib() _hash = attr.ib() + def renew(self, new_expire_time): + # Preserve the HashedLeaseInfo wrapper around the renewed LeaseInfo. + return attr.assoc( + self, + _lease_info=super(HashedLeaseInfo, self).renew(new_expire_time), + ) + def is_renew_secret(self, candidate_secret): # type: (bytes) -> bool """ From 456df65a07a0c48f3a056519282cc96b5e4e2f25 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 14:16:43 -0400 Subject: [PATCH 16/24] Add v2 of the mutable container schema It uses hashed lease secrets, like v2 of the immutable container schema. --- src/allmydata/storage/mutable_schema.py | 225 ++++++++++++++++++++---- 1 file changed, 187 insertions(+), 38 deletions(-) diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py index 25f24ea1f..9496fe571 100644 --- a/src/allmydata/storage/mutable_schema.py +++ b/src/allmydata/storage/mutable_schema.py @@ -13,23 +13,193 @@ if PY2: import struct +try: + from typing import Union +except ImportError: + pass + +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + +from ..util.hashutil import ( + tagged_hash, +) from .lease import ( LeaseInfo, + HashedLeaseInfo, ) +def _magic(version): + # type: (int) -> bytes + """ + Compute a "magic" header string for a container of the given version. + + :param version: The version number of the container. + """ + # Make it easy for people to recognize + human_readable = u"Tahoe mutable container v{:d}\n".format(version).encode("ascii") + # But also keep the chance of accidental collision low + if version == 1: + # It's unclear where this byte sequence came from. It may have just + # been random. In any case, preserve it since it is the magic marker + # in all v1 share files. + random_bytes = b"\x75\x09\x44\x03\x8e" + else: + # For future versions, use a reproducable scheme. + random_bytes = tagged_hash( + b"allmydata_mutable_container_header", + human_readable, + truncate_to=5, + ) + magic = human_readable + random_bytes + assert len(magic) == 32 + if version > 1: + # The chance of collision is pretty low but let's just be sure about + # it. + assert magic != _magic(version - 1) + + return magic + +def _header(magic, extra_lease_offset, nodeid, write_enabler): + # type: (bytes, int, bytes, bytes) -> bytes + """ + Construct a container header. + + :param nodeid: A unique identifier for the node holding this + container. + + :param write_enabler: A secret shared with the client used to + authorize changes to the contents of this container. + """ + fixed_header = struct.pack( + ">32s20s32sQQ", + magic, + nodeid, + write_enabler, + # data length, initially the container is empty + 0, + extra_lease_offset, + ) + blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 + extra_lease_count = struct.pack(">L", 0) + + return b"".join([ + fixed_header, + # share data will go in between the next two items eventually but + # for now there is none. + blank_leases, + extra_lease_count, + ]) + + +class _V2(object): + """ + Implement encoding and decoding for v2 of the mutable container. + """ + version = 2 + _MAGIC = _magic(version) + + _HEADER_FORMAT = ">32s20s32sQQ" + + # This size excludes leases + _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + + _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + @classmethod + def magic_matches(cls, candidate_magic): + # type: (bytes) -> bool + """ + Return ``True`` if a candidate string matches the expected magic string + from a mutable container header, ``False`` otherwise. + """ + return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + + @classmethod + def header(cls, nodeid, write_enabler): + return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) + + @classmethod + def serialize_lease(cls, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + """ + Serialize a lease to be written to a v2 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + if isinstance(lease, LeaseInfo): + # v2 of the mutable schema stores lease secrets hashed. If we're + # given a LeaseInfo then it holds plaintext secrets. Hash them + # before trying to serialize. + lease = cls._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return lease.to_mutable_data() + raise ValueError( + "MutableShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> HashedLeaseInfo + """ + Unserialize some bytes from a v2 container. + + :param data: the bytes from the container + + :return: the ``HashedLeaseInfo`` the bytes represent + """ + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + lease = LeaseInfo.from_mutable_data(data) + return HashedLeaseInfo(lease, cls._hash_secret) + + class _V1(object): """ Implement encoding and decoding for v1 of the mutable container. """ version = 1 - - _MAGIC = ( - # Make it easy for people to recognize - b"Tahoe mutable container v1\n" - # But also keep the chance of accidental collision low - b"\x75\x09\x44\x03\x8e" - ) - assert len(_MAGIC) == 32 + _MAGIC = _magic(version) _HEADER_FORMAT = ">32s20s32sQQ" @@ -49,35 +219,8 @@ class _V1(object): @classmethod def header(cls, nodeid, write_enabler): - # type: (bytes, bytes) -> bytes - """ - Construct a container header. + return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) - :param nodeid: A unique identifier for the node holding this - container. - - :param write_enabler: A secret shared with the client used to - authorize changes to the contents of this container. - """ - fixed_header = struct.pack( - ">32s20s32sQQ", - cls._MAGIC, - nodeid, - write_enabler, - # data length, initially the container is empty - 0, - cls._EXTRA_LEASE_OFFSET, - ) - blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4 - extra_lease_count = struct.pack(">L", 0) - - return b"".join([ - fixed_header, - # share data will go in between the next two items eventually but - # for now there is none. - blank_leases, - extra_lease_count, - ]) @classmethod def serialize_lease(cls, lease_info): @@ -89,7 +232,13 @@ class _V1(object): :return: the serialized bytes """ - return lease_info.to_mutable_data() + if isinstance(lease, LeaseInfo): + return lease_info.to_mutable_data() + raise ValueError( + "MutableShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) @classmethod def unserialize_lease(cls, data): @@ -104,7 +253,7 @@ class _V1(object): return LeaseInfo.from_mutable_data(data) -ALL_SCHEMAS = {_V1} +ALL_SCHEMAS = {_V2, _V1} ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore From 617a1eac9d848661b1fce2fe18976796ce02ac2a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 15:30:49 -0400 Subject: [PATCH 17/24] refactor lease hashing logic to avoid mutable/immutable duplication --- src/allmydata/storage/immutable.py | 4 +- src/allmydata/storage/immutable_schema.py | 169 ++++--------------- src/allmydata/storage/lease.py | 4 +- src/allmydata/storage/lease_schema.py | 129 ++++++++++++++ src/allmydata/storage/mutable.py | 4 +- src/allmydata/storage/mutable_schema.py | 194 ++++------------------ 6 files changed, 199 insertions(+), 305 deletions(-) create mode 100644 src/allmydata/storage/lease_schema.py diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 216262a81..e9992d96e 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -231,7 +231,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(self._schema.serialize_lease(lease_info)) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -262,7 +262,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield self._schema.unserialize_lease(data) + yield self._schema.lease_serializer.unserialize(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 440755b01..40663b935 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,84 +13,28 @@ if PY2: import struct -try: - from typing import Union -except ImportError: - pass - import attr -from nacl.hash import blake2b -from nacl.encoding import RawEncoder - -from .lease import ( - LeaseInfo, - HashedLeaseInfo, +from .lease_schema import ( + v1_immutable, + v2_immutable, ) -def _header(version, max_size): - # type: (int, int) -> bytes +@attr.s(frozen=True) +class _Schema(object): """ - Construct the header for an immutable container. + Implement encoding and decoding for multiple versions of the immutable + container schema. - :param version: the container version to include the in header - :param max_size: the maximum data size the container will hold + :ivar int version: the version number of the schema this object supports - :return: some bytes to write at the beginning of the container + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization """ - # The second field -- the four-byte share data length -- is no longer - # used as of Tahoe v1.3.0, but we continue to write it in there in - # case someone downgrades a storage server from >= Tahoe-1.3.0 to < - # Tahoe-1.3.0, or moves a share file from one server to another, - # etc. We do saturation -- a share data length larger than 2**32-1 - # (what can fit into the field) is marked as the largest length that - # can fit into the field. That way, even if this does happen, the old - # < v1.3.0 server will still allow clients to read the first part of - # the share. - return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + version = attr.ib() + lease_serializer = attr.ib() - -class _V2(object): - """ - Implement encoding and decoding for v2 of the immutable container. - """ - version = 2 - - @classmethod - def _hash_secret(cls, secret): - # type: (bytes) -> bytes - """ - Hash a lease secret for storage. - """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) - - @classmethod - def _hash_lease_info(cls, lease_info): - # type: (LeaseInfo) -> HashedLeaseInfo - """ - Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. - """ - if not isinstance(lease_info, LeaseInfo): - # Provide a little safety against misuse, especially an attempt to - # re-hash an already-hashed lease info which is represented as a - # different type. - raise TypeError( - "Can only hash LeaseInfo, not {!r}".format(lease_info), - ) - - # Hash the cleartext secrets in the lease info and wrap the result in - # a new type. - return HashedLeaseInfo( - attr.assoc( - lease_info, - renew_secret=cls._hash_secret(lease_info.renew_secret), - cancel_secret=cls._hash_secret(lease_info.cancel_secret), - ), - cls._hash_secret, - ) - - @classmethod - def header(cls, max_size): + def header(self, max_size): # type: (int) -> bytes """ Construct a container header. @@ -99,78 +43,23 @@ class _V2(object): :return: the header bytes """ - return _header(cls.version, max_size) + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0) - @classmethod - def serialize_lease(cls, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes - """ - Serialize a lease to be written to a v2 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - # v2 of the immutable schema stores lease secrets hashed. If - # we're given a LeaseInfo then it holds plaintext secrets. Hash - # them before trying to serialize. - lease = cls._hash_lease_info(lease) - if isinstance(lease, HashedLeaseInfo): - return lease.to_immutable_data() - raise ValueError( - "ShareFile v2 schema cannot represent lease {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> HashedLeaseInfo - """ - Unserialize some bytes from a v2 container. - - :param data: the bytes from the container - - :return: the ``HashedLeaseInfo`` the bytes represent - """ - # In v2 of the immutable schema lease secrets are stored hashed. Wrap - # a LeaseInfo in a HashedLeaseInfo so it can supply the correct - # interpretation for those values. - return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) - - -class _V1(object): - """ - Implement encoding and decoding for v1 of the immutable container. - """ - version = 1 - - @classmethod - def header(cls, max_size): - return _header(cls.version, max_size) - - @classmethod - def serialize_lease(cls, lease): - if isinstance(lease, LeaseInfo): - return lease.to_immutable_data() - raise ValueError( - "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # In v1 of the immutable schema lease secrets are stored plaintext. - # So load the data into a plain LeaseInfo which works on plaintext - # secrets. - return LeaseInfo.from_immutable_data(data) - - -ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore +ALL_SCHEMAS = { + _Schema(version=2, lease_serializer=v2_immutable), + _Schema(version=1, lease_serializer=v1_immutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_version(version): # (int) -> Optional[type] diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 1a5416d6a..8be44bafd 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -230,7 +230,7 @@ class LeaseInfo(object): "cancel_secret", "expiration_time", ] - values = struct.unpack(">L32s32sL", data) + values = struct.unpack(IMMUTABLE_FORMAT, data) return cls(nodeid=None, **dict(zip(names, values))) def immutable_size(self): @@ -274,7 +274,7 @@ class LeaseInfo(object): "cancel_secret", "nodeid", ] - values = struct.unpack(">LL32s32s20s", data) + values = struct.unpack(MUTABLE_FORMAT, data) return cls(**dict(zip(names, values))) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py new file mode 100644 index 000000000..697ac9e34 --- /dev/null +++ b/src/allmydata/storage/lease_schema.py @@ -0,0 +1,129 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +try: + from typing import Union +except ImportError: + pass + +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + +from .lease import ( + LeaseInfo, + HashedLeaseInfo, +) + +@attr.s(frozen=True) +class CleartextLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + def serialize(self, lease): + # type: (LeaseInfo) -> bytes + if isinstance(lease, LeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> LeaseInfo + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return self._from_data(data) + +@attr.s(frozen=True) +class HashedLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + def serialize(self, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = self._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> HashedLeaseInfo + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(self._from_data(data), self._hash_secret) + +v1_immutable = CleartextLeaseSerializer( + LeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v2_immutable = HashedLeaseSerializer( + HashedLeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v1_mutable = CleartextLeaseSerializer( + LeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) + +v2_mutable = HashedLeaseSerializer( + HashedLeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 346edd53a..bd59d96b8 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -236,7 +236,7 @@ class MutableShareFile(object): + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset - f.write(self._schema.serialize_lease(lease_info)) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None @@ -253,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = self._schema.unserialize_lease(data) + lease_info = self._schema.lease_serializer.unserialize(data) if lease_info.owner_num == 0: return None return lease_info diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py index 9496fe571..4be0d2137 100644 --- a/src/allmydata/storage/mutable_schema.py +++ b/src/allmydata/storage/mutable_schema.py @@ -13,22 +13,17 @@ if PY2: import struct -try: - from typing import Union -except ImportError: - pass - import attr -from nacl.hash import blake2b -from nacl.encoding import RawEncoder - from ..util.hashutil import ( tagged_hash, ) from .lease import ( LeaseInfo, - HashedLeaseInfo, +) +from .lease_schema import ( + v1_mutable, + v2_mutable, ) def _magic(version): @@ -94,168 +89,49 @@ def _header(magic, extra_lease_offset, nodeid, write_enabler): ]) -class _V2(object): +_HEADER_FORMAT = ">32s20s32sQQ" + +# This size excludes leases +_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + +_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + +@attr.s(frozen=True) +class _Schema(object): """ - Implement encoding and decoding for v2 of the mutable container. + Implement encoding and decoding for the mutable container. + + :ivar int version: the version number of the schema this object supports + + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization """ - version = 2 - _MAGIC = _magic(version) - - _HEADER_FORMAT = ">32s20s32sQQ" - - # This size excludes leases - _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) - - _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + version = attr.ib() + lease_serializer = attr.ib() + _magic = attr.ib() @classmethod - def _hash_secret(cls, secret): - # type: (bytes) -> bytes - """ - Hash a lease secret for storage. - """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) + def for_version(cls, version, lease_serializer): + return cls(version, lease_serializer, magic=_magic(version)) - @classmethod - def _hash_lease_info(cls, lease_info): - # type: (LeaseInfo) -> HashedLeaseInfo - """ - Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. - """ - if not isinstance(lease_info, LeaseInfo): - # Provide a little safety against misuse, especially an attempt to - # re-hash an already-hashed lease info which is represented as a - # different type. - raise TypeError( - "Can only hash LeaseInfo, not {!r}".format(lease_info), - ) - - # Hash the cleartext secrets in the lease info and wrap the result in - # a new type. - return HashedLeaseInfo( - attr.assoc( - lease_info, - renew_secret=cls._hash_secret(lease_info.renew_secret), - cancel_secret=cls._hash_secret(lease_info.cancel_secret), - ), - cls._hash_secret, - ) - - @classmethod - def magic_matches(cls, candidate_magic): + def magic_matches(self, candidate_magic): # type: (bytes) -> bool """ Return ``True`` if a candidate string matches the expected magic string from a mutable container header, ``False`` otherwise. """ - return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + return candidate_magic[:len(self._magic)] == self._magic - @classmethod - def header(cls, nodeid, write_enabler): - return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) + def header(self, nodeid, write_enabler): + return _header(self._magic, _EXTRA_LEASE_OFFSET, nodeid, write_enabler) - @classmethod - def serialize_lease(cls, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes - """ - Serialize a lease to be written to a v2 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - # v2 of the mutable schema stores lease secrets hashed. If we're - # given a LeaseInfo then it holds plaintext secrets. Hash them - # before trying to serialize. - lease = cls._hash_lease_info(lease) - if isinstance(lease, HashedLeaseInfo): - return lease.to_mutable_data() - raise ValueError( - "MutableShareFile v2 schema cannot represent lease {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> HashedLeaseInfo - """ - Unserialize some bytes from a v2 container. - - :param data: the bytes from the container - - :return: the ``HashedLeaseInfo`` the bytes represent - """ - # In v2 of the immutable schema lease secrets are stored hashed. Wrap - # a LeaseInfo in a HashedLeaseInfo so it can supply the correct - # interpretation for those values. - lease = LeaseInfo.from_mutable_data(data) - return HashedLeaseInfo(lease, cls._hash_secret) - - -class _V1(object): - """ - Implement encoding and decoding for v1 of the mutable container. - """ - version = 1 - _MAGIC = _magic(version) - - _HEADER_FORMAT = ">32s20s32sQQ" - - # This size excludes leases - _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) - - _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() - - @classmethod - def magic_matches(cls, candidate_magic): - # type: (bytes) -> bool - """ - Return ``True`` if a candidate string matches the expected magic string - from a mutable container header, ``False`` otherwise. - """ - return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC - - @classmethod - def header(cls, nodeid, write_enabler): - return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) - - - @classmethod - def serialize_lease(cls, lease_info): - # type: (LeaseInfo) -> bytes - """ - Serialize a lease to be written to a v1 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - return lease_info.to_mutable_data() - raise ValueError( - "MutableShareFile v1 schema only supports LeaseInfo, not {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> LeaseInfo - """ - Unserialize some bytes from a v1 container. - - :param data: the bytes from the container - - :return: the ``LeaseInfo`` the bytes represent - """ - return LeaseInfo.from_mutable_data(data) - - -ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore +ALL_SCHEMAS = { + _Schema.for_version(version=2, lease_serializer=v2_mutable), + _Schema.for_version(version=1, lease_serializer=v1_mutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_header(header): # (int) -> Optional[type] From 66644791cbce41f31a08a7a9ba56449ccf02e33e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 5 Nov 2021 15:36:26 -0400 Subject: [PATCH 18/24] news fragment --- newsfragments/3841.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3841.security diff --git a/newsfragments/3841.security b/newsfragments/3841.security new file mode 100644 index 000000000..867322e0a --- /dev/null +++ b/newsfragments/3841.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for mutable share files instead of keeping the original secrets. \ No newline at end of file From a208502e18c4f4faf85d500e86e3b2093d219ecf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:29:01 -0500 Subject: [PATCH 19/24] whitespace --- src/allmydata/storage/lease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 63dba15e8..bc94ca6d5 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -272,7 +272,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign Hash the candidate secret and compare the result to the stored hashed secret. """ - if isinstance(candidate_secret, _HashedCancelSecret): + if isinstance(candidate_secret, _HashedCancelSecret): # Someone read it off of this object in this project - probably # the lease crawler - and is just trying to use it to identify # which lease it wants to operate on. Avoid re-hashing the value. From 3a8432713fb0885f3795d4501c77e80a21caea5a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:29:05 -0500 Subject: [PATCH 20/24] a note about what's happening with proxyForInterface --- src/allmydata/storage/lease.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index bc94ca6d5..0c3b219f6 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -260,6 +260,10 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _lease_info = attr.ib() _hash = attr.ib() + # proxyForInterface will take care of forwarding all methods on ILeaseInfo + # to `_lease_info`. Here we override a few of those methods to adjust + # their behavior to make them suitable for use with hashed secrets. + def is_renew_secret(self, candidate_secret): """ Hash the candidate secret and compare the result to the stored hashed From e8adca40abdfa9f4c8616194bbe0bf1fe8817f1f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:32:35 -0500 Subject: [PATCH 21/24] give the ContainerVersionError exceptions a nice str --- src/allmydata/storage/common.py | 6 ++++++ src/allmydata/test/test_storage.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index 48fc77840..17a3f41b7 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -21,6 +21,12 @@ class UnknownContainerVersionError(Exception): self.filename = filename self.version = version + def __str__(self): + return "sharefile {!r} had unexpected version {!r}".format( + self.filename, + self.version, + ) + class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 655395042..ba3d3598f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -651,6 +651,7 @@ class Server(unittest.TestCase): ss.remote_get_buckets, b"si1") self.assertEqual(e.filename, fn) self.assertEqual(e.version, 0) + self.assertIn("had unexpected version 0", str(e)) def test_disconnect(self): # simulate a disconnection @@ -1136,6 +1137,8 @@ class MutableServer(unittest.TestCase): read, b"si1", [0], [(0,10)]) self.assertEqual(e.filename, fn) self.assertTrue(e.version.startswith(b"BAD MAGIC")) + self.assertIn("had unexpected version", str(e)) + self.assertIn("BAD MAGIC", str(e)) def test_container_size(self): ss = self.create("test_container_size") From 04e45f065ab496acf8aadb9f4cca0f60f55c41a2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 22 Nov 2021 07:56:51 -0500 Subject: [PATCH 22/24] document `compare_leases_without_timestamps` --- src/allmydata/test/test_storage.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 92176ce52..91c7adb7f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1361,6 +1361,10 @@ class MutableServer(unittest.TestCase): 2: [b"2"*10]}) def compare_leases_without_timestamps(self, leases_a, leases_b): + """ + Assert that, except for expiration times, ``leases_a`` contains the same + lease information as ``leases_b``. + """ for a, b in zip(leases_a, leases_b): # The leases aren't always of the same type (though of course # corresponding elements in the two lists should be of the same From b92343c664c15605df4a4244208d614f2b3390b2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 22 Nov 2021 08:36:12 -0500 Subject: [PATCH 23/24] some more docstrings --- src/allmydata/storage/lease_schema.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py index 697ac9e34..c09a9279b 100644 --- a/src/allmydata/storage/lease_schema.py +++ b/src/allmydata/storage/lease_schema.py @@ -28,11 +28,17 @@ from .lease import ( @attr.s(frozen=True) class CleartextLeaseSerializer(object): + """ + Serialize and unserialize leases with cleartext secrets. + """ _to_data = attr.ib() _from_data = attr.ib() def serialize(self, lease): # type: (LeaseInfo) -> bytes + """ + Represent the given lease as bytes with cleartext secrets. + """ if isinstance(lease, LeaseInfo): return self._to_data(lease) raise ValueError( @@ -42,6 +48,9 @@ class CleartextLeaseSerializer(object): ) def unserialize(self, data): + """ + Load a lease with cleartext secrets from the given bytes representation. + """ # type: (bytes) -> LeaseInfo # In v1 of the immutable schema lease secrets are stored plaintext. # So load the data into a plain LeaseInfo which works on plaintext From d1839187f148f0e8f265123149c5fc2bc3c9d143 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 22 Nov 2021 08:45:10 -0500 Subject: [PATCH 24/24] "misplaced type annotation" --- src/allmydata/storage/lease_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py index c09a9279b..7e604388e 100644 --- a/src/allmydata/storage/lease_schema.py +++ b/src/allmydata/storage/lease_schema.py @@ -48,10 +48,10 @@ class CleartextLeaseSerializer(object): ) def unserialize(self, data): + # type: (bytes) -> LeaseInfo """ Load a lease with cleartext secrets from the given bytes representation. """ - # type: (bytes) -> LeaseInfo # In v1 of the immutable schema lease secrets are stored plaintext. # So load the data into a plain LeaseInfo which works on plaintext # secrets.