From 0459b712b02b8ba686687d696325bcdb650f770c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 08:54:55 -0400 Subject: [PATCH 01/12] news fragment --- newsfragments/3839.security | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3839.security diff --git a/newsfragments/3839.security b/newsfragments/3839.security new file mode 100644 index 000000000..1ae054542 --- /dev/null +++ b/newsfragments/3839.security @@ -0,0 +1 @@ +The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets. From 274dc6e837dd7181fb1f6ba9116570dc4b255d66 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 08:55:37 -0400 Subject: [PATCH 02/12] Introduce `UnknownContainerVersionError` base w/ structured args --- src/allmydata/storage/common.py | 11 ++++++++--- src/allmydata/storage/immutable.py | 4 +--- src/allmydata/storage/mutable.py | 4 +--- src/allmydata/test/test_storage.py | 7 ++++--- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index e5563647f..48fc77840 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -16,11 +16,16 @@ from allmydata.util import base32 # Backwards compatibility. from allmydata.interfaces import DataTooLargeError # noqa: F401 -class UnknownMutableContainerVersionError(Exception): - pass -class UnknownImmutableContainerVersionError(Exception): +class UnknownContainerVersionError(Exception): + def __init__(self, filename, version): + self.filename = filename + self.version = version + +class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass +class UnknownImmutableContainerVersionError(UnknownContainerVersionError): + pass def si_b2a(storageindex): return base32.b2a(storageindex) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index a43860138..fcc60509c 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -174,9 +174,7 @@ class ShareFile(object): filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) if version != 1: - msg = "sharefile %s had version %d but we wanted 1" % \ - (filename, version) - raise UnknownImmutableContainerVersionError(msg) + raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) self._data_offset = 0xc diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 4abf22064..ce9cc5ff4 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -95,9 +95,7 @@ class MutableShareFile(object): data_length, extra_least_offset) = \ struct.unpack(">32s20s32sQQ", data) if not self.is_valid_header(data): - msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ - (filename, magic, self.MAGIC) - raise UnknownMutableContainerVersionError(msg) + raise UnknownMutableContainerVersionError(filename, magic) self.parent = parent # for logging def log(self, *args, **kwargs): diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 2c8d84b9e..bf9eff37a 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -646,7 +646,8 @@ class Server(unittest.TestCase): e = self.failUnlessRaises(UnknownImmutableContainerVersionError, ss.remote_get_buckets, b"si1") - self.failUnlessIn(" had version 0 but we wanted 1", str(e)) + self.assertEqual(e.filename, fn) + self.assertEqual(e.version, 0) def test_disconnect(self): # simulate a disconnection @@ -1127,8 +1128,8 @@ class MutableServer(unittest.TestCase): read = ss.remote_slot_readv e = self.failUnlessRaises(UnknownMutableContainerVersionError, read, b"si1", [0], [(0,10)]) - self.failUnlessIn(" had magic ", str(e)) - self.failUnlessIn(" but we wanted ", str(e)) + self.assertEqual(e.filename, fn) + self.assertTrue(e.version.startswith(b"BAD MAGIC")) def test_container_size(self): ss = self.create("test_container_size") From 10724a91f9ca2fe929f4e29adb03b876b21f9fe5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:17:36 -0400 Subject: [PATCH 03/12] introduce an explicit representation of the v1 immutable container schema This is only a partial representation, sufficient to express the changes that are coming in v2. --- src/allmydata/storage/immutable.py | 37 ++++++----- src/allmydata/storage/immutable_schema.py | 81 +++++++++++++++++++++++ 2 files changed, 102 insertions(+), 16 deletions(-) create mode 100644 src/allmydata/storage/immutable_schema.py diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index fcc60509c..ae5a710af 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -25,9 +25,14 @@ from allmydata.interfaces import ( ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition -from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError +from .immutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_version, +) + + # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and # RIBucketReader.read . The lease information is not accessible through these @@ -118,9 +123,16 @@ class ShareFile(object): ``False`` otherwise. """ (version,) = struct.unpack(">L", header[:4]) - return version == 1 + return schema_from_version(version) is not None - def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): + def __init__( + self, + filename, + max_size=None, + create=False, + lease_count_format="L", + schema=NEWEST_SCHEMA_VERSION, + ): """ Initialize a ``ShareFile``. @@ -156,24 +168,17 @@ class ShareFile(object): # it. Also construct the metadata. assert not os.path.exists(self.home) fileutil.make_dirs(os.path.dirname(self.home)) - # The second field -- the four-byte share data length -- is no - # longer used as of Tahoe v1.3.0, but we continue to write it in - # there in case someone downgrades a storage server from >= - # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one - # server to another, etc. We do saturation -- a share data length - # larger than 2**32-1 (what can fit into the field) is marked as - # the largest length that can fit into the field. That way, even - # if this does happen, the old < v1.3.0 server will still allow - # clients to read the first part of the share. + self._schema = schema with open(self.home, 'wb') as f: - f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) + f.write(self._schema.header(max_size)) self._lease_offset = max_size + 0x0c self._num_leases = 0 else: with open(self.home, 'rb') as f: filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - if version != 1: + self._schema = schema_from_version(version) + if self._schema is None: raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) @@ -209,7 +214,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_immutable_data()) + f.write(self._schema.serialize_lease(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -240,7 +245,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield LeaseInfo.from_immutable_data(data) + yield self._schema.unserialize_lease(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py new file mode 100644 index 000000000..759752bed --- /dev/null +++ b/src/allmydata/storage/immutable_schema.py @@ -0,0 +1,81 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +from .lease import ( + LeaseInfo, +) + +def _header(version, max_size): + # (int, int) -> bytes + """ + Construct the header for an immutable container. + + :param version: The container version to include the in header. + :param max_size: The maximum data size the container will hold. + + :return: Some bytes to write at the beginning of the container. + """ + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V1(object): + """ + Implement encoding and decoding for v1 of the immutable container. + """ + version = 1 + + @classmethod + def header(cls, max_size): + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + if isinstance(lease, LeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return LeaseInfo.from_immutable_data(data) + + +ALL_SCHEMAS = {_V1} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) + +def schema_from_version(version): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.version == version: + return schema + return None From 3b4141952387452894ed5c0ed58113e272ad3e4f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:32:59 -0400 Subject: [PATCH 04/12] apply the ShareFile tests to all schema versions using hypothesis --- src/allmydata/test/test_storage.py | 60 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index bf9eff37a..655395042 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -43,6 +43,9 @@ from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME from allmydata.storage.shares import get_share_file from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile +from allmydata.storage.immutable_schema import ( + ALL_SCHEMAS, +) from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \ si_b2a, si_a2b @@ -844,6 +847,9 @@ class Server(unittest.TestCase): # Create a bucket: rs0, cs0 = self.create_bucket_5_shares(ss, b"si0") + + # Upload of an immutable implies creation of a single lease with the + # supplied secrets. (lease,) = ss.get_leases(b"si0") self.assertTrue(lease.is_renew_secret(rs0)) @@ -3125,6 +3131,7 @@ class Stats(unittest.TestCase): self.failUnless(output["get"]["99_0_percentile"] is None, output) self.failUnless(output["get"]["99_9_percentile"] is None, output) +immutable_schemas = strategies.sampled_from(list(ALL_SCHEMAS)) class ShareFileTests(unittest.TestCase): """Tests for allmydata.storage.immutable.ShareFile.""" @@ -3136,47 +3143,54 @@ class ShareFileTests(unittest.TestCase): # Should be b'abDEF' now. return sf - def test_read_write(self): + @given(immutable_schemas) + def test_read_write(self, schema): """Basic writes can be read.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 3), b"abD") self.assertEqual(sf.read_share_data(1, 4), b"bDEF") - def test_reads_beyond_file_end(self): + @given(immutable_schemas) + def test_reads_beyond_file_end(self, schema): """Reads beyond the file size are truncated.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) self.assertEqual(sf.read_share_data(0, 10), b"abDEF") self.assertEqual(sf.read_share_data(5, 10), b"") - def test_too_large_write(self): + @given(immutable_schemas) + def test_too_large_write(self, schema): """Can't do write larger than file size.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(DataTooLargeError): sf.write_share_data(0, b"x" * 3000) - def test_no_leases_cancelled(self): + @given(immutable_schemas) + def test_no_leases_cancelled(self, schema): """If no leases were cancelled, IndexError is raised.""" - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) with self.assertRaises(IndexError): sf.cancel_lease(b"garbage") - def test_long_lease_count_format(self): + @given(immutable_schemas) + def test_long_lease_count_format(self, schema): """ ``ShareFile.__init__`` raises ``ValueError`` if the lease count format given is longer than one character. """ with self.assertRaises(ValueError): - self.get_sharefile(lease_count_format="BB") + self.get_sharefile(schema=schema, lease_count_format="BB") - def test_large_lease_count_format(self): + @given(immutable_schemas) + def test_large_lease_count_format(self, schema): """ ``ShareFile.__init__`` raises ``ValueError`` if the lease count format encodes to a size larger than 8 bytes. """ with self.assertRaises(ValueError): - self.get_sharefile(lease_count_format="Q") + self.get_sharefile(schema=schema, lease_count_format="Q") - def test_avoid_lease_overflow(self): + @given(immutable_schemas) + def test_avoid_lease_overflow(self, schema): """ If the share file already has the maximum number of leases supported then ``ShareFile.add_lease`` raises ``struct.error`` and makes no changes @@ -3190,7 +3204,7 @@ class ShareFileTests(unittest.TestCase): ) # Make it a little easier to reach the condition by limiting the # number of leases to only 255. - sf = self.get_sharefile(lease_count_format="B") + sf = self.get_sharefile(schema=schema, lease_count_format="B") # Add the leases. for i in range(2 ** 8 - 1): @@ -3214,16 +3228,17 @@ class ShareFileTests(unittest.TestCase): self.assertEqual(before_data, after_data) - def test_renew_secret(self): + @given(immutable_schemas) + def test_renew_secret(self, schema): """ - A lease loaded from an immutable share file can have its renew secret - verified. + A lease loaded from an immutable share file at any schema version can have + its renew secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, @@ -3234,16 +3249,17 @@ class ShareFileTests(unittest.TestCase): (loaded_lease,) = sf.get_leases() self.assertTrue(loaded_lease.is_renew_secret(renew_secret)) - def test_cancel_secret(self): + @given(immutable_schemas) + def test_cancel_secret(self, schema): """ - A lease loaded from an immutable share file can have its cancel secret - verified. + A lease loaded from an immutable share file at any schema version can have + its cancel secret verified. """ renew_secret = b"r" * 32 cancel_secret = b"c" * 32 expiration_time = 2 ** 31 - sf = self.get_sharefile() + sf = self.get_sharefile(schema=schema) lease = LeaseInfo( owner_num=0, renew_secret=renew_secret, From 234b8dcde2febc2b3eee96e1ede4d123f634dcb1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 11:56:49 -0400 Subject: [PATCH 05/12] Formalize LeaseInfo interface in preparation for another implementation --- src/allmydata/storage/lease.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 6d21bb2b2..23071707a 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -15,6 +15,11 @@ import struct, time import attr +from zope.interface import ( + Interface, + implementer, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -23,6 +28,84 @@ IMMUTABLE_FORMAT = ">L32s32sL" # struct format for representation of a lease in a mutable share MUTABLE_FORMAT = ">LL32s32s20s" + +class ILeaseInfo(Interface): + """ + Represent a marker attached to a share that indicates that share should be + retained for some amount of time. + + Typically clients will create and renew leases on their shares as a way to + inform storage servers that there is still interest in those shares. A + share may have more than one lease. If all leases on a share have + expiration times in the past then the storage server may take this as a + strong hint that no one is interested in the share anymore and therefore + the share may be deleted to reclaim the space. + """ + def renew(new_expire_time): + """ + Create a new ``ILeaseInfo`` with the given expiration time. + + :param Union[int, float] new_expire_time: The expiration time the new + ``ILeaseInfo`` will have. + + :return: The new ``ILeaseInfo`` provider with the new expiration time. + """ + + def get_expiration_time(): + """ + :return Union[int, float]: this lease's expiration time + """ + + def get_grant_renew_time_time(): + """ + :return Union[int, float]: a guess about the last time this lease was + renewed + """ + + def get_age(): + """ + :return Union[int, float]: a guess about how long it has been since this + lease was renewed + """ + + def to_immutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in an immutable container + """ + + def to_mutable_data(): + """ + :return bytes: a serialized representation of this lease suitable for + inclusion in a mutable container + """ + + def immutable_size(): + """ + :return int: the size of the serialized representation of this lease in an + immutable container + """ + + def mutable_size(): + """ + :return int: the size of the serialized representation of this lease in a + mutable container + """ + + def is_renew_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's renew + secret, ``False`` otherwise + """ + + def is_cancel_secret(candidate_secret): + """ + :return bool: ``True`` if the given byte string is this lease's cancel + secret, ``False`` otherwise + """ + + +@implementer(ILeaseInfo) @attr.s(frozen=True) class LeaseInfo(object): """ From b69e8d013bfc32f8b7ca5948ad36a5c60b3db73a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:07:49 -0400 Subject: [PATCH 06/12] introduce immutable container schema version 2 This version used on-disk hashed secrets to reduce the chance of secrets leaking to unintended parties. --- src/allmydata/storage/immutable.py | 23 ++++- src/allmydata/storage/immutable_schema.py | 105 +++++++++++++++++++++- src/allmydata/storage/lease.py | 82 +++++++++++++++++ src/allmydata/test/test_download.py | 14 ++- 4 files changed, 214 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index ae5a710af..216262a81 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -39,14 +39,14 @@ from .immutable_schema import ( # interfaces. # The share file has the following layout: -# 0x00: share file version number, four bytes, current version is 1 +# 0x00: share file version number, four bytes, current version is 2 # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. # 0x08: number of leases, four bytes big-endian # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) # A+0x0c = B: first lease. Lease format is: # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner -# B+0x04: renew secret, 32 bytes (SHA256) -# B+0x24: cancel secret, 32 bytes (SHA256) +# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below. +# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b) # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch # B+0x48: next lease, or end of record @@ -58,6 +58,23 @@ from .immutable_schema import ( # then the value stored in this field will be the actual share data length # modulo 2**32. +# Footnote 2: The change between share file version number 1 and 2 is that +# storage of lease secrets is changed from plaintext to hashed. This change +# protects the secrets from compromises of local storage on the server: if a +# plaintext cancel secret is somehow exfiltrated from the storage server, an +# attacker could use it to cancel that lease and potentially cause user data +# to be discarded before intended by the real owner. As of this comment, +# lease cancellation is disabled because there have been at least two bugs +# which leak the persisted value of the cancellation secret. If lease secrets +# were stored hashed instead of plaintext then neither of these bugs would +# have allowed an attacker to learn a usable cancel secret. +# +# Clients are free to construct these secrets however they like. The +# Tahoe-LAFS client uses a SHA256-based construction. The server then uses +# blake2b to hash these values for storage so that it retains no persistent +# copy of the original secret. +# + def _fix_lease_count_format(lease_count_format): """ Turn a single character struct format string into a format string suitable diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 759752bed..fc823507a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,8 +13,14 @@ if PY2: import struct +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + from .lease import ( LeaseInfo, + HashedLeaseInfo, ) def _header(version, max_size): @@ -22,10 +28,10 @@ def _header(version, max_size): """ Construct the header for an immutable container. - :param version: The container version to include the in header. - :param max_size: The maximum data size the container will hold. + :param version: the container version to include the in header + :param max_size: the maximum data size the container will hold - :return: Some bytes to write at the beginning of the container. + :return: some bytes to write at the beginning of the container """ # The second field -- the four-byte share data length -- is no longer # used as of Tahoe v1.3.0, but we continue to write it in there in @@ -38,6 +44,97 @@ def _header(version, max_size): # the share. return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V2(object): + """ + Implement encoding and decoding for v2 of the immutable container. + """ + version = 2 + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + @classmethod + def header(cls, max_size): + # type: (int) -> bytes + """ + Construct a container header. + + :param max_size: the maximum size the container can hold + + :return: the header bytes + """ + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + """ + Serialize a lease to be written to a v2 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = cls._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> HashedLeaseInfo + """ + Unserialize some bytes from a v2 container. + + :param data: the bytes from the container + + :return: the ``HashedLeaseInfo`` the bytes represent + """ + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) + + class _V1(object): """ Implement encoding and decoding for v1 of the immutable container. @@ -66,7 +163,7 @@ class _V1(object): return LeaseInfo.from_immutable_data(data) -ALL_SCHEMAS = {_V1} +ALL_SCHEMAS = {_V2, _V1} ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 23071707a..895a0970c 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -20,6 +20,10 @@ from zope.interface import ( implementer, ) +from twisted.python.components import ( + proxyForInterface, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -245,3 +249,81 @@ class LeaseInfo(object): ] values = struct.unpack(">LL32s32s20s", data) return cls(**dict(zip(names, values))) + + +@attr.s(frozen=True) +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): + """ + A ``HashedLeaseInfo`` wraps lease information in which the secrets have + been hashed. + """ + _lease_info = attr.ib() + _hash = attr.ib() + + def is_renew_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + + def is_cancel_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + if isinstance(candidate_secret, _HashedCancelSecret): + # Someone read it off of this object in this project - probably + # the lease crawler - and is just trying to use it to identify + # which lease it wants to operate on. Avoid re-hashing the value. + # + # It is important that this codepath is only availably internally + # for this process to talk to itself. If it were to be exposed to + # clients over the network, they could just provide the hashed + # value to avoid having to ever learn the original value. + hashed_candidate = candidate_secret.hashed_value + else: + # It is not yet hashed so hash it. + hashed_candidate = self._hash(candidate_secret) + + return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + + @property + def owner_num(self): + return self._lease_info.owner_num + + @property + def cancel_secret(self): + """ + Give back an opaque wrapper around the hashed cancel secret which can + later be presented for a succesful equality comparison. + """ + # We don't *have* the cancel secret. We hashed it and threw away the + # original. That's good. It does mean that some code that runs + # in-process with the storage service (LeaseCheckingCrawler) runs into + # some difficulty. That code wants to cancel leases and does so using + # the same interface that faces storage clients (or would face them, + # if lease cancellation were exposed). + # + # Since it can't use the hashed secret to cancel a lease (that's the + # point of the hashing) and we don't have the unhashed secret to give + # it, instead we give it a marker that `cancel_lease` will recognize. + # On recognizing it, if the hashed value given matches the hashed + # value stored it is considered a match and the lease can be + # cancelled. + # + # This isn't great. Maybe the internal and external consumers of + # cancellation should use different interfaces. + return _HashedCancelSecret(self._lease_info.cancel_secret) + + +@attr.s(frozen=True) +class _HashedCancelSecret(object): + """ + ``_HashedCancelSecret`` is a marker type for an already-hashed lease + cancel secret that lets internal lease cancellers bypass the hash-based + protection that's imposed on external lease cancellers. + + :ivar bytes hashed_value: The already-hashed secret. + """ + hashed_value = attr.ib() diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index ca5b5650b..85d89cde6 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase): d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) - corrupt_values = [(3, 2, "no-sh2"), - (15, 2, "need-4th"), # share looks v2 - ] + corrupt_values = [ + # Make the container version for share number 2 look + # unsupported. If you add support for immutable share file + # version number much past 16 million then you will have to + # update this test. Also maybe you have other problems. + (1, 255, "no-sh2"), + # Make the immutable share number 2 (not the container, the + # thing inside the container) look unsupported. Ditto the + # above about version numbers in the ballpark of 16 million. + (13, 255, "need-4th"), + ] for i,newvalue,expected in corrupt_values: d.addCallback(self._corrupt_set, imm_uri, i, newvalue) d.addCallback(_download, imm_uri, i, expected) From 7a59aa83bb9e429d0b44f47fff6365dbfa24f42f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:12:54 -0400 Subject: [PATCH 07/12] add missing import --- src/allmydata/storage/immutable_schema.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index fc823507a..6ac49f6f1 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,6 +13,11 @@ if PY2: import struct +try: + from typing import Union +except ImportError: + pass + import attr from nacl.hash import blake2b From 6889ab2a76a1665dc7adb11dfa2205760641f303 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:16:55 -0400 Subject: [PATCH 08/12] fix syntax of type hint --- src/allmydata/storage/immutable_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 6ac49f6f1..7ffec418a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -29,7 +29,7 @@ from .lease import ( ) def _header(version, max_size): - # (int, int) -> bytes + # type: (int, int) -> bytes """ Construct the header for an immutable container. From 2186bfcc372d01ab79f6899e8d0a54157ee83444 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 14:40:43 -0400 Subject: [PATCH 09/12] silence some mypy errors :/ I don't know the "right" way to make mypy happy with these things --- src/allmydata/storage/immutable_schema.py | 4 ++-- src/allmydata/storage/lease.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 7ffec418a..440755b01 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -169,8 +169,8 @@ class _V1(object): ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore def schema_from_version(version): # (int) -> Optional[type] diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 895a0970c..63dba15e8 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -252,7 +252,7 @@ class LeaseInfo(object): @attr.s(frozen=True) -class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ignore # unsupported dynamic base class """ A ``HashedLeaseInfo`` wraps lease information in which the secrets have been hashed. From a208502e18c4f4faf85d500e86e3b2093d219ecf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:29:01 -0500 Subject: [PATCH 10/12] whitespace --- src/allmydata/storage/lease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 63dba15e8..bc94ca6d5 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -272,7 +272,7 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign Hash the candidate secret and compare the result to the stored hashed secret. """ - if isinstance(candidate_secret, _HashedCancelSecret): + if isinstance(candidate_secret, _HashedCancelSecret): # Someone read it off of this object in this project - probably # the lease crawler - and is just trying to use it to identify # which lease it wants to operate on. Avoid re-hashing the value. From 3a8432713fb0885f3795d4501c77e80a21caea5a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:29:05 -0500 Subject: [PATCH 11/12] a note about what's happening with proxyForInterface --- src/allmydata/storage/lease.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index bc94ca6d5..0c3b219f6 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -260,6 +260,10 @@ class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ign _lease_info = attr.ib() _hash = attr.ib() + # proxyForInterface will take care of forwarding all methods on ILeaseInfo + # to `_lease_info`. Here we override a few of those methods to adjust + # their behavior to make them suitable for use with hashed secrets. + def is_renew_secret(self, candidate_secret): """ Hash the candidate secret and compare the result to the stored hashed From e8adca40abdfa9f4c8616194bbe0bf1fe8817f1f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 16 Nov 2021 18:32:35 -0500 Subject: [PATCH 12/12] give the ContainerVersionError exceptions a nice str --- src/allmydata/storage/common.py | 6 ++++++ src/allmydata/test/test_storage.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index 48fc77840..17a3f41b7 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -21,6 +21,12 @@ class UnknownContainerVersionError(Exception): self.filename = filename self.version = version + def __str__(self): + return "sharefile {!r} had unexpected version {!r}".format( + self.filename, + self.version, + ) + class UnknownMutableContainerVersionError(UnknownContainerVersionError): pass diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 655395042..ba3d3598f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -651,6 +651,7 @@ class Server(unittest.TestCase): ss.remote_get_buckets, b"si1") self.assertEqual(e.filename, fn) self.assertEqual(e.version, 0) + self.assertIn("had unexpected version 0", str(e)) def test_disconnect(self): # simulate a disconnection @@ -1136,6 +1137,8 @@ class MutableServer(unittest.TestCase): read, b"si1", [0], [(0,10)]) self.assertEqual(e.filename, fn) self.assertTrue(e.version.startswith(b"BAD MAGIC")) + self.assertIn("had unexpected version", str(e)) + self.assertIn("BAD MAGIC", str(e)) def test_container_size(self): ss = self.create("test_container_size")