diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index ae5a710af..216262a81 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -39,14 +39,14 @@ from .immutable_schema import ( # interfaces. # The share file has the following layout: -# 0x00: share file version number, four bytes, current version is 1 +# 0x00: share file version number, four bytes, current version is 2 # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. # 0x08: number of leases, four bytes big-endian # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) # A+0x0c = B: first lease. Lease format is: # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner -# B+0x04: renew secret, 32 bytes (SHA256) -# B+0x24: cancel secret, 32 bytes (SHA256) +# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below. +# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b) # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch # B+0x48: next lease, or end of record @@ -58,6 +58,23 @@ from .immutable_schema import ( # then the value stored in this field will be the actual share data length # modulo 2**32. +# Footnote 2: The change between share file version number 1 and 2 is that +# storage of lease secrets is changed from plaintext to hashed. This change +# protects the secrets from compromises of local storage on the server: if a +# plaintext cancel secret is somehow exfiltrated from the storage server, an +# attacker could use it to cancel that lease and potentially cause user data +# to be discarded before intended by the real owner. As of this comment, +# lease cancellation is disabled because there have been at least two bugs +# which leak the persisted value of the cancellation secret. If lease secrets +# were stored hashed instead of plaintext then neither of these bugs would +# have allowed an attacker to learn a usable cancel secret. +# +# Clients are free to construct these secrets however they like. The +# Tahoe-LAFS client uses a SHA256-based construction. The server then uses +# blake2b to hash these values for storage so that it retains no persistent +# copy of the original secret. +# + def _fix_lease_count_format(lease_count_format): """ Turn a single character struct format string into a format string suitable diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 759752bed..fc823507a 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,8 +13,14 @@ if PY2: import struct +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + from .lease import ( LeaseInfo, + HashedLeaseInfo, ) def _header(version, max_size): @@ -22,10 +28,10 @@ def _header(version, max_size): """ Construct the header for an immutable container. - :param version: The container version to include the in header. - :param max_size: The maximum data size the container will hold. + :param version: the container version to include the in header + :param max_size: the maximum data size the container will hold - :return: Some bytes to write at the beginning of the container. + :return: some bytes to write at the beginning of the container """ # The second field -- the four-byte share data length -- is no longer # used as of Tahoe v1.3.0, but we continue to write it in there in @@ -38,6 +44,97 @@ def _header(version, max_size): # the share. return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V2(object): + """ + Implement encoding and decoding for v2 of the immutable container. + """ + version = 2 + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + @classmethod + def header(cls, max_size): + # type: (int) -> bytes + """ + Construct a container header. + + :param max_size: the maximum size the container can hold + + :return: the header bytes + """ + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + """ + Serialize a lease to be written to a v2 container. + + :param lease: the lease to serialize + + :return: the serialized bytes + """ + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = cls._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # type: (bytes) -> HashedLeaseInfo + """ + Unserialize some bytes from a v2 container. + + :param data: the bytes from the container + + :return: the ``HashedLeaseInfo`` the bytes represent + """ + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) + + class _V1(object): """ Implement encoding and decoding for v1 of the immutable container. @@ -66,7 +163,7 @@ class _V1(object): return LeaseInfo.from_immutable_data(data) -ALL_SCHEMAS = {_V1} +ALL_SCHEMAS = {_V2, _V1} ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 23071707a..895a0970c 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -20,6 +20,10 @@ from zope.interface import ( implementer, ) +from twisted.python.components import ( + proxyForInterface, +) + from allmydata.util.hashutil import timing_safe_compare # struct format for representation of a lease in an immutable share @@ -245,3 +249,81 @@ class LeaseInfo(object): ] values = struct.unpack(">LL32s32s20s", data) return cls(**dict(zip(names, values))) + + +@attr.s(frozen=True) +class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): + """ + A ``HashedLeaseInfo`` wraps lease information in which the secrets have + been hashed. + """ + _lease_info = attr.ib() + _hash = attr.ib() + + def is_renew_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret)) + + def is_cancel_secret(self, candidate_secret): + """ + Hash the candidate secret and compare the result to the stored hashed + secret. + """ + if isinstance(candidate_secret, _HashedCancelSecret): + # Someone read it off of this object in this project - probably + # the lease crawler - and is just trying to use it to identify + # which lease it wants to operate on. Avoid re-hashing the value. + # + # It is important that this codepath is only availably internally + # for this process to talk to itself. If it were to be exposed to + # clients over the network, they could just provide the hashed + # value to avoid having to ever learn the original value. + hashed_candidate = candidate_secret.hashed_value + else: + # It is not yet hashed so hash it. + hashed_candidate = self._hash(candidate_secret) + + return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate) + + @property + def owner_num(self): + return self._lease_info.owner_num + + @property + def cancel_secret(self): + """ + Give back an opaque wrapper around the hashed cancel secret which can + later be presented for a succesful equality comparison. + """ + # We don't *have* the cancel secret. We hashed it and threw away the + # original. That's good. It does mean that some code that runs + # in-process with the storage service (LeaseCheckingCrawler) runs into + # some difficulty. That code wants to cancel leases and does so using + # the same interface that faces storage clients (or would face them, + # if lease cancellation were exposed). + # + # Since it can't use the hashed secret to cancel a lease (that's the + # point of the hashing) and we don't have the unhashed secret to give + # it, instead we give it a marker that `cancel_lease` will recognize. + # On recognizing it, if the hashed value given matches the hashed + # value stored it is considered a match and the lease can be + # cancelled. + # + # This isn't great. Maybe the internal and external consumers of + # cancellation should use different interfaces. + return _HashedCancelSecret(self._lease_info.cancel_secret) + + +@attr.s(frozen=True) +class _HashedCancelSecret(object): + """ + ``_HashedCancelSecret`` is a marker type for an already-hashed lease + cancel secret that lets internal lease cancellers bypass the hash-based + protection that's imposed on external lease cancellers. + + :ivar bytes hashed_value: The already-hashed secret. + """ + hashed_value = attr.ib() diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index ca5b5650b..85d89cde6 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase): d.addCallback(_download, imm_uri, i, expected) d.addCallback(lambda ign: self.restore_all_shares(self.shares)) d.addCallback(fireEventually) - corrupt_values = [(3, 2, "no-sh2"), - (15, 2, "need-4th"), # share looks v2 - ] + corrupt_values = [ + # Make the container version for share number 2 look + # unsupported. If you add support for immutable share file + # version number much past 16 million then you will have to + # update this test. Also maybe you have other problems. + (1, 255, "no-sh2"), + # Make the immutable share number 2 (not the container, the + # thing inside the container) look unsupported. Ditto the + # above about version numbers in the ballpark of 16 million. + (13, 255, "need-4th"), + ] for i,newvalue,expected in corrupt_values: d.addCallback(self._corrupt_set, imm_uri, i, newvalue) d.addCallback(_download, imm_uri, i, expected)