introduce immutable container schema version 2

This version used on-disk hashed secrets to reduce the chance of secrets
leaking to unintended parties.
This commit is contained in:
Jean-Paul Calderone 2021-11-04 14:07:49 -04:00
parent 234b8dcde2
commit b69e8d013b
4 changed files with 214 additions and 10 deletions

View File

@ -39,14 +39,14 @@ from .immutable_schema import (
# interfaces.
# The share file has the following layout:
# 0x00: share file version number, four bytes, current version is 1
# 0x00: share file version number, four bytes, current version is 2
# 0x04: share data length, four bytes big-endian = A # See Footnote 1 below.
# 0x08: number of leases, four bytes big-endian
# 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy)
# A+0x0c = B: first lease. Lease format is:
# B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner
# B+0x04: renew secret, 32 bytes (SHA256)
# B+0x24: cancel secret, 32 bytes (SHA256)
# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below.
# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b)
# B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch
# B+0x48: next lease, or end of record
@ -58,6 +58,23 @@ from .immutable_schema import (
# then the value stored in this field will be the actual share data length
# modulo 2**32.
# Footnote 2: The change between share file version number 1 and 2 is that
# storage of lease secrets is changed from plaintext to hashed. This change
# protects the secrets from compromises of local storage on the server: if a
# plaintext cancel secret is somehow exfiltrated from the storage server, an
# attacker could use it to cancel that lease and potentially cause user data
# to be discarded before intended by the real owner. As of this comment,
# lease cancellation is disabled because there have been at least two bugs
# which leak the persisted value of the cancellation secret. If lease secrets
# were stored hashed instead of plaintext then neither of these bugs would
# have allowed an attacker to learn a usable cancel secret.
#
# Clients are free to construct these secrets however they like. The
# Tahoe-LAFS client uses a SHA256-based construction. The server then uses
# blake2b to hash these values for storage so that it retains no persistent
# copy of the original secret.
#
def _fix_lease_count_format(lease_count_format):
"""
Turn a single character struct format string into a format string suitable

View File

@ -13,8 +13,14 @@ if PY2:
import struct
import attr
from nacl.hash import blake2b
from nacl.encoding import RawEncoder
from .lease import (
LeaseInfo,
HashedLeaseInfo,
)
def _header(version, max_size):
@ -22,10 +28,10 @@ def _header(version, max_size):
"""
Construct the header for an immutable container.
:param version: The container version to include the in header.
:param max_size: The maximum data size the container will hold.
:param version: the container version to include the in header
:param max_size: the maximum data size the container will hold
:return: Some bytes to write at the beginning of the container.
:return: some bytes to write at the beginning of the container
"""
# The second field -- the four-byte share data length -- is no longer
# used as of Tahoe v1.3.0, but we continue to write it in there in
@ -38,6 +44,97 @@ def _header(version, max_size):
# the share.
return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0)
class _V2(object):
"""
Implement encoding and decoding for v2 of the immutable container.
"""
version = 2
@classmethod
def _hash_secret(cls, secret):
# type: (bytes) -> bytes
"""
Hash a lease secret for storage.
"""
return blake2b(secret, digest_size=32, encoder=RawEncoder())
@classmethod
def _hash_lease_info(cls, lease_info):
# type: (LeaseInfo) -> HashedLeaseInfo
"""
Hash the cleartext lease info secrets into a ``HashedLeaseInfo``.
"""
if not isinstance(lease_info, LeaseInfo):
# Provide a little safety against misuse, especially an attempt to
# re-hash an already-hashed lease info which is represented as a
# different type.
raise TypeError(
"Can only hash LeaseInfo, not {!r}".format(lease_info),
)
# Hash the cleartext secrets in the lease info and wrap the result in
# a new type.
return HashedLeaseInfo(
attr.assoc(
lease_info,
renew_secret=cls._hash_secret(lease_info.renew_secret),
cancel_secret=cls._hash_secret(lease_info.cancel_secret),
),
cls._hash_secret,
)
@classmethod
def header(cls, max_size):
# type: (int) -> bytes
"""
Construct a container header.
:param max_size: the maximum size the container can hold
:return: the header bytes
"""
return _header(cls.version, max_size)
@classmethod
def serialize_lease(cls, lease):
# type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes
"""
Serialize a lease to be written to a v2 container.
:param lease: the lease to serialize
:return: the serialized bytes
"""
if isinstance(lease, LeaseInfo):
# v2 of the immutable schema stores lease secrets hashed. If
# we're given a LeaseInfo then it holds plaintext secrets. Hash
# them before trying to serialize.
lease = cls._hash_lease_info(lease)
if isinstance(lease, HashedLeaseInfo):
return lease.to_immutable_data()
raise ValueError(
"ShareFile v2 schema cannot represent lease {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# type: (bytes) -> HashedLeaseInfo
"""
Unserialize some bytes from a v2 container.
:param data: the bytes from the container
:return: the ``HashedLeaseInfo`` the bytes represent
"""
# In v2 of the immutable schema lease secrets are stored hashed. Wrap
# a LeaseInfo in a HashedLeaseInfo so it can supply the correct
# interpretation for those values.
return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret)
class _V1(object):
"""
Implement encoding and decoding for v1 of the immutable container.
@ -66,7 +163,7 @@ class _V1(object):
return LeaseInfo.from_immutable_data(data)
ALL_SCHEMAS = {_V1}
ALL_SCHEMAS = {_V2, _V1}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)

View File

@ -20,6 +20,10 @@ from zope.interface import (
implementer,
)
from twisted.python.components import (
proxyForInterface,
)
from allmydata.util.hashutil import timing_safe_compare
# struct format for representation of a lease in an immutable share
@ -245,3 +249,81 @@ class LeaseInfo(object):
]
values = struct.unpack(">LL32s32s20s", data)
return cls(**dict(zip(names, values)))
@attr.s(frozen=True)
class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")):
"""
A ``HashedLeaseInfo`` wraps lease information in which the secrets have
been hashed.
"""
_lease_info = attr.ib()
_hash = attr.ib()
def is_renew_secret(self, candidate_secret):
"""
Hash the candidate secret and compare the result to the stored hashed
secret.
"""
return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret))
def is_cancel_secret(self, candidate_secret):
"""
Hash the candidate secret and compare the result to the stored hashed
secret.
"""
if isinstance(candidate_secret, _HashedCancelSecret):
# Someone read it off of this object in this project - probably
# the lease crawler - and is just trying to use it to identify
# which lease it wants to operate on. Avoid re-hashing the value.
#
# It is important that this codepath is only availably internally
# for this process to talk to itself. If it were to be exposed to
# clients over the network, they could just provide the hashed
# value to avoid having to ever learn the original value.
hashed_candidate = candidate_secret.hashed_value
else:
# It is not yet hashed so hash it.
hashed_candidate = self._hash(candidate_secret)
return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate)
@property
def owner_num(self):
return self._lease_info.owner_num
@property
def cancel_secret(self):
"""
Give back an opaque wrapper around the hashed cancel secret which can
later be presented for a succesful equality comparison.
"""
# We don't *have* the cancel secret. We hashed it and threw away the
# original. That's good. It does mean that some code that runs
# in-process with the storage service (LeaseCheckingCrawler) runs into
# some difficulty. That code wants to cancel leases and does so using
# the same interface that faces storage clients (or would face them,
# if lease cancellation were exposed).
#
# Since it can't use the hashed secret to cancel a lease (that's the
# point of the hashing) and we don't have the unhashed secret to give
# it, instead we give it a marker that `cancel_lease` will recognize.
# On recognizing it, if the hashed value given matches the hashed
# value stored it is considered a match and the lease can be
# cancelled.
#
# This isn't great. Maybe the internal and external consumers of
# cancellation should use different interfaces.
return _HashedCancelSecret(self._lease_info.cancel_secret)
@attr.s(frozen=True)
class _HashedCancelSecret(object):
"""
``_HashedCancelSecret`` is a marker type for an already-hashed lease
cancel secret that lets internal lease cancellers bypass the hash-based
protection that's imposed on external lease cancellers.
:ivar bytes hashed_value: The already-hashed secret.
"""
hashed_value = attr.ib()

View File

@ -1113,8 +1113,16 @@ class Corruption(_Base, unittest.TestCase):
d.addCallback(_download, imm_uri, i, expected)
d.addCallback(lambda ign: self.restore_all_shares(self.shares))
d.addCallback(fireEventually)
corrupt_values = [(3, 2, "no-sh2"),
(15, 2, "need-4th"), # share looks v2
corrupt_values = [
# Make the container version for share number 2 look
# unsupported. If you add support for immutable share file
# version number much past 16 million then you will have to
# update this test. Also maybe you have other problems.
(1, 255, "no-sh2"),
# Make the immutable share number 2 (not the container, the
# thing inside the container) look unsupported. Ditto the
# above about version numbers in the ballpark of 16 million.
(13, 255, "need-4th"),
]
for i,newvalue,expected in corrupt_values:
d.addCallback(self._corrupt_set, imm_uri, i, newvalue)