Merge pull request #38 from tahoe-lafs/LFS-01-010.immutable

LFS-01-010 (immutables): Weak authentication for client leases

Fixes: ticket:3839
This commit is contained in:
Jean-Paul Calderone 2021-11-22 07:38:51 -05:00 committed by GitHub
commit 4c5cf7d3f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 466 additions and 56 deletions

View File

@ -0,0 +1 @@
The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets.

View File

@ -16,11 +16,22 @@ from allmydata.util import base32
# Backwards compatibility.
from allmydata.interfaces import DataTooLargeError # noqa: F401
class UnknownMutableContainerVersionError(Exception):
pass
class UnknownImmutableContainerVersionError(Exception):
class UnknownContainerVersionError(Exception):
def __init__(self, filename, version):
self.filename = filename
self.version = version
def __str__(self):
return "sharefile {!r} had unexpected version {!r}".format(
self.filename,
self.version,
)
class UnknownMutableContainerVersionError(UnknownContainerVersionError):
pass
class UnknownImmutableContainerVersionError(UnknownContainerVersionError):
pass
def si_b2a(storageindex):
return base32.b2a(storageindex)

View File

@ -25,23 +25,28 @@ from allmydata.interfaces import (
)
from allmydata.util import base32, fileutil, log
from allmydata.util.assertutil import precondition
from allmydata.storage.lease import LeaseInfo
from allmydata.storage.common import UnknownImmutableContainerVersionError
from .immutable_schema import (
NEWEST_SCHEMA_VERSION,
schema_from_version,
)
# each share file (in storage/shares/$SI/$SHNUM) contains lease information
# and share data. The share data is accessed by RIBucketWriter.write and
# RIBucketReader.read . The lease information is not accessible through these
# interfaces.
# The share file has the following layout:
# 0x00: share file version number, four bytes, current version is 1
# 0x00: share file version number, four bytes, current version is 2
# 0x04: share data length, four bytes big-endian = A # See Footnote 1 below.
# 0x08: number of leases, four bytes big-endian
# 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy)
# A+0x0c = B: first lease. Lease format is:
# B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner
# B+0x04: renew secret, 32 bytes (SHA256)
# B+0x24: cancel secret, 32 bytes (SHA256)
# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below.
# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b)
# B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch
# B+0x48: next lease, or end of record
@ -53,6 +58,23 @@ from allmydata.storage.common import UnknownImmutableContainerVersionError
# then the value stored in this field will be the actual share data length
# modulo 2**32.
# Footnote 2: The change between share file version number 1 and 2 is that
# storage of lease secrets is changed from plaintext to hashed. This change
# protects the secrets from compromises of local storage on the server: if a
# plaintext cancel secret is somehow exfiltrated from the storage server, an
# attacker could use it to cancel that lease and potentially cause user data
# to be discarded before intended by the real owner. As of this comment,
# lease cancellation is disabled because there have been at least two bugs
# which leak the persisted value of the cancellation secret. If lease secrets
# were stored hashed instead of plaintext then neither of these bugs would
# have allowed an attacker to learn a usable cancel secret.
#
# Clients are free to construct these secrets however they like. The
# Tahoe-LAFS client uses a SHA256-based construction. The server then uses
# blake2b to hash these values for storage so that it retains no persistent
# copy of the original secret.
#
def _fix_lease_count_format(lease_count_format):
"""
Turn a single character struct format string into a format string suitable
@ -118,9 +140,16 @@ class ShareFile(object):
``False`` otherwise.
"""
(version,) = struct.unpack(">L", header[:4])
return version == 1
return schema_from_version(version) is not None
def __init__(self, filename, max_size=None, create=False, lease_count_format="L"):
def __init__(
self,
filename,
max_size=None,
create=False,
lease_count_format="L",
schema=NEWEST_SCHEMA_VERSION,
):
"""
Initialize a ``ShareFile``.
@ -156,27 +185,18 @@ class ShareFile(object):
# it. Also construct the metadata.
assert not os.path.exists(self.home)
fileutil.make_dirs(os.path.dirname(self.home))
# The second field -- the four-byte share data length -- is no
# longer used as of Tahoe v1.3.0, but we continue to write it in
# there in case someone downgrades a storage server from >=
# Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one
# server to another, etc. We do saturation -- a share data length
# larger than 2**32-1 (what can fit into the field) is marked as
# the largest length that can fit into the field. That way, even
# if this does happen, the old < v1.3.0 server will still allow
# clients to read the first part of the share.
self._schema = schema
with open(self.home, 'wb') as f:
f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0))
f.write(self._schema.header(max_size))
self._lease_offset = max_size + 0x0c
self._num_leases = 0
else:
with open(self.home, 'rb') as f:
filesize = os.path.getsize(self.home)
(version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc))
if version != 1:
msg = "sharefile %s had version %d but we wanted 1" % \
(filename, version)
raise UnknownImmutableContainerVersionError(msg)
self._schema = schema_from_version(version)
if self._schema is None:
raise UnknownImmutableContainerVersionError(filename, version)
self._num_leases = num_leases
self._lease_offset = filesize - (num_leases * self.LEASE_SIZE)
self._data_offset = 0xc
@ -211,7 +231,7 @@ class ShareFile(object):
offset = self._lease_offset + lease_number * self.LEASE_SIZE
f.seek(offset)
assert f.tell() == offset
f.write(lease_info.to_immutable_data())
f.write(self._schema.serialize_lease(lease_info))
def _read_num_leases(self, f):
f.seek(0x08)
@ -242,7 +262,7 @@ class ShareFile(object):
for i in range(num_leases):
data = f.read(self.LEASE_SIZE)
if data:
yield LeaseInfo.from_immutable_data(data)
yield self._schema.unserialize_lease(data)
def add_lease(self, lease_info):
with open(self.home, 'rb+') as f:

View File

@ -0,0 +1,183 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import struct
try:
from typing import Union
except ImportError:
pass
import attr
from nacl.hash import blake2b
from nacl.encoding import RawEncoder
from .lease import (
LeaseInfo,
HashedLeaseInfo,
)
def _header(version, max_size):
# type: (int, int) -> bytes
"""
Construct the header for an immutable container.
:param version: the container version to include the in header
:param max_size: the maximum data size the container will hold
:return: some bytes to write at the beginning of the container
"""
# The second field -- the four-byte share data length -- is no longer
# used as of Tahoe v1.3.0, but we continue to write it in there in
# case someone downgrades a storage server from >= Tahoe-1.3.0 to <
# Tahoe-1.3.0, or moves a share file from one server to another,
# etc. We do saturation -- a share data length larger than 2**32-1
# (what can fit into the field) is marked as the largest length that
# can fit into the field. That way, even if this does happen, the old
# < v1.3.0 server will still allow clients to read the first part of
# the share.
return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0)
class _V2(object):
"""
Implement encoding and decoding for v2 of the immutable container.
"""
version = 2
@classmethod
def _hash_secret(cls, secret):
# type: (bytes) -> bytes
"""
Hash a lease secret for storage.
"""
return blake2b(secret, digest_size=32, encoder=RawEncoder())
@classmethod
def _hash_lease_info(cls, lease_info):
# type: (LeaseInfo) -> HashedLeaseInfo
"""
Hash the cleartext lease info secrets into a ``HashedLeaseInfo``.
"""
if not isinstance(lease_info, LeaseInfo):
# Provide a little safety against misuse, especially an attempt to
# re-hash an already-hashed lease info which is represented as a
# different type.
raise TypeError(
"Can only hash LeaseInfo, not {!r}".format(lease_info),
)
# Hash the cleartext secrets in the lease info and wrap the result in
# a new type.
return HashedLeaseInfo(
attr.assoc(
lease_info,
renew_secret=cls._hash_secret(lease_info.renew_secret),
cancel_secret=cls._hash_secret(lease_info.cancel_secret),
),
cls._hash_secret,
)
@classmethod
def header(cls, max_size):
# type: (int) -> bytes
"""
Construct a container header.
:param max_size: the maximum size the container can hold
:return: the header bytes
"""
return _header(cls.version, max_size)
@classmethod
def serialize_lease(cls, lease):
# type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes
"""
Serialize a lease to be written to a v2 container.
:param lease: the lease to serialize
:return: the serialized bytes
"""
if isinstance(lease, LeaseInfo):
# v2 of the immutable schema stores lease secrets hashed. If
# we're given a LeaseInfo then it holds plaintext secrets. Hash
# them before trying to serialize.
lease = cls._hash_lease_info(lease)
if isinstance(lease, HashedLeaseInfo):
return lease.to_immutable_data()
raise ValueError(
"ShareFile v2 schema cannot represent lease {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# type: (bytes) -> HashedLeaseInfo
"""
Unserialize some bytes from a v2 container.
:param data: the bytes from the container
:return: the ``HashedLeaseInfo`` the bytes represent
"""
# In v2 of the immutable schema lease secrets are stored hashed. Wrap
# a LeaseInfo in a HashedLeaseInfo so it can supply the correct
# interpretation for those values.
return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret)
class _V1(object):
"""
Implement encoding and decoding for v1 of the immutable container.
"""
version = 1
@classmethod
def header(cls, max_size):
return _header(cls.version, max_size)
@classmethod
def serialize_lease(cls, lease):
if isinstance(lease, LeaseInfo):
return lease.to_immutable_data()
raise ValueError(
"ShareFile v1 schema only supports LeaseInfo, not {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# In v1 of the immutable schema lease secrets are stored plaintext.
# So load the data into a plain LeaseInfo which works on plaintext
# secrets.
return LeaseInfo.from_immutable_data(data)
ALL_SCHEMAS = {_V2, _V1}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore
def schema_from_version(version):
# (int) -> Optional[type]
"""
Find the schema object that corresponds to a certain version number.
"""
for schema in ALL_SCHEMAS:
if schema.version == version:
return schema
return None

View File

@ -15,6 +15,15 @@ import struct, time
import attr
from zope.interface import (
Interface,
implementer,
)
from twisted.python.components import (
proxyForInterface,
)
from allmydata.util.hashutil import timing_safe_compare
# struct format for representation of a lease in an immutable share
@ -23,6 +32,84 @@ IMMUTABLE_FORMAT = ">L32s32sL"
# struct format for representation of a lease in a mutable share
MUTABLE_FORMAT = ">LL32s32s20s"
class ILeaseInfo(Interface):
"""
Represent a marker attached to a share that indicates that share should be
retained for some amount of time.
Typically clients will create and renew leases on their shares as a way to
inform storage servers that there is still interest in those shares. A
share may have more than one lease. If all leases on a share have
expiration times in the past then the storage server may take this as a
strong hint that no one is interested in the share anymore and therefore
the share may be deleted to reclaim the space.
"""
def renew(new_expire_time):
"""
Create a new ``ILeaseInfo`` with the given expiration time.
:param Union[int, float] new_expire_time: The expiration time the new
``ILeaseInfo`` will have.
:return: The new ``ILeaseInfo`` provider with the new expiration time.
"""
def get_expiration_time():
"""
:return Union[int, float]: this lease's expiration time
"""
def get_grant_renew_time_time():
"""
:return Union[int, float]: a guess about the last time this lease was
renewed
"""
def get_age():
"""
:return Union[int, float]: a guess about how long it has been since this
lease was renewed
"""
def to_immutable_data():
"""
:return bytes: a serialized representation of this lease suitable for
inclusion in an immutable container
"""
def to_mutable_data():
"""
:return bytes: a serialized representation of this lease suitable for
inclusion in a mutable container
"""
def immutable_size():
"""
:return int: the size of the serialized representation of this lease in an
immutable container
"""
def mutable_size():
"""
:return int: the size of the serialized representation of this lease in a
mutable container
"""
def is_renew_secret(candidate_secret):
"""
:return bool: ``True`` if the given byte string is this lease's renew
secret, ``False`` otherwise
"""
def is_cancel_secret(candidate_secret):
"""
:return bool: ``True`` if the given byte string is this lease's cancel
secret, ``False`` otherwise
"""
@implementer(ILeaseInfo)
@attr.s(frozen=True)
class LeaseInfo(object):
"""
@ -162,3 +249,85 @@ class LeaseInfo(object):
]
values = struct.unpack(">LL32s32s20s", data)
return cls(**dict(zip(names, values)))
@attr.s(frozen=True)
class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ignore # unsupported dynamic base class
"""
A ``HashedLeaseInfo`` wraps lease information in which the secrets have
been hashed.
"""
_lease_info = attr.ib()
_hash = attr.ib()
# proxyForInterface will take care of forwarding all methods on ILeaseInfo
# to `_lease_info`. Here we override a few of those methods to adjust
# their behavior to make them suitable for use with hashed secrets.
def is_renew_secret(self, candidate_secret):
"""
Hash the candidate secret and compare the result to the stored hashed
secret.
"""
return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret))
def is_cancel_secret(self, candidate_secret):
"""
Hash the candidate secret and compare the result to the stored hashed
secret.
"""
if isinstance(candidate_secret, _HashedCancelSecret):
# Someone read it off of this object in this project - probably
# the lease crawler - and is just trying to use it to identify
# which lease it wants to operate on. Avoid re-hashing the value.
#
# It is important that this codepath is only availably internally
# for this process to talk to itself. If it were to be exposed to
# clients over the network, they could just provide the hashed
# value to avoid having to ever learn the original value.
hashed_candidate = candidate_secret.hashed_value
else:
# It is not yet hashed so hash it.
hashed_candidate = self._hash(candidate_secret)
return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate)
@property
def owner_num(self):
return self._lease_info.owner_num
@property
def cancel_secret(self):
"""
Give back an opaque wrapper around the hashed cancel secret which can
later be presented for a succesful equality comparison.
"""
# We don't *have* the cancel secret. We hashed it and threw away the
# original. That's good. It does mean that some code that runs
# in-process with the storage service (LeaseCheckingCrawler) runs into
# some difficulty. That code wants to cancel leases and does so using
# the same interface that faces storage clients (or would face them,
# if lease cancellation were exposed).
#
# Since it can't use the hashed secret to cancel a lease (that's the
# point of the hashing) and we don't have the unhashed secret to give
# it, instead we give it a marker that `cancel_lease` will recognize.
# On recognizing it, if the hashed value given matches the hashed
# value stored it is considered a match and the lease can be
# cancelled.
#
# This isn't great. Maybe the internal and external consumers of
# cancellation should use different interfaces.
return _HashedCancelSecret(self._lease_info.cancel_secret)
@attr.s(frozen=True)
class _HashedCancelSecret(object):
"""
``_HashedCancelSecret`` is a marker type for an already-hashed lease
cancel secret that lets internal lease cancellers bypass the hash-based
protection that's imposed on external lease cancellers.
:ivar bytes hashed_value: The already-hashed secret.
"""
hashed_value = attr.ib()

View File

@ -95,9 +95,7 @@ class MutableShareFile(object):
data_length, extra_least_offset) = \
struct.unpack(">32s20s32sQQ", data)
if not self.is_valid_header(data):
msg = "sharefile %s had magic '%r' but we wanted '%r'" % \
(filename, magic, self.MAGIC)
raise UnknownMutableContainerVersionError(msg)
raise UnknownMutableContainerVersionError(filename, magic)
self.parent = parent # for logging
def log(self, *args, **kwargs):

View File

@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase):
d.addCallback(_download, imm_uri, i, expected)
d.addCallback(lambda ign: self.restore_all_shares(self.shares))
d.addCallback(fireEventually)
corrupt_values = [(3, 2, "no-sh2"),
(15, 2, "need-4th"), # share looks v2
]
corrupt_values = [
# Make the container version for share number 2 look
# unsupported. If you add support for immutable share file
# version number much past 16 million then you will have to
# update this test. Also maybe you have other problems.
(1, 255, "no-sh2"),
# Make the immutable share number 2 (not the container, the
# thing inside the container) look unsupported. Ditto the
# above about version numbers in the ballpark of 16 million.
(13, 255, "need-4th"),
]
for i,newvalue,expected in corrupt_values:
d.addCallback(self._corrupt_set, imm_uri, i, newvalue)
d.addCallback(_download, imm_uri, i, expected)

View File

@ -43,6 +43,9 @@ from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME
from allmydata.storage.shares import get_share_file
from allmydata.storage.mutable import MutableShareFile
from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile
from allmydata.storage.immutable_schema import (
ALL_SCHEMAS,
)
from allmydata.storage.common import storage_index_to_dir, \
UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \
si_b2a, si_a2b
@ -646,7 +649,9 @@ class Server(unittest.TestCase):
e = self.failUnlessRaises(UnknownImmutableContainerVersionError,
ss.remote_get_buckets, b"si1")
self.failUnlessIn(" had version 0 but we wanted 1", str(e))
self.assertEqual(e.filename, fn)
self.assertEqual(e.version, 0)
self.assertIn("had unexpected version 0", str(e))
def test_disconnect(self):
# simulate a disconnection
@ -843,6 +848,9 @@ class Server(unittest.TestCase):
# Create a bucket:
rs0, cs0 = self.create_bucket_5_shares(ss, b"si0")
# Upload of an immutable implies creation of a single lease with the
# supplied secrets.
(lease,) = ss.get_leases(b"si0")
self.assertTrue(lease.is_renew_secret(rs0))
@ -1127,8 +1135,10 @@ class MutableServer(unittest.TestCase):
read = ss.remote_slot_readv
e = self.failUnlessRaises(UnknownMutableContainerVersionError,
read, b"si1", [0], [(0,10)])
self.failUnlessIn(" had magic ", str(e))
self.failUnlessIn(" but we wanted ", str(e))
self.assertEqual(e.filename, fn)
self.assertTrue(e.version.startswith(b"BAD MAGIC"))
self.assertIn("had unexpected version", str(e))
self.assertIn("BAD MAGIC", str(e))
def test_container_size(self):
ss = self.create("test_container_size")
@ -3124,6 +3134,7 @@ class Stats(unittest.TestCase):
self.failUnless(output["get"]["99_0_percentile"] is None, output)
self.failUnless(output["get"]["99_9_percentile"] is None, output)
immutable_schemas = strategies.sampled_from(list(ALL_SCHEMAS))
class ShareFileTests(unittest.TestCase):
"""Tests for allmydata.storage.immutable.ShareFile."""
@ -3135,47 +3146,54 @@ class ShareFileTests(unittest.TestCase):
# Should be b'abDEF' now.
return sf
def test_read_write(self):
@given(immutable_schemas)
def test_read_write(self, schema):
"""Basic writes can be read."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
self.assertEqual(sf.read_share_data(0, 3), b"abD")
self.assertEqual(sf.read_share_data(1, 4), b"bDEF")
def test_reads_beyond_file_end(self):
@given(immutable_schemas)
def test_reads_beyond_file_end(self, schema):
"""Reads beyond the file size are truncated."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
self.assertEqual(sf.read_share_data(0, 10), b"abDEF")
self.assertEqual(sf.read_share_data(5, 10), b"")
def test_too_large_write(self):
@given(immutable_schemas)
def test_too_large_write(self, schema):
"""Can't do write larger than file size."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
with self.assertRaises(DataTooLargeError):
sf.write_share_data(0, b"x" * 3000)
def test_no_leases_cancelled(self):
@given(immutable_schemas)
def test_no_leases_cancelled(self, schema):
"""If no leases were cancelled, IndexError is raised."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
with self.assertRaises(IndexError):
sf.cancel_lease(b"garbage")
def test_long_lease_count_format(self):
@given(immutable_schemas)
def test_long_lease_count_format(self, schema):
"""
``ShareFile.__init__`` raises ``ValueError`` if the lease count format
given is longer than one character.
"""
with self.assertRaises(ValueError):
self.get_sharefile(lease_count_format="BB")
self.get_sharefile(schema=schema, lease_count_format="BB")
def test_large_lease_count_format(self):
@given(immutable_schemas)
def test_large_lease_count_format(self, schema):
"""
``ShareFile.__init__`` raises ``ValueError`` if the lease count format
encodes to a size larger than 8 bytes.
"""
with self.assertRaises(ValueError):
self.get_sharefile(lease_count_format="Q")
self.get_sharefile(schema=schema, lease_count_format="Q")
def test_avoid_lease_overflow(self):
@given(immutable_schemas)
def test_avoid_lease_overflow(self, schema):
"""
If the share file already has the maximum number of leases supported then
``ShareFile.add_lease`` raises ``struct.error`` and makes no changes
@ -3189,7 +3207,7 @@ class ShareFileTests(unittest.TestCase):
)
# Make it a little easier to reach the condition by limiting the
# number of leases to only 255.
sf = self.get_sharefile(lease_count_format="B")
sf = self.get_sharefile(schema=schema, lease_count_format="B")
# Add the leases.
for i in range(2 ** 8 - 1):
@ -3213,16 +3231,17 @@ class ShareFileTests(unittest.TestCase):
self.assertEqual(before_data, after_data)
def test_renew_secret(self):
@given(immutable_schemas)
def test_renew_secret(self, schema):
"""
A lease loaded from an immutable share file can have its renew secret
verified.
A lease loaded from an immutable share file at any schema version can have
its renew secret verified.
"""
renew_secret = b"r" * 32
cancel_secret = b"c" * 32
expiration_time = 2 ** 31
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
lease = LeaseInfo(
owner_num=0,
renew_secret=renew_secret,
@ -3233,16 +3252,17 @@ class ShareFileTests(unittest.TestCase):
(loaded_lease,) = sf.get_leases()
self.assertTrue(loaded_lease.is_renew_secret(renew_secret))
def test_cancel_secret(self):
@given(immutable_schemas)
def test_cancel_secret(self, schema):
"""
A lease loaded from an immutable share file can have its cancel secret
verified.
A lease loaded from an immutable share file at any schema version can have
its cancel secret verified.
"""
renew_secret = b"r" * 32
cancel_secret = b"c" * 32
expiration_time = 2 ** 31
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
lease = LeaseInfo(
owner_num=0,
renew_secret=renew_secret,