Merge branch 'master' into 3825.json-pickle

This commit is contained in:
meejah 2021-11-23 13:53:41 -07:00
commit b0309331a5
12 changed files with 741 additions and 104 deletions

View File

@ -0,0 +1 @@
The storage server now keeps hashes of lease renew and cancel secrets for immutable share files instead of keeping the original secrets.

View File

@ -0,0 +1 @@
The storage server now keeps hashes of lease renew and cancel secrets for mutable share files instead of keeping the original secrets.

View File

@ -230,8 +230,8 @@ def dump_mutable_share(options):
print(" ownerid: %d" % lease.owner_num, file=out)
when = format_expiration_time(lease.get_expiration_time())
print(" expires in %s" % when, file=out)
print(" renew_secret: %s" % str(base32.b2a(lease.renew_secret), "utf-8"), file=out)
print(" cancel_secret: %s" % str(base32.b2a(lease.cancel_secret), "utf-8"), file=out)
print(" renew_secret: %s" % lease.present_renew_secret(), file=out)
print(" cancel_secret: %s" % lease.present_cancel_secret(), file=out)
print(" secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid), file=out)
else:
print("No leases.", file=out)

View File

@ -16,11 +16,22 @@ from allmydata.util import base32
# Backwards compatibility.
from allmydata.interfaces import DataTooLargeError # noqa: F401
class UnknownMutableContainerVersionError(Exception):
pass
class UnknownImmutableContainerVersionError(Exception):
class UnknownContainerVersionError(Exception):
def __init__(self, filename, version):
self.filename = filename
self.version = version
def __str__(self):
return "sharefile {!r} had unexpected version {!r}".format(
self.filename,
self.version,
)
class UnknownMutableContainerVersionError(UnknownContainerVersionError):
pass
class UnknownImmutableContainerVersionError(UnknownContainerVersionError):
pass
def si_b2a(storageindex):
return base32.b2a(storageindex)

View File

@ -25,23 +25,28 @@ from allmydata.interfaces import (
)
from allmydata.util import base32, fileutil, log
from allmydata.util.assertutil import precondition
from allmydata.storage.lease import LeaseInfo
from allmydata.storage.common import UnknownImmutableContainerVersionError
from .immutable_schema import (
NEWEST_SCHEMA_VERSION,
schema_from_version,
)
# each share file (in storage/shares/$SI/$SHNUM) contains lease information
# and share data. The share data is accessed by RIBucketWriter.write and
# RIBucketReader.read . The lease information is not accessible through these
# interfaces.
# The share file has the following layout:
# 0x00: share file version number, four bytes, current version is 1
# 0x00: share file version number, four bytes, current version is 2
# 0x04: share data length, four bytes big-endian = A # See Footnote 1 below.
# 0x08: number of leases, four bytes big-endian
# 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy)
# A+0x0c = B: first lease. Lease format is:
# B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner
# B+0x04: renew secret, 32 bytes (SHA256)
# B+0x24: cancel secret, 32 bytes (SHA256)
# B+0x04: renew secret, 32 bytes (SHA256 + blake2b) # See Footnote 2 below.
# B+0x24: cancel secret, 32 bytes (SHA256 + blake2b)
# B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch
# B+0x48: next lease, or end of record
@ -53,6 +58,23 @@ from allmydata.storage.common import UnknownImmutableContainerVersionError
# then the value stored in this field will be the actual share data length
# modulo 2**32.
# Footnote 2: The change between share file version number 1 and 2 is that
# storage of lease secrets is changed from plaintext to hashed. This change
# protects the secrets from compromises of local storage on the server: if a
# plaintext cancel secret is somehow exfiltrated from the storage server, an
# attacker could use it to cancel that lease and potentially cause user data
# to be discarded before intended by the real owner. As of this comment,
# lease cancellation is disabled because there have been at least two bugs
# which leak the persisted value of the cancellation secret. If lease secrets
# were stored hashed instead of plaintext then neither of these bugs would
# have allowed an attacker to learn a usable cancel secret.
#
# Clients are free to construct these secrets however they like. The
# Tahoe-LAFS client uses a SHA256-based construction. The server then uses
# blake2b to hash these values for storage so that it retains no persistent
# copy of the original secret.
#
def _fix_lease_count_format(lease_count_format):
"""
Turn a single character struct format string into a format string suitable
@ -118,9 +140,16 @@ class ShareFile(object):
``False`` otherwise.
"""
(version,) = struct.unpack(">L", header[:4])
return version == 1
return schema_from_version(version) is not None
def __init__(self, filename, max_size=None, create=False, lease_count_format="L"):
def __init__(
self,
filename,
max_size=None,
create=False,
lease_count_format="L",
schema=NEWEST_SCHEMA_VERSION,
):
"""
Initialize a ``ShareFile``.
@ -156,27 +185,18 @@ class ShareFile(object):
# it. Also construct the metadata.
assert not os.path.exists(self.home)
fileutil.make_dirs(os.path.dirname(self.home))
# The second field -- the four-byte share data length -- is no
# longer used as of Tahoe v1.3.0, but we continue to write it in
# there in case someone downgrades a storage server from >=
# Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one
# server to another, etc. We do saturation -- a share data length
# larger than 2**32-1 (what can fit into the field) is marked as
# the largest length that can fit into the field. That way, even
# if this does happen, the old < v1.3.0 server will still allow
# clients to read the first part of the share.
self._schema = schema
with open(self.home, 'wb') as f:
f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0))
f.write(self._schema.header(max_size))
self._lease_offset = max_size + 0x0c
self._num_leases = 0
else:
with open(self.home, 'rb') as f:
filesize = os.path.getsize(self.home)
(version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc))
if version != 1:
msg = "sharefile %s had version %d but we wanted 1" % \
(filename, version)
raise UnknownImmutableContainerVersionError(msg)
self._schema = schema_from_version(version)
if self._schema is None:
raise UnknownImmutableContainerVersionError(filename, version)
self._num_leases = num_leases
self._lease_offset = filesize - (num_leases * self.LEASE_SIZE)
self._data_offset = 0xc
@ -211,7 +231,7 @@ class ShareFile(object):
offset = self._lease_offset + lease_number * self.LEASE_SIZE
f.seek(offset)
assert f.tell() == offset
f.write(lease_info.to_immutable_data())
f.write(self._schema.lease_serializer.serialize(lease_info))
def _read_num_leases(self, f):
f.seek(0x08)
@ -242,7 +262,7 @@ class ShareFile(object):
for i in range(num_leases):
data = f.read(self.LEASE_SIZE)
if data:
yield LeaseInfo.from_immutable_data(data)
yield self._schema.lease_serializer.unserialize(data)
def add_lease(self, lease_info):
with open(self.home, 'rb+') as f:

View File

@ -0,0 +1,72 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import struct
import attr
from .lease_schema import (
v1_immutable,
v2_immutable,
)
@attr.s(frozen=True)
class _Schema(object):
"""
Implement encoding and decoding for multiple versions of the immutable
container schema.
:ivar int version: the version number of the schema this object supports
:ivar lease_serializer: an object that is responsible for lease
serialization and unserialization
"""
version = attr.ib()
lease_serializer = attr.ib()
def header(self, max_size):
# type: (int) -> bytes
"""
Construct a container header.
:param max_size: the maximum size the container can hold
:return: the header bytes
"""
# The second field -- the four-byte share data length -- is no longer
# used as of Tahoe v1.3.0, but we continue to write it in there in
# case someone downgrades a storage server from >= Tahoe-1.3.0 to <
# Tahoe-1.3.0, or moves a share file from one server to another,
# etc. We do saturation -- a share data length larger than 2**32-1
# (what can fit into the field) is marked as the largest length that
# can fit into the field. That way, even if this does happen, the old
# < v1.3.0 server will still allow clients to read the first part of
# the share.
return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0)
ALL_SCHEMAS = {
_Schema(version=2, lease_serializer=v2_immutable),
_Schema(version=1, lease_serializer=v1_immutable),
}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)
def schema_from_version(version):
# (int) -> Optional[type]
"""
Find the schema object that corresponds to a certain version number.
"""
for schema in ALL_SCHEMAS:
if schema.version == version:
return schema
return None

View File

@ -15,7 +15,17 @@ import struct, time
import attr
from zope.interface import (
Interface,
implementer,
)
from twisted.python.components import (
proxyForInterface,
)
from allmydata.util.hashutil import timing_safe_compare
from allmydata.util import base32
# struct format for representation of a lease in an immutable share
IMMUTABLE_FORMAT = ">L32s32sL"
@ -23,6 +33,96 @@ IMMUTABLE_FORMAT = ">L32s32sL"
# struct format for representation of a lease in a mutable share
MUTABLE_FORMAT = ">LL32s32s20s"
class ILeaseInfo(Interface):
"""
Represent a marker attached to a share that indicates that share should be
retained for some amount of time.
Typically clients will create and renew leases on their shares as a way to
inform storage servers that there is still interest in those shares. A
share may have more than one lease. If all leases on a share have
expiration times in the past then the storage server may take this as a
strong hint that no one is interested in the share anymore and therefore
the share may be deleted to reclaim the space.
"""
def renew(new_expire_time):
"""
Create a new ``ILeaseInfo`` with the given expiration time.
:param Union[int, float] new_expire_time: The expiration time the new
``ILeaseInfo`` will have.
:return: The new ``ILeaseInfo`` provider with the new expiration time.
"""
def get_expiration_time():
"""
:return Union[int, float]: this lease's expiration time
"""
def get_grant_renew_time_time():
"""
:return Union[int, float]: a guess about the last time this lease was
renewed
"""
def get_age():
"""
:return Union[int, float]: a guess about how long it has been since this
lease was renewed
"""
def to_immutable_data():
"""
:return bytes: a serialized representation of this lease suitable for
inclusion in an immutable container
"""
def to_mutable_data():
"""
:return bytes: a serialized representation of this lease suitable for
inclusion in a mutable container
"""
def immutable_size():
"""
:return int: the size of the serialized representation of this lease in an
immutable container
"""
def mutable_size():
"""
:return int: the size of the serialized representation of this lease in a
mutable container
"""
def is_renew_secret(candidate_secret):
"""
:return bool: ``True`` if the given byte string is this lease's renew
secret, ``False`` otherwise
"""
def present_renew_secret():
"""
:return str: Text which could reasonably be shown to a person representing
this lease's renew secret.
"""
def is_cancel_secret(candidate_secret):
"""
:return bool: ``True`` if the given byte string is this lease's cancel
secret, ``False`` otherwise
"""
def present_cancel_secret():
"""
:return str: Text which could reasonably be shown to a person representing
this lease's cancel secret.
"""
@implementer(ILeaseInfo)
@attr.s(frozen=True)
class LeaseInfo(object):
"""
@ -86,6 +186,13 @@ class LeaseInfo(object):
"""
return timing_safe_compare(self.renew_secret, candidate_secret)
def present_renew_secret(self):
# type: () -> str
"""
Return the renew secret, base32-encoded.
"""
return str(base32.b2a(self.renew_secret), "utf-8")
def is_cancel_secret(self, candidate_secret):
# type: (bytes) -> bool
"""
@ -96,6 +203,13 @@ class LeaseInfo(object):
"""
return timing_safe_compare(self.cancel_secret, candidate_secret)
def present_cancel_secret(self):
# type: () -> str
"""
Return the cancel secret, base32-encoded.
"""
return str(base32.b2a(self.cancel_secret), "utf-8")
def get_grant_renew_time_time(self):
# hack, based upon fixed 31day expiration period
return self._expiration_time - 31*24*60*60
@ -116,7 +230,7 @@ class LeaseInfo(object):
"cancel_secret",
"expiration_time",
]
values = struct.unpack(">L32s32sL", data)
values = struct.unpack(IMMUTABLE_FORMAT, data)
return cls(nodeid=None, **dict(zip(names, values)))
def immutable_size(self):
@ -160,5 +274,114 @@ class LeaseInfo(object):
"cancel_secret",
"nodeid",
]
values = struct.unpack(">LL32s32s20s", data)
values = struct.unpack(MUTABLE_FORMAT, data)
return cls(**dict(zip(names, values)))
@attr.s(frozen=True)
class HashedLeaseInfo(proxyForInterface(ILeaseInfo, "_lease_info")): # type: ignore # unsupported dynamic base class
"""
A ``HashedLeaseInfo`` wraps lease information in which the secrets have
been hashed.
"""
_lease_info = attr.ib()
_hash = attr.ib()
# proxyForInterface will take care of forwarding all methods on ILeaseInfo
# to `_lease_info`. Here we override a few of those methods to adjust
# their behavior to make them suitable for use with hashed secrets.
def renew(self, new_expire_time):
# Preserve the HashedLeaseInfo wrapper around the renewed LeaseInfo.
return attr.assoc(
self,
_lease_info=super(HashedLeaseInfo, self).renew(new_expire_time),
)
def is_renew_secret(self, candidate_secret):
# type: (bytes) -> bool
"""
Hash the candidate secret and compare the result to the stored hashed
secret.
"""
return super(HashedLeaseInfo, self).is_renew_secret(self._hash(candidate_secret))
def present_renew_secret(self):
# type: () -> str
"""
Present the hash of the secret with a marker indicating it is a hash.
"""
return u"hash:" + super(HashedLeaseInfo, self).present_renew_secret()
def is_cancel_secret(self, candidate_secret):
# type: (bytes) -> bool
"""
Hash the candidate secret and compare the result to the stored hashed
secret.
"""
if isinstance(candidate_secret, _HashedCancelSecret):
# Someone read it off of this object in this project - probably
# the lease crawler - and is just trying to use it to identify
# which lease it wants to operate on. Avoid re-hashing the value.
#
# It is important that this codepath is only availably internally
# for this process to talk to itself. If it were to be exposed to
# clients over the network, they could just provide the hashed
# value to avoid having to ever learn the original value.
hashed_candidate = candidate_secret.hashed_value
else:
# It is not yet hashed so hash it.
hashed_candidate = self._hash(candidate_secret)
return super(HashedLeaseInfo, self).is_cancel_secret(hashed_candidate)
def present_cancel_secret(self):
# type: () -> str
"""
Present the hash of the secret with a marker indicating it is a hash.
"""
return u"hash:" + super(HashedLeaseInfo, self).present_cancel_secret()
@property
def owner_num(self):
return self._lease_info.owner_num
@property
def nodeid(self):
return self._lease_info.nodeid
@property
def cancel_secret(self):
"""
Give back an opaque wrapper around the hashed cancel secret which can
later be presented for a succesful equality comparison.
"""
# We don't *have* the cancel secret. We hashed it and threw away the
# original. That's good. It does mean that some code that runs
# in-process with the storage service (LeaseCheckingCrawler) runs into
# some difficulty. That code wants to cancel leases and does so using
# the same interface that faces storage clients (or would face them,
# if lease cancellation were exposed).
#
# Since it can't use the hashed secret to cancel a lease (that's the
# point of the hashing) and we don't have the unhashed secret to give
# it, instead we give it a marker that `cancel_lease` will recognize.
# On recognizing it, if the hashed value given matches the hashed
# value stored it is considered a match and the lease can be
# cancelled.
#
# This isn't great. Maybe the internal and external consumers of
# cancellation should use different interfaces.
return _HashedCancelSecret(self._lease_info.cancel_secret)
@attr.s(frozen=True)
class _HashedCancelSecret(object):
"""
``_HashedCancelSecret`` is a marker type for an already-hashed lease
cancel secret that lets internal lease cancellers bypass the hash-based
protection that's imposed on external lease cancellers.
:ivar bytes hashed_value: The already-hashed secret.
"""
hashed_value = attr.ib()

View File

@ -0,0 +1,138 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
try:
from typing import Union
except ImportError:
pass
import attr
from nacl.hash import blake2b
from nacl.encoding import RawEncoder
from .lease import (
LeaseInfo,
HashedLeaseInfo,
)
@attr.s(frozen=True)
class CleartextLeaseSerializer(object):
"""
Serialize and unserialize leases with cleartext secrets.
"""
_to_data = attr.ib()
_from_data = attr.ib()
def serialize(self, lease):
# type: (LeaseInfo) -> bytes
"""
Represent the given lease as bytes with cleartext secrets.
"""
if isinstance(lease, LeaseInfo):
return self._to_data(lease)
raise ValueError(
"ShareFile v1 schema only supports LeaseInfo, not {!r}".format(
lease,
),
)
def unserialize(self, data):
# type: (bytes) -> LeaseInfo
"""
Load a lease with cleartext secrets from the given bytes representation.
"""
# In v1 of the immutable schema lease secrets are stored plaintext.
# So load the data into a plain LeaseInfo which works on plaintext
# secrets.
return self._from_data(data)
@attr.s(frozen=True)
class HashedLeaseSerializer(object):
_to_data = attr.ib()
_from_data = attr.ib()
@classmethod
def _hash_secret(cls, secret):
# type: (bytes) -> bytes
"""
Hash a lease secret for storage.
"""
return blake2b(secret, digest_size=32, encoder=RawEncoder())
@classmethod
def _hash_lease_info(cls, lease_info):
# type: (LeaseInfo) -> HashedLeaseInfo
"""
Hash the cleartext lease info secrets into a ``HashedLeaseInfo``.
"""
if not isinstance(lease_info, LeaseInfo):
# Provide a little safety against misuse, especially an attempt to
# re-hash an already-hashed lease info which is represented as a
# different type.
raise TypeError(
"Can only hash LeaseInfo, not {!r}".format(lease_info),
)
# Hash the cleartext secrets in the lease info and wrap the result in
# a new type.
return HashedLeaseInfo(
attr.assoc(
lease_info,
renew_secret=cls._hash_secret(lease_info.renew_secret),
cancel_secret=cls._hash_secret(lease_info.cancel_secret),
),
cls._hash_secret,
)
def serialize(self, lease):
# type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes
if isinstance(lease, LeaseInfo):
# v2 of the immutable schema stores lease secrets hashed. If
# we're given a LeaseInfo then it holds plaintext secrets. Hash
# them before trying to serialize.
lease = self._hash_lease_info(lease)
if isinstance(lease, HashedLeaseInfo):
return self._to_data(lease)
raise ValueError(
"ShareFile v2 schema cannot represent lease {!r}".format(
lease,
),
)
def unserialize(self, data):
# type: (bytes) -> HashedLeaseInfo
# In v2 of the immutable schema lease secrets are stored hashed. Wrap
# a LeaseInfo in a HashedLeaseInfo so it can supply the correct
# interpretation for those values.
return HashedLeaseInfo(self._from_data(data), self._hash_secret)
v1_immutable = CleartextLeaseSerializer(
LeaseInfo.to_immutable_data,
LeaseInfo.from_immutable_data,
)
v2_immutable = HashedLeaseSerializer(
HashedLeaseInfo.to_immutable_data,
LeaseInfo.from_immutable_data,
)
v1_mutable = CleartextLeaseSerializer(
LeaseInfo.to_mutable_data,
LeaseInfo.from_mutable_data,
)
v2_mutable = HashedLeaseSerializer(
HashedLeaseInfo.to_mutable_data,
LeaseInfo.from_mutable_data,
)

View File

@ -24,7 +24,10 @@ from allmydata.storage.lease import LeaseInfo
from allmydata.storage.common import UnknownMutableContainerVersionError, \
DataTooLargeError
from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE
from .mutable_schema import (
NEWEST_SCHEMA_VERSION,
schema_from_header,
)
# the MutableShareFile is like the ShareFile, but used for mutable data. It
# has a different layout. See docs/mutable.txt for more details.
@ -64,9 +67,6 @@ class MutableShareFile(object):
# our sharefiles share with a recognizable string, plus some random
# binary data to reduce the chance that a regular text file will look
# like a sharefile.
MAGIC = b"Tahoe mutable container v1\n" + b"\x75\x09\x44\x03\x8e"
assert len(MAGIC) == 32
assert isinstance(MAGIC, bytes)
MAX_SIZE = MAX_MUTABLE_SHARE_SIZE
# TODO: decide upon a policy for max share size
@ -82,22 +82,19 @@ class MutableShareFile(object):
:return: ``True`` if the bytes could belong to this container,
``False`` otherwise.
"""
return header.startswith(cls.MAGIC)
return schema_from_header(header) is not None
def __init__(self, filename, parent=None):
def __init__(self, filename, parent=None, schema=NEWEST_SCHEMA_VERSION):
self.home = filename
if os.path.exists(self.home):
# we don't cache anything, just check the magic
with open(self.home, 'rb') as f:
data = f.read(self.HEADER_SIZE)
(magic,
write_enabler_nodeid, write_enabler,
data_length, extra_least_offset) = \
struct.unpack(">32s20s32sQQ", data)
if not self.is_valid_header(data):
msg = "sharefile %s had magic '%r' but we wanted '%r'" % \
(filename, magic, self.MAGIC)
raise UnknownMutableContainerVersionError(msg)
header = f.read(self.HEADER_SIZE)
self._schema = schema_from_header(header)
if self._schema is None:
raise UnknownMutableContainerVersionError(filename, header)
else:
self._schema = schema
self.parent = parent # for logging
def log(self, *args, **kwargs):
@ -105,23 +102,8 @@ class MutableShareFile(object):
def create(self, my_nodeid, write_enabler):
assert not os.path.exists(self.home)
data_length = 0
extra_lease_offset = (self.HEADER_SIZE
+ 4 * self.LEASE_SIZE
+ data_length)
assert extra_lease_offset == self.DATA_OFFSET # true at creation
num_extra_leases = 0
with open(self.home, 'wb') as f:
header = struct.pack(
">32s20s32sQQ",
self.MAGIC, my_nodeid, write_enabler,
data_length, extra_lease_offset,
)
leases = (b"\x00" * self.LEASE_SIZE) * 4
f.write(header + leases)
# data goes here, empty after creation
f.write(struct.pack(">L", num_extra_leases))
# extra leases go here, none at creation
f.write(self._schema.header(my_nodeid, write_enabler))
def unlink(self):
os.unlink(self.home)
@ -254,7 +236,7 @@ class MutableShareFile(object):
+ (lease_number-4)*self.LEASE_SIZE)
f.seek(offset)
assert f.tell() == offset
f.write(lease_info.to_mutable_data())
f.write(self._schema.lease_serializer.serialize(lease_info))
def _read_lease_record(self, f, lease_number):
# returns a LeaseInfo instance, or None
@ -271,7 +253,7 @@ class MutableShareFile(object):
f.seek(offset)
assert f.tell() == offset
data = f.read(self.LEASE_SIZE)
lease_info = LeaseInfo.from_mutable_data(data)
lease_info = self._schema.lease_serializer.unserialize(data)
if lease_info.owner_num == 0:
return None
return lease_info

View File

@ -0,0 +1,144 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import struct
import attr
from ..util.hashutil import (
tagged_hash,
)
from .lease import (
LeaseInfo,
)
from .lease_schema import (
v1_mutable,
v2_mutable,
)
def _magic(version):
# type: (int) -> bytes
"""
Compute a "magic" header string for a container of the given version.
:param version: The version number of the container.
"""
# Make it easy for people to recognize
human_readable = u"Tahoe mutable container v{:d}\n".format(version).encode("ascii")
# But also keep the chance of accidental collision low
if version == 1:
# It's unclear where this byte sequence came from. It may have just
# been random. In any case, preserve it since it is the magic marker
# in all v1 share files.
random_bytes = b"\x75\x09\x44\x03\x8e"
else:
# For future versions, use a reproducable scheme.
random_bytes = tagged_hash(
b"allmydata_mutable_container_header",
human_readable,
truncate_to=5,
)
magic = human_readable + random_bytes
assert len(magic) == 32
if version > 1:
# The chance of collision is pretty low but let's just be sure about
# it.
assert magic != _magic(version - 1)
return magic
def _header(magic, extra_lease_offset, nodeid, write_enabler):
# type: (bytes, int, bytes, bytes) -> bytes
"""
Construct a container header.
:param nodeid: A unique identifier for the node holding this
container.
:param write_enabler: A secret shared with the client used to
authorize changes to the contents of this container.
"""
fixed_header = struct.pack(
">32s20s32sQQ",
magic,
nodeid,
write_enabler,
# data length, initially the container is empty
0,
extra_lease_offset,
)
blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4
extra_lease_count = struct.pack(">L", 0)
return b"".join([
fixed_header,
# share data will go in between the next two items eventually but
# for now there is none.
blank_leases,
extra_lease_count,
])
_HEADER_FORMAT = ">32s20s32sQQ"
# This size excludes leases
_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT)
_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size()
@attr.s(frozen=True)
class _Schema(object):
"""
Implement encoding and decoding for the mutable container.
:ivar int version: the version number of the schema this object supports
:ivar lease_serializer: an object that is responsible for lease
serialization and unserialization
"""
version = attr.ib()
lease_serializer = attr.ib()
_magic = attr.ib()
@classmethod
def for_version(cls, version, lease_serializer):
return cls(version, lease_serializer, magic=_magic(version))
def magic_matches(self, candidate_magic):
# type: (bytes) -> bool
"""
Return ``True`` if a candidate string matches the expected magic string
from a mutable container header, ``False`` otherwise.
"""
return candidate_magic[:len(self._magic)] == self._magic
def header(self, nodeid, write_enabler):
return _header(self._magic, _EXTRA_LEASE_OFFSET, nodeid, write_enabler)
ALL_SCHEMAS = {
_Schema.for_version(version=2, lease_serializer=v2_mutable),
_Schema.for_version(version=1, lease_serializer=v1_mutable),
}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)
def schema_from_header(header):
# (int) -> Optional[type]
"""
Find the schema object that corresponds to a certain version number.
"""
for schema in ALL_SCHEMAS:
if schema.magic_matches(header):
return schema
return None

View File

@ -1113,9 +1113,17 @@ class Corruption(_Base, unittest.TestCase):
d.addCallback(_download, imm_uri, i, expected)
d.addCallback(lambda ign: self.restore_all_shares(self.shares))
d.addCallback(fireEventually)
corrupt_values = [(3, 2, "no-sh2"),
(15, 2, "need-4th"), # share looks v2
]
corrupt_values = [
# Make the container version for share number 2 look
# unsupported. If you add support for immutable share file
# version number much past 16 million then you will have to
# update this test. Also maybe you have other problems.
(1, 255, "no-sh2"),
# Make the immutable share number 2 (not the container, the
# thing inside the container) look unsupported. Ditto the
# above about version numbers in the ballpark of 16 million.
(13, 255, "need-4th"),
]
for i,newvalue,expected in corrupt_values:
d.addCallback(self._corrupt_set, imm_uri, i, newvalue)
d.addCallback(_download, imm_uri, i, expected)

View File

@ -42,7 +42,13 @@ from allmydata.util import fileutil, hashutil, base32
from allmydata.storage.server import StorageServer, DEFAULT_RENEWAL_TIME
from allmydata.storage.shares import get_share_file
from allmydata.storage.mutable import MutableShareFile
from allmydata.storage.mutable_schema import (
ALL_SCHEMAS as ALL_MUTABLE_SCHEMAS,
)
from allmydata.storage.immutable import BucketWriter, BucketReader, ShareFile
from allmydata.storage.immutable_schema import (
ALL_SCHEMAS as ALL_IMMUTABLE_SCHEMAS,
)
from allmydata.storage.common import storage_index_to_dir, \
UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, \
si_b2a, si_a2b
@ -646,7 +652,9 @@ class Server(unittest.TestCase):
e = self.failUnlessRaises(UnknownImmutableContainerVersionError,
ss.remote_get_buckets, b"si1")
self.failUnlessIn(" had version 0 but we wanted 1", str(e))
self.assertEqual(e.filename, fn)
self.assertEqual(e.version, 0)
self.assertIn("had unexpected version 0", str(e))
def test_disconnect(self):
# simulate a disconnection
@ -843,6 +851,9 @@ class Server(unittest.TestCase):
# Create a bucket:
rs0, cs0 = self.create_bucket_5_shares(ss, b"si0")
# Upload of an immutable implies creation of a single lease with the
# supplied secrets.
(lease,) = ss.get_leases(b"si0")
self.assertTrue(lease.is_renew_secret(rs0))
@ -1127,8 +1138,10 @@ class MutableServer(unittest.TestCase):
read = ss.remote_slot_readv
e = self.failUnlessRaises(UnknownMutableContainerVersionError,
read, b"si1", [0], [(0,10)])
self.failUnlessIn(" had magic ", str(e))
self.failUnlessIn(" but we wanted ", str(e))
self.assertEqual(e.filename, fn)
self.assertTrue(e.version.startswith(b"BAD MAGIC"))
self.assertIn("had unexpected version", str(e))
self.assertIn("BAD MAGIC", str(e))
def test_container_size(self):
ss = self.create("test_container_size")
@ -1351,14 +1364,25 @@ class MutableServer(unittest.TestCase):
2: [b"2"*10]})
def compare_leases_without_timestamps(self, leases_a, leases_b):
self.failUnlessEqual(len(leases_a), len(leases_b))
for i in range(len(leases_a)):
a = leases_a[i]
b = leases_b[i]
self.failUnlessEqual(a.owner_num, b.owner_num)
self.failUnlessEqual(a.renew_secret, b.renew_secret)
self.failUnlessEqual(a.cancel_secret, b.cancel_secret)
self.failUnlessEqual(a.nodeid, b.nodeid)
"""
Assert that, except for expiration times, ``leases_a`` contains the same
lease information as ``leases_b``.
"""
for a, b in zip(leases_a, leases_b):
# The leases aren't always of the same type (though of course
# corresponding elements in the two lists should be of the same
# type as each other) so it's inconvenient to just reach in and
# normalize the expiration timestamp. We don't want to call
# `renew` on both objects to normalize the expiration timestamp in
# case `renew` is broken and gives us back equal outputs from
# non-equal inputs (expiration timestamp aside). It seems
# reasonably safe to use `renew` to make _one_ of the timestamps
# equal to the other though.
self.assertEqual(
a.renew(b.get_expiration_time()),
b,
)
self.assertEqual(len(leases_a), len(leases_b))
def test_leases(self):
ss = self.create("test_leases")
@ -3124,6 +3148,7 @@ class Stats(unittest.TestCase):
self.failUnless(output["get"]["99_0_percentile"] is None, output)
self.failUnless(output["get"]["99_9_percentile"] is None, output)
immutable_schemas = strategies.sampled_from(list(ALL_IMMUTABLE_SCHEMAS))
class ShareFileTests(unittest.TestCase):
"""Tests for allmydata.storage.immutable.ShareFile."""
@ -3135,47 +3160,54 @@ class ShareFileTests(unittest.TestCase):
# Should be b'abDEF' now.
return sf
def test_read_write(self):
@given(immutable_schemas)
def test_read_write(self, schema):
"""Basic writes can be read."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
self.assertEqual(sf.read_share_data(0, 3), b"abD")
self.assertEqual(sf.read_share_data(1, 4), b"bDEF")
def test_reads_beyond_file_end(self):
@given(immutable_schemas)
def test_reads_beyond_file_end(self, schema):
"""Reads beyond the file size are truncated."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
self.assertEqual(sf.read_share_data(0, 10), b"abDEF")
self.assertEqual(sf.read_share_data(5, 10), b"")
def test_too_large_write(self):
@given(immutable_schemas)
def test_too_large_write(self, schema):
"""Can't do write larger than file size."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
with self.assertRaises(DataTooLargeError):
sf.write_share_data(0, b"x" * 3000)
def test_no_leases_cancelled(self):
@given(immutable_schemas)
def test_no_leases_cancelled(self, schema):
"""If no leases were cancelled, IndexError is raised."""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
with self.assertRaises(IndexError):
sf.cancel_lease(b"garbage")
def test_long_lease_count_format(self):
@given(immutable_schemas)
def test_long_lease_count_format(self, schema):
"""
``ShareFile.__init__`` raises ``ValueError`` if the lease count format
given is longer than one character.
"""
with self.assertRaises(ValueError):
self.get_sharefile(lease_count_format="BB")
self.get_sharefile(schema=schema, lease_count_format="BB")
def test_large_lease_count_format(self):
@given(immutable_schemas)
def test_large_lease_count_format(self, schema):
"""
``ShareFile.__init__`` raises ``ValueError`` if the lease count format
encodes to a size larger than 8 bytes.
"""
with self.assertRaises(ValueError):
self.get_sharefile(lease_count_format="Q")
self.get_sharefile(schema=schema, lease_count_format="Q")
def test_avoid_lease_overflow(self):
@given(immutable_schemas)
def test_avoid_lease_overflow(self, schema):
"""
If the share file already has the maximum number of leases supported then
``ShareFile.add_lease`` raises ``struct.error`` and makes no changes
@ -3189,7 +3221,7 @@ class ShareFileTests(unittest.TestCase):
)
# Make it a little easier to reach the condition by limiting the
# number of leases to only 255.
sf = self.get_sharefile(lease_count_format="B")
sf = self.get_sharefile(schema=schema, lease_count_format="B")
# Add the leases.
for i in range(2 ** 8 - 1):
@ -3213,16 +3245,17 @@ class ShareFileTests(unittest.TestCase):
self.assertEqual(before_data, after_data)
def test_renew_secret(self):
@given(immutable_schemas)
def test_renew_secret(self, schema):
"""
A lease loaded from an immutable share file can have its renew secret
verified.
A lease loaded from an immutable share file at any schema version can have
its renew secret verified.
"""
renew_secret = b"r" * 32
cancel_secret = b"c" * 32
expiration_time = 2 ** 31
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
lease = LeaseInfo(
owner_num=0,
renew_secret=renew_secret,
@ -3233,16 +3266,17 @@ class ShareFileTests(unittest.TestCase):
(loaded_lease,) = sf.get_leases()
self.assertTrue(loaded_lease.is_renew_secret(renew_secret))
def test_cancel_secret(self):
@given(immutable_schemas)
def test_cancel_secret(self, schema):
"""
A lease loaded from an immutable share file can have its cancel secret
verified.
A lease loaded from an immutable share file at any schema version can have
its cancel secret verified.
"""
renew_secret = b"r" * 32
cancel_secret = b"c" * 32
expiration_time = 2 ** 31
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
lease = LeaseInfo(
owner_num=0,
renew_secret=renew_secret,
@ -3253,15 +3287,17 @@ class ShareFileTests(unittest.TestCase):
(loaded_lease,) = sf.get_leases()
self.assertTrue(loaded_lease.is_cancel_secret(cancel_secret))
mutable_schemas = strategies.sampled_from(list(ALL_MUTABLE_SCHEMAS))
class MutableShareFileTests(unittest.TestCase):
"""
Tests for allmydata.storage.mutable.MutableShareFile.
"""
def get_sharefile(self):
return MutableShareFile(self.mktemp())
def get_sharefile(self, **kwargs):
return MutableShareFile(self.mktemp(), **kwargs)
@given(
schema=mutable_schemas,
nodeid=strategies.just(b"x" * 20),
write_enabler=strategies.just(b"y" * 32),
datav=strategies.lists(
@ -3272,12 +3308,12 @@ class MutableShareFileTests(unittest.TestCase):
),
new_length=offsets(),
)
def test_readv_reads_share_data(self, nodeid, write_enabler, datav, new_length):
def test_readv_reads_share_data(self, schema, nodeid, write_enabler, datav, new_length):
"""
``MutableShareFile.readv`` returns bytes from the share data portion
of the share file.
"""
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
sf.create(my_nodeid=nodeid, write_enabler=write_enabler)
sf.writev(datav=datav, new_length=new_length)
@ -3312,12 +3348,13 @@ class MutableShareFileTests(unittest.TestCase):
self.assertEqual(expected_data, read_data)
@given(
schema=mutable_schemas,
nodeid=strategies.just(b"x" * 20),
write_enabler=strategies.just(b"y" * 32),
readv=strategies.lists(strategies.tuples(offsets(), lengths()), min_size=1),
random=strategies.randoms(),
)
def test_readv_rejects_negative_length(self, nodeid, write_enabler, readv, random):
def test_readv_rejects_negative_length(self, schema, nodeid, write_enabler, readv, random):
"""
If a negative length is given to ``MutableShareFile.readv`` in a read
vector then ``AssertionError`` is raised.
@ -3356,7 +3393,7 @@ class MutableShareFileTests(unittest.TestCase):
*broken_readv[readv_index]
)
sf = self.get_sharefile()
sf = self.get_sharefile(schema=schema)
sf.create(my_nodeid=nodeid, write_enabler=write_enabler)
# A read with a broken read vector is an error.