diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 216262a81..e9992d96e 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -231,7 +231,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(self._schema.serialize_lease(lease_info)) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -262,7 +262,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield self._schema.unserialize_lease(data) + yield self._schema.lease_serializer.unserialize(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py index 440755b01..40663b935 100644 --- a/src/allmydata/storage/immutable_schema.py +++ b/src/allmydata/storage/immutable_schema.py @@ -13,84 +13,28 @@ if PY2: import struct -try: - from typing import Union -except ImportError: - pass - import attr -from nacl.hash import blake2b -from nacl.encoding import RawEncoder - -from .lease import ( - LeaseInfo, - HashedLeaseInfo, +from .lease_schema import ( + v1_immutable, + v2_immutable, ) -def _header(version, max_size): - # type: (int, int) -> bytes +@attr.s(frozen=True) +class _Schema(object): """ - Construct the header for an immutable container. + Implement encoding and decoding for multiple versions of the immutable + container schema. - :param version: the container version to include the in header - :param max_size: the maximum data size the container will hold + :ivar int version: the version number of the schema this object supports - :return: some bytes to write at the beginning of the container + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization """ - # The second field -- the four-byte share data length -- is no longer - # used as of Tahoe v1.3.0, but we continue to write it in there in - # case someone downgrades a storage server from >= Tahoe-1.3.0 to < - # Tahoe-1.3.0, or moves a share file from one server to another, - # etc. We do saturation -- a share data length larger than 2**32-1 - # (what can fit into the field) is marked as the largest length that - # can fit into the field. That way, even if this does happen, the old - # < v1.3.0 server will still allow clients to read the first part of - # the share. - return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + version = attr.ib() + lease_serializer = attr.ib() - -class _V2(object): - """ - Implement encoding and decoding for v2 of the immutable container. - """ - version = 2 - - @classmethod - def _hash_secret(cls, secret): - # type: (bytes) -> bytes - """ - Hash a lease secret for storage. - """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) - - @classmethod - def _hash_lease_info(cls, lease_info): - # type: (LeaseInfo) -> HashedLeaseInfo - """ - Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. - """ - if not isinstance(lease_info, LeaseInfo): - # Provide a little safety against misuse, especially an attempt to - # re-hash an already-hashed lease info which is represented as a - # different type. - raise TypeError( - "Can only hash LeaseInfo, not {!r}".format(lease_info), - ) - - # Hash the cleartext secrets in the lease info and wrap the result in - # a new type. - return HashedLeaseInfo( - attr.assoc( - lease_info, - renew_secret=cls._hash_secret(lease_info.renew_secret), - cancel_secret=cls._hash_secret(lease_info.cancel_secret), - ), - cls._hash_secret, - ) - - @classmethod - def header(cls, max_size): + def header(self, max_size): # type: (int) -> bytes """ Construct a container header. @@ -99,78 +43,23 @@ class _V2(object): :return: the header bytes """ - return _header(cls.version, max_size) + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0) - @classmethod - def serialize_lease(cls, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes - """ - Serialize a lease to be written to a v2 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - # v2 of the immutable schema stores lease secrets hashed. If - # we're given a LeaseInfo then it holds plaintext secrets. Hash - # them before trying to serialize. - lease = cls._hash_lease_info(lease) - if isinstance(lease, HashedLeaseInfo): - return lease.to_immutable_data() - raise ValueError( - "ShareFile v2 schema cannot represent lease {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> HashedLeaseInfo - """ - Unserialize some bytes from a v2 container. - - :param data: the bytes from the container - - :return: the ``HashedLeaseInfo`` the bytes represent - """ - # In v2 of the immutable schema lease secrets are stored hashed. Wrap - # a LeaseInfo in a HashedLeaseInfo so it can supply the correct - # interpretation for those values. - return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret) - - -class _V1(object): - """ - Implement encoding and decoding for v1 of the immutable container. - """ - version = 1 - - @classmethod - def header(cls, max_size): - return _header(cls.version, max_size) - - @classmethod - def serialize_lease(cls, lease): - if isinstance(lease, LeaseInfo): - return lease.to_immutable_data() - raise ValueError( - "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # In v1 of the immutable schema lease secrets are stored plaintext. - # So load the data into a plain LeaseInfo which works on plaintext - # secrets. - return LeaseInfo.from_immutable_data(data) - - -ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore +ALL_SCHEMAS = { + _Schema(version=2, lease_serializer=v2_immutable), + _Schema(version=1, lease_serializer=v1_immutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_version(version): # (int) -> Optional[type] diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 1a5416d6a..8be44bafd 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -230,7 +230,7 @@ class LeaseInfo(object): "cancel_secret", "expiration_time", ] - values = struct.unpack(">L32s32sL", data) + values = struct.unpack(IMMUTABLE_FORMAT, data) return cls(nodeid=None, **dict(zip(names, values))) def immutable_size(self): @@ -274,7 +274,7 @@ class LeaseInfo(object): "cancel_secret", "nodeid", ] - values = struct.unpack(">LL32s32s20s", data) + values = struct.unpack(MUTABLE_FORMAT, data) return cls(**dict(zip(names, values))) diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py new file mode 100644 index 000000000..697ac9e34 --- /dev/null +++ b/src/allmydata/storage/lease_schema.py @@ -0,0 +1,129 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +try: + from typing import Union +except ImportError: + pass + +import attr + +from nacl.hash import blake2b +from nacl.encoding import RawEncoder + +from .lease import ( + LeaseInfo, + HashedLeaseInfo, +) + +@attr.s(frozen=True) +class CleartextLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + def serialize(self, lease): + # type: (LeaseInfo) -> bytes + if isinstance(lease, LeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> LeaseInfo + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return self._from_data(data) + +@attr.s(frozen=True) +class HashedLeaseSerializer(object): + _to_data = attr.ib() + _from_data = attr.ib() + + @classmethod + def _hash_secret(cls, secret): + # type: (bytes) -> bytes + """ + Hash a lease secret for storage. + """ + return blake2b(secret, digest_size=32, encoder=RawEncoder()) + + @classmethod + def _hash_lease_info(cls, lease_info): + # type: (LeaseInfo) -> HashedLeaseInfo + """ + Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. + """ + if not isinstance(lease_info, LeaseInfo): + # Provide a little safety against misuse, especially an attempt to + # re-hash an already-hashed lease info which is represented as a + # different type. + raise TypeError( + "Can only hash LeaseInfo, not {!r}".format(lease_info), + ) + + # Hash the cleartext secrets in the lease info and wrap the result in + # a new type. + return HashedLeaseInfo( + attr.assoc( + lease_info, + renew_secret=cls._hash_secret(lease_info.renew_secret), + cancel_secret=cls._hash_secret(lease_info.cancel_secret), + ), + cls._hash_secret, + ) + + def serialize(self, lease): + # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + if isinstance(lease, LeaseInfo): + # v2 of the immutable schema stores lease secrets hashed. If + # we're given a LeaseInfo then it holds plaintext secrets. Hash + # them before trying to serialize. + lease = self._hash_lease_info(lease) + if isinstance(lease, HashedLeaseInfo): + return self._to_data(lease) + raise ValueError( + "ShareFile v2 schema cannot represent lease {!r}".format( + lease, + ), + ) + + def unserialize(self, data): + # type: (bytes) -> HashedLeaseInfo + # In v2 of the immutable schema lease secrets are stored hashed. Wrap + # a LeaseInfo in a HashedLeaseInfo so it can supply the correct + # interpretation for those values. + return HashedLeaseInfo(self._from_data(data), self._hash_secret) + +v1_immutable = CleartextLeaseSerializer( + LeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v2_immutable = HashedLeaseSerializer( + HashedLeaseInfo.to_immutable_data, + LeaseInfo.from_immutable_data, +) + +v1_mutable = CleartextLeaseSerializer( + LeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) + +v2_mutable = HashedLeaseSerializer( + HashedLeaseInfo.to_mutable_data, + LeaseInfo.from_mutable_data, +) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 346edd53a..bd59d96b8 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -236,7 +236,7 @@ class MutableShareFile(object): + (lease_number-4)*self.LEASE_SIZE) f.seek(offset) assert f.tell() == offset - f.write(self._schema.serialize_lease(lease_info)) + f.write(self._schema.lease_serializer.serialize(lease_info)) def _read_lease_record(self, f, lease_number): # returns a LeaseInfo instance, or None @@ -253,7 +253,7 @@ class MutableShareFile(object): f.seek(offset) assert f.tell() == offset data = f.read(self.LEASE_SIZE) - lease_info = self._schema.unserialize_lease(data) + lease_info = self._schema.lease_serializer.unserialize(data) if lease_info.owner_num == 0: return None return lease_info diff --git a/src/allmydata/storage/mutable_schema.py b/src/allmydata/storage/mutable_schema.py index 9496fe571..4be0d2137 100644 --- a/src/allmydata/storage/mutable_schema.py +++ b/src/allmydata/storage/mutable_schema.py @@ -13,22 +13,17 @@ if PY2: import struct -try: - from typing import Union -except ImportError: - pass - import attr -from nacl.hash import blake2b -from nacl.encoding import RawEncoder - from ..util.hashutil import ( tagged_hash, ) from .lease import ( LeaseInfo, - HashedLeaseInfo, +) +from .lease_schema import ( + v1_mutable, + v2_mutable, ) def _magic(version): @@ -94,168 +89,49 @@ def _header(magic, extra_lease_offset, nodeid, write_enabler): ]) -class _V2(object): +_HEADER_FORMAT = ">32s20s32sQQ" + +# This size excludes leases +_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) + +_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + + +@attr.s(frozen=True) +class _Schema(object): """ - Implement encoding and decoding for v2 of the mutable container. + Implement encoding and decoding for the mutable container. + + :ivar int version: the version number of the schema this object supports + + :ivar lease_serializer: an object that is responsible for lease + serialization and unserialization """ - version = 2 - _MAGIC = _magic(version) - - _HEADER_FORMAT = ">32s20s32sQQ" - - # This size excludes leases - _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) - - _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() + version = attr.ib() + lease_serializer = attr.ib() + _magic = attr.ib() @classmethod - def _hash_secret(cls, secret): - # type: (bytes) -> bytes - """ - Hash a lease secret for storage. - """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) + def for_version(cls, version, lease_serializer): + return cls(version, lease_serializer, magic=_magic(version)) - @classmethod - def _hash_lease_info(cls, lease_info): - # type: (LeaseInfo) -> HashedLeaseInfo - """ - Hash the cleartext lease info secrets into a ``HashedLeaseInfo``. - """ - if not isinstance(lease_info, LeaseInfo): - # Provide a little safety against misuse, especially an attempt to - # re-hash an already-hashed lease info which is represented as a - # different type. - raise TypeError( - "Can only hash LeaseInfo, not {!r}".format(lease_info), - ) - - # Hash the cleartext secrets in the lease info and wrap the result in - # a new type. - return HashedLeaseInfo( - attr.assoc( - lease_info, - renew_secret=cls._hash_secret(lease_info.renew_secret), - cancel_secret=cls._hash_secret(lease_info.cancel_secret), - ), - cls._hash_secret, - ) - - @classmethod - def magic_matches(cls, candidate_magic): + def magic_matches(self, candidate_magic): # type: (bytes) -> bool """ Return ``True`` if a candidate string matches the expected magic string from a mutable container header, ``False`` otherwise. """ - return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC + return candidate_magic[:len(self._magic)] == self._magic - @classmethod - def header(cls, nodeid, write_enabler): - return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) + def header(self, nodeid, write_enabler): + return _header(self._magic, _EXTRA_LEASE_OFFSET, nodeid, write_enabler) - @classmethod - def serialize_lease(cls, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes - """ - Serialize a lease to be written to a v2 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - # v2 of the mutable schema stores lease secrets hashed. If we're - # given a LeaseInfo then it holds plaintext secrets. Hash them - # before trying to serialize. - lease = cls._hash_lease_info(lease) - if isinstance(lease, HashedLeaseInfo): - return lease.to_mutable_data() - raise ValueError( - "MutableShareFile v2 schema cannot represent lease {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> HashedLeaseInfo - """ - Unserialize some bytes from a v2 container. - - :param data: the bytes from the container - - :return: the ``HashedLeaseInfo`` the bytes represent - """ - # In v2 of the immutable schema lease secrets are stored hashed. Wrap - # a LeaseInfo in a HashedLeaseInfo so it can supply the correct - # interpretation for those values. - lease = LeaseInfo.from_mutable_data(data) - return HashedLeaseInfo(lease, cls._hash_secret) - - -class _V1(object): - """ - Implement encoding and decoding for v1 of the mutable container. - """ - version = 1 - _MAGIC = _magic(version) - - _HEADER_FORMAT = ">32s20s32sQQ" - - # This size excludes leases - _HEADER_SIZE = struct.calcsize(_HEADER_FORMAT) - - _EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size() - - @classmethod - def magic_matches(cls, candidate_magic): - # type: (bytes) -> bool - """ - Return ``True`` if a candidate string matches the expected magic string - from a mutable container header, ``False`` otherwise. - """ - return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC - - @classmethod - def header(cls, nodeid, write_enabler): - return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler) - - - @classmethod - def serialize_lease(cls, lease_info): - # type: (LeaseInfo) -> bytes - """ - Serialize a lease to be written to a v1 container. - - :param lease: the lease to serialize - - :return: the serialized bytes - """ - if isinstance(lease, LeaseInfo): - return lease_info.to_mutable_data() - raise ValueError( - "MutableShareFile v1 schema only supports LeaseInfo, not {!r}".format( - lease, - ), - ) - - @classmethod - def unserialize_lease(cls, data): - # type: (bytes) -> LeaseInfo - """ - Unserialize some bytes from a v1 container. - - :param data: the bytes from the container - - :return: the ``LeaseInfo`` the bytes represent - """ - return LeaseInfo.from_mutable_data(data) - - -ALL_SCHEMAS = {_V2, _V1} -ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore -NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore +ALL_SCHEMAS = { + _Schema.for_version(version=2, lease_serializer=v2_mutable), + _Schema.for_version(version=1, lease_serializer=v1_mutable), +} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) def schema_from_header(header): # (int) -> Optional[type]