refactor lease hashing logic to avoid mutable/immutable duplication

This commit is contained in:
Jean-Paul Calderone 2021-11-05 15:30:49 -04:00
parent 456df65a07
commit 617a1eac9d
6 changed files with 199 additions and 305 deletions

View File

@ -231,7 +231,7 @@ class ShareFile(object):
offset = self._lease_offset + lease_number * self.LEASE_SIZE
f.seek(offset)
assert f.tell() == offset
f.write(self._schema.serialize_lease(lease_info))
f.write(self._schema.lease_serializer.serialize(lease_info))
def _read_num_leases(self, f):
f.seek(0x08)
@ -262,7 +262,7 @@ class ShareFile(object):
for i in range(num_leases):
data = f.read(self.LEASE_SIZE)
if data:
yield self._schema.unserialize_lease(data)
yield self._schema.lease_serializer.unserialize(data)
def add_lease(self, lease_info):
with open(self.home, 'rb+') as f:

View File

@ -13,84 +13,28 @@ if PY2:
import struct
try:
from typing import Union
except ImportError:
pass
import attr
from nacl.hash import blake2b
from nacl.encoding import RawEncoder
from .lease import (
LeaseInfo,
HashedLeaseInfo,
from .lease_schema import (
v1_immutable,
v2_immutable,
)
def _header(version, max_size):
# type: (int, int) -> bytes
@attr.s(frozen=True)
class _Schema(object):
"""
Construct the header for an immutable container.
Implement encoding and decoding for multiple versions of the immutable
container schema.
:param version: the container version to include the in header
:param max_size: the maximum data size the container will hold
:ivar int version: the version number of the schema this object supports
:return: some bytes to write at the beginning of the container
:ivar lease_serializer: an object that is responsible for lease
serialization and unserialization
"""
# The second field -- the four-byte share data length -- is no longer
# used as of Tahoe v1.3.0, but we continue to write it in there in
# case someone downgrades a storage server from >= Tahoe-1.3.0 to <
# Tahoe-1.3.0, or moves a share file from one server to another,
# etc. We do saturation -- a share data length larger than 2**32-1
# (what can fit into the field) is marked as the largest length that
# can fit into the field. That way, even if this does happen, the old
# < v1.3.0 server will still allow clients to read the first part of
# the share.
return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0)
version = attr.ib()
lease_serializer = attr.ib()
class _V2(object):
"""
Implement encoding and decoding for v2 of the immutable container.
"""
version = 2
@classmethod
def _hash_secret(cls, secret):
# type: (bytes) -> bytes
"""
Hash a lease secret for storage.
"""
return blake2b(secret, digest_size=32, encoder=RawEncoder())
@classmethod
def _hash_lease_info(cls, lease_info):
# type: (LeaseInfo) -> HashedLeaseInfo
"""
Hash the cleartext lease info secrets into a ``HashedLeaseInfo``.
"""
if not isinstance(lease_info, LeaseInfo):
# Provide a little safety against misuse, especially an attempt to
# re-hash an already-hashed lease info which is represented as a
# different type.
raise TypeError(
"Can only hash LeaseInfo, not {!r}".format(lease_info),
)
# Hash the cleartext secrets in the lease info and wrap the result in
# a new type.
return HashedLeaseInfo(
attr.assoc(
lease_info,
renew_secret=cls._hash_secret(lease_info.renew_secret),
cancel_secret=cls._hash_secret(lease_info.cancel_secret),
),
cls._hash_secret,
)
@classmethod
def header(cls, max_size):
def header(self, max_size):
# type: (int) -> bytes
"""
Construct a container header.
@ -99,78 +43,23 @@ class _V2(object):
:return: the header bytes
"""
return _header(cls.version, max_size)
# The second field -- the four-byte share data length -- is no longer
# used as of Tahoe v1.3.0, but we continue to write it in there in
# case someone downgrades a storage server from >= Tahoe-1.3.0 to <
# Tahoe-1.3.0, or moves a share file from one server to another,
# etc. We do saturation -- a share data length larger than 2**32-1
# (what can fit into the field) is marked as the largest length that
# can fit into the field. That way, even if this does happen, the old
# < v1.3.0 server will still allow clients to read the first part of
# the share.
return struct.pack(">LLL", self.version, min(2**32 - 1, max_size), 0)
@classmethod
def serialize_lease(cls, lease):
# type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes
"""
Serialize a lease to be written to a v2 container.
:param lease: the lease to serialize
:return: the serialized bytes
"""
if isinstance(lease, LeaseInfo):
# v2 of the immutable schema stores lease secrets hashed. If
# we're given a LeaseInfo then it holds plaintext secrets. Hash
# them before trying to serialize.
lease = cls._hash_lease_info(lease)
if isinstance(lease, HashedLeaseInfo):
return lease.to_immutable_data()
raise ValueError(
"ShareFile v2 schema cannot represent lease {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# type: (bytes) -> HashedLeaseInfo
"""
Unserialize some bytes from a v2 container.
:param data: the bytes from the container
:return: the ``HashedLeaseInfo`` the bytes represent
"""
# In v2 of the immutable schema lease secrets are stored hashed. Wrap
# a LeaseInfo in a HashedLeaseInfo so it can supply the correct
# interpretation for those values.
return HashedLeaseInfo(LeaseInfo.from_immutable_data(data), cls._hash_secret)
class _V1(object):
"""
Implement encoding and decoding for v1 of the immutable container.
"""
version = 1
@classmethod
def header(cls, max_size):
return _header(cls.version, max_size)
@classmethod
def serialize_lease(cls, lease):
if isinstance(lease, LeaseInfo):
return lease.to_immutable_data()
raise ValueError(
"ShareFile v1 schema only supports LeaseInfo, not {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# In v1 of the immutable schema lease secrets are stored plaintext.
# So load the data into a plain LeaseInfo which works on plaintext
# secrets.
return LeaseInfo.from_immutable_data(data)
ALL_SCHEMAS = {_V2, _V1}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore
ALL_SCHEMAS = {
_Schema(version=2, lease_serializer=v2_immutable),
_Schema(version=1, lease_serializer=v1_immutable),
}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)
def schema_from_version(version):
# (int) -> Optional[type]

View File

@ -230,7 +230,7 @@ class LeaseInfo(object):
"cancel_secret",
"expiration_time",
]
values = struct.unpack(">L32s32sL", data)
values = struct.unpack(IMMUTABLE_FORMAT, data)
return cls(nodeid=None, **dict(zip(names, values)))
def immutable_size(self):
@ -274,7 +274,7 @@ class LeaseInfo(object):
"cancel_secret",
"nodeid",
]
values = struct.unpack(">LL32s32s20s", data)
values = struct.unpack(MUTABLE_FORMAT, data)
return cls(**dict(zip(names, values)))

View File

@ -0,0 +1,129 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
try:
from typing import Union
except ImportError:
pass
import attr
from nacl.hash import blake2b
from nacl.encoding import RawEncoder
from .lease import (
LeaseInfo,
HashedLeaseInfo,
)
@attr.s(frozen=True)
class CleartextLeaseSerializer(object):
_to_data = attr.ib()
_from_data = attr.ib()
def serialize(self, lease):
# type: (LeaseInfo) -> bytes
if isinstance(lease, LeaseInfo):
return self._to_data(lease)
raise ValueError(
"ShareFile v1 schema only supports LeaseInfo, not {!r}".format(
lease,
),
)
def unserialize(self, data):
# type: (bytes) -> LeaseInfo
# In v1 of the immutable schema lease secrets are stored plaintext.
# So load the data into a plain LeaseInfo which works on plaintext
# secrets.
return self._from_data(data)
@attr.s(frozen=True)
class HashedLeaseSerializer(object):
_to_data = attr.ib()
_from_data = attr.ib()
@classmethod
def _hash_secret(cls, secret):
# type: (bytes) -> bytes
"""
Hash a lease secret for storage.
"""
return blake2b(secret, digest_size=32, encoder=RawEncoder())
@classmethod
def _hash_lease_info(cls, lease_info):
# type: (LeaseInfo) -> HashedLeaseInfo
"""
Hash the cleartext lease info secrets into a ``HashedLeaseInfo``.
"""
if not isinstance(lease_info, LeaseInfo):
# Provide a little safety against misuse, especially an attempt to
# re-hash an already-hashed lease info which is represented as a
# different type.
raise TypeError(
"Can only hash LeaseInfo, not {!r}".format(lease_info),
)
# Hash the cleartext secrets in the lease info and wrap the result in
# a new type.
return HashedLeaseInfo(
attr.assoc(
lease_info,
renew_secret=cls._hash_secret(lease_info.renew_secret),
cancel_secret=cls._hash_secret(lease_info.cancel_secret),
),
cls._hash_secret,
)
def serialize(self, lease):
# type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes
if isinstance(lease, LeaseInfo):
# v2 of the immutable schema stores lease secrets hashed. If
# we're given a LeaseInfo then it holds plaintext secrets. Hash
# them before trying to serialize.
lease = self._hash_lease_info(lease)
if isinstance(lease, HashedLeaseInfo):
return self._to_data(lease)
raise ValueError(
"ShareFile v2 schema cannot represent lease {!r}".format(
lease,
),
)
def unserialize(self, data):
# type: (bytes) -> HashedLeaseInfo
# In v2 of the immutable schema lease secrets are stored hashed. Wrap
# a LeaseInfo in a HashedLeaseInfo so it can supply the correct
# interpretation for those values.
return HashedLeaseInfo(self._from_data(data), self._hash_secret)
v1_immutable = CleartextLeaseSerializer(
LeaseInfo.to_immutable_data,
LeaseInfo.from_immutable_data,
)
v2_immutable = HashedLeaseSerializer(
HashedLeaseInfo.to_immutable_data,
LeaseInfo.from_immutable_data,
)
v1_mutable = CleartextLeaseSerializer(
LeaseInfo.to_mutable_data,
LeaseInfo.from_mutable_data,
)
v2_mutable = HashedLeaseSerializer(
HashedLeaseInfo.to_mutable_data,
LeaseInfo.from_mutable_data,
)

View File

@ -236,7 +236,7 @@ class MutableShareFile(object):
+ (lease_number-4)*self.LEASE_SIZE)
f.seek(offset)
assert f.tell() == offset
f.write(self._schema.serialize_lease(lease_info))
f.write(self._schema.lease_serializer.serialize(lease_info))
def _read_lease_record(self, f, lease_number):
# returns a LeaseInfo instance, or None
@ -253,7 +253,7 @@ class MutableShareFile(object):
f.seek(offset)
assert f.tell() == offset
data = f.read(self.LEASE_SIZE)
lease_info = self._schema.unserialize_lease(data)
lease_info = self._schema.lease_serializer.unserialize(data)
if lease_info.owner_num == 0:
return None
return lease_info

View File

@ -13,22 +13,17 @@ if PY2:
import struct
try:
from typing import Union
except ImportError:
pass
import attr
from nacl.hash import blake2b
from nacl.encoding import RawEncoder
from ..util.hashutil import (
tagged_hash,
)
from .lease import (
LeaseInfo,
HashedLeaseInfo,
)
from .lease_schema import (
v1_mutable,
v2_mutable,
)
def _magic(version):
@ -94,168 +89,49 @@ def _header(magic, extra_lease_offset, nodeid, write_enabler):
])
class _V2(object):
_HEADER_FORMAT = ">32s20s32sQQ"
# This size excludes leases
_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT)
_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size()
@attr.s(frozen=True)
class _Schema(object):
"""
Implement encoding and decoding for v2 of the mutable container.
Implement encoding and decoding for the mutable container.
:ivar int version: the version number of the schema this object supports
:ivar lease_serializer: an object that is responsible for lease
serialization and unserialization
"""
version = 2
_MAGIC = _magic(version)
_HEADER_FORMAT = ">32s20s32sQQ"
# This size excludes leases
_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT)
_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size()
version = attr.ib()
lease_serializer = attr.ib()
_magic = attr.ib()
@classmethod
def _hash_secret(cls, secret):
# type: (bytes) -> bytes
"""
Hash a lease secret for storage.
"""
return blake2b(secret, digest_size=32, encoder=RawEncoder())
def for_version(cls, version, lease_serializer):
return cls(version, lease_serializer, magic=_magic(version))
@classmethod
def _hash_lease_info(cls, lease_info):
# type: (LeaseInfo) -> HashedLeaseInfo
"""
Hash the cleartext lease info secrets into a ``HashedLeaseInfo``.
"""
if not isinstance(lease_info, LeaseInfo):
# Provide a little safety against misuse, especially an attempt to
# re-hash an already-hashed lease info which is represented as a
# different type.
raise TypeError(
"Can only hash LeaseInfo, not {!r}".format(lease_info),
)
# Hash the cleartext secrets in the lease info and wrap the result in
# a new type.
return HashedLeaseInfo(
attr.assoc(
lease_info,
renew_secret=cls._hash_secret(lease_info.renew_secret),
cancel_secret=cls._hash_secret(lease_info.cancel_secret),
),
cls._hash_secret,
)
@classmethod
def magic_matches(cls, candidate_magic):
def magic_matches(self, candidate_magic):
# type: (bytes) -> bool
"""
Return ``True`` if a candidate string matches the expected magic string
from a mutable container header, ``False`` otherwise.
"""
return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC
return candidate_magic[:len(self._magic)] == self._magic
@classmethod
def header(cls, nodeid, write_enabler):
return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler)
def header(self, nodeid, write_enabler):
return _header(self._magic, _EXTRA_LEASE_OFFSET, nodeid, write_enabler)
@classmethod
def serialize_lease(cls, lease):
# type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes
"""
Serialize a lease to be written to a v2 container.
:param lease: the lease to serialize
:return: the serialized bytes
"""
if isinstance(lease, LeaseInfo):
# v2 of the mutable schema stores lease secrets hashed. If we're
# given a LeaseInfo then it holds plaintext secrets. Hash them
# before trying to serialize.
lease = cls._hash_lease_info(lease)
if isinstance(lease, HashedLeaseInfo):
return lease.to_mutable_data()
raise ValueError(
"MutableShareFile v2 schema cannot represent lease {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# type: (bytes) -> HashedLeaseInfo
"""
Unserialize some bytes from a v2 container.
:param data: the bytes from the container
:return: the ``HashedLeaseInfo`` the bytes represent
"""
# In v2 of the immutable schema lease secrets are stored hashed. Wrap
# a LeaseInfo in a HashedLeaseInfo so it can supply the correct
# interpretation for those values.
lease = LeaseInfo.from_mutable_data(data)
return HashedLeaseInfo(lease, cls._hash_secret)
class _V1(object):
"""
Implement encoding and decoding for v1 of the mutable container.
"""
version = 1
_MAGIC = _magic(version)
_HEADER_FORMAT = ">32s20s32sQQ"
# This size excludes leases
_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT)
_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size()
@classmethod
def magic_matches(cls, candidate_magic):
# type: (bytes) -> bool
"""
Return ``True`` if a candidate string matches the expected magic string
from a mutable container header, ``False`` otherwise.
"""
return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC
@classmethod
def header(cls, nodeid, write_enabler):
return _header(cls._MAGIC, cls._EXTRA_LEASE_OFFSET, nodeid, write_enabler)
@classmethod
def serialize_lease(cls, lease_info):
# type: (LeaseInfo) -> bytes
"""
Serialize a lease to be written to a v1 container.
:param lease: the lease to serialize
:return: the serialized bytes
"""
if isinstance(lease, LeaseInfo):
return lease_info.to_mutable_data()
raise ValueError(
"MutableShareFile v1 schema only supports LeaseInfo, not {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# type: (bytes) -> LeaseInfo
"""
Unserialize some bytes from a v1 container.
:param data: the bytes from the container
:return: the ``LeaseInfo`` the bytes represent
"""
return LeaseInfo.from_mutable_data(data)
ALL_SCHEMAS = {_V2, _V1}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore
ALL_SCHEMAS = {
_Schema.for_version(version=2, lease_serializer=v2_mutable),
_Schema.for_version(version=1, lease_serializer=v1_mutable),
}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)
def schema_from_header(header):
# (int) -> Optional[type]