From 10724a91f9ca2fe929f4e29adb03b876b21f9fe5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 4 Nov 2021 10:17:36 -0400 Subject: [PATCH] introduce an explicit representation of the v1 immutable container schema This is only a partial representation, sufficient to express the changes that are coming in v2. --- src/allmydata/storage/immutable.py | 37 ++++++----- src/allmydata/storage/immutable_schema.py | 81 +++++++++++++++++++++++ 2 files changed, 102 insertions(+), 16 deletions(-) create mode 100644 src/allmydata/storage/immutable_schema.py diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index fcc60509c..ae5a710af 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -25,9 +25,14 @@ from allmydata.interfaces import ( ) from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition -from allmydata.storage.lease import LeaseInfo from allmydata.storage.common import UnknownImmutableContainerVersionError +from .immutable_schema import ( + NEWEST_SCHEMA_VERSION, + schema_from_version, +) + + # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and # RIBucketReader.read . The lease information is not accessible through these @@ -118,9 +123,16 @@ class ShareFile(object): ``False`` otherwise. """ (version,) = struct.unpack(">L", header[:4]) - return version == 1 + return schema_from_version(version) is not None - def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): + def __init__( + self, + filename, + max_size=None, + create=False, + lease_count_format="L", + schema=NEWEST_SCHEMA_VERSION, + ): """ Initialize a ``ShareFile``. @@ -156,24 +168,17 @@ class ShareFile(object): # it. Also construct the metadata. assert not os.path.exists(self.home) fileutil.make_dirs(os.path.dirname(self.home)) - # The second field -- the four-byte share data length -- is no - # longer used as of Tahoe v1.3.0, but we continue to write it in - # there in case someone downgrades a storage server from >= - # Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one - # server to another, etc. We do saturation -- a share data length - # larger than 2**32-1 (what can fit into the field) is marked as - # the largest length that can fit into the field. That way, even - # if this does happen, the old < v1.3.0 server will still allow - # clients to read the first part of the share. + self._schema = schema with open(self.home, 'wb') as f: - f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) + f.write(self._schema.header(max_size)) self._lease_offset = max_size + 0x0c self._num_leases = 0 else: with open(self.home, 'rb') as f: filesize = os.path.getsize(self.home) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - if version != 1: + self._schema = schema_from_version(version) + if self._schema is None: raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) @@ -209,7 +214,7 @@ class ShareFile(object): offset = self._lease_offset + lease_number * self.LEASE_SIZE f.seek(offset) assert f.tell() == offset - f.write(lease_info.to_immutable_data()) + f.write(self._schema.serialize_lease(lease_info)) def _read_num_leases(self, f): f.seek(0x08) @@ -240,7 +245,7 @@ class ShareFile(object): for i in range(num_leases): data = f.read(self.LEASE_SIZE) if data: - yield LeaseInfo.from_immutable_data(data) + yield self._schema.unserialize_lease(data) def add_lease(self, lease_info): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/immutable_schema.py b/src/allmydata/storage/immutable_schema.py new file mode 100644 index 000000000..759752bed --- /dev/null +++ b/src/allmydata/storage/immutable_schema.py @@ -0,0 +1,81 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +import struct + +from .lease import ( + LeaseInfo, +) + +def _header(version, max_size): + # (int, int) -> bytes + """ + Construct the header for an immutable container. + + :param version: The container version to include the in header. + :param max_size: The maximum data size the container will hold. + + :return: Some bytes to write at the beginning of the container. + """ + # The second field -- the four-byte share data length -- is no longer + # used as of Tahoe v1.3.0, but we continue to write it in there in + # case someone downgrades a storage server from >= Tahoe-1.3.0 to < + # Tahoe-1.3.0, or moves a share file from one server to another, + # etc. We do saturation -- a share data length larger than 2**32-1 + # (what can fit into the field) is marked as the largest length that + # can fit into the field. That way, even if this does happen, the old + # < v1.3.0 server will still allow clients to read the first part of + # the share. + return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0) + +class _V1(object): + """ + Implement encoding and decoding for v1 of the immutable container. + """ + version = 1 + + @classmethod + def header(cls, max_size): + return _header(cls.version, max_size) + + @classmethod + def serialize_lease(cls, lease): + if isinstance(lease, LeaseInfo): + return lease.to_immutable_data() + raise ValueError( + "ShareFile v1 schema only supports LeaseInfo, not {!r}".format( + lease, + ), + ) + + @classmethod + def unserialize_lease(cls, data): + # In v1 of the immutable schema lease secrets are stored plaintext. + # So load the data into a plain LeaseInfo which works on plaintext + # secrets. + return LeaseInfo.from_immutable_data(data) + + +ALL_SCHEMAS = {_V1} +ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} +NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) + +def schema_from_version(version): + # (int) -> Optional[type] + """ + Find the schema object that corresponds to a certain version number. + """ + for schema in ALL_SCHEMAS: + if schema.version == version: + return schema + return None