introduce an explicit representation of the v1 immutable container schema

This is only a partial representation, sufficient to express the changes that
are coming in v2.
This commit is contained in:
Jean-Paul Calderone 2021-11-04 10:17:36 -04:00
parent 274dc6e837
commit 10724a91f9
2 changed files with 102 additions and 16 deletions

View File

@ -25,9 +25,14 @@ from allmydata.interfaces import (
) )
from allmydata.util import base32, fileutil, log from allmydata.util import base32, fileutil, log
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.storage.lease import LeaseInfo
from allmydata.storage.common import UnknownImmutableContainerVersionError from allmydata.storage.common import UnknownImmutableContainerVersionError
from .immutable_schema import (
NEWEST_SCHEMA_VERSION,
schema_from_version,
)
# each share file (in storage/shares/$SI/$SHNUM) contains lease information # each share file (in storage/shares/$SI/$SHNUM) contains lease information
# and share data. The share data is accessed by RIBucketWriter.write and # and share data. The share data is accessed by RIBucketWriter.write and
# RIBucketReader.read . The lease information is not accessible through these # RIBucketReader.read . The lease information is not accessible through these
@ -118,9 +123,16 @@ class ShareFile(object):
``False`` otherwise. ``False`` otherwise.
""" """
(version,) = struct.unpack(">L", header[:4]) (version,) = struct.unpack(">L", header[:4])
return version == 1 return schema_from_version(version) is not None
def __init__(self, filename, max_size=None, create=False, lease_count_format="L"): def __init__(
self,
filename,
max_size=None,
create=False,
lease_count_format="L",
schema=NEWEST_SCHEMA_VERSION,
):
""" """
Initialize a ``ShareFile``. Initialize a ``ShareFile``.
@ -156,24 +168,17 @@ class ShareFile(object):
# it. Also construct the metadata. # it. Also construct the metadata.
assert not os.path.exists(self.home) assert not os.path.exists(self.home)
fileutil.make_dirs(os.path.dirname(self.home)) fileutil.make_dirs(os.path.dirname(self.home))
# The second field -- the four-byte share data length -- is no self._schema = schema
# longer used as of Tahoe v1.3.0, but we continue to write it in
# there in case someone downgrades a storage server from >=
# Tahoe-1.3.0 to < Tahoe-1.3.0, or moves a share file from one
# server to another, etc. We do saturation -- a share data length
# larger than 2**32-1 (what can fit into the field) is marked as
# the largest length that can fit into the field. That way, even
# if this does happen, the old < v1.3.0 server will still allow
# clients to read the first part of the share.
with open(self.home, 'wb') as f: with open(self.home, 'wb') as f:
f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) f.write(self._schema.header(max_size))
self._lease_offset = max_size + 0x0c self._lease_offset = max_size + 0x0c
self._num_leases = 0 self._num_leases = 0
else: else:
with open(self.home, 'rb') as f: with open(self.home, 'rb') as f:
filesize = os.path.getsize(self.home) filesize = os.path.getsize(self.home)
(version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc))
if version != 1: self._schema = schema_from_version(version)
if self._schema is None:
raise UnknownImmutableContainerVersionError(filename, version) raise UnknownImmutableContainerVersionError(filename, version)
self._num_leases = num_leases self._num_leases = num_leases
self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) self._lease_offset = filesize - (num_leases * self.LEASE_SIZE)
@ -209,7 +214,7 @@ class ShareFile(object):
offset = self._lease_offset + lease_number * self.LEASE_SIZE offset = self._lease_offset + lease_number * self.LEASE_SIZE
f.seek(offset) f.seek(offset)
assert f.tell() == offset assert f.tell() == offset
f.write(lease_info.to_immutable_data()) f.write(self._schema.serialize_lease(lease_info))
def _read_num_leases(self, f): def _read_num_leases(self, f):
f.seek(0x08) f.seek(0x08)
@ -240,7 +245,7 @@ class ShareFile(object):
for i in range(num_leases): for i in range(num_leases):
data = f.read(self.LEASE_SIZE) data = f.read(self.LEASE_SIZE)
if data: if data:
yield LeaseInfo.from_immutable_data(data) yield self._schema.unserialize_lease(data)
def add_lease(self, lease_info): def add_lease(self, lease_info):
with open(self.home, 'rb+') as f: with open(self.home, 'rb+') as f:

View File

@ -0,0 +1,81 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import struct
from .lease import (
LeaseInfo,
)
def _header(version, max_size):
# (int, int) -> bytes
"""
Construct the header for an immutable container.
:param version: The container version to include the in header.
:param max_size: The maximum data size the container will hold.
:return: Some bytes to write at the beginning of the container.
"""
# The second field -- the four-byte share data length -- is no longer
# used as of Tahoe v1.3.0, but we continue to write it in there in
# case someone downgrades a storage server from >= Tahoe-1.3.0 to <
# Tahoe-1.3.0, or moves a share file from one server to another,
# etc. We do saturation -- a share data length larger than 2**32-1
# (what can fit into the field) is marked as the largest length that
# can fit into the field. That way, even if this does happen, the old
# < v1.3.0 server will still allow clients to read the first part of
# the share.
return struct.pack(">LLL", version, min(2**32 - 1, max_size), 0)
class _V1(object):
"""
Implement encoding and decoding for v1 of the immutable container.
"""
version = 1
@classmethod
def header(cls, max_size):
return _header(cls.version, max_size)
@classmethod
def serialize_lease(cls, lease):
if isinstance(lease, LeaseInfo):
return lease.to_immutable_data()
raise ValueError(
"ShareFile v1 schema only supports LeaseInfo, not {!r}".format(
lease,
),
)
@classmethod
def unserialize_lease(cls, data):
# In v1 of the immutable schema lease secrets are stored plaintext.
# So load the data into a plain LeaseInfo which works on plaintext
# secrets.
return LeaseInfo.from_immutable_data(data)
ALL_SCHEMAS = {_V1}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS}
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version)
def schema_from_version(version):
# (int) -> Optional[type]
"""
Find the schema object that corresponds to a certain version number.
"""
for schema in ALL_SCHEMAS:
if schema.version == version:
return schema
return None