introduce an explicit representation of the v1 mutable container schema

This is only a partial representation, sufficient to express the changes that
are coming in v2.
This commit is contained in:
Jean-Paul Calderone 2021-11-04 15:26:58 -04:00
parent 2186bfcc37
commit 931ddf85a5
2 changed files with 134 additions and 31 deletions

View File

@ -24,7 +24,10 @@ from allmydata.storage.lease import LeaseInfo
from allmydata.storage.common import UnknownMutableContainerVersionError, \
DataTooLargeError
from allmydata.mutable.layout import MAX_MUTABLE_SHARE_SIZE
from .mutable_schema import (
NEWEST_SCHEMA_VERSION,
schema_from_header,
)
# the MutableShareFile is like the ShareFile, but used for mutable data. It
# has a different layout. See docs/mutable.txt for more details.
@ -64,9 +67,6 @@ class MutableShareFile(object):
# our sharefiles share with a recognizable string, plus some random
# binary data to reduce the chance that a regular text file will look
# like a sharefile.
MAGIC = b"Tahoe mutable container v1\n" + b"\x75\x09\x44\x03\x8e"
assert len(MAGIC) == 32
assert isinstance(MAGIC, bytes)
MAX_SIZE = MAX_MUTABLE_SHARE_SIZE
# TODO: decide upon a policy for max share size
@ -82,20 +82,19 @@ class MutableShareFile(object):
:return: ``True`` if the bytes could belong to this container,
``False`` otherwise.
"""
return header.startswith(cls.MAGIC)
return schema_from_header(header) is not None
def __init__(self, filename, parent=None):
def __init__(self, filename, parent=None, schema=NEWEST_SCHEMA_VERSION):
self.home = filename
if os.path.exists(self.home):
# we don't cache anything, just check the magic
with open(self.home, 'rb') as f:
data = f.read(self.HEADER_SIZE)
(magic,
write_enabler_nodeid, write_enabler,
data_length, extra_least_offset) = \
struct.unpack(">32s20s32sQQ", data)
if not self.is_valid_header(data):
raise UnknownMutableContainerVersionError(filename, magic)
header = f.read(self.HEADER_SIZE)
self._schema = schema_from_header(header)
if self._schema is None:
raise UnknownMutableContainerVersionError(filename, header)
else:
self._schema = schema
self.parent = parent # for logging
def log(self, *args, **kwargs):
@ -103,23 +102,8 @@ class MutableShareFile(object):
def create(self, my_nodeid, write_enabler):
assert not os.path.exists(self.home)
data_length = 0
extra_lease_offset = (self.HEADER_SIZE
+ 4 * self.LEASE_SIZE
+ data_length)
assert extra_lease_offset == self.DATA_OFFSET # true at creation
num_extra_leases = 0
with open(self.home, 'wb') as f:
header = struct.pack(
">32s20s32sQQ",
self.MAGIC, my_nodeid, write_enabler,
data_length, extra_lease_offset,
)
leases = (b"\x00" * self.LEASE_SIZE) * 4
f.write(header + leases)
# data goes here, empty after creation
f.write(struct.pack(">L", num_extra_leases))
# extra leases go here, none at creation
f.write(self._schema.header(my_nodeid, write_enabler))
def unlink(self):
os.unlink(self.home)
@ -252,7 +236,7 @@ class MutableShareFile(object):
+ (lease_number-4)*self.LEASE_SIZE)
f.seek(offset)
assert f.tell() == offset
f.write(lease_info.to_mutable_data())
f.write(self._schema.serialize_lease(lease_info))
def _read_lease_record(self, f, lease_number):
# returns a LeaseInfo instance, or None
@ -269,7 +253,7 @@ class MutableShareFile(object):
f.seek(offset)
assert f.tell() == offset
data = f.read(self.LEASE_SIZE)
lease_info = LeaseInfo.from_mutable_data(data)
lease_info = self._schema.unserialize_lease(data)
if lease_info.owner_num == 0:
return None
return lease_info

View File

@ -0,0 +1,119 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import struct
from .lease import (
LeaseInfo,
)
class _V1(object):
"""
Implement encoding and decoding for v1 of the mutable container.
"""
version = 1
_MAGIC = (
# Make it easy for people to recognize
b"Tahoe mutable container v1\n"
# But also keep the chance of accidental collision low
b"\x75\x09\x44\x03\x8e"
)
assert len(_MAGIC) == 32
_HEADER_FORMAT = ">32s20s32sQQ"
# This size excludes leases
_HEADER_SIZE = struct.calcsize(_HEADER_FORMAT)
_EXTRA_LEASE_OFFSET = _HEADER_SIZE + 4 * LeaseInfo().mutable_size()
@classmethod
def magic_matches(cls, candidate_magic):
# type: (bytes) -> bool
"""
Return ``True`` if a candidate string matches the expected magic string
from a mutable container header, ``False`` otherwise.
"""
return candidate_magic[:len(cls._MAGIC)] == cls._MAGIC
@classmethod
def header(cls, nodeid, write_enabler):
# type: (bytes, bytes) -> bytes
"""
Construct a container header.
:param nodeid: A unique identifier for the node holding this
container.
:param write_enabler: A secret shared with the client used to
authorize changes to the contents of this container.
"""
fixed_header = struct.pack(
">32s20s32sQQ",
cls._MAGIC,
nodeid,
write_enabler,
# data length, initially the container is empty
0,
cls._EXTRA_LEASE_OFFSET,
)
blank_leases = b"\x00" * LeaseInfo().mutable_size() * 4
extra_lease_count = struct.pack(">L", 0)
return b"".join([
fixed_header,
# share data will go in between the next two items eventually but
# for now there is none.
blank_leases,
extra_lease_count,
])
@classmethod
def serialize_lease(cls, lease_info):
# type: (LeaseInfo) -> bytes
"""
Serialize a lease to be written to a v1 container.
:param lease: the lease to serialize
:return: the serialized bytes
"""
return lease_info.to_mutable_data()
@classmethod
def unserialize_lease(cls, data):
# type: (bytes) -> LeaseInfo
"""
Unserialize some bytes from a v1 container.
:param data: the bytes from the container
:return: the ``LeaseInfo`` the bytes represent
"""
return LeaseInfo.from_mutable_data(data)
ALL_SCHEMAS = {_V1}
ALL_SCHEMA_VERSIONS = {schema.version for schema in ALL_SCHEMAS} # type: ignore
NEWEST_SCHEMA_VERSION = max(ALL_SCHEMAS, key=lambda schema: schema.version) # type: ignore
def schema_from_header(header):
# (int) -> Optional[type]
"""
Find the schema object that corresponds to a certain version number.
"""
for schema in ALL_SCHEMAS:
if schema.magic_matches(header):
return schema
return None