mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-02-01 08:48:01 +00:00
Merge pull request #1152 from tahoe-lafs/3833.container-format-abstraction
Container header format abstractions Fixes: ticket:3833
This commit is contained in:
commit
8fbbc913ad
0
newsfragments/3833.minor
Normal file
0
newsfragments/3833.minor
Normal file
@ -15,15 +15,22 @@ try:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# do not import any allmydata modules at this level. Do that from inside
|
||||
# individual functions instead.
|
||||
import struct, time, os, sys
|
||||
|
||||
from twisted.python import usage, failure
|
||||
from twisted.internet import defer
|
||||
from foolscap.logging import cli as foolscap_cli
|
||||
from allmydata.scripts.common import BaseOptions
|
||||
|
||||
from allmydata.scripts.common import BaseOptions
|
||||
from allmydata import uri
|
||||
from allmydata.storage.mutable import MutableShareFile
|
||||
from allmydata.storage.immutable import ShareFile
|
||||
from allmydata.mutable.layout import unpack_share
|
||||
from allmydata.mutable.layout import MDMFSlotReadProxy
|
||||
from allmydata.mutable.common import NeedMoreDataError
|
||||
from allmydata.immutable.layout import ReadBucketProxy
|
||||
from allmydata.util import base32
|
||||
from allmydata.util.encodingutil import quote_output
|
||||
|
||||
class DumpOptions(BaseOptions):
|
||||
def getSynopsis(self):
|
||||
@ -56,13 +63,11 @@ def dump_share(options):
|
||||
# check the version, to see if we have a mutable or immutable share
|
||||
print("share filename: %s" % quote_output(options['filename']), file=out)
|
||||
|
||||
f = open(options['filename'], "rb")
|
||||
prefix = f.read(32)
|
||||
f.close()
|
||||
if prefix == MutableShareFile.MAGIC:
|
||||
return dump_mutable_share(options)
|
||||
# otherwise assume it's immutable
|
||||
return dump_immutable_share(options)
|
||||
with open(options['filename'], "rb") as f:
|
||||
if MutableShareFile.is_valid_header(f.read(32)):
|
||||
return dump_mutable_share(options)
|
||||
# otherwise assume it's immutable
|
||||
return dump_immutable_share(options)
|
||||
|
||||
def dump_immutable_share(options):
|
||||
from allmydata.storage.immutable import ShareFile
|
||||
@ -712,125 +717,122 @@ def call(c, *args, **kwargs):
|
||||
return results[0]
|
||||
|
||||
def describe_share(abs_sharefile, si_s, shnum_s, now, out):
|
||||
from allmydata import uri
|
||||
from allmydata.storage.mutable import MutableShareFile
|
||||
from allmydata.storage.immutable import ShareFile
|
||||
from allmydata.mutable.layout import unpack_share
|
||||
from allmydata.mutable.common import NeedMoreDataError
|
||||
from allmydata.immutable.layout import ReadBucketProxy
|
||||
from allmydata.util import base32
|
||||
from allmydata.util.encodingutil import quote_output
|
||||
import struct
|
||||
|
||||
f = open(abs_sharefile, "rb")
|
||||
prefix = f.read(32)
|
||||
|
||||
if prefix == MutableShareFile.MAGIC:
|
||||
# mutable share
|
||||
m = MutableShareFile(abs_sharefile)
|
||||
WE, nodeid = m._read_write_enabler_and_nodeid(f)
|
||||
data_length = m._read_data_length(f)
|
||||
expiration_time = min( [lease.get_expiration_time()
|
||||
for (i,lease) in m._enumerate_leases(f)] )
|
||||
expiration = max(0, expiration_time - now)
|
||||
|
||||
share_type = "unknown"
|
||||
f.seek(m.DATA_OFFSET)
|
||||
version = f.read(1)
|
||||
if version == b"\x00":
|
||||
# this slot contains an SMDF share
|
||||
share_type = "SDMF"
|
||||
elif version == b"\x01":
|
||||
share_type = "MDMF"
|
||||
|
||||
if share_type == "SDMF":
|
||||
f.seek(m.DATA_OFFSET)
|
||||
data = f.read(min(data_length, 2000))
|
||||
|
||||
try:
|
||||
pieces = unpack_share(data)
|
||||
except NeedMoreDataError as e:
|
||||
# retry once with the larger size
|
||||
size = e.needed_bytes
|
||||
f.seek(m.DATA_OFFSET)
|
||||
data = f.read(min(data_length, size))
|
||||
pieces = unpack_share(data)
|
||||
(seqnum, root_hash, IV, k, N, segsize, datalen,
|
||||
pubkey, signature, share_hash_chain, block_hash_tree,
|
||||
share_data, enc_privkey) = pieces
|
||||
|
||||
print("SDMF %s %d/%d %d #%d:%s %d %s" % \
|
||||
(si_s, k, N, datalen,
|
||||
seqnum, str(base32.b2a(root_hash), "utf-8"),
|
||||
expiration, quote_output(abs_sharefile)), file=out)
|
||||
elif share_type == "MDMF":
|
||||
from allmydata.mutable.layout import MDMFSlotReadProxy
|
||||
fake_shnum = 0
|
||||
# TODO: factor this out with dump_MDMF_share()
|
||||
class ShareDumper(MDMFSlotReadProxy):
|
||||
def _read(self, readvs, force_remote=False, queue=False):
|
||||
data = []
|
||||
for (where,length) in readvs:
|
||||
f.seek(m.DATA_OFFSET+where)
|
||||
data.append(f.read(length))
|
||||
return defer.succeed({fake_shnum: data})
|
||||
|
||||
p = ShareDumper(None, "fake-si", fake_shnum)
|
||||
def extract(func):
|
||||
stash = []
|
||||
# these methods return Deferreds, but we happen to know that
|
||||
# they run synchronously when not actually talking to a
|
||||
# remote server
|
||||
d = func()
|
||||
d.addCallback(stash.append)
|
||||
return stash[0]
|
||||
|
||||
verinfo = extract(p.get_verinfo)
|
||||
(seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix,
|
||||
offsets) = verinfo
|
||||
print("MDMF %s %d/%d %d #%d:%s %d %s" % \
|
||||
(si_s, k, N, datalen,
|
||||
seqnum, str(base32.b2a(root_hash), "utf-8"),
|
||||
expiration, quote_output(abs_sharefile)), file=out)
|
||||
with open(abs_sharefile, "rb") as f:
|
||||
prefix = f.read(32)
|
||||
if MutableShareFile.is_valid_header(prefix):
|
||||
_describe_mutable_share(abs_sharefile, f, now, si_s, out)
|
||||
elif ShareFile.is_valid_header(prefix):
|
||||
_describe_immutable_share(abs_sharefile, now, si_s, out)
|
||||
else:
|
||||
print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)
|
||||
print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out)
|
||||
|
||||
elif struct.unpack(">L", prefix[:4]) == (1,):
|
||||
# immutable
|
||||
def _describe_mutable_share(abs_sharefile, f, now, si_s, out):
|
||||
# mutable share
|
||||
m = MutableShareFile(abs_sharefile)
|
||||
WE, nodeid = m._read_write_enabler_and_nodeid(f)
|
||||
data_length = m._read_data_length(f)
|
||||
expiration_time = min( [lease.get_expiration_time()
|
||||
for (i,lease) in m._enumerate_leases(f)] )
|
||||
expiration = max(0, expiration_time - now)
|
||||
|
||||
class ImmediateReadBucketProxy(ReadBucketProxy):
|
||||
def __init__(self, sf):
|
||||
self.sf = sf
|
||||
ReadBucketProxy.__init__(self, None, None, "")
|
||||
def __repr__(self):
|
||||
return "<ImmediateReadBucketProxy>"
|
||||
def _read(self, offset, size):
|
||||
return defer.succeed(sf.read_share_data(offset, size))
|
||||
share_type = "unknown"
|
||||
f.seek(m.DATA_OFFSET)
|
||||
version = f.read(1)
|
||||
if version == b"\x00":
|
||||
# this slot contains an SMDF share
|
||||
share_type = "SDMF"
|
||||
elif version == b"\x01":
|
||||
share_type = "MDMF"
|
||||
|
||||
# use a ReadBucketProxy to parse the bucket and find the uri extension
|
||||
sf = ShareFile(abs_sharefile)
|
||||
bp = ImmediateReadBucketProxy(sf)
|
||||
if share_type == "SDMF":
|
||||
f.seek(m.DATA_OFFSET)
|
||||
|
||||
expiration_time = min( [lease.get_expiration_time()
|
||||
for lease in sf.get_leases()] )
|
||||
expiration = max(0, expiration_time - now)
|
||||
# Read at least the mutable header length, if possible. If there's
|
||||
# less data than that in the share, don't try to read more (we won't
|
||||
# be able to unpack the header in this case but we surely don't want
|
||||
# to try to unpack bytes *following* the data section as if they were
|
||||
# header data). Rather than 2000 we could use HEADER_LENGTH from
|
||||
# allmydata/mutable/layout.py, probably.
|
||||
data = f.read(min(data_length, 2000))
|
||||
|
||||
UEB_data = call(bp.get_uri_extension)
|
||||
unpacked = uri.unpack_extension_readable(UEB_data)
|
||||
try:
|
||||
pieces = unpack_share(data)
|
||||
except NeedMoreDataError as e:
|
||||
# retry once with the larger size
|
||||
size = e.needed_bytes
|
||||
f.seek(m.DATA_OFFSET)
|
||||
data = f.read(min(data_length, size))
|
||||
pieces = unpack_share(data)
|
||||
(seqnum, root_hash, IV, k, N, segsize, datalen,
|
||||
pubkey, signature, share_hash_chain, block_hash_tree,
|
||||
share_data, enc_privkey) = pieces
|
||||
|
||||
k = unpacked["needed_shares"]
|
||||
N = unpacked["total_shares"]
|
||||
filesize = unpacked["size"]
|
||||
ueb_hash = unpacked["UEB_hash"]
|
||||
print("SDMF %s %d/%d %d #%d:%s %d %s" % \
|
||||
(si_s, k, N, datalen,
|
||||
seqnum, str(base32.b2a(root_hash), "utf-8"),
|
||||
expiration, quote_output(abs_sharefile)), file=out)
|
||||
elif share_type == "MDMF":
|
||||
fake_shnum = 0
|
||||
# TODO: factor this out with dump_MDMF_share()
|
||||
class ShareDumper(MDMFSlotReadProxy):
|
||||
def _read(self, readvs, force_remote=False, queue=False):
|
||||
data = []
|
||||
for (where,length) in readvs:
|
||||
f.seek(m.DATA_OFFSET+where)
|
||||
data.append(f.read(length))
|
||||
return defer.succeed({fake_shnum: data})
|
||||
|
||||
print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize,
|
||||
str(ueb_hash, "utf-8"), expiration,
|
||||
quote_output(abs_sharefile)), file=out)
|
||||
p = ShareDumper(None, "fake-si", fake_shnum)
|
||||
def extract(func):
|
||||
stash = []
|
||||
# these methods return Deferreds, but we happen to know that
|
||||
# they run synchronously when not actually talking to a
|
||||
# remote server
|
||||
d = func()
|
||||
d.addCallback(stash.append)
|
||||
return stash[0]
|
||||
|
||||
verinfo = extract(p.get_verinfo)
|
||||
(seqnum, root_hash, salt_to_use, segsize, datalen, k, N, prefix,
|
||||
offsets) = verinfo
|
||||
print("MDMF %s %d/%d %d #%d:%s %d %s" % \
|
||||
(si_s, k, N, datalen,
|
||||
seqnum, str(base32.b2a(root_hash), "utf-8"),
|
||||
expiration, quote_output(abs_sharefile)), file=out)
|
||||
else:
|
||||
print("UNKNOWN really-unknown %s" % quote_output(abs_sharefile), file=out)
|
||||
print("UNKNOWN mutable %s" % quote_output(abs_sharefile), file=out)
|
||||
|
||||
|
||||
def _describe_immutable_share(abs_sharefile, now, si_s, out):
|
||||
class ImmediateReadBucketProxy(ReadBucketProxy):
|
||||
def __init__(self, sf):
|
||||
self.sf = sf
|
||||
ReadBucketProxy.__init__(self, None, None, "")
|
||||
def __repr__(self):
|
||||
return "<ImmediateReadBucketProxy>"
|
||||
def _read(self, offset, size):
|
||||
return defer.succeed(sf.read_share_data(offset, size))
|
||||
|
||||
# use a ReadBucketProxy to parse the bucket and find the uri extension
|
||||
sf = ShareFile(abs_sharefile)
|
||||
bp = ImmediateReadBucketProxy(sf)
|
||||
|
||||
expiration_time = min(lease.get_expiration_time()
|
||||
for lease in sf.get_leases())
|
||||
expiration = max(0, expiration_time - now)
|
||||
|
||||
UEB_data = call(bp.get_uri_extension)
|
||||
unpacked = uri.unpack_extension_readable(UEB_data)
|
||||
|
||||
k = unpacked["needed_shares"]
|
||||
N = unpacked["total_shares"]
|
||||
filesize = unpacked["size"]
|
||||
ueb_hash = unpacked["UEB_hash"]
|
||||
|
||||
print("CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize,
|
||||
str(ueb_hash, "utf-8"), expiration,
|
||||
quote_output(abs_sharefile)), file=out)
|
||||
|
||||
f.close()
|
||||
|
||||
def catalog_shares(options):
|
||||
from allmydata.util.encodingutil import listdir_unicode, quote_output
|
||||
@ -933,34 +935,35 @@ def corrupt_share(options):
|
||||
f.write(d)
|
||||
f.close()
|
||||
|
||||
f = open(fn, "rb")
|
||||
prefix = f.read(32)
|
||||
f.close()
|
||||
if prefix == MutableShareFile.MAGIC:
|
||||
# mutable
|
||||
m = MutableShareFile(fn)
|
||||
f = open(fn, "rb")
|
||||
f.seek(m.DATA_OFFSET)
|
||||
data = f.read(2000)
|
||||
# make sure this slot contains an SMDF share
|
||||
assert data[0:1] == b"\x00", "non-SDMF mutable shares not supported"
|
||||
f.close()
|
||||
with open(fn, "rb") as f:
|
||||
prefix = f.read(32)
|
||||
|
||||
(version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize,
|
||||
ig_datalen, offsets) = unpack_header(data)
|
||||
if MutableShareFile.is_valid_header(prefix):
|
||||
# mutable
|
||||
m = MutableShareFile(fn)
|
||||
with open(fn, "rb") as f:
|
||||
f.seek(m.DATA_OFFSET)
|
||||
# Read enough data to get a mutable header to unpack.
|
||||
data = f.read(2000)
|
||||
# make sure this slot contains an SMDF share
|
||||
assert data[0:1] == b"\x00", "non-SDMF mutable shares not supported"
|
||||
f.close()
|
||||
|
||||
assert version == 0, "we only handle v0 SDMF files"
|
||||
start = m.DATA_OFFSET + offsets["share_data"]
|
||||
end = m.DATA_OFFSET + offsets["enc_privkey"]
|
||||
flip_bit(start, end)
|
||||
else:
|
||||
# otherwise assume it's immutable
|
||||
f = ShareFile(fn)
|
||||
bp = ReadBucketProxy(None, None, '')
|
||||
offsets = bp._parse_offsets(f.read_share_data(0, 0x24))
|
||||
start = f._data_offset + offsets["data"]
|
||||
end = f._data_offset + offsets["plaintext_hash_tree"]
|
||||
flip_bit(start, end)
|
||||
(version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize,
|
||||
ig_datalen, offsets) = unpack_header(data)
|
||||
|
||||
assert version == 0, "we only handle v0 SDMF files"
|
||||
start = m.DATA_OFFSET + offsets["share_data"]
|
||||
end = m.DATA_OFFSET + offsets["enc_privkey"]
|
||||
flip_bit(start, end)
|
||||
else:
|
||||
# otherwise assume it's immutable
|
||||
f = ShareFile(fn)
|
||||
bp = ReadBucketProxy(None, None, '')
|
||||
offsets = bp._parse_offsets(f.read_share_data(0, 0x24))
|
||||
start = f._data_offset + offsets["data"]
|
||||
end = f._data_offset + offsets["plaintext_hash_tree"]
|
||||
flip_bit(start, end)
|
||||
|
||||
|
||||
|
||||
|
@ -57,6 +57,21 @@ class ShareFile(object):
|
||||
LEASE_SIZE = struct.calcsize(">L32s32sL")
|
||||
sharetype = "immutable"
|
||||
|
||||
@classmethod
|
||||
def is_valid_header(cls, header):
|
||||
# type: (bytes) -> bool
|
||||
"""
|
||||
Determine if the given bytes constitute a valid header for this type of
|
||||
container.
|
||||
|
||||
:param header: Some bytes from the beginning of a container.
|
||||
|
||||
:return: ``True`` if the bytes could belong to this container,
|
||||
``False`` otherwise.
|
||||
"""
|
||||
(version,) = struct.unpack(">L", header[:4])
|
||||
return version == 1
|
||||
|
||||
def __init__(self, filename, max_size=None, create=False):
|
||||
""" If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """
|
||||
precondition((max_size is not None) or (not create), max_size, create)
|
||||
|
@ -67,6 +67,20 @@ class MutableShareFile(object):
|
||||
MAX_SIZE = MAX_MUTABLE_SHARE_SIZE
|
||||
# TODO: decide upon a policy for max share size
|
||||
|
||||
@classmethod
|
||||
def is_valid_header(cls, header):
|
||||
# type: (bytes) -> bool
|
||||
"""
|
||||
Determine if the given bytes constitute a valid header for this type of
|
||||
container.
|
||||
|
||||
:param header: Some bytes from the beginning of a container.
|
||||
|
||||
:return: ``True`` if the bytes could belong to this container,
|
||||
``False`` otherwise.
|
||||
"""
|
||||
return header.startswith(cls.MAGIC)
|
||||
|
||||
def __init__(self, filename, parent=None):
|
||||
self.home = filename
|
||||
if os.path.exists(self.home):
|
||||
@ -77,7 +91,7 @@ class MutableShareFile(object):
|
||||
write_enabler_nodeid, write_enabler,
|
||||
data_length, extra_least_offset) = \
|
||||
struct.unpack(">32s20s32sQQ", data)
|
||||
if magic != self.MAGIC:
|
||||
if not self.is_valid_header(data):
|
||||
msg = "sharefile %s had magic '%r' but we wanted '%r'" % \
|
||||
(filename, magic, self.MAGIC)
|
||||
raise UnknownMutableContainerVersionError(msg)
|
||||
@ -388,7 +402,7 @@ class MutableShareFile(object):
|
||||
write_enabler_nodeid, write_enabler,
|
||||
data_length, extra_least_offset) = \
|
||||
struct.unpack(">32s20s32sQQ", data)
|
||||
assert magic == self.MAGIC
|
||||
assert self.is_valid_header(data)
|
||||
return (write_enabler, write_enabler_nodeid)
|
||||
|
||||
def readv(self, readv):
|
||||
|
@ -14,7 +14,7 @@ if PY2:
|
||||
else:
|
||||
from typing import Dict
|
||||
|
||||
import os, re, struct, time
|
||||
import os, re, time
|
||||
import six
|
||||
|
||||
from foolscap.api import Referenceable
|
||||
@ -373,12 +373,12 @@ class StorageServer(service.MultiService, Referenceable):
|
||||
for shnum, filename in self._get_bucket_shares(storage_index):
|
||||
with open(filename, 'rb') as f:
|
||||
header = f.read(32)
|
||||
if header[:32] == MutableShareFile.MAGIC:
|
||||
if MutableShareFile.is_valid_header(header):
|
||||
sf = MutableShareFile(filename, self)
|
||||
# note: if the share has been migrated, the renew_lease()
|
||||
# call will throw an exception, with information to help the
|
||||
# client update the lease.
|
||||
elif header[:4] == struct.pack(">L", 1):
|
||||
elif ShareFile.is_valid_header(header):
|
||||
sf = ShareFile(filename)
|
||||
else:
|
||||
continue # non-sharefile
|
||||
|
@ -17,8 +17,7 @@ from allmydata.storage.immutable import ShareFile
|
||||
def get_share_file(filename):
|
||||
with open(filename, "rb") as f:
|
||||
prefix = f.read(32)
|
||||
if prefix == MutableShareFile.MAGIC:
|
||||
if MutableShareFile.is_valid_header(prefix):
|
||||
return MutableShareFile(filename)
|
||||
# otherwise assume it's immutable
|
||||
return ShareFile(filename)
|
||||
|
||||
|
@ -1068,7 +1068,7 @@ def _corrupt_offset_of_uri_extension_to_force_short_read(data, debug=False):
|
||||
|
||||
def _corrupt_mutable_share_data(data, debug=False):
|
||||
prefix = data[:32]
|
||||
assert prefix == MutableShareFile.MAGIC, "This function is designed to corrupt mutable shares of v1, and the magic number doesn't look right: %r vs %r" % (prefix, MutableShareFile.MAGIC)
|
||||
assert MutableShareFile.is_valid_header(prefix), "This function is designed to corrupt mutable shares of v1, and the magic number doesn't look right: %r vs %r" % (prefix, MutableShareFile.MAGIC)
|
||||
data_offset = MutableShareFile.DATA_OFFSET
|
||||
sharetype = data[data_offset:data_offset+1]
|
||||
assert sharetype == b"\x00", "non-SDMF mutable shares not supported"
|
||||
|
@ -23,6 +23,7 @@ from twisted.internet import defer
|
||||
|
||||
from allmydata import uri
|
||||
from allmydata.storage.mutable import MutableShareFile
|
||||
from allmydata.storage.immutable import ShareFile
|
||||
from allmydata.storage.server import si_a2b
|
||||
from allmydata.immutable import offloaded, upload
|
||||
from allmydata.immutable.literal import LiteralFileNode
|
||||
@ -1290,9 +1291,9 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
|
||||
# are sharefiles here
|
||||
filename = os.path.join(dirpath, filenames[0])
|
||||
# peek at the magic to see if it is a chk share
|
||||
magic = open(filename, "rb").read(4)
|
||||
if magic == b'\x00\x00\x00\x01':
|
||||
break
|
||||
with open(filename, "rb") as f:
|
||||
if ShareFile.is_valid_header(f.read(32)):
|
||||
break
|
||||
else:
|
||||
self.fail("unable to find any uri_extension files in %r"
|
||||
% self.basedir)
|
||||
|
Loading…
x
Reference in New Issue
Block a user