2020-10-07 09:29:41 -04:00
|
|
|
"""
|
|
|
|
Ported to Python 3.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from future.utils import PY2
|
|
|
|
if PY2:
|
|
|
|
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
|
|
|
|
|
2008-10-09 17:08:00 -07:00
|
|
|
import struct
|
2017-02-27 10:56:49 -07:00
|
|
|
from zope.interface import implementer
|
2008-10-09 17:08:00 -07:00
|
|
|
from twisted.internet import defer
|
|
|
|
from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \
|
|
|
|
FileTooLargeError, HASH_SIZE
|
2011-08-01 15:43:07 -07:00
|
|
|
from allmydata.util import mathutil, observer, pipeline
|
2009-01-02 13:21:28 -07:00
|
|
|
from allmydata.util.assertutil import precondition
|
2009-02-18 14:46:55 -07:00
|
|
|
from allmydata.storage.server import si_b2a
|
2008-10-09 17:08:00 -07:00
|
|
|
|
2009-01-02 12:15:54 -07:00
|
|
|
class LayoutInvalid(Exception):
|
2009-02-23 18:58:37 -07:00
|
|
|
""" There is something wrong with these bytes so they can't be
|
|
|
|
interpreted as the kind of immutable file that I know how to download."""
|
2009-01-02 12:15:54 -07:00
|
|
|
pass
|
|
|
|
|
|
|
|
class RidiculouslyLargeURIExtensionBlock(LayoutInvalid):
|
2009-02-23 18:58:37 -07:00
|
|
|
""" When downloading a file, the length of the URI Extension Block was
|
|
|
|
given as >= 2**32. This means the share data must have been corrupted, or
|
|
|
|
else the original uploader of the file wrote a ridiculous value into the
|
|
|
|
URI Extension Block length."""
|
2009-01-02 12:15:54 -07:00
|
|
|
pass
|
|
|
|
|
|
|
|
class ShareVersionIncompatible(LayoutInvalid):
|
2009-02-23 18:58:37 -07:00
|
|
|
""" When downloading a share, its format was not one of the formats we
|
|
|
|
know how to parse."""
|
2009-01-02 12:15:54 -07:00
|
|
|
pass
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
"""
|
2009-02-23 18:58:37 -07:00
|
|
|
Share data is written in a file. At the start of the file, there is a series
|
|
|
|
of four-byte big-endian offset values, which indicate where each section
|
|
|
|
starts. Each offset is measured from the beginning of the share data.
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
0x00: version number (=00 00 00 01)
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
0x04: block size # See Footnote 1 below.
|
|
|
|
0x08: share data size # See Footnote 1 below.
|
2008-10-09 17:08:00 -07:00
|
|
|
0x0c: offset of data (=00 00 00 24)
|
2008-12-19 08:18:07 -07:00
|
|
|
0x10: offset of plaintext_hash_tree UNUSED
|
2008-10-09 17:08:00 -07:00
|
|
|
0x14: offset of crypttext_hash_tree
|
|
|
|
0x18: offset of block_hashes
|
|
|
|
0x1c: offset of share_hashes
|
|
|
|
0x20: offset of uri_extension_length + uri_extension
|
|
|
|
0x24: start of data
|
2008-12-19 08:18:07 -07:00
|
|
|
? : start of plaintext_hash_tree UNUSED
|
2008-10-09 17:08:00 -07:00
|
|
|
? : start of crypttext_hash_tree
|
|
|
|
? : start of block_hashes
|
|
|
|
? : start of share_hashes
|
|
|
|
each share_hash is written as a two-byte (big-endian) hashnum
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
followed by the 32-byte SHA-256 hash. We store only the hashes
|
2008-10-09 17:08:00 -07:00
|
|
|
necessary to validate the share hash root
|
|
|
|
? : start of uri_extension_length (four-byte big-endian value)
|
|
|
|
? : start of uri_extension
|
|
|
|
"""
|
|
|
|
|
2008-10-09 18:13:27 -07:00
|
|
|
"""
|
|
|
|
v2 shares: these use 8-byte offsets to remove two of the three ~12GiB size
|
|
|
|
limitations described in #346.
|
|
|
|
|
|
|
|
0x00: version number (=00 00 00 02)
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
0x04: block size # See Footnote 1 below.
|
|
|
|
0x0c: share data size # See Footnote 1 below.
|
2008-10-09 18:13:27 -07:00
|
|
|
0x14: offset of data (=00 00 00 00 00 00 00 44)
|
2008-12-19 08:18:07 -07:00
|
|
|
0x1c: offset of plaintext_hash_tree UNUSED
|
2008-10-09 18:13:27 -07:00
|
|
|
0x24: offset of crypttext_hash_tree
|
|
|
|
0x2c: offset of block_hashes
|
|
|
|
0x34: offset of share_hashes
|
|
|
|
0x3c: offset of uri_extension_length + uri_extension
|
|
|
|
0x44: start of data
|
|
|
|
: rest of share is the same as v1, above
|
|
|
|
... ...
|
|
|
|
? : start of uri_extension_length (eight-byte big-endian value)
|
2009-01-12 20:14:42 -07:00
|
|
|
? : start of uri_extension
|
2008-10-09 18:13:27 -07:00
|
|
|
"""
|
|
|
|
|
2009-01-12 20:14:42 -07:00
|
|
|
# Footnote 1: as of Tahoe v1.3.0 these fields are not used when reading, but
|
|
|
|
# they are still provided when writing so that older versions of Tahoe can
|
|
|
|
# read them.
|
|
|
|
|
2010-08-04 00:26:39 -07:00
|
|
|
FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares
|
|
|
|
|
2011-08-01 15:43:17 -07:00
|
|
|
def make_write_bucket_proxy(rref, server,
|
|
|
|
data_size, block_size, num_segments,
|
|
|
|
num_share_hashes, uri_extension_size_max):
|
2009-01-12 20:14:42 -07:00
|
|
|
# Use layout v1 for small files, so they'll be readable by older versions
|
|
|
|
# (<tahoe-1.3.0). Use layout v2 for large files; they'll only be readable
|
|
|
|
# by tahoe-1.3.0 or later.
|
|
|
|
try:
|
2010-08-04 00:26:39 -07:00
|
|
|
if FORCE_V2:
|
|
|
|
raise FileTooLargeError
|
2011-08-01 15:43:17 -07:00
|
|
|
wbp = WriteBucketProxy(rref, server,
|
|
|
|
data_size, block_size, num_segments,
|
|
|
|
num_share_hashes, uri_extension_size_max)
|
2009-01-12 20:14:42 -07:00
|
|
|
except FileTooLargeError:
|
2011-08-01 15:43:17 -07:00
|
|
|
wbp = WriteBucketProxy_v2(rref, server,
|
|
|
|
data_size, block_size, num_segments,
|
|
|
|
num_share_hashes, uri_extension_size_max)
|
2009-01-12 20:14:42 -07:00
|
|
|
return wbp
|
2008-10-09 17:08:00 -07:00
|
|
|
|
2017-02-27 10:56:49 -07:00
|
|
|
@implementer(IStorageBucketWriter)
|
|
|
|
class WriteBucketProxy(object):
|
2008-10-09 18:13:27 -07:00
|
|
|
fieldsize = 4
|
|
|
|
fieldstruct = ">L"
|
|
|
|
|
2011-08-01 15:43:17 -07:00
|
|
|
def __init__(self, rref, server, data_size, block_size, num_segments,
|
|
|
|
num_share_hashes, uri_extension_size_max, pipeline_size=50000):
|
2008-10-09 17:08:00 -07:00
|
|
|
self._rref = rref
|
2011-08-01 15:43:17 -07:00
|
|
|
self._server = server
|
2008-10-09 17:08:00 -07:00
|
|
|
self._data_size = data_size
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
self._block_size = block_size
|
2008-10-09 17:08:00 -07:00
|
|
|
self._num_segments = num_segments
|
|
|
|
|
|
|
|
effective_segments = mathutil.next_power_of_k(num_segments,2)
|
|
|
|
self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE
|
|
|
|
# how many share hashes are included in each share? This will be
|
|
|
|
# about ln2(num_shares).
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
self._share_hashtree_size = num_share_hashes * (2+HASH_SIZE)
|
2008-10-09 17:08:00 -07:00
|
|
|
# we commit to not sending a uri extension larger than this
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
self._uri_extension_size_max = uri_extension_size_max
|
2008-10-09 17:08:00 -07:00
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
self._create_offsets(block_size, data_size)
|
2008-10-09 18:13:27 -07:00
|
|
|
|
2009-05-18 16:44:22 -07:00
|
|
|
# k=3, max_segment_size=128KiB gives us a typical segment of 43691
|
|
|
|
# bytes. Setting the default pipeline_size to 50KB lets us get two
|
|
|
|
# segments onto the wire but not a third, which would keep the pipe
|
|
|
|
# filled.
|
|
|
|
self._pipeline = pipeline.Pipeline(pipeline_size)
|
|
|
|
|
2009-01-12 20:14:42 -07:00
|
|
|
def get_allocated_size(self):
|
|
|
|
return (self._offsets['uri_extension'] + self.fieldsize +
|
|
|
|
self._uri_extension_size_max)
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _create_offsets(self, block_size, data_size):
|
|
|
|
if block_size >= 2**32 or data_size >= 2**32:
|
2008-10-09 18:13:27 -07:00
|
|
|
raise FileTooLargeError("This file is too large to be uploaded (data_size).")
|
|
|
|
|
2008-10-09 17:08:00 -07:00
|
|
|
offsets = self._offsets = {}
|
|
|
|
x = 0x24
|
|
|
|
offsets['data'] = x
|
|
|
|
x += data_size
|
2008-12-19 08:18:07 -07:00
|
|
|
offsets['plaintext_hash_tree'] = x # UNUSED
|
2008-10-09 17:08:00 -07:00
|
|
|
x += self._segment_hash_size
|
|
|
|
offsets['crypttext_hash_tree'] = x
|
|
|
|
x += self._segment_hash_size
|
|
|
|
offsets['block_hashes'] = x
|
|
|
|
x += self._segment_hash_size
|
|
|
|
offsets['share_hashes'] = x
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
x += self._share_hashtree_size
|
2008-10-09 17:08:00 -07:00
|
|
|
offsets['uri_extension'] = x
|
|
|
|
|
|
|
|
if x >= 2**32:
|
|
|
|
raise FileTooLargeError("This file is too large to be uploaded (offsets).")
|
|
|
|
|
|
|
|
offset_data = struct.pack(">LLLLLLLLL",
|
|
|
|
1, # version number
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
block_size,
|
2008-10-09 17:08:00 -07:00
|
|
|
data_size,
|
|
|
|
offsets['data'],
|
2008-12-19 08:18:07 -07:00
|
|
|
offsets['plaintext_hash_tree'], # UNUSED
|
2008-10-09 17:08:00 -07:00
|
|
|
offsets['crypttext_hash_tree'],
|
|
|
|
offsets['block_hashes'],
|
|
|
|
offsets['share_hashes'],
|
|
|
|
offsets['uri_extension'],
|
|
|
|
)
|
|
|
|
assert len(offset_data) == 0x24
|
|
|
|
self._offset_data = offset_data
|
|
|
|
|
|
|
|
def __repr__(self):
|
2021-02-23 12:02:08 -05:00
|
|
|
return "<WriteBucketProxy for node %r>" % self._server.get_name()
|
2008-10-09 17:08:00 -07:00
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def put_header(self):
|
2008-10-09 17:08:00 -07:00
|
|
|
return self._write(0, self._offset_data)
|
|
|
|
|
|
|
|
def put_block(self, segmentnum, data):
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
offset = self._offsets['data'] + segmentnum * self._block_size
|
2008-10-09 17:08:00 -07:00
|
|
|
assert offset + len(data) <= self._offsets['uri_extension']
|
2020-08-27 15:36:54 -04:00
|
|
|
assert isinstance(data, bytes)
|
2008-10-09 17:08:00 -07:00
|
|
|
if segmentnum < self._num_segments-1:
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
precondition(len(data) == self._block_size,
|
|
|
|
len(data), self._block_size)
|
2008-10-09 17:08:00 -07:00
|
|
|
else:
|
|
|
|
precondition(len(data) == (self._data_size -
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
(self._block_size *
|
2008-10-09 17:08:00 -07:00
|
|
|
(self._num_segments - 1))),
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
len(data), self._block_size)
|
2008-10-09 17:08:00 -07:00
|
|
|
return self._write(offset, data)
|
|
|
|
|
|
|
|
def put_crypttext_hashes(self, hashes):
|
|
|
|
offset = self._offsets['crypttext_hash_tree']
|
|
|
|
assert isinstance(hashes, list)
|
2020-08-27 15:36:54 -04:00
|
|
|
data = b"".join(hashes)
|
2008-10-09 17:08:00 -07:00
|
|
|
precondition(len(data) == self._segment_hash_size,
|
|
|
|
len(data), self._segment_hash_size)
|
|
|
|
precondition(offset + len(data) <= self._offsets['block_hashes'],
|
|
|
|
offset, len(data), offset+len(data),
|
|
|
|
self._offsets['block_hashes'])
|
|
|
|
return self._write(offset, data)
|
|
|
|
|
|
|
|
def put_block_hashes(self, blockhashes):
|
|
|
|
offset = self._offsets['block_hashes']
|
|
|
|
assert isinstance(blockhashes, list)
|
2020-08-27 15:36:54 -04:00
|
|
|
data = b"".join(blockhashes)
|
2008-10-09 17:08:00 -07:00
|
|
|
precondition(len(data) == self._segment_hash_size,
|
|
|
|
len(data), self._segment_hash_size)
|
|
|
|
precondition(offset + len(data) <= self._offsets['share_hashes'],
|
|
|
|
offset, len(data), offset+len(data),
|
|
|
|
self._offsets['share_hashes'])
|
|
|
|
return self._write(offset, data)
|
|
|
|
|
|
|
|
def put_share_hashes(self, sharehashes):
|
|
|
|
# sharehashes is a list of (index, hash) tuples, so they get stored
|
|
|
|
# as 2+32=34 bytes each
|
|
|
|
offset = self._offsets['share_hashes']
|
|
|
|
assert isinstance(sharehashes, list)
|
2020-08-27 15:36:54 -04:00
|
|
|
data = b"".join([struct.pack(">H", hashnum) + hashvalue
|
2008-10-09 17:08:00 -07:00
|
|
|
for hashnum,hashvalue in sharehashes])
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
precondition(len(data) == self._share_hashtree_size,
|
|
|
|
len(data), self._share_hashtree_size)
|
2008-10-09 17:08:00 -07:00
|
|
|
precondition(offset + len(data) <= self._offsets['uri_extension'],
|
|
|
|
offset, len(data), offset+len(data),
|
|
|
|
self._offsets['uri_extension'])
|
|
|
|
return self._write(offset, data)
|
|
|
|
|
|
|
|
def put_uri_extension(self, data):
|
|
|
|
offset = self._offsets['uri_extension']
|
2020-08-27 15:36:54 -04:00
|
|
|
assert isinstance(data, bytes)
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
precondition(len(data) <= self._uri_extension_size_max,
|
|
|
|
len(data), self._uri_extension_size_max)
|
2008-10-09 18:13:27 -07:00
|
|
|
length = struct.pack(self.fieldstruct, len(data))
|
2008-10-09 17:08:00 -07:00
|
|
|
return self._write(offset, length+data)
|
|
|
|
|
|
|
|
def _write(self, offset, data):
|
2009-05-18 16:44:22 -07:00
|
|
|
# use a Pipeline to pipeline several writes together. TODO: another
|
|
|
|
# speedup would be to coalesce small writes into a single call: this
|
|
|
|
# would reduce the foolscap CPU overhead per share, but wouldn't
|
|
|
|
# reduce the number of round trips, so it might not be worth the
|
|
|
|
# effort.
|
|
|
|
|
|
|
|
return self._pipeline.add(len(data),
|
|
|
|
self._rref.callRemote, "write", offset, data)
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
def close(self):
|
2009-05-18 16:44:22 -07:00
|
|
|
d = self._pipeline.add(0, self._rref.callRemote, "close")
|
|
|
|
d.addCallback(lambda ign: self._pipeline.flush())
|
|
|
|
return d
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
def abort(self):
|
|
|
|
return self._rref.callRemoteOnly("abort")
|
|
|
|
|
2010-05-13 17:49:17 -07:00
|
|
|
|
2011-08-01 15:43:17 -07:00
|
|
|
def get_servername(self):
|
|
|
|
return self._server.get_name()
|
2010-05-13 17:49:17 -07:00
|
|
|
def get_peerid(self):
|
2011-08-01 15:43:17 -07:00
|
|
|
return self._server.get_serverid()
|
2010-05-13 17:49:17 -07:00
|
|
|
|
2008-10-09 18:13:27 -07:00
|
|
|
class WriteBucketProxy_v2(WriteBucketProxy):
|
|
|
|
fieldsize = 8
|
|
|
|
fieldstruct = ">Q"
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _create_offsets(self, block_size, data_size):
|
|
|
|
if block_size >= 2**64 or data_size >= 2**64:
|
2008-10-09 18:13:27 -07:00
|
|
|
raise FileTooLargeError("This file is too large to be uploaded (data_size).")
|
|
|
|
|
|
|
|
offsets = self._offsets = {}
|
|
|
|
x = 0x44
|
|
|
|
offsets['data'] = x
|
|
|
|
x += data_size
|
2008-12-19 08:18:07 -07:00
|
|
|
offsets['plaintext_hash_tree'] = x # UNUSED
|
2008-10-09 18:13:27 -07:00
|
|
|
x += self._segment_hash_size
|
|
|
|
offsets['crypttext_hash_tree'] = x
|
|
|
|
x += self._segment_hash_size
|
|
|
|
offsets['block_hashes'] = x
|
|
|
|
x += self._segment_hash_size
|
|
|
|
offsets['share_hashes'] = x
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
x += self._share_hashtree_size
|
2008-10-09 18:13:27 -07:00
|
|
|
offsets['uri_extension'] = x
|
|
|
|
|
|
|
|
if x >= 2**64:
|
|
|
|
raise FileTooLargeError("This file is too large to be uploaded (offsets).")
|
|
|
|
|
|
|
|
offset_data = struct.pack(">LQQQQQQQQ",
|
|
|
|
2, # version number
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
block_size,
|
2008-10-09 18:13:27 -07:00
|
|
|
data_size,
|
|
|
|
offsets['data'],
|
2008-12-19 08:18:07 -07:00
|
|
|
offsets['plaintext_hash_tree'], # UNUSED
|
2008-10-09 18:13:27 -07:00
|
|
|
offsets['crypttext_hash_tree'],
|
|
|
|
offsets['block_hashes'],
|
|
|
|
offsets['share_hashes'],
|
|
|
|
offsets['uri_extension'],
|
|
|
|
)
|
|
|
|
assert len(offset_data) == 0x44, len(offset_data)
|
|
|
|
self._offset_data = offset_data
|
|
|
|
|
2017-02-27 10:56:49 -07:00
|
|
|
@implementer(IStorageBucketReader)
|
|
|
|
class ReadBucketProxy(object):
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
|
|
|
|
MAX_UEB_SIZE = 2000 # actual size is closer to 419, but varies by a few bytes
|
|
|
|
|
2011-08-01 15:43:07 -07:00
|
|
|
def __init__(self, rref, server, storage_index):
|
2008-10-09 17:08:00 -07:00
|
|
|
self._rref = rref
|
2011-08-01 15:43:07 -07:00
|
|
|
self._server = server
|
|
|
|
self._storage_index = storage_index
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
self._started = False # sent request to server
|
|
|
|
self._ready = observer.OneShotObserverList() # got response from server
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
def get_peerid(self):
|
2011-08-01 15:43:07 -07:00
|
|
|
return self._server.get_serverid()
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
def __repr__(self):
|
2021-02-23 12:02:08 -05:00
|
|
|
return "<ReadBucketProxy %r to peer [%r] SI %r>" % \
|
2011-08-01 15:43:07 -07:00
|
|
|
(id(self), self._server.get_name(), si_b2a(self._storage_index))
|
2008-10-09 17:08:00 -07:00
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _start_if_needed(self):
|
2009-02-23 18:58:37 -07:00
|
|
|
""" Returns a deferred that will be fired when I'm ready to return
|
|
|
|
data, or errbacks if the starting (header reading and parsing)
|
|
|
|
process fails."""
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
if not self._started:
|
|
|
|
self._start()
|
|
|
|
return self._ready.when_fired()
|
|
|
|
|
|
|
|
def _start(self):
|
|
|
|
self._started = True
|
|
|
|
# TODO: for small shares, read the whole bucket in _start()
|
|
|
|
d = self._fetch_header()
|
2008-10-09 17:08:00 -07:00
|
|
|
d.addCallback(self._parse_offsets)
|
2009-02-23 18:58:37 -07:00
|
|
|
# XXX The following two callbacks implement a slightly faster/nicer
|
|
|
|
# way to get the ueb and sharehashtree, but it requires that the
|
|
|
|
# storage server be >= v1.3.0.
|
2009-01-05 13:40:57 -07:00
|
|
|
# d.addCallback(self._fetch_sharehashtree_and_ueb)
|
|
|
|
# d.addCallback(self._parse_sharehashtree_and_ueb)
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _fail_waiters(f):
|
|
|
|
self._ready.fire(f)
|
2009-01-05 13:35:22 -07:00
|
|
|
def _notify_waiters(result):
|
|
|
|
self._ready.fire(result)
|
|
|
|
d.addCallbacks(_notify_waiters, _fail_waiters)
|
2008-10-09 17:08:00 -07:00
|
|
|
return d
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _fetch_header(self):
|
|
|
|
return self._read(0, 0x44)
|
|
|
|
|
2008-10-09 17:08:00 -07:00
|
|
|
def _parse_offsets(self, data):
|
2008-10-09 18:13:27 -07:00
|
|
|
precondition(len(data) >= 0x4)
|
2008-10-09 17:08:00 -07:00
|
|
|
self._offsets = {}
|
2008-10-09 18:13:27 -07:00
|
|
|
(version,) = struct.unpack(">L", data[0:4])
|
2009-01-02 12:15:54 -07:00
|
|
|
if version != 1 and version != 2:
|
|
|
|
raise ShareVersionIncompatible(version)
|
2008-10-09 18:13:27 -07:00
|
|
|
|
|
|
|
if version == 1:
|
|
|
|
precondition(len(data) >= 0x24)
|
|
|
|
x = 0x0c
|
|
|
|
fieldsize = 0x4
|
|
|
|
fieldstruct = ">L"
|
|
|
|
else:
|
|
|
|
precondition(len(data) >= 0x44)
|
|
|
|
x = 0x14
|
|
|
|
fieldsize = 0x8
|
|
|
|
fieldstruct = ">Q"
|
|
|
|
|
|
|
|
self._version = version
|
|
|
|
self._fieldsize = fieldsize
|
|
|
|
self._fieldstruct = fieldstruct
|
|
|
|
|
2008-10-09 17:08:00 -07:00
|
|
|
for field in ( 'data',
|
2008-12-19 08:18:07 -07:00
|
|
|
'plaintext_hash_tree', # UNUSED
|
2008-10-09 17:08:00 -07:00
|
|
|
'crypttext_hash_tree',
|
|
|
|
'block_hashes',
|
|
|
|
'share_hashes',
|
|
|
|
'uri_extension',
|
|
|
|
):
|
2008-10-09 18:13:27 -07:00
|
|
|
offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0]
|
|
|
|
x += fieldsize
|
2008-10-09 17:08:00 -07:00
|
|
|
self._offsets[field] = offset
|
|
|
|
return self._offsets
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _fetch_sharehashtree_and_ueb(self, offsets):
|
|
|
|
sharehashtree_size = offsets['uri_extension'] - offsets['share_hashes']
|
2009-02-23 18:58:37 -07:00
|
|
|
return self._read(offsets['share_hashes'],
|
|
|
|
self.MAX_UEB_SIZE+sharehashtree_size)
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
|
|
|
|
def _parse_sharehashtree_and_ueb(self, data):
|
|
|
|
sharehashtree_size = self._offsets['uri_extension'] - self._offsets['share_hashes']
|
|
|
|
if len(data) < sharehashtree_size:
|
|
|
|
raise LayoutInvalid("share hash tree truncated -- should have at least %d bytes -- not %d" % (sharehashtree_size, len(data)))
|
|
|
|
if sharehashtree_size % (2+HASH_SIZE) != 0:
|
|
|
|
raise LayoutInvalid("share hash tree malformed -- should have an even multiple of %d bytes -- not %d" % (2+HASH_SIZE, sharehashtree_size))
|
|
|
|
self._share_hashes = []
|
|
|
|
for i in range(0, sharehashtree_size, 2+HASH_SIZE):
|
|
|
|
hashnum = struct.unpack(">H", data[i:i+2])[0]
|
|
|
|
hashvalue = data[i+2:i+2+HASH_SIZE]
|
|
|
|
self._share_hashes.append( (hashnum, hashvalue) )
|
|
|
|
|
|
|
|
i = self._offsets['uri_extension']-self._offsets['share_hashes']
|
|
|
|
if len(data) < i+self._fieldsize:
|
|
|
|
raise LayoutInvalid("not enough bytes to encode URI length -- should be at least %d bytes long, not %d " % (i+self._fieldsize, len(data),))
|
|
|
|
length = struct.unpack(self._fieldstruct, data[i:i+self._fieldsize])[0]
|
|
|
|
self._ueb_data = data[i+self._fieldsize:i+self._fieldsize+length]
|
|
|
|
|
|
|
|
def _get_block_data(self, unused, blocknum, blocksize, thisblocksize):
|
|
|
|
offset = self._offsets['data'] + blocknum * blocksize
|
|
|
|
return self._read(offset, thisblocksize)
|
|
|
|
|
|
|
|
def get_block_data(self, blocknum, blocksize, thisblocksize):
|
|
|
|
d = self._start_if_needed()
|
|
|
|
d.addCallback(self._get_block_data, blocknum, blocksize, thisblocksize)
|
|
|
|
return d
|
2008-10-09 17:08:00 -07:00
|
|
|
|
|
|
|
def _str2l(self, s):
|
|
|
|
""" split string (pulled from storage) into a list of blockids """
|
|
|
|
return [ s[i:i+HASH_SIZE]
|
|
|
|
for i in range(0, len(s), HASH_SIZE) ]
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _get_crypttext_hashes(self, unused=None):
|
2008-10-09 17:08:00 -07:00
|
|
|
offset = self._offsets['crypttext_hash_tree']
|
|
|
|
size = self._offsets['block_hashes'] - offset
|
|
|
|
d = self._read(offset, size)
|
|
|
|
d.addCallback(self._str2l)
|
|
|
|
return d
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def get_crypttext_hashes(self):
|
|
|
|
d = self._start_if_needed()
|
|
|
|
d.addCallback(self._get_crypttext_hashes)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _get_block_hashes(self, unused=None, at_least_these=()):
|
|
|
|
# TODO: fetch only at_least_these instead of all of them.
|
2008-10-09 17:08:00 -07:00
|
|
|
offset = self._offsets['block_hashes']
|
|
|
|
size = self._offsets['share_hashes'] - offset
|
|
|
|
d = self._read(offset, size)
|
|
|
|
d.addCallback(self._str2l)
|
|
|
|
return d
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def get_block_hashes(self, at_least_these=()):
|
|
|
|
if at_least_these:
|
|
|
|
d = self._start_if_needed()
|
|
|
|
d.addCallback(self._get_block_hashes, at_least_these)
|
|
|
|
return d
|
|
|
|
else:
|
|
|
|
return defer.succeed([])
|
|
|
|
|
|
|
|
def _get_share_hashes(self, unused=None):
|
2009-01-05 13:40:57 -07:00
|
|
|
if hasattr(self, '_share_hashes'):
|
|
|
|
return self._share_hashes
|
2018-04-26 15:09:01 -04:00
|
|
|
return self._get_share_hashes_the_old_way()
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
|
2008-10-09 17:08:00 -07:00
|
|
|
def get_share_hashes(self):
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
d = self._start_if_needed()
|
|
|
|
d.addCallback(self._get_share_hashes)
|
2008-10-09 17:08:00 -07:00
|
|
|
return d
|
|
|
|
|
2009-01-05 13:40:57 -07:00
|
|
|
def _get_share_hashes_the_old_way(self):
|
2009-02-23 18:58:37 -07:00
|
|
|
""" Tahoe storage servers < v1.3.0 would return an error if you tried
|
|
|
|
to read past the end of the share, so we need to use the offset and
|
|
|
|
read just that much."""
|
2009-01-05 13:40:57 -07:00
|
|
|
offset = self._offsets['share_hashes']
|
|
|
|
size = self._offsets['uri_extension'] - offset
|
2009-01-05 14:01:14 -07:00
|
|
|
if size % (2+HASH_SIZE) != 0:
|
|
|
|
raise LayoutInvalid("share hash tree corrupted -- should occupy a multiple of %d bytes, not %d bytes" % ((2+HASH_SIZE), size))
|
2009-01-05 13:40:57 -07:00
|
|
|
d = self._read(offset, size)
|
|
|
|
def _unpack_share_hashes(data):
|
2009-01-05 18:28:18 -07:00
|
|
|
if len(data) != size:
|
|
|
|
raise LayoutInvalid("share hash tree corrupted -- got a short read of the share data -- should have gotten %d, not %d bytes" % (size, len(data)))
|
2009-01-05 13:40:57 -07:00
|
|
|
hashes = []
|
|
|
|
for i in range(0, size, 2+HASH_SIZE):
|
|
|
|
hashnum = struct.unpack(">H", data[i:i+2])[0]
|
|
|
|
hashvalue = data[i+2:i+2+HASH_SIZE]
|
|
|
|
hashes.append( (hashnum, hashvalue) )
|
|
|
|
return hashes
|
|
|
|
d.addCallback(_unpack_share_hashes)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _get_uri_extension_the_old_way(self, unused=None):
|
2009-02-23 18:58:37 -07:00
|
|
|
""" Tahoe storage servers < v1.3.0 would return an error if you tried
|
|
|
|
to read past the end of the share, so we need to fetch the UEB size
|
|
|
|
and then read just that much."""
|
2009-01-05 13:40:57 -07:00
|
|
|
offset = self._offsets['uri_extension']
|
|
|
|
d = self._read(offset, self._fieldsize)
|
|
|
|
def _got_length(data):
|
|
|
|
if len(data) != self._fieldsize:
|
|
|
|
raise LayoutInvalid("not enough bytes to encode URI length -- should be %d bytes long, not %d " % (self._fieldsize, len(data),))
|
|
|
|
length = struct.unpack(self._fieldstruct, data)[0]
|
|
|
|
if length >= 2**31:
|
2009-02-23 18:58:37 -07:00
|
|
|
# URI extension blocks are around 419 bytes long, so this
|
|
|
|
# must be corrupted. Anyway, the foolscap interface schema
|
|
|
|
# for "read" will not allow >= 2**31 bytes length.
|
2009-01-05 13:40:57 -07:00
|
|
|
raise RidiculouslyLargeURIExtensionBlock(length)
|
|
|
|
|
|
|
|
return self._read(offset+self._fieldsize, length)
|
|
|
|
d.addCallback(_got_length)
|
|
|
|
return d
|
|
|
|
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
def _get_uri_extension(self, unused=None):
|
2009-01-05 13:40:57 -07:00
|
|
|
if hasattr(self, '_ueb_data'):
|
|
|
|
return self._ueb_data
|
|
|
|
else:
|
|
|
|
return self._get_uri_extension_the_old_way()
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
|
2008-10-09 17:08:00 -07:00
|
|
|
def get_uri_extension(self):
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 09:51:45 -07:00
|
|
|
d = self._start_if_needed()
|
|
|
|
d.addCallback(self._get_uri_extension)
|
2008-10-09 17:08:00 -07:00
|
|
|
return d
|
|
|
|
|
|
|
|
def _read(self, offset, length):
|
|
|
|
return self._rref.callRemote("read", offset, length)
|