storage: we must truncate short segments. Now most tests pass (except uri_extension)

This commit is contained in:
Brian Warner 2007-07-13 16:38:25 -07:00
parent 1f8e407d9c
commit 7589a8ee82
5 changed files with 83 additions and 51 deletions

View File

@ -119,7 +119,7 @@ class ValidatedBucket:
if not self._share_hash:
d1 = self.bucket.get_share_hashes()
else:
d1 = defer.succeed(None)
d1 = defer.succeed([])
# we might need to grab some elements of our block hash tree, to
# validate the requested block up to the share hash
@ -149,9 +149,12 @@ class ValidatedBucket:
sht.set_hashes(sh)
self._share_hash = sht.get_leaf(self.sharenum)
#log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d" %
# (self.sharenum, blocknum, len(blockdata)))
blockhash = hashutil.block_hash(blockdata)
#log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d "
# "%r .. %r: %s" %
# (self.sharenum, blocknum, len(blockdata),
# blockdata[:50], blockdata[-50:], idlib.b2a(blockhash)))
# we always validate the blockhash
bh = dict(enumerate(blockhashes))
# replace blockhash root with validated value
@ -163,20 +166,33 @@ class ValidatedBucket:
# likely a programming error
log.msg("hash failure in block=%d, shnum=%d on %s" %
(blocknum, self.sharenum, self.bucket))
#log.msg(" block length: %d" % len(blockdata))
#log.msg(" block hash: %s" % idlib.b2a_or_none(blockhash)) # not safe
#log.msg(" block data: %r" % (blockdata,))
#log.msg(" root hash: %s" % idlib.b2a(self._roothash))
#log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
#log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
#lines = []
#for i,h in sorted(sharehashes):
# lines.append("%3d: %s" % (i, idlib.b2a_or_none(h)))
#log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
#lines = []
#for i,h in enumerate(blockhashes):
# lines.append("%3d: %s" % (i, idlib.b2a_or_none(h)))
#log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
if self._share_hash:
log.msg(""" failure occurred when checking the block_hash_tree.
This suggests that either the block data was bad, or that the
block hashes we received along with it were bad.""")
else:
log.msg(""" the failure probably occurred when checking the
share_hash_tree, which suggests that the share hashes we
received from the remote peer were bad.""")
log.msg(" have self._share_hash: %s" % bool(self._share_hash))
log.msg(" block length: %d" % len(blockdata))
log.msg(" block hash: %s" % idlib.b2a_or_none(blockhash)) # not safe
if len(blockdata) < 100:
log.msg(" block data: %r" % (blockdata,))
else:
log.msg(" block data start/end: %r .. %r" %
(blockdata[:50], blockdata[-50:]))
log.msg(" root hash: %s" % idlib.b2a(self._roothash))
log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
lines = []
for i,h in sorted(sharehashes):
lines.append("%3d: %s" % (i, idlib.b2a_or_none(h)))
log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
lines = []
for i,h in enumerate(blockhashes):
lines.append("%3d: %s" % (i, idlib.b2a_or_none(h)))
log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
raise
# If we made it here, the block is good. If the hash trees didn't

View File

@ -302,6 +302,11 @@ class Encoder(object):
d = self.send_subshare(shareid, segnum, subshare)
dl.append(d)
subshare_hash = hashutil.block_hash(subshare)
#from allmydata.util import idlib
#log.msg("creating block (shareid=%d, blocknum=%d) "
# "len=%d %r .. %r: %s" %
# (shareid, segnum, len(subshare),
# subshare[:50], subshare[-50:], idlib.b2a(subshare_hash)))
self.subshare_hashes[shareid].append(subshare_hash)
dl = self._gather_responses(dl)
def _logit(res):

View File

@ -6,9 +6,8 @@ from twisted.internet import defer
from zope.interface import implements
from allmydata.interfaces import RIStorageServer, RIBucketWriter, \
RIBucketReader, IStorageBucketWriter, IStorageBucketReader
from allmydata import interfaces
from allmydata.util import fileutil, idlib
RIBucketReader, IStorageBucketWriter, IStorageBucketReader, HASH_SIZE
from allmydata.util import fileutil, idlib, mathutil
from allmydata.util.assertutil import precondition
# store/
@ -151,22 +150,23 @@ a series of four-byte big-endian offset values, which indicate where each
section starts. Each offset is measured from the beginning of the file.
0x00: segment size
0x04: offset of data (=00 00 00 1c)
0x08: offset of plaintext_hash_tree
0x0c: offset of crypttext_hash_tree
0x10: offset of block_hashes
0x14: offset of share_hashes
0x18: offset of uri_extension_length + uri_extension
0x1c: start of data
start of plaintext_hash_tree
start of crypttext_hash_tree
start of block_hashes
start of share_hashes
0x04: data size
0x08: offset of data (=00 00 00 1c)
0x0c: offset of plaintext_hash_tree
0x10: offset of crypttext_hash_tree
0x14: offset of block_hashes
0x18: offset of share_hashes
0x1c: offset of uri_extension_length + uri_extension
0x20: start of data
? : start of plaintext_hash_tree
? : start of crypttext_hash_tree
? : start of block_hashes
? : start of share_hashes
each share_hash is written as a two-byte (big-endian) hashnum
followed by the 32-byte SHA-256 hash. We only store the hashes
necessary to validate the share hash root
start of uri_extension_length (four-byte big-endian value)
start of uri_extension
? : start of uri_extension_length (four-byte big-endian value)
? : start of uri_extension
"""
def allocated_size(data_size, num_segments, num_share_hashes,
@ -181,10 +181,10 @@ class WriteBucketProxy:
def __init__(self, rref, data_size, segment_size, num_segments,
num_share_hashes, uri_extension_size):
self._rref = rref
self._data_size = data_size
self._segment_size = segment_size
self._num_segments = num_segments
HASH_SIZE = interfaces.HASH_SIZE
self._segment_hash_size = (2*num_segments - 1) * HASH_SIZE
# how many share hashes are included in each share? This will be
# about ln2(num_shares).
@ -193,7 +193,7 @@ class WriteBucketProxy:
self._uri_extension_size = uri_extension_size
offsets = self._offsets = {}
x = 0x1c
x = 0x20
offsets['data'] = x
x += data_size
offsets['plaintext_hash_tree'] = x
@ -206,16 +206,17 @@ class WriteBucketProxy:
x += self._share_hash_size
offsets['uri_extension'] = x
offset_data = struct.pack(">LLLLLLL",
offset_data = struct.pack(">LLLLLLLL",
segment_size,
data_size,
offsets['data'],
offsets['plaintext_hash_tree'],
offsets['crypttext_hash_tree'],
offsets['block_hashes'],
offsets['share_hashes'],
offsets['uri_extension']
offsets['uri_extension'],
)
assert len(offset_data) == 7*4
assert len(offset_data) == 8*4
self._offset_data = offset_data
def start(self):
@ -229,7 +230,9 @@ class WriteBucketProxy:
precondition(len(data) == self._segment_size,
len(data), self._segment_size)
else:
precondition(len(data) <= self._segment_size,
precondition(len(data) == (self._data_size -
(self._segment_size *
(self._num_segments - 1))),
len(data), self._segment_size)
return self._write(offset, data)
@ -298,17 +301,19 @@ class ReadBucketProxy:
def start(self):
# TODO: for small shares, read the whole bucket in start()
d = self._read(0, 7*4)
d = self._read(0, 8*4)
self._offsets = {}
def _got_offsets(data):
self._segment_size = struct.unpack(">L", data[0:4])[0]
x = 4
self._data_size = struct.unpack(">L", data[4:8])[0]
x = 0x08
for field in ( 'data',
'plaintext_hash_tree',
'crypttext_hash_tree',
'block_hashes',
'share_hashes',
'uri_extension' ):
'uri_extension',
):
offset = struct.unpack(">L", data[x:x+4])[0]
x += 4
self._offsets[field] = offset
@ -316,13 +321,20 @@ class ReadBucketProxy:
return d
def get_block(self, blocknum):
num_segments = mathutil.div_ceil(self._data_size, self._segment_size)
if blocknum < num_segments-1:
size = self._segment_size
else:
size = self._data_size % self._segment_size
if size == 0:
size = self._segment_size
offset = self._offsets['data'] + blocknum * self._segment_size
return self._read(offset, self._segment_size)
return self._read(offset, size)
def _str2l(self, s):
""" split string (pulled from storage) into a list of blockids """
return [ s[i:i+interfaces.HASH_SIZE]
for i in range(0, len(s), interfaces.HASH_SIZE) ]
return [ s[i:i+HASH_SIZE]
for i in range(0, len(s), HASH_SIZE) ]
def get_plaintext_hashes(self):
offset = self._offsets['plaintext_hash_tree']
@ -348,7 +360,6 @@ class ReadBucketProxy:
def get_share_hashes(self):
offset = self._offsets['share_hashes']
size = self._offsets['uri_extension'] - offset
HASH_SIZE = interfaces.HASH_SIZE
assert size % (2+HASH_SIZE) == 0
d = self._read(offset, size)
def _unpack_share_hashes(data):

View File

@ -102,7 +102,7 @@ class BucketProxy(unittest.TestCase):
bw, rb, final = self.make_bucket("test_readwrite", 1406)
bp = WriteBucketProxy(rb,
data_size=100,
data_size=95,
segment_size=25,
num_segments=4,
num_share_hashes=3,
@ -112,7 +112,7 @@ class BucketProxy(unittest.TestCase):
d.addCallback(lambda res: bp.put_block(0, "a"*25))
d.addCallback(lambda res: bp.put_block(1, "b"*25))
d.addCallback(lambda res: bp.put_block(2, "c"*25))
d.addCallback(lambda res: bp.put_block(3, "d"*25))
d.addCallback(lambda res: bp.put_block(3, "d"*20))
d.addCallback(lambda res: bp.put_plaintext_hashes(plaintext_hashes))
d.addCallback(lambda res: bp.put_crypttext_hashes(crypttext_hashes))
d.addCallback(lambda res: bp.put_block_hashes(block_hashes))
@ -136,7 +136,7 @@ class BucketProxy(unittest.TestCase):
d1.addCallback(lambda res: rbp.get_block(2))
d1.addCallback(lambda res: self.failUnlessEqual(res, "c"*25))
d1.addCallback(lambda res: rbp.get_block(3))
d1.addCallback(lambda res: self.failUnlessEqual(res, "d"*25))
d1.addCallback(lambda res: self.failUnlessEqual(res, "d"*20))
d1.addCallback(lambda res: rbp.get_plaintext_hashes())
d1.addCallback(lambda res:

View File

@ -4,7 +4,7 @@ from twisted.python.failure import Failure
from twisted.internet import defer
from cStringIO import StringIO
from allmydata import upload, encode, storageserver
from allmydata import upload, encode
from allmydata.uri import unpack_uri, unpack_lit
from allmydata.util.assertutil import precondition
from foolscap import eventual
@ -35,7 +35,7 @@ class FakeStorageServer:
return d
def allocate_buckets(self, crypttext_hash, sharenums,
share_size, blocksize, canary):
share_size, canary):
#print "FakeStorageServer.allocate_buckets(num=%d, size=%d)" % (len(sharenums), share_size)
if self.mode == "full":
return (set(), {},)