tahoe-lafs/src/allmydata/test/test_encode.py

393 lines
15 KiB
Python

#! /usr/bin/env python
from twisted.trial import unittest
from twisted.internet import defer
from twisted.python.failure import Failure
from foolscap import eventual
from allmydata import encode, download
from allmydata.uri import pack_uri
from cStringIO import StringIO
class FakePeer:
def __init__(self, mode="good"):
self.ss = FakeStorageServer(mode)
def callRemote(self, methname, *args, **kwargs):
def _call():
meth = getattr(self, methname)
return meth(*args, **kwargs)
return defer.maybeDeferred(_call)
def get_service(self, sname):
assert sname == "storageserver"
return self.ss
class FakeStorageServer:
def __init__(self, mode):
self.mode = mode
def callRemote(self, methname, *args, **kwargs):
def _call():
meth = getattr(self, methname)
return meth(*args, **kwargs)
d = eventual.fireEventually()
d.addCallback(lambda res: _call())
return d
def allocate_buckets(self, verifierid, sharenums, shareize, blocksize, canary):
if self.mode == "full":
return (set(), {},)
elif self.mode == "already got them":
return (set(sharenums), {},)
else:
return (set(), dict([(shnum, FakeBucketWriter(),) for shnum in sharenums]),)
class FakeBucketWriter:
# these are used for both reading and writing
def __init__(self, mode="good"):
self.mode = mode
self.blocks = {}
self.block_hashes = None
self.share_hashes = None
self.closed = False
def callRemote(self, methname, *args, **kwargs):
def _call():
meth = getattr(self, methname)
return meth(*args, **kwargs)
return defer.maybeDeferred(_call)
def put_block(self, segmentnum, data):
assert not self.closed
assert segmentnum not in self.blocks
self.blocks[segmentnum] = data
def put_block_hashes(self, blockhashes):
assert not self.closed
assert self.block_hashes is None
self.block_hashes = blockhashes
def put_share_hashes(self, sharehashes):
assert not self.closed
assert self.share_hashes is None
self.share_hashes = sharehashes
def close(self):
assert not self.closed
self.closed = True
def flip_bit(self, good):
return good[:-1] + chr(ord(good[-1]) ^ 0x01)
def get_block(self, blocknum):
assert isinstance(blocknum, int)
if self.mode == "bad block":
return self.flip_bit(self.blocks[blocknum])
return self.blocks[blocknum]
def get_block_hashes(self):
if self.mode == "bad blockhash":
hashes = self.block_hashes[:]
hashes[1] = self.flip_bit(hashes[1])
return hashes
return self.block_hashes
def get_share_hashes(self):
if self.mode == "bad sharehash":
hashes = self.share_hashes[:]
hashes[1] = (hashes[1][0], self.flip_bit(hashes[1][1]))
return hashes
if self.mode == "missing sharehash":
# one sneaky attack would be to pretend we don't know our own
# sharehash, which could manage to frame someone else.
# download.py is supposed to guard against this case.
return []
return self.share_hashes
def make_data(length):
data = "happy happy joy joy" * 100
assert length <= len(data)
return data[:length]
class Encode(unittest.TestCase):
def do_encode(self, max_segment_size, datalen, NUM_SHARES, NUM_SEGMENTS,
expected_block_hashes, expected_share_hashes):
data = make_data(datalen)
# force use of multiple segments
options = {"max_segment_size": max_segment_size}
e = encode.Encoder(options)
e.setup(StringIO(data))
assert e.num_shares == NUM_SHARES # else we'll be completely confused
e.setup_codec() # need to rebuild the codec for that change
assert (NUM_SEGMENTS-1)*e.segment_size < len(data) <= NUM_SEGMENTS*e.segment_size
shareholders = {}
all_shareholders = []
for shnum in range(NUM_SHARES):
peer = FakeBucketWriter()
shareholders[shnum] = peer
all_shareholders.append(peer)
e.set_shareholders(shareholders)
d = e.start()
def _check(roothash):
self.failUnless(isinstance(roothash, str))
self.failUnlessEqual(len(roothash), 32)
for i,peer in enumerate(all_shareholders):
self.failUnless(peer.closed)
self.failUnlessEqual(len(peer.blocks), NUM_SEGMENTS)
# each peer gets a full tree of block hashes. For 3 or 4
# segments, that's 7 hashes. For 5 segments it's 15 hashes.
self.failUnlessEqual(len(peer.block_hashes),
expected_block_hashes)
for h in peer.block_hashes:
self.failUnlessEqual(len(h), 32)
# each peer also gets their necessary chain of share hashes.
# For 100 shares (rounded up to 128 leaves), that's 8 hashes
self.failUnlessEqual(len(peer.share_hashes),
expected_share_hashes)
for (hashnum, h) in peer.share_hashes:
self.failUnless(isinstance(hashnum, int))
self.failUnlessEqual(len(h), 32)
d.addCallback(_check)
return d
# a series of 3*3 tests to check out edge conditions. One axis is how the
# plaintext is divided into segments: kn+(-1,0,1). Another way to express
# that is that n%k == -1 or 0 or 1. For example, for 25-byte segments, we
# might test 74 bytes, 75 bytes, and 76 bytes.
# on the other axis is how many leaves in the block hash tree we wind up
# with, relative to a power of 2, so 2^a+(-1,0,1). Each segment turns
# into a single leaf. So we'd like to check out, e.g., 3 segments, 4
# segments, and 5 segments.
# that results in the following series of data lengths:
# 3 segs: 74, 75, 51
# 4 segs: 99, 100, 76
# 5 segs: 124, 125, 101
# all tests encode to 100 shares, which means the share hash tree will
# have 128 leaves, which means that buckets will be given an 8-long share
# hash chain
# all 3-segment files will have a 4-leaf blockhashtree, and thus expect
# to get 7 blockhashes. 4-segment files will also get 4-leaf block hash
# trees and 7 blockhashes. 5-segment files will get 8-leaf block hash
# trees, which get 15 blockhashes.
def test_send_74(self):
# 3 segments (25, 25, 24)
return self.do_encode(25, 74, 100, 3, 7, 8)
def test_send_75(self):
# 3 segments (25, 25, 25)
return self.do_encode(25, 75, 100, 3, 7, 8)
def test_send_51(self):
# 3 segments (25, 25, 1)
return self.do_encode(25, 51, 100, 3, 7, 8)
def test_send_76(self):
# encode a 76 byte file (in 4 segments: 25,25,25,1) to 100 shares
return self.do_encode(25, 76, 100, 4, 7, 8)
def test_send_99(self):
# 4 segments: 25,25,25,24
return self.do_encode(25, 99, 100, 4, 7, 8)
def test_send_100(self):
# 4 segments: 25,25,25,25
return self.do_encode(25, 100, 100, 4, 7, 8)
def test_send_101(self):
# encode a 101 byte file (in 5 segments: 25,25,25,25,1) to 100 shares
return self.do_encode(25, self.make_data(101), 100, 5, 15, 8)
def test_send_124(self):
# 5 segments: 25, 25, 25, 25, 24
return self.do_encode(25, 124, 100, 5, 15, 8)
def test_send_125(self):
# 5 segments: 25, 25, 25, 25, 25
return self.do_encode(25, 125, 100, 5, 15, 8)
def test_send_101(self):
# 5 segments: 25, 25, 25, 25, 1
return self.do_encode(25, 101, 100, 5, 15, 8)
class Roundtrip(unittest.TestCase):
def send_and_recover(self, k_and_n=(25,100),
AVAILABLE_SHARES=None,
datalen=76,
max_segment_size=25,
bucket_modes={}):
NUM_SHARES = k_and_n[1]
if AVAILABLE_SHARES is None:
AVAILABLE_SHARES = NUM_SHARES
data = make_data(datalen)
# force use of multiple segments
options = {"max_segment_size": max_segment_size,
"needed_and_total_shares": k_and_n}
e = encode.Encoder(options)
e.setup(StringIO(data))
assert e.num_shares == NUM_SHARES # else we'll be completely confused
e.setup_codec() # need to rebuild the codec for that change
shareholders = {}
all_shareholders = []
all_peers = []
for shnum in range(NUM_SHARES):
mode = bucket_modes.get(shnum, "good")
peer = FakeBucketWriter(mode)
shareholders[shnum] = peer
all_shareholders.append(peer)
e.set_shareholders(shareholders)
d = e.start()
def _uploaded(roothash):
URI = pack_uri(e._codec.get_encoder_type(),
e._codec.get_serialized_params(),
e._tail_codec.get_serialized_params(),
"V" * 20,
roothash,
e.required_shares,
e.num_shares,
e.file_size,
e.segment_size)
client = None
target = download.Data()
fd = download.FileDownloader(client, URI, target)
for shnum in range(AVAILABLE_SHARES):
bucket = all_shareholders[shnum]
fd.add_share_bucket(shnum, bucket)
fd._got_all_shareholders(None)
d2 = fd._download_all_segments(None)
d2.addCallback(fd._done)
return d2
d.addCallback(_uploaded)
def _downloaded(newdata):
self.failUnless(newdata == data)
d.addCallback(_downloaded)
return d
def test_not_enough_shares(self):
d = self.send_and_recover((4,10), AVAILABLE_SHARES=2)
def _done(res):
self.failUnless(isinstance(res, Failure))
self.failUnless(res.check(download.NotEnoughPeersError))
d.addBoth(_done)
return d
def test_one_share_per_peer(self):
return self.send_and_recover()
def test_74(self):
return self.send_and_recover(datalen=74)
def test_75(self):
return self.send_and_recover(datalen=75)
def test_51(self):
return self.send_and_recover(datalen=51)
def test_99(self):
return self.send_and_recover(datalen=99)
def test_100(self):
return self.send_and_recover(datalen=100)
def test_76(self):
return self.send_and_recover(datalen=76)
def test_124(self):
return self.send_and_recover(datalen=124)
def test_125(self):
return self.send_and_recover(datalen=125)
def test_101(self):
return self.send_and_recover(datalen=101)
# the following tests all use 4-out-of-10 encoding
def test_bad_blocks(self):
# the first 6 servers have bad blocks, which will be caught by the
# blockhashes
modemap = dict([(i, "bad block")
for i in range(6)]
+ [(i, "good")
for i in range(6, 10)])
return self.send_and_recover((4,10), bucket_modes=modemap)
def test_bad_blocks_failure(self):
# the first 7 servers have bad blocks, which will be caught by the
# blockhashes, and the download will fail
modemap = dict([(i, "bad block")
for i in range(7)]
+ [(i, "good")
for i in range(7, 10)])
d = self.send_and_recover((4,10), bucket_modes=modemap)
def _done(res):
self.failUnless(isinstance(res, Failure))
self.failUnless(res.check(download.NotEnoughPeersError))
d.addBoth(_done)
return d
def test_bad_blockhashes(self):
# the first 6 servers have bad block hashes, so the blockhash tree
# will not validate
modemap = dict([(i, "bad blockhash")
for i in range(6)]
+ [(i, "good")
for i in range(6, 10)])
return self.send_and_recover((4,10), bucket_modes=modemap)
def test_bad_blockhashes_failure(self):
# the first 7 servers have bad block hashes, so the blockhash tree
# will not validate, and the download will fail
modemap = dict([(i, "bad blockhash")
for i in range(7)]
+ [(i, "good")
for i in range(7, 10)])
d = self.send_and_recover((4,10), bucket_modes=modemap)
def _done(res):
self.failUnless(isinstance(res, Failure))
self.failUnless(res.check(download.NotEnoughPeersError))
d.addBoth(_done)
return d
def test_bad_sharehashes(self):
# the first 6 servers have bad block hashes, so the sharehash tree
# will not validate
modemap = dict([(i, "bad sharehash")
for i in range(6)]
+ [(i, "good")
for i in range(6, 10)])
return self.send_and_recover((4,10), bucket_modes=modemap)
def test_bad_sharehashes_failure(self):
# the first 7 servers have bad block hashes, so the sharehash tree
# will not validate, and the download will fail
modemap = dict([(i, "bad sharehash")
for i in range(7)]
+ [(i, "good")
for i in range(7, 10)])
d = self.send_and_recover((4,10), bucket_modes=modemap)
def _done(res):
self.failUnless(isinstance(res, Failure))
self.failUnless(res.check(download.NotEnoughPeersError))
d.addBoth(_done)
return d
def test_missing_sharehashes(self):
# the first 6 servers are missing their sharehashes, so the
# sharehash tree will not validate
modemap = dict([(i, "missing sharehash")
for i in range(6)]
+ [(i, "good")
for i in range(6, 10)])
return self.send_and_recover((4,10), bucket_modes=modemap)
def test_missing_sharehashes_failure(self):
# the first 7 servers are missing their sharehashes, so the
# sharehash tree will not validate, and the download will fail
modemap = dict([(i, "missing sharehash")
for i in range(7)]
+ [(i, "good")
for i in range(7, 10)])
d = self.send_and_recover((4,10), bucket_modes=modemap)
def _done(res):
self.failUnless(isinstance(res, Failure))
self.failUnless(res.check(download.NotEnoughPeersError))
d.addBoth(_done)
return d