tahoe-lafs/src/allmydata/test/test_hashtree.py

220 lines
8.8 KiB
Python
Raw Normal View History

# -*- test-case-name: allmydata.test.test_hashtree -*-
from twisted.trial import unittest
from allmydata.util.hashutil import tagged_hash
2007-04-12 20:13:25 +00:00
from allmydata import hashtree
def make_tree(numleaves):
leaves = ["%d" % i for i in range(numleaves)]
leaf_hashes = [tagged_hash("tag", leaf) for leaf in leaves]
2007-04-12 20:13:25 +00:00
ht = hashtree.HashTree(leaf_hashes)
return ht
class Complete(unittest.TestCase):
def test_create(self):
# try out various sizes, since we pad to a power of two
ht = make_tree(6)
ht = make_tree(9)
ht = make_tree(8)
root = ht[0]
self.failUnlessEqual(len(root), 32)
self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0"))
self.failUnlessRaises(IndexError, ht.get_leaf, 8)
self.failUnlessEqual(ht.get_leaf_index(0), 7)
self.failUnlessRaises(IndexError, ht.parent, 0)
self.failUnlessRaises(IndexError, ht.needed_for, -1)
def test_needed_hashes(self):
ht = make_tree(8)
self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
def test_dump(self):
ht = make_tree(6)
expected = [(0,0),
(1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3),
(2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3),
]
self.failUnlessEqual(list(ht.depth_first()), expected)
d = "\n" + ht.dump()
#print d
self.failUnless("\n 0:" in d)
self.failUnless("\n 1:" in d)
self.failUnless("\n 3:" in d)
self.failUnless("\n 7:" in d)
self.failUnless("\n 8:" in d)
self.failUnless("\n 4:" in d)
class Incomplete(unittest.TestCase):
def test_create(self):
ht = hashtree.IncompleteHashTree(6)
ht = hashtree.IncompleteHashTree(9)
ht = hashtree.IncompleteHashTree(8)
self.failUnlessEqual(ht[0], None)
self.failUnlessEqual(ht.get_leaf(0), None)
self.failUnlessRaises(IndexError, ht.get_leaf, 8)
self.failUnlessEqual(ht.get_leaf_index(0), 7)
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better) The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well. ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download. ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32. Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify. ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user. Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 16:51:45 +00:00
def test_needed_hashes(self):
ht = hashtree.IncompleteHashTree(8)
self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
ht = hashtree.IncompleteHashTree(1)
self.failUnlessEqual(ht.needed_hashes(0), set([]))
ht = hashtree.IncompleteHashTree(6)
self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1]))
self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1]))
self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1]))
def test_depth_of(self):
ht = hashtree.IncompleteHashTree(8)
2009-04-03 23:56:14 +00:00
self.failUnlessEqual(hashtree.depth_of(0), 0)
for i in [1,2]:
2009-04-03 23:56:14 +00:00
self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i)
for i in [3,4,5,6]:
2009-04-03 23:56:14 +00:00
self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i)
for i in [7,8,9,10,11,12,13,14]:
2009-04-03 23:56:14 +00:00
self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i)
def test_large(self):
# IncompleteHashTree.set_hashes() used to take O(N**2). This test is
# meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish
# there were a good way to assert this (like counting VM operations
# or something): the problem was inside list.sort(), so there's no
# good way to instrument set_hashes() to count what we care about. On
# my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s
# in the old broken version. An 80k-leaf test (corresponding to a
# 10GB file with a 128KiB segsize) 10s in the fixed version, and
# several hours in the broken version, but 10s on my laptop (plus the
# 20s of setup code) probably means 200s on our dapper buildslave,
# which is painfully long for a unit test.
self.do_test_speed(10000)
def do_test_speed(self, SIZE):
# on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB
# segsize) takes:
# 7s to build the (complete) HashTree
# 13s to set up the dictionary
# 10s to run set_hashes()
ht = make_tree(SIZE)
iht = hashtree.IncompleteHashTree(SIZE)
needed = set()
for i in range(SIZE):
needed.update(ht.needed_hashes(i, True))
all = dict([ (i, ht[i]) for i in needed])
iht.set_hashes(hashes=all)
def test_check(self):
# first create a complete hash tree
ht = make_tree(6)
# then create a corresponding incomplete tree
2007-04-12 20:13:25 +00:00
iht = hashtree.IncompleteHashTree(6)
# suppose we wanted to validate leaf[0]
# leaf[0] is the same as node[7]
self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2]))
self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
iht[0] = ht[0] # set the root
self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
iht[5] = ht[5]
self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
# reset
iht = hashtree.IncompleteHashTree(6)
current_hashes = list(iht)
# this should fail because there aren't enough hashes known
try:
iht.set_hashes(leaves={0: tagged_hash("tag", "0")})
2007-04-12 20:13:25 +00:00
except hashtree.NotEnoughHashesError:
pass
else:
self.fail("didn't catch not enough hashes")
# and the set of hashes stored in the tree should still be the same
self.failUnlessEqual(list(iht), current_hashes)
# and we should still need the same
self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]}
# this should fail because the leaf hash is just plain wrong
try:
iht.set_hashes(chain, leaves={0: tagged_hash("bad tag", "0")})
except hashtree.BadHashError:
pass
else:
self.fail("didn't catch bad hash")
# this should fail because we give it conflicting hashes: one as an
# internal node, another as a leaf
try:
iht.set_hashes(chain, leaves={1: tagged_hash("bad tag", "1")})
except hashtree.BadHashError:
pass
else:
self.fail("didn't catch bad hash")
bad_chain = chain.copy()
bad_chain[2] = ht[2] + "BOGUS"
# this should fail because the internal hash is wrong
try:
iht.set_hashes(bad_chain, leaves={0: tagged_hash("tag", "0")})
2007-04-12 20:13:25 +00:00
except hashtree.BadHashError:
pass
else:
self.fail("didn't catch bad hash")
# this should succeed
try:
iht.set_hashes(chain, leaves={0: tagged_hash("tag", "0")})
2007-04-12 20:13:25 +00:00
except hashtree.BadHashError, e:
self.fail("bad hash: %s" % e)
self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0"))
self.failUnlessRaises(IndexError, ht.get_leaf, 8)
# this should succeed too
try:
iht.set_hashes(leaves={1: tagged_hash("tag", "1")})
2007-04-12 20:13:25 +00:00
except hashtree.BadHashError:
self.fail("bad hash")
# this should fail because we give it hashes that conflict with some
# that we added successfully before
try:
iht.set_hashes(leaves={1: tagged_hash("bad tag", "1")})
except hashtree.BadHashError:
pass
else:
self.fail("didn't catch bad hash")
# now that leaves 0 and 1 are known, some of the internal nodes are
# known
self.failUnlessEqual(iht.needed_hashes(4), set([12, 6]))
chain = {6: ht[6], 12: ht[12]}
# this should succeed
try:
iht.set_hashes(chain, leaves={4: tagged_hash("tag", "4")})
2007-04-12 20:13:25 +00:00
except hashtree.BadHashError, e:
self.fail("bad hash: %s" % e)