mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-20 13:33:09 +00:00
immutable: more detailed tests for checker/verifier/repairer
There are a lot of different ways that a share could be corrupted, or that attempting to download it might fail. These tests attempt to exercise many of those ways and require the checker/verifier/repairer to handle each kind of failure well.
This commit is contained in:
parent
00b2fbfd00
commit
3a47031a51
@ -1634,11 +1634,17 @@ class ICheckerResults(Interface):
|
|||||||
that was found to be corrupt. Each share
|
that was found to be corrupt. Each share
|
||||||
locator is a list of (serverid, storage_index,
|
locator is a list of (serverid, storage_index,
|
||||||
sharenum).
|
sharenum).
|
||||||
|
count-incompatible-shares: the number of shares which are of a share format unknown to
|
||||||
|
this checker
|
||||||
|
list-incompatible-shares: a list of 'share locators', one for each share that was found
|
||||||
|
to be of an unknown format. Each share locator is a list of
|
||||||
|
(serverid, storage_index, sharenum).
|
||||||
servers-responding: list of (binary) storage server identifiers,
|
servers-responding: list of (binary) storage server identifiers,
|
||||||
one for each server which responded to the share
|
one for each server which responded to the share
|
||||||
query (even if they said they didn't have shares,
|
query (even if they said they didn't have shares,
|
||||||
and even if they said they did have shares but then
|
and even if they said they did have shares but then
|
||||||
refused to send them when asked, and even if they
|
didn't send them when asked, or dropped the
|
||||||
|
connection, or returned a Failure, and even if they
|
||||||
said they did have shares and sent incorrect ones
|
said they did have shares and sent incorrect ones
|
||||||
when asked)
|
when asked)
|
||||||
sharemap: dict mapping share identifier to list of serverids
|
sharemap: dict mapping share identifier to list of serverids
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from allmydata.immutable import upload
|
|
||||||
from allmydata.test.common import SystemTestMixin, ShareManglingMixin
|
from allmydata.test.common import SystemTestMixin, ShareManglingMixin
|
||||||
from allmydata.monitor import Monitor
|
from allmydata.monitor import Monitor
|
||||||
from allmydata.interfaces import IURI, NotEnoughSharesError
|
from allmydata.interfaces import IURI, NotEnoughSharesError
|
||||||
|
from allmydata.immutable import upload
|
||||||
|
from allmydata.util import log
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
from twisted.trial import unittest
|
from twisted.trial import unittest
|
||||||
import random, struct
|
import random, struct
|
||||||
@ -9,11 +11,17 @@ import common_util as testutil
|
|||||||
|
|
||||||
TEST_DATA="\x02"*(upload.Uploader.URI_LIT_SIZE_THRESHOLD+1)
|
TEST_DATA="\x02"*(upload.Uploader.URI_LIT_SIZE_THRESHOLD+1)
|
||||||
|
|
||||||
def corrupt_field(data, offset, size):
|
def corrupt_field(data, offset, size, debug=False):
|
||||||
if random.random() < 0.5:
|
if random.random() < 0.5:
|
||||||
return testutil.flip_one_bit(data, offset, size)
|
newdata = testutil.flip_one_bit(data, offset, size)
|
||||||
|
if debug:
|
||||||
|
log.msg("testing: corrupting offset %d, size %d flipping one bit orig: %r, newdata: %r" % (offset, size, data[offset:offset+size], newdata[offset:offset+size]))
|
||||||
|
return newdata
|
||||||
else:
|
else:
|
||||||
return data[:offset]+testutil.insecurerandstr(size)+data[offset+size:]
|
newval = testutil.insecurerandstr(size)
|
||||||
|
if debug:
|
||||||
|
log.msg("testing: corrupting offset %d, size %d randomizing field, orig: %r, newval: %r" % (offset, size, data[offset:offset+size], newval))
|
||||||
|
return data[:offset]+newval+data[offset+size:]
|
||||||
|
|
||||||
def _corrupt_file_version_number(data):
|
def _corrupt_file_version_number(data):
|
||||||
""" Scramble the file data -- the share file version number have one bit flipped or else
|
""" Scramble the file data -- the share file version number have one bit flipped or else
|
||||||
@ -21,14 +29,33 @@ def _corrupt_file_version_number(data):
|
|||||||
return corrupt_field(data, 0x00, 4)
|
return corrupt_field(data, 0x00, 4)
|
||||||
|
|
||||||
def _corrupt_size_of_file_data(data):
|
def _corrupt_size_of_file_data(data):
|
||||||
""" Scramble the file data -- the field showing the size of the share data within the
|
""" Scramble the file data -- the field showing the size of the share data within the file
|
||||||
file will have one bit flipped or else will be changed to a random value. """
|
will be set to one smaller. """
|
||||||
return corrupt_field(data, 0x04, 4)
|
return corrupt_field(data, 0x04, 4)
|
||||||
|
|
||||||
def _corrupt_sharedata_version_number(data):
|
def _corrupt_sharedata_version_number(data):
|
||||||
""" Scramble the file data -- the share data version number will have one bit flipped or
|
""" Scramble the file data -- the share data version number will have one bit flipped or
|
||||||
else will be changed to a random value."""
|
else will be changed to a random value, but not 1 or 2."""
|
||||||
return corrupt_field(data, 0x0c, 4)
|
return corrupt_field(data, 0x0c, 4)
|
||||||
|
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
|
||||||
|
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
|
||||||
|
newsharevernum = sharevernum
|
||||||
|
while newsharevernum in (1, 2):
|
||||||
|
newsharevernum = random.randrange(0, 2**32)
|
||||||
|
newsharevernumbytes = struct.pack(">l", newsharevernum)
|
||||||
|
return data[:0x0c] + newsharevernumbytes + data[0x0c+4:]
|
||||||
|
|
||||||
|
def _corrupt_sharedata_version_number_to_known_version(data):
|
||||||
|
""" Scramble the file data -- the share data version number will
|
||||||
|
be changed to 2 if it is 1 or else to 1 if it is 2."""
|
||||||
|
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
|
||||||
|
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
|
||||||
|
if sharevernum == 1:
|
||||||
|
newsharevernum = 2
|
||||||
|
else:
|
||||||
|
newsharevernum = 1
|
||||||
|
newsharevernumbytes = struct.pack(">l", newsharevernum)
|
||||||
|
return data[:0x0c] + newsharevernumbytes + data[0x0c+4:]
|
||||||
|
|
||||||
def _corrupt_segment_size(data):
|
def _corrupt_segment_size(data):
|
||||||
""" Scramble the file data -- the field showing the size of the segment will have one
|
""" Scramble the file data -- the field showing the size of the segment will have one
|
||||||
@ -36,9 +63,9 @@ def _corrupt_segment_size(data):
|
|||||||
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
|
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
|
||||||
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
|
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
|
||||||
if sharevernum == 1:
|
if sharevernum == 1:
|
||||||
return corrupt_field(data, 0x0c+0x04, 4)
|
return corrupt_field(data, 0x0c+0x04, 4, debug=True)
|
||||||
else:
|
else:
|
||||||
return corrupt_field(data, 0x0c+0x04, 8)
|
return corrupt_field(data, 0x0c+0x04, 8, debug=True)
|
||||||
|
|
||||||
def _corrupt_size_of_sharedata(data):
|
def _corrupt_size_of_sharedata(data):
|
||||||
""" Scramble the file data -- the field showing the size of the data within the share
|
""" Scramble the file data -- the field showing the size of the data within the share
|
||||||
@ -67,9 +94,9 @@ def _corrupt_offset_of_ciphertext_hash_tree(data):
|
|||||||
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
|
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
|
||||||
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
|
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
|
||||||
if sharevernum == 1:
|
if sharevernum == 1:
|
||||||
return corrupt_field(data, 0x0c+0x14, 4)
|
return corrupt_field(data, 0x0c+0x14, 4, debug=True)
|
||||||
else:
|
else:
|
||||||
return corrupt_field(data, 0x0c+0x24, 8)
|
return corrupt_field(data, 0x0c+0x24, 8, debug=True)
|
||||||
|
|
||||||
def _corrupt_offset_of_block_hashes(data):
|
def _corrupt_offset_of_block_hashes(data):
|
||||||
""" Scramble the file data -- the field showing the offset of the block hash tree within
|
""" Scramble the file data -- the field showing the offset of the block hash tree within
|
||||||
@ -354,11 +381,10 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
""" Check says the file is healthy when none of the shares have been touched. It says
|
""" Check says the file is healthy when none of the shares have been touched. It says
|
||||||
that the file is unhealthy if any field of any share has been corrupted. It doesn't use
|
that the file is unhealthy if any field of any share has been corrupted. It doesn't use
|
||||||
more than twice as many reads as it needs. """
|
more than twice as many reads as it needs. """
|
||||||
# N == 10. 2 is the "efficiency leeway" -- we'll allow you to pass this test even if
|
LEEWAY = 7 # We'll allow you to pass this test even if you trigger seven times as many disk reads and blocks sends as would be optimal.
|
||||||
# you trigger twice as many disk reads and blocks sends as would be optimal.
|
DELTA_READS = 10 * LEEWAY # N = 10
|
||||||
DELTA_READS = 10 * 2
|
|
||||||
d = defer.succeed(self.filenode)
|
d = defer.succeed(self.filenode)
|
||||||
def _check1(filenode):
|
def _check_pristine(filenode):
|
||||||
before_check_reads = self._count_reads()
|
before_check_reads = self._count_reads()
|
||||||
|
|
||||||
d2 = filenode.check(Monitor(), verify=True)
|
d2 = filenode.check(Monitor(), verify=True)
|
||||||
@ -369,7 +395,7 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
|
|
||||||
d2.addCallback(_after_check)
|
d2.addCallback(_after_check)
|
||||||
return d2
|
return d2
|
||||||
d.addCallback(_check1)
|
d.addCallback(_check_pristine)
|
||||||
|
|
||||||
d.addCallback(self.find_shares)
|
d.addCallback(self.find_shares)
|
||||||
stash = [None]
|
stash = [None]
|
||||||
@ -378,14 +404,23 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
return res
|
return res
|
||||||
d.addCallback(_stash_it)
|
d.addCallback(_stash_it)
|
||||||
|
|
||||||
def _check2(ignored):
|
def _check_after_feckless_corruption(ignored, corruptor_func):
|
||||||
|
# Corruption which has no effect -- bits of the share file that are unused.
|
||||||
before_check_reads = self._count_reads()
|
before_check_reads = self._count_reads()
|
||||||
d2 = self.filenode.check(Monitor(), verify=True)
|
d2 = self.filenode.check(Monitor(), verify=True)
|
||||||
|
|
||||||
def _after_check(checkresults):
|
def _after_check(checkresults):
|
||||||
after_check_reads = self._count_reads()
|
after_check_reads = self._count_reads()
|
||||||
self.failIf(after_check_reads - before_check_reads > DELTA_READS)
|
self.failIf(after_check_reads - before_check_reads > DELTA_READS)
|
||||||
self.failIf(checkresults.is_healthy())
|
self.failUnless(checkresults.is_healthy(), (checkresults, checkresults.is_healthy(), checkresults.get_data(), corruptor_func))
|
||||||
|
data = checkresults.get_data()
|
||||||
|
self.failUnless(data['count-shares-good'] == 10, data)
|
||||||
|
self.failUnless(len(data['sharemap']) == 10, data)
|
||||||
|
self.failUnless(data['count-shares-needed'] == 3, data)
|
||||||
|
self.failUnless(data['count-shares-expected'] == 10, data)
|
||||||
|
self.failUnless(data['count-good-share-hosts'] == 5, data)
|
||||||
|
self.failUnless(len(data['servers-responding']) == 5, data)
|
||||||
|
self.failUnless(len(data['list-corrupt-shares']) == 0, data)
|
||||||
|
|
||||||
d2.addCallback(_after_check)
|
d2.addCallback(_after_check)
|
||||||
return d2
|
return d2
|
||||||
@ -395,11 +430,111 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
return ignored
|
return ignored
|
||||||
|
|
||||||
for corruptor_func in (
|
for corruptor_func in (
|
||||||
_corrupt_file_version_number,
|
|
||||||
_corrupt_size_of_file_data,
|
_corrupt_size_of_file_data,
|
||||||
_corrupt_sharedata_version_number,
|
|
||||||
_corrupt_segment_size,
|
|
||||||
_corrupt_size_of_sharedata,
|
_corrupt_size_of_sharedata,
|
||||||
|
_corrupt_segment_size,
|
||||||
|
):
|
||||||
|
d.addCallback(self._corrupt_a_random_share, corruptor_func)
|
||||||
|
d.addCallback(_check_after_feckless_corruption, corruptor_func=corruptor_func)
|
||||||
|
d.addCallback(_put_it_all_back)
|
||||||
|
|
||||||
|
def _check_after_server_visible_corruption(ignored, corruptor_func):
|
||||||
|
# Corruption which is detected by the server means that the server will send you
|
||||||
|
# back a Failure in response to get_bucket instead of giving you the share data.
|
||||||
|
before_check_reads = self._count_reads()
|
||||||
|
d2 = self.filenode.check(Monitor(), verify=True)
|
||||||
|
|
||||||
|
def _after_check(checkresults):
|
||||||
|
after_check_reads = self._count_reads()
|
||||||
|
self.failIf(after_check_reads - before_check_reads > DELTA_READS)
|
||||||
|
self.failIf(checkresults.is_healthy(), (checkresults, checkresults.is_healthy(), checkresults.get_data(), corruptor_func))
|
||||||
|
data = checkresults.get_data()
|
||||||
|
# The server might fail to serve up its other share as well as the corrupted
|
||||||
|
# one, so count-shares-good could be 8 or 9.
|
||||||
|
self.failUnless(data['count-shares-good'] in (8, 9), data)
|
||||||
|
self.failUnless(len(data['sharemap']) in (8, 9,), data)
|
||||||
|
self.failUnless(data['count-shares-needed'] == 3, data)
|
||||||
|
self.failUnless(data['count-shares-expected'] == 10, data)
|
||||||
|
# The server may have served up the non-corrupted share, or it may not have, so
|
||||||
|
# the checker could have detected either 4 or 5 good servers.
|
||||||
|
self.failUnless(data['count-good-share-hosts'] in (4, 5), data)
|
||||||
|
self.failUnless(len(data['servers-responding']) in (4, 5), data)
|
||||||
|
# If the server served up the other share, then the checker should consider it good, else it should
|
||||||
|
# not.
|
||||||
|
self.failUnless((data['count-shares-good'] == 9) == (data['count-good-share-hosts'] == 5), data)
|
||||||
|
self.failUnless(len(data['list-corrupt-shares']) == 0, data)
|
||||||
|
|
||||||
|
d2.addCallback(_after_check)
|
||||||
|
return d2
|
||||||
|
|
||||||
|
for corruptor_func in (
|
||||||
|
_corrupt_file_version_number,
|
||||||
|
):
|
||||||
|
d.addCallback(self._corrupt_a_random_share, corruptor_func)
|
||||||
|
d.addCallback(_check_after_server_visible_corruption, corruptor_func=corruptor_func)
|
||||||
|
d.addCallback(_put_it_all_back)
|
||||||
|
|
||||||
|
def _check_after_share_incompatibility(ignored, corruptor_func):
|
||||||
|
# Corruption which means the share is indistinguishable from a share of an
|
||||||
|
# incompatible version.
|
||||||
|
before_check_reads = self._count_reads()
|
||||||
|
d2 = self.filenode.check(Monitor(), verify=True)
|
||||||
|
|
||||||
|
def _after_check(checkresults):
|
||||||
|
after_check_reads = self._count_reads()
|
||||||
|
self.failIf(after_check_reads - before_check_reads > DELTA_READS)
|
||||||
|
self.failIf(checkresults.is_healthy(), (checkresults, checkresults.is_healthy(), checkresults.get_data(), corruptor_func))
|
||||||
|
data = checkresults.get_data()
|
||||||
|
self.failUnless(data['count-shares-good'] == 9, data)
|
||||||
|
self.failUnless(len(data['sharemap']) == 9, data)
|
||||||
|
self.failUnless(data['count-shares-needed'] == 3, data)
|
||||||
|
self.failUnless(data['count-shares-expected'] == 10, data)
|
||||||
|
self.failUnless(data['count-good-share-hosts'] == 5, data)
|
||||||
|
self.failUnless(len(data['servers-responding']) == 5, data)
|
||||||
|
self.failUnless(len(data['list-corrupt-shares']) == 0, data)
|
||||||
|
self.failUnless(len(data['list-corrupt-shares']) == data['count-corrupt-shares'], data)
|
||||||
|
self.failUnless(len(data['list-incompatible-shares']) == data['count-incompatible-shares'], data)
|
||||||
|
self.failUnless(len(data['list-incompatible-shares']) == 1, data)
|
||||||
|
|
||||||
|
d2.addCallback(_after_check)
|
||||||
|
return d2
|
||||||
|
|
||||||
|
for corruptor_func in (
|
||||||
|
_corrupt_sharedata_version_number,
|
||||||
|
):
|
||||||
|
d.addCallback(self._corrupt_a_random_share, corruptor_func)
|
||||||
|
d.addCallback(_check_after_share_incompatibility, corruptor_func=corruptor_func)
|
||||||
|
d.addCallback(_put_it_all_back)
|
||||||
|
|
||||||
|
def _check_after_server_invisible_corruption(ignored, corruptor_func):
|
||||||
|
# Corruption which is not detected by the server means that the server will send you
|
||||||
|
# back the share data, but you will detect that it is wrong.
|
||||||
|
before_check_reads = self._count_reads()
|
||||||
|
d2 = self.filenode.check(Monitor(), verify=True)
|
||||||
|
|
||||||
|
def _after_check(checkresults):
|
||||||
|
after_check_reads = self._count_reads()
|
||||||
|
# print "delta was ", after_check_reads - before_check_reads
|
||||||
|
self.failIf(after_check_reads - before_check_reads > DELTA_READS)
|
||||||
|
self.failIf(checkresults.is_healthy(), (checkresults, checkresults.is_healthy(), checkresults.get_data(), corruptor_func))
|
||||||
|
data = checkresults.get_data()
|
||||||
|
self.failUnless(data['count-shares-good'] == 9, data)
|
||||||
|
self.failUnless(data['count-shares-needed'] == 3, data)
|
||||||
|
self.failUnless(data['count-shares-expected'] == 10, data)
|
||||||
|
self.failUnless(data['count-good-share-hosts'] == 5, data)
|
||||||
|
self.failUnless(data['count-corrupt-shares'] == 1, (data, corruptor_func))
|
||||||
|
self.failUnless(len(data['list-corrupt-shares']) == 1, data)
|
||||||
|
self.failUnless(len(data['list-corrupt-shares']) == data['count-corrupt-shares'], data)
|
||||||
|
self.failUnless(len(data['list-incompatible-shares']) == data['count-incompatible-shares'], data)
|
||||||
|
self.failUnless(len(data['list-incompatible-shares']) == 0, data)
|
||||||
|
self.failUnless(len(data['servers-responding']) == 5, data)
|
||||||
|
self.failUnless(len(data['sharemap']) == 9, data)
|
||||||
|
|
||||||
|
d2.addCallback(_after_check)
|
||||||
|
return d2
|
||||||
|
|
||||||
|
for corruptor_func in (
|
||||||
|
_corrupt_sharedata_version_number_to_known_version,
|
||||||
_corrupt_offset_of_sharedata,
|
_corrupt_offset_of_sharedata,
|
||||||
_corrupt_offset_of_ciphertext_hash_tree,
|
_corrupt_offset_of_ciphertext_hash_tree,
|
||||||
_corrupt_offset_of_block_hashes,
|
_corrupt_offset_of_block_hashes,
|
||||||
@ -413,16 +548,16 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
_corrupt_uri_extension,
|
_corrupt_uri_extension,
|
||||||
):
|
):
|
||||||
d.addCallback(self._corrupt_a_random_share, corruptor_func)
|
d.addCallback(self._corrupt_a_random_share, corruptor_func)
|
||||||
d.addCallback(_check2)
|
d.addCallback(_check_after_server_invisible_corruption, corruptor_func=corruptor_func)
|
||||||
d.addCallback(_put_it_all_back)
|
d.addCallback(_put_it_all_back)
|
||||||
return d
|
return d
|
||||||
test_check_with_verify.todo = "We haven't implemented a verifier this thorough yet."
|
test_check_with_verify.todo = "We haven't implemented a verifier this thorough yet."
|
||||||
|
|
||||||
def test_repair(self):
|
def test_repair(self):
|
||||||
""" Repair replaces a share that got deleted. """
|
""" Repair replaces a share that got deleted. """
|
||||||
# N == 10. 2 is the "efficiency leeway" -- we'll allow you to pass this test even if
|
# N == 10. 7 is the "efficiency leeway" -- we'll allow you to pass this test even if
|
||||||
# you trigger twice as many disk reads and blocks sends as would be optimal.
|
# you trigger seven times as many disk reads and blocks sends as would be optimal.
|
||||||
DELTA_READS = 10 * 2
|
DELTA_READS = 10 * 7
|
||||||
# We'll allow you to pass this test only if you repair the missing share using only a
|
# We'll allow you to pass this test only if you repair the missing share using only a
|
||||||
# single allocate.
|
# single allocate.
|
||||||
DELTA_ALLOCATES = 1
|
DELTA_ALLOCATES = 1
|
||||||
@ -515,10 +650,8 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
|
|
||||||
for corruptor_func in (
|
for corruptor_func in (
|
||||||
_corrupt_file_version_number,
|
_corrupt_file_version_number,
|
||||||
_corrupt_size_of_file_data,
|
|
||||||
_corrupt_sharedata_version_number,
|
_corrupt_sharedata_version_number,
|
||||||
_corrupt_segment_size,
|
_corrupt_sharedata_version_number_to_known_version,
|
||||||
_corrupt_size_of_sharedata,
|
|
||||||
_corrupt_offset_of_sharedata,
|
_corrupt_offset_of_sharedata,
|
||||||
_corrupt_offset_of_ciphertext_hash_tree,
|
_corrupt_offset_of_ciphertext_hash_tree,
|
||||||
_corrupt_offset_of_block_hashes,
|
_corrupt_offset_of_block_hashes,
|
||||||
@ -538,3 +671,10 @@ class Test(ShareManglingMixin, unittest.TestCase):
|
|||||||
|
|
||||||
return d
|
return d
|
||||||
test_repair.todo = "We haven't implemented a repairer yet."
|
test_repair.todo = "We haven't implemented a repairer yet."
|
||||||
|
|
||||||
|
|
||||||
|
# XXX extend these tests to show that the checker detects which specific share on which specific server is broken -- this is necessary so that the checker results can be passed to the repairer and the repairer can go ahead and upload fixes without first doing what is effectively a check (/verify) run
|
||||||
|
|
||||||
|
# XXX extend these tests to show bad behavior of various kinds from servers: raising exception from each remove_foo() method, for example
|
||||||
|
|
||||||
|
# XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit
|
||||||
|
Loading…
Reference in New Issue
Block a user