repairer: test all different kinds of corruption that can happen to share files on disk

This commit is contained in:
Zooko O'Whielacronx 2008-10-14 16:09:20 -07:00
parent b0a00e0f33
commit 8a6d1e5da6
5 changed files with 268 additions and 60 deletions

View File

@ -9,9 +9,9 @@ from allmydata.util.assertutil import _assert, precondition
"""
Share data is written into a single file. At the start of the file, there is
a series of four-byte big-endian offset values, which indicate where each
section starts. Each offset is measured from the beginning of the file.
Share data is written in a file. At the start of the file, there is a series of four-byte
big-endian offset values, which indicate where each section starts. Each offset is measured from
the beginning of the share data.
0x00: version number (=00 00 00 01)
0x04: segment size

View File

@ -34,6 +34,7 @@ class RIStubClient(RemoteInterface):
RemoteInterface for the StubClient."""
class RIBucketWriter(RemoteInterface):
""" Objects of this kind live on the server side. """
def write(offset=Offset, data=ShareData):
return None
@ -230,6 +231,9 @@ class RIStorageServer(RemoteInterface):
return TupleOf(bool, DictOf(int, ReadData))
class IStorageBucketWriter(Interface):
"""
Objects of this kind live on the client side.
"""
def put_block(segmentnum=int, data=ShareData):
"""@param data: For most segments, this data will be 'blocksize'
bytes in length. The last segment might be shorter.

View File

@ -353,8 +353,8 @@ class BucketReader(Referenceable):
# 9 ?? n*92 extra leases
assert struct.calcsize("L"), 4
assert struct.calcsize("Q"), 8
assert struct.calcsize("L"), 4 # The struct module doc says that L's are 4 bytes in size.
assert struct.calcsize("Q"), 8 # The struct module doc says that Q's are 8 bytes in size (at least with big-endian ordering).
class MutableShareFile:
@ -1205,6 +1205,3 @@ class StorageServer(service.MultiService, Referenceable):
facility="tahoe.storage", level=log.NOISY, parent=lp)
self.add_latency("readv", time.time() - start)
return datavs
# the code before here runs on the storage server, not the client

View File

@ -8,6 +8,183 @@ import random, struct
TEST_DATA="\x02"*(upload.Uploader.URI_LIT_SIZE_THRESHOLD+1)
def corrupt_field(data, offset, size):
if random.random() < 0.5:
return testutil.flip_one_bit(data, offset, size)
else:
return data[:offset]+testutil.insecurerandstr(size)+data[offset+size:]
def _corrupt_file_version_number(data):
""" Scramble the file data -- the share file version number have one bit flipped or else
will be changed to a random value."""
return corrupt_field(data, 0x00, 4)
def _corrupt_size_of_file_data(data):
""" Scramble the file data -- the field showing the size of the share data within the
file will have one bit flipped or else will be changed to a random value. """
return corrupt_field(data, 0x04, 4)
def _corrupt_sharedata_version_number(data):
""" Scramble the file data -- the share data version number will have one bit flipped or
else will be changed to a random value."""
return corrupt_field(data, 0x0c, 4)
def _corrupt_segment_size(data):
""" Scramble the file data -- the field showing the size of the segment will have one
bit flipped or else be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x04, 4)
else:
return corrupt_field(data, 0x0c+0x04, 8)
def _corrupt_size_of_sharedata(data):
""" Scramble the file data -- the field showing the size of the data within the share
data will have one bit flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x08, 4)
else:
return corrupt_field(data, 0x0c+0x0c, 8)
def _corrupt_offset_of_sharedata(data):
""" Scramble the file data -- the field showing the offset of the data within the share
data will have one bit flipped or else be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x0c, 4)
else:
return corrupt_field(data, 0x0c+0x14, 8)
def _corrupt_offset_of_ciphertext_hash_tree(data):
""" Scramble the file data -- the field showing the offset of the ciphertext hash tree
within the share data will have one bit flipped or else be changed to a random value.
"""
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x14, 4)
else:
return corrupt_field(data, 0x0c+0x24, 8)
def _corrupt_offset_of_block_hashes(data):
""" Scramble the file data -- the field showing the offset of the block hash tree within
the share data will have one bit flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x18, 4)
else:
return corrupt_field(data, 0x0c+0x2c, 8)
def _corrupt_offset_of_share_hashes(data):
""" Scramble the file data -- the field showing the offset of the share hash tree within
the share data will have one bit flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x1c, 4)
else:
return corrupt_field(data, 0x0c+0x34, 8)
def _corrupt_offset_of_uri_extension(data):
""" Scramble the file data -- the field showing the offset of the uri extension will
have one bit flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
return corrupt_field(data, 0x0c+0x20, 4)
else:
return corrupt_field(data, 0x0c+0x3c, 8)
def _corrupt_share_data(data):
""" Scramble the file data -- the field containing the share data itself will have one
bit flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
sharedatasize = struct.unpack(">L", data[0x0c+0x08:0x0c+0x08+4])[0]
return corrupt_field(data, 0x0c+0x24, sharedatasize)
else:
sharedatasize = struct.unpack(">Q", data[0x0c+0x08:0x0c+0x0c+8])[0]
return corrupt_field(data, 0x0c+0x44, sharedatasize)
def _corrupt_crypttext_hash_tree(data):
""" Scramble the file data -- the field containing the crypttext hash tree will have one
bit flipped or else will be changed to a random value.
"""
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
crypttexthashtreeoffset = struct.unpack(">L", data[0x0c+0x14:0x0c+0x14+4])[0]
blockhashesoffset = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0]
else:
crypttexthashtreeoffset = struct.unpack(">Q", data[0x0c+0x24:0x0c+0x24+8])[0]
blockhashesoffset = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0]
return corrupt_field(data, crypttexthashtreeoffset, blockhashesoffset-crypttexthashtreeoffset)
def _corrupt_block_hashes(data):
""" Scramble the file data -- the field containing the block hash tree will have one bit
flipped or else will be changed to a random value.
"""
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
blockhashesoffset = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0]
sharehashesoffset = struct.unpack(">L", data[0x0c+0x1c:0x0c+0x1c+4])[0]
else:
blockhashesoffset = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0]
sharehashesoffset = struct.unpack(">Q", data[0x0c+0x34:0x0c+0x34+8])[0]
return corrupt_field(data, blockhashesoffset, sharehashesoffset-blockhashesoffset)
def _corrupt_share_hashes(data):
""" Scramble the file data -- the field containing the share hash chain will have one
bit flipped or else will be changed to a random value.
"""
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
sharehashesoffset = struct.unpack(">L", data[0x0c+0x1c:0x0c+0x1c+4])[0]
uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0]
else:
sharehashesoffset = struct.unpack(">Q", data[0x0c+0x34:0x0c+0x34+8])[0]
uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0]
return corrupt_field(data, sharehashesoffset, uriextoffset-sharehashesoffset)
def _corrupt_length_of_uri_extension(data):
""" Scramble the file data -- the field showing the length of the uri extension will
have one bit flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0]
return corrupt_field(data, uriextoffset, 4)
else:
uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0]
return corrupt_field(data, uriextoffset, 8)
def _corrupt_uri_extension(data):
""" Scramble the file data -- the field containing the uri extension will have one bit
flipped or else will be changed to a random value. """
sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0]
assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways."
if sharevernum == 1:
uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0]
uriextlen = struct.unpack(">L", data[0x0c+uriextoffset:0x0c+uriextoffset+4])[0]
else:
uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0]
uriextlen = struct.unpack(">Q", data[0x0c+uriextoffset:0x0c+uriextoffset+8])[0]
return corrupt_field(data, uriextoffset, uriextlen)
class Test(ShareManglingMixin, unittest.TestCase):
def setUp(self):
# Set self.basedir to a temp dir which has the name of the current test method in its
@ -46,7 +223,7 @@ class Test(ShareManglingMixin, unittest.TestCase):
shares = self.find_shares()
ks = shares.keys()
if sharenum is not None:
k = [ key for key in shares.keys() if key[1] == sharenum][0]
k = [ key for key in shares.keys() if key[1] == sharenum ][0]
else:
k = random.choice(ks)
del shares[k]
@ -54,26 +231,6 @@ class Test(ShareManglingMixin, unittest.TestCase):
return unused
def _corrupt_a_share(self, unused=None):
""" Exactly one bit of exactly one share on disk will be flipped (randomly selected from
among the bits of the 'share data' -- the verifiable bits)."""
shares = self.find_shares()
ks = shares.keys()
k = random.choice(ks)
data = shares[k]
(version, size, num_leases) = struct.unpack(">LLL", data[:0xc])
sharedata = data[0xc:0xc+size]
corruptedsharedata = testutil.flip_one_bit(sharedata)
corrupteddata = data[:0xc]+corruptedsharedata+data[0xc+size:]
shares[k] = corrupteddata
self.replace_shares(shares, storage_index=self.uri.storage_index)
return unused
def test_test_code(self):
# The following process of stashing the shares, running
# replace_shares, and asserting that the new set of shares equals the
@ -84,7 +241,6 @@ class Test(ShareManglingMixin, unittest.TestCase):
def _stash_it(res):
stash[0] = res
return res
d.addCallback(_stash_it)
d.addCallback(self.replace_shares, storage_index=self.uri.storage_index)
@ -149,11 +305,20 @@ class Test(ShareManglingMixin, unittest.TestCase):
sum_of_allocate_counts += counters.get('storage_server.allocate', 0)
return sum_of_allocate_counts
def _corrupt_a_random_share(self, unused, corruptor_func):
""" Exactly one share on disk will be corrupted by corruptor_func. """
shares = self.find_shares()
ks = shares.keys()
k = random.choice(ks)
shares[k] = corruptor_func(shares[k])
self.replace_shares(shares, storage_index=self.uri.storage_index)
def test_check_without_verify(self):
""" Check says the file is healthy when none of the shares have been
touched. It says that the file is unhealthy when all of them have
been removed. It says that the file is healthy if one bit of one share
has been flipped."""
""" Check says the file is healthy when none of the shares have been touched. It says
that the file is unhealthy when all of them have been removed. It doesn't use any reads.
"""
d = defer.succeed(self.filenode)
def _check1(filenode):
before_check_reads = self._count_reads()
@ -168,22 +333,8 @@ class Test(ShareManglingMixin, unittest.TestCase):
return d2
d.addCallback(_check1)
d.addCallback(self._corrupt_a_share)
def _check2(ignored):
before_check_reads = self._count_reads()
d2 = self.filenode.check(verify=False)
def _after_check(checkresults):
after_check_reads = self._count_reads()
self.failIf(after_check_reads - before_check_reads > 0, after_check_reads - before_check_reads)
d2.addCallback(_after_check)
return d2
d.addCallback(_check2)
return d
d.addCallback(lambda ignore: self.replace_shares({}, storage_index=self.uri.storage_index))
def _check3(ignored):
def _check2(ignored):
before_check_reads = self._count_reads()
d2 = self.filenode.check(verify=False)
@ -194,13 +345,14 @@ class Test(ShareManglingMixin, unittest.TestCase):
d2.addCallback(_after_check)
return d2
d.addCallback(_check3)
d.addCallback(_check2)
return d
def test_check_with_verify(self):
""" Check says the file is healthy when none of the shares have been touched. It says
that the file is unhealthy if one bit of one share has been flipped."""
that the file is unhealthy if any field of any share has been corrupted. It doesn't use
more than twice as many reads as it needs. """
# N == 10. 2 is the "efficiency leeway" -- we'll allow you to pass this test even if
# you trigger twice as many disk reads and blocks sends as would be optimal.
DELTA_READS = 10 * 2
@ -219,7 +371,13 @@ class Test(ShareManglingMixin, unittest.TestCase):
return d2
d.addCallback(_check1)
d.addCallback(self._corrupt_a_share)
d.addCallback(self.find_shares)
stash = [None]
def _stash_it(res):
stash[0] = res
return res
d.addCallback(_stash_it)
def _check2(ignored):
before_check_reads = self._count_reads()
d2 = self.filenode.check(verify=True)
@ -232,7 +390,32 @@ class Test(ShareManglingMixin, unittest.TestCase):
d2.addCallback(_after_check)
return d2
d.addCallback(_check2)
def _put_it_all_back(ignored):
self.replace_shares(stash[0], storage_index=self.uri.storage_index)
return ignored
for corruptor_func in (
_corrupt_file_version_number,
_corrupt_size_of_file_data,
_corrupt_sharedata_version_number,
_corrupt_segment_size,
_corrupt_size_of_sharedata,
_corrupt_offset_of_sharedata,
_corrupt_offset_of_ciphertext_hash_tree,
_corrupt_offset_of_block_hashes,
_corrupt_offset_of_share_hashes,
_corrupt_offset_of_uri_extension,
_corrupt_share_data,
_corrupt_crypttext_hash_tree,
_corrupt_block_hashes,
_corrupt_share_hashes,
_corrupt_length_of_uri_extension,
_corrupt_uri_extension,
):
d.addCallback(self._corrupt_a_random_share, corruptor_func)
d.addCallback(_check2)
d.addCallback(_put_it_all_back)
return d
test_check_with_verify.todo = "We haven't implemented a verifier this thorough yet."
@ -309,9 +492,6 @@ class Test(ShareManglingMixin, unittest.TestCase):
return d2
d.addCallback(_repair_from_deletion_of_7)
# Now we corrupt a share...
d.addCallback(self._corrupt_a_share)
def _repair_from_corruption(filenode):
before_repair_reads = self._count_reads()
before_repair_allocates = self._count_allocates()
@ -333,7 +513,29 @@ class Test(ShareManglingMixin, unittest.TestCase):
d2.addCallback(_after_repair)
return d2
d.addCallback(_repair_from_corruption)
for corruptor_func in (
_corrupt_file_version_number,
_corrupt_size_of_file_data,
_corrupt_sharedata_version_number,
_corrupt_segment_size,
_corrupt_size_of_sharedata,
_corrupt_offset_of_sharedata,
_corrupt_offset_of_ciphertext_hash_tree,
_corrupt_offset_of_block_hashes,
_corrupt_offset_of_share_hashes,
_corrupt_offset_of_uri_extension,
_corrupt_share_data,
_corrupt_crypttext_hash_tree,
_corrupt_block_hashes,
_corrupt_share_hashes,
_corrupt_length_of_uri_extension,
_corrupt_uri_extension,
):
# Now we corrupt a share...
d.addCallback(self._corrupt_a_random_share, corruptor_func)
# And repair...
d.addCallback(_repair_from_corruption)
return d
test_repair.todo = "We haven't implemented a repairer yet."

View File

@ -4,6 +4,8 @@ from random import randrange
from twisted.internet import reactor, defer, task
from twisted.python import failure
def insecurerandstr(n):
return ''.join(map(chr, map(randrange, [0]*n, [256]*n)))
def flip_bit(good, which):
# flip the low-order bit of good[which]
@ -13,9 +15,12 @@ def flip_bit(good, which):
pieces = good[:which], good[which:which+1], good[which+1:]
return pieces[0] + chr(ord(pieces[1]) ^ 0x01) + pieces[2]
def flip_one_bit(s):
# flip one random bit of the string s
i = randrange(0, len(s))
def flip_one_bit(s, offset=0, size=None):
""" flip one random bit of the string s, in a byte greater than or equal to offset and less
than offset+size. """
if size is None:
size=len(s)
i = randrange(offset, offset+size)
result = s[:i] + chr(ord(s[i])^(0x01<<randrange(0, 8))) + s[i+1:]
assert result != s, "Internal error -- flip_one_bit() produced the same string as its input: %s == %s" % (result, s)
return result