rename thingA to 'uri extension'

This commit is contained in:
Brian Warner 2007-06-08 15:59:16 -07:00
parent 72bc8627de
commit c9ef291c02
9 changed files with 128 additions and 126 deletions

View File

@ -17,7 +17,7 @@ class HaveAllPeersError(Exception):
# we use this to jump out of the loop
pass
class BadThingAHashValue(Exception):
class BadURIExtensionHashValue(Exception):
pass
class BadPlaintextHashValue(Exception):
pass
@ -260,7 +260,7 @@ class FileDownloader:
d = unpack_uri(uri)
self._storage_index = d['storage_index']
self._thingA_hash = d['thingA_hash']
self._uri_extension_hash = d['uri_extension_hash']
self._total_shares = d['total_shares']
self._size = d['size']
self._num_needed_shares = d['needed_shares']
@ -270,11 +270,11 @@ class FileDownloader:
self.active_buckets = {} # k: shnum, v: bucket
self._share_buckets = [] # list of (sharenum, bucket) tuples
self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
self._thingA_sources = []
self._uri_extension_sources = []
self._thingA_data = None
self._uri_extension_data = None
self._fetch_failures = {"thingA": 0,
self._fetch_failures = {"uri_extension": 0,
"plaintext_hashroot": 0,
"plaintext_hashtree": 0,
"crypttext_hashroot": 0,
@ -287,9 +287,9 @@ class FileDownloader:
# first step: who should we download from?
d = defer.maybeDeferred(self._get_all_shareholders)
d.addCallback(self._got_all_shareholders)
# now get the thingA block from somebody and validate it
d.addCallback(self._obtain_thingA)
d.addCallback(self._got_thingA)
# now get the uri_extension block from somebody and validate it
d.addCallback(self._obtain_uri_extension)
d.addCallback(self._got_uri_extension)
d.addCallback(self._get_hashtrees)
d.addCallback(self._create_validated_buckets)
# once we know that, we can download blocks from everybody
@ -312,7 +312,7 @@ class FileDownloader:
_assert(isinstance(buckets, dict), buckets) # soon foolscap will check this for us with its DictOf schema constraint
for sharenum, bucket in buckets.iteritems():
self.add_share_bucket(sharenum, bucket)
self._thingA_sources.append(bucket)
self._uri_extension_sources.append(bucket)
def add_share_bucket(self, sharenum, bucket):
# this is split out for the benefit of test_encode.py
@ -341,23 +341,23 @@ class FileDownloader:
# assert isinstance(vb, ValidatedBucket), \
# "vb is %s but should be a ValidatedBucket" % (vb,)
def _obtain_thingA(self, ignored):
# all shareholders are supposed to have a copy of thingA, and all are
# supposed to be identical. We compute the hash of the data that
# comes back, and compare it against the version in our URI. If they
# don't match, ignore their data and try someone else.
def _obtain_uri_extension(self, ignored):
# all shareholders are supposed to have a copy of uri_extension, and
# all are supposed to be identical. We compute the hash of the data
# that comes back, and compare it against the version in our URI. If
# they don't match, ignore their data and try someone else.
def _validate(proposal, bucket):
h = hashutil.thingA_hash(proposal)
if h != self._thingA_hash:
self._fetch_failures["thingA"] += 1
msg = ("The copy of thingA we received from %s was bad" %
bucket)
raise BadThingAHashValue(msg)
h = hashutil.uri_extension_hash(proposal)
if h != self._uri_extension_hash:
self._fetch_failures["uri_extension"] += 1
msg = ("The copy of uri_extension we received from "
"%s was bad" % bucket)
raise BadURIExtensionHashValue(msg)
return bencode.bdecode(proposal)
return self._obtain_validated_thing(None,
self._thingA_sources,
"thingA",
"get_thingA", (), _validate)
self._uri_extension_sources,
"uri_extension",
"get_uri_extension", (), _validate)
def _obtain_validated_thing(self, ignored, sources, name, methname, args,
validatorfunc):
@ -379,8 +379,8 @@ class FileDownloader:
d.addErrback(_bad)
return d
def _got_thingA(self, thingA_data):
d = self._thingA_data = thingA_data
def _got_uri_extension(self, uri_extension_data):
d = self._uri_extension_data = uri_extension_data
self._codec = codec.get_decoder_by_name(d['codec_name'])
self._codec.set_serialized_params(d['codec_params'])
@ -409,7 +409,7 @@ class FileDownloader:
def _get_plaintext_hashtrees(self):
def _validate_plaintext_hashtree(proposal, bucket):
if proposal[0] != self._thingA_data['plaintext_root_hash']:
if proposal[0] != self._uri_extension_data['plaintext_root_hash']:
self._fetch_failures["plaintext_hashroot"] += 1
msg = ("The copy of the plaintext_root_hash we received from"
" %s was bad" % bucket)
@ -420,12 +420,13 @@ class FileDownloader:
pt_hashtree.set_hashes(pt_hashes)
except hashtree.BadHashError:
# the hashes they gave us were not self-consistent, even
# though the root matched what we saw in the thingA block
# though the root matched what we saw in the uri_extension
# block
self._fetch_failures["plaintext_hashtree"] += 1
raise
self._plaintext_hashtree = pt_hashtree
d = self._obtain_validated_thing(None,
self._thingA_sources,
self._uri_extension_sources,
"plaintext_hashes",
"get_plaintext_hashes", (),
_validate_plaintext_hashtree)
@ -433,7 +434,7 @@ class FileDownloader:
def _get_crypttext_hashtrees(self, res):
def _validate_crypttext_hashtree(proposal, bucket):
if proposal[0] != self._thingA_data['crypttext_root_hash']:
if proposal[0] != self._uri_extension_data['crypttext_root_hash']:
self._fetch_failures["crypttext_hashroot"] += 1
msg = ("The copy of the crypttext_root_hash we received from"
" %s was bad" % bucket)
@ -448,7 +449,7 @@ class FileDownloader:
ct_hashtree.set_hashes(ct_hashes)
self._crypttext_hashtree = ct_hashtree
d = self._obtain_validated_thing(None,
self._thingA_sources,
self._uri_extension_sources,
"crypttext_hashes",
"get_crypttext_hashes", (),
_validate_crypttext_hashtree)

View File

@ -85,7 +85,7 @@ class Encoder(object):
self.NEEDED_SHARES = k
self.SHARES_OF_HAPPINESS = happy
self.TOTAL_SHARES = n
self.thingA_data = {}
self.uri_extension_data = {}
def setup(self, infile, encryption_key):
self.infile = infile
@ -112,7 +112,7 @@ class Encoder(object):
self._codec.set_params(self.segment_size,
self.required_shares, self.num_shares)
data = self.thingA_data
data = self.uri_extension_data
data['codec_name'] = self._codec.get_encoder_type()
data['codec_params'] = self._codec.get_serialized_params()
@ -140,8 +140,8 @@ class Encoder(object):
self.required_shares, self.num_shares)
data['tail_codec_params'] = self._tail_codec.get_serialized_params()
def set_thingA_data(self, thingA_data):
self.thingA_data.update(thingA_data)
def set_uri_extension_data(self, uri_extension_data):
self.uri_extension_data.update(uri_extension_data)
def get_share_size(self):
share_size = mathutil.div_ceil(self.file_size, self.required_shares)
@ -186,7 +186,7 @@ class Encoder(object):
self.send_crypttext_hash_tree_to_all_shareholders())
d.addCallback(lambda res: self.send_all_subshare_hash_trees())
d.addCallback(lambda res: self.send_all_share_hash_trees())
d.addCallback(lambda res: self.send_thingA_to_all_shareholders())
d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
d.addCallback(lambda res: self.close_all_shareholders())
d.addCallbacks(lambda res: self.done(), self.err)
return d
@ -345,7 +345,7 @@ class Encoder(object):
log.msg("%s sending plaintext hash tree" % self)
t = HashTree(self._plaintext_hashes)
all_hashes = list(t)
self.thingA_data["plaintext_root_hash"] = t[0]
self.uri_extension_data["plaintext_root_hash"] = t[0]
dl = []
for shareid in self.landlords.keys():
dl.append(self.send_plaintext_hash_tree(shareid, all_hashes))
@ -363,7 +363,7 @@ class Encoder(object):
log.msg("%s sending crypttext hash tree" % self)
t = HashTree(self._crypttext_hashes)
all_hashes = list(t)
self.thingA_data["crypttext_root_hash"] = t[0]
self.uri_extension_data["crypttext_root_hash"] = t[0]
dl = []
for shareid in self.landlords.keys():
dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
@ -412,7 +412,7 @@ class Encoder(object):
# create the share hash tree
t = HashTree(self.share_root_hashes)
# the root of this hash tree goes into our URI
self.thingA_data['share_root_hash'] = t[0]
self.uri_extension_data['share_root_hash'] = t[0]
# now send just the necessary pieces out to each shareholder
for i in range(self.num_shares):
# the HashTree is given a list of leaves: 0,1,2,3..n .
@ -430,19 +430,19 @@ class Encoder(object):
d.addErrback(self._remove_shareholder, shareid, "put_share_hashes")
return d
def send_thingA_to_all_shareholders(self):
log.msg("%s: sending thingA" % self)
thingA = bencode.bencode(self.thingA_data)
self.thingA_hash = hashutil.thingA_hash(thingA)
def send_uri_extension_to_all_shareholders(self):
log.msg("%s: sending uri_extension" % self)
uri_extension = bencode.bencode(self.uri_extension_data)
self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension)
dl = []
for shareid in self.landlords.keys():
dl.append(self.send_thingA(shareid, thingA))
dl.append(self.send_uri_extension(shareid, uri_extension))
return self._gather_responses(dl)
def send_thingA(self, shareid, thingA):
def send_uri_extension(self, shareid, uri_extension):
sh = self.landlords[shareid]
d = sh.callRemote("put_thingA", thingA)
d.addErrback(self._remove_shareholder, shareid, "put_thingA")
d = sh.callRemote("put_uri_extension", uri_extension)
d.addErrback(self._remove_shareholder, shareid, "put_uri_extension")
return d
def close_all_shareholders(self):
@ -456,7 +456,7 @@ class Encoder(object):
def done(self):
log.msg("%s: upload done" % self)
return self.thingA_hash
return self.uri_extension_hash
def err(self, f):
log.msg("%s: upload failed: %s" % (self, f)) # UNUSUAL

View File

@ -15,7 +15,7 @@ StorageIndex = StringConstraint(32)
URI = StringConstraint(300) # kind of arbitrary
MAX_BUCKETS = 200 # per peer
ShareData = StringConstraint(100000) # 2MB segment / k=25
ThingAData = StringConstraint(1000)
URIExtensionData = StringConstraint(1000)
class RIIntroducerClient(RemoteInterface):
def new_peers(furls=SetOf(FURL)):
@ -62,13 +62,13 @@ class RIBucketWriter(RemoteInterface):
def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash), maxLength=2**20)):
return None
def put_thingA(data=ThingAData):
"""This as-yet-unnamed block of data contains integrity-checking
information (hashes of plaintext, crypttext, and shares), as well as
encoding parameters that are necessary to recover the data. This is a
bencoded dict mapping strings to other strings. The hash of this data
is kept in the URI and verified before any of the data is used. All
buckets for a given file contain identical copies of this data.
def put_uri_extension(data=URIExtensionData):
"""This block of data contains integrity-checking information (hashes
of plaintext, crypttext, and shares), as well as encoding parameters
that are necessary to recover the data. This is a serialized dict
mapping strings to other strings. The hash of this data is kept in
the URI and verified before any of the data is used. All buckets for
a given file contain identical copies of this data.
"""
return None
@ -96,8 +96,8 @@ class RIBucketReader(RemoteInterface):
return ListOf(Hash, maxLength=2**20)
def get_share_hashes():
return ListOf(TupleOf(int, Hash), maxLength=2**20)
def get_thingA():
return ThingAData
def get_uri_extension():
return URIExtensionData
class RIStorageServer(RemoteInterface):
@ -402,7 +402,8 @@ class IEncoder(Interface):
input file, encrypting it, encoding the pieces, uploading the shares
to the shareholders, then sending the hash trees.
I return a Deferred that fires with the hash of the thingA data block.
I return a Deferred that fires with the hash of the uri_extension
data block.
"""
class IDecoder(Interface):

View File

@ -71,9 +71,9 @@ class BucketWriter(Referenceable):
precondition(not self.closed)
self._write_file('sharehashes', bencode.bencode(sharehashes))
def remote_put_thingA(self, data):
def remote_put_uri_extension(self, data):
precondition(not self.closed)
self._write_file('thingA', data)
self._write_file('uri_extension', data)
def remote_close(self):
precondition(not self.closed)
@ -121,8 +121,8 @@ class BucketReader(Referenceable):
# schema
return [tuple(i) for i in hashes]
def remote_get_thingA(self):
return self._read_file('thingA')
def remote_get_uri_extension(self):
return self._read_file('uri_extension')
class StorageServer(service.MultiService, Referenceable):
implements(RIStorageServer)

View File

@ -91,9 +91,9 @@ class FakeBucketWriter:
assert self.share_hashes is None
self.share_hashes = sharehashes
def put_thingA(self, thingA):
def put_uri_extension(self, uri_extension):
assert not self.closed
self.thingA = thingA
self.uri_extension = uri_extension
def close(self):
assert not self.closed
@ -139,10 +139,10 @@ class FakeBucketWriter:
return []
return self.share_hashes
def get_thingA(self):
if self.mode == "bad thingA":
return flip_bit(self.thingA)
return self.thingA
def get_uri_extension(self):
if self.mode == "bad uri_extension":
return flip_bit(self.uri_extension)
return self.uri_extension
def make_data(length):
@ -265,7 +265,7 @@ class Roundtrip(unittest.TestCase):
data = make_data(datalen)
d = self.send(k_and_happy_and_n, AVAILABLE_SHARES,
max_segment_size, bucket_modes, data)
# that fires with (thingA_hash, e, shareholders)
# that fires with (uri_extension_hash, e, shareholders)
d.addCallback(self.recover, AVAILABLE_SHARES, recover_mode)
# that fires with newdata
def _downloaded((newdata, fd)):
@ -303,16 +303,16 @@ class Roundtrip(unittest.TestCase):
verifierid_hasher = hashutil.verifierid_hasher()
verifierid_hasher.update(cryptor.encrypt(data))
e.set_thingA_data({'verifierid': verifierid_hasher.digest(),
'fileid': fileid_hasher.digest(),
})
e.set_uri_extension_data({'verifierid': verifierid_hasher.digest(),
'fileid': fileid_hasher.digest(),
})
d = e.start()
def _sent(thingA_hash):
return (thingA_hash, e, shareholders)
def _sent(uri_extension_hash):
return (uri_extension_hash, e, shareholders)
d.addCallback(_sent)
return d
def recover(self, (thingA_hash, e, shareholders), AVAILABLE_SHARES,
def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES,
recover_mode):
key = e.key
if "corrupt_key" in recover_mode:
@ -320,7 +320,7 @@ class Roundtrip(unittest.TestCase):
URI = pack_uri(storage_index="S" * 32,
key=key,
thingA_hash=thingA_hash,
uri_extension_hash=uri_extension_hash,
needed_shares=e.required_shares,
total_shares=e.num_shares,
size=e.file_size)
@ -338,35 +338,35 @@ class Roundtrip(unittest.TestCase):
fd.add_share_bucket(shnum, bucket)
fd._got_all_shareholders(None)
# Make it possible to obtain thingA from the shareholders. Arrange
# for shareholders[0] to be the first, so we can selectively corrupt
# the data it returns.
fd._thingA_sources = shareholders.values()
fd._thingA_sources.remove(shareholders[0])
fd._thingA_sources.insert(0, shareholders[0])
# Make it possible to obtain uri_extension from the shareholders.
# Arrange for shareholders[0] to be the first, so we can selectively
# corrupt the data it returns.
fd._uri_extension_sources = shareholders.values()
fd._uri_extension_sources.remove(shareholders[0])
fd._uri_extension_sources.insert(0, shareholders[0])
d = defer.succeed(None)
# have the FileDownloader retrieve a copy of thingA itself
d.addCallback(fd._obtain_thingA)
# have the FileDownloader retrieve a copy of uri_extension itself
d.addCallback(fd._obtain_uri_extension)
if "corrupt_crypttext_hashes" in recover_mode:
# replace everybody's crypttext hash trees with a different one
# (computed over a different file), then modify our thingA to
# reflect the new crypttext hash tree root
def _corrupt_crypttext_hashes(thingA):
assert isinstance(thingA, dict)
assert 'crypttext_root_hash' in thingA
# (computed over a different file), then modify our uri_extension
# to reflect the new crypttext hash tree root
def _corrupt_crypttext_hashes(uri_extension):
assert isinstance(uri_extension, dict)
assert 'crypttext_root_hash' in uri_extension
badhash = hashutil.tagged_hash("bogus", "data")
bad_crypttext_hashes = [badhash] * thingA['num_segments']
bad_crypttext_hashes = [badhash] * uri_extension['num_segments']
badtree = hashtree.HashTree(bad_crypttext_hashes)
for bucket in shareholders.values():
bucket.crypttext_hashes = list(badtree)
thingA['crypttext_root_hash'] = badtree[0]
return thingA
uri_extension['crypttext_root_hash'] = badtree[0]
return uri_extension
d.addCallback(_corrupt_crypttext_hashes)
d.addCallback(fd._got_thingA)
d.addCallback(fd._got_uri_extension)
# also have the FileDownloader ask for hash trees
d.addCallback(fd._get_hashtrees)
@ -469,7 +469,7 @@ class Roundtrip(unittest.TestCase):
return self.send_and_recover((4,8,10), bucket_modes=modemap)
def assertFetchFailureIn(self, fd, where):
expected = {"thingA": 0,
expected = {"uri_extension": 0,
"plaintext_hashroot": 0,
"plaintext_hashtree": 0,
"crypttext_hashroot": 0,
@ -487,13 +487,13 @@ class Roundtrip(unittest.TestCase):
d.addCallback(self.assertFetchFailureIn, None)
return d
def test_bad_thingA(self):
# the first server has a bad thingA block, so we will fail over to a
# different server.
modemap = dict([(i, "bad thingA") for i in range(1)] +
def test_bad_uri_extension(self):
# the first server has a bad uri_extension block, so we will fail
# over to a different server.
modemap = dict([(i, "bad uri_extension") for i in range(1)] +
[(i, "good") for i in range(1, 10)])
d = self.send_and_recover((4,8,10), bucket_modes=modemap)
d.addCallback(self.assertFetchFailureIn, "thingA")
d.addCallback(self.assertFetchFailureIn, "uri_extension")
return d
def test_bad_plaintext_hashroot(self):
@ -536,10 +536,10 @@ class Roundtrip(unittest.TestCase):
# to test that the crypttext merkle tree is really being applied, we
# sneak into the download process and corrupt two things: we replace
# everybody's crypttext hashtree with a bad version (computed over
# bogus data), and we modify the supposedly-validated thingA block to
# match the new crypttext hashtree root. The download process should
# notice that the crypttext coming out of FEC doesn't match the tree,
# and fail.
# bogus data), and we modify the supposedly-validated uri_extension
# block to match the new crypttext hashtree root. The download
# process should notice that the crypttext coming out of FEC doesn't
# match the tree, and fail.
modemap = dict([(i, "good") for i in range(0, 10)])
d = self.send_and_recover((4,8,10), bucket_modes=modemap,

View File

@ -220,13 +220,13 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
d['storage_index'] = self.flip_bit(d['storage_index'])
return uri.pack_uri(**d)
# TODO: add a test which mangles the thingA_hash instead, and should fail
# due to not being able to get a valid thingA block. Also a test which
# sneakily mangles the thingA block to change some of the validation
# data, so it will fail in the post-download phase when the file's
# crypttext integrity check fails. Do the same thing for the key, which
# should cause the download to fail the post-download plaintext
# verifierid check.
# TODO: add a test which mangles the uri_extension_hash instead, and
# should fail due to not being able to get a valid uri_extension block.
# Also a test which sneakily mangles the uri_extension block to change
# some of the validation data, so it will fail in the post-download phase
# when the file's crypttext integrity check fails. Do the same thing for
# the key, which should cause the download to fail the post-download
# plaintext verifierid check.
def test_vdrive(self):
self.basedir = "test_system/SystemTest/test_vdrive"

View File

@ -235,16 +235,16 @@ class FileUploader:
assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
self._encoder.set_shareholders(buckets)
thingA_data = {}
thingA_data['verifierid'] = self._verifierid
thingA_data['fileid'] = self._fileid
self._encoder.set_thingA_data(thingA_data)
uri_extension_data = {}
uri_extension_data['verifierid'] = self._verifierid
uri_extension_data['fileid'] = self._fileid
self._encoder.set_uri_extension_data(uri_extension_data)
return self._encoder.start()
def _compute_uri(self, thingA_hash):
def _compute_uri(self, uri_extension_hash):
return pack_uri(storage_index=self._verifierid,
key=self._encryption_key,
thingA_hash=thingA_hash,
uri_extension_hash=uri_extension_hash,
needed_shares=self.needed_shares,
total_shares=self.total_shares,
size=self._size,

View File

@ -5,14 +5,14 @@ from allmydata.util import idlib
# enough information to retrieve and validate the contents. It shall be
# expressed in a limited character set (namely [TODO]).
def pack_uri(storage_index, key, thingA_hash,
def pack_uri(storage_index, key, uri_extension_hash,
needed_shares, total_shares, size):
# applications should pass keyword parameters into this
assert isinstance(storage_index, str)
assert len(storage_index) == 32 # sha256 hash
assert isinstance(thingA_hash, str)
assert len(thingA_hash) == 32 # sha56 hash
assert isinstance(uri_extension_hash, str)
assert len(uri_extension_hash) == 32 # sha56 hash
assert isinstance(key, str)
assert len(key) == 16 # AES-128
@ -21,7 +21,7 @@ def pack_uri(storage_index, key, thingA_hash,
assert isinstance(size, (int,long))
return "URI:%s:%s:%s:%d:%d:%d" % (idlib.b2a(storage_index), idlib.b2a(key),
idlib.b2a(thingA_hash),
idlib.b2a(uri_extension_hash),
needed_shares, total_shares, size)
@ -29,12 +29,12 @@ def unpack_uri(uri):
assert uri.startswith("URI:")
d = {}
(header,
storage_index_s, key_s, thingA_hash_s,
storage_index_s, key_s, uri_extension_hash_s,
needed_shares_s, total_shares_s, size_s) = uri.split(":")
assert header == "URI"
d['storage_index'] = idlib.a2b(storage_index_s)
d['key'] = idlib.a2b(key_s)
d['thingA_hash'] = idlib.a2b(thingA_hash_s)
d['uri_extension_hash'] = idlib.a2b(uri_extension_hash_s)
d['needed_shares'] = int(needed_shares_s)
d['total_shares'] = int(total_shares_s)
d['size'] = int(size_s)

View File

@ -26,10 +26,10 @@ def block_hash(data):
def block_hasher():
return tagged_hasher("allmydata_encoded_subshare_v1")
def thingA_hash(data):
return tagged_hash("thingA", data)
def thingA_hasher():
return tagged_hasher("thingA")
def uri_extension_hash(data):
return tagged_hash("allmydata_uri_extension_v1", data)
def uri_extension_hasher():
return tagged_hasher("allmydata_uri_extension_v1")
def fileid_hash(data):
return tagged_hash("allmydata_fileid_v1", data)