CHK: remove the storage index from the URI, deriving it from the key instead

This commit is contained in:
Brian Warner
2007-07-21 18:23:15 -07:00
parent 2bc3c163b6
commit 81a9904455
8 changed files with 61 additions and 58 deletions

View File

@ -48,29 +48,25 @@ property), and encrypted using a "read key". A secure hash of the data is
computed to help validate the data afterwards (providing the "identification" computed to help validate the data afterwards (providing the "identification"
property). All of these pieces, plus information about the file's size and property). All of these pieces, plus information about the file's size and
the number of shares into which it has been distributed, are put into the the number of shares into which it has been distributed, are put into the
"CHK" uri. "CHK" uri. The storage index is derived by hashing the read key, so it does
not need to be physically present in the URI.
In the current release, these URIs contain both the storage index and the
read key. The plan is to derive the storage index from the read key, so only
the latter will appear in the URI.
The current format for CHK URIs is the concatenation of the following The current format for CHK URIs is the concatenation of the following
strings: strings:
URI:CHK:(storage-index):(key):(hash):(needed-shares):(total-shares):(size) URI:CHK:(key):(hash):(needed-shares):(total-shares):(size)
Where (storage-index) is the base32 encoding of the (binary) storage index Where (key) is the base32 encoding of the 16-byte AES read key, (hash) is the
(which itself is a SHA-256 hash), (key) is the base32 encoding of the 16-byte base32 encoding of the SHA-256 hash of the URI Extension Block,
AES read key, (hash) is the base32 encoding of the SHA-256 hash of the URI (needed-shares) is an ascii decimal representation of the number of shares
Extension Block, (needed-shares) is an ascii decimal representation of the required to reconstruct this file, (total-shares) is the same representation
number of shares required to reconstruct this file, (total-shares) is the of the total number of shares created, and (size) is an ascii decimal
same representation of the total number of shares created, and (size) is an representation of the size of the data represented by this URI.
ascii decimal representation of the size of the data represented by this URI.
For example, the following is a CHK URI, generated from the contents of the For example, the following is a CHK URI, generated from the contents of the
architecture.txt document that lives next to this one in the source tree: architecture.txt document that lives next to this one in the source tree:
URI:CHK:khzth3n672elnovimdpaczwuyukwm42vkdzenbqoaj6sqebk3zjq====:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733 URI:CHK:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
=== LIT URIs === === LIT URIs ===

View File

@ -173,14 +173,12 @@ class Test(unittest.TestCase):
self.failUnlessEqual(res, {}) self.failUnlessEqual(res, {})
d.addCallback(_listed) d.addCallback(_listed)
file1 = uri.CHKFileURI(storage_index="11" + " "*30, file1 = uri.CHKFileURI(key="k"*15+"1",
key="k"*16,
uri_extension_hash="e"*32, uri_extension_hash="e"*32,
needed_shares=25, needed_shares=25,
total_shares=100, total_shares=100,
size=12345).to_string() size=12345).to_string()
file2 = uri.CHKFileURI(storage_index="2i" + " "*30, file2 = uri.CHKFileURI(key="k"*15+"2",
key="k"*16,
uri_extension_hash="e"*32, uri_extension_hash="e"*32,
needed_shares=25, needed_shares=25,
total_shares=100, total_shares=100,

View File

@ -302,15 +302,21 @@ class Roundtrip(unittest.TestCase):
def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES, def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES,
recover_mode): recover_mode):
key = e.key key = e.key
if "corrupt_key" in recover_mode:
key = flip_bit(key)
URI = uri.CHKFileURI(storage_index="S" * 32, if "corrupt_key" in recover_mode:
key=key, # we corrupt the key, so that the decrypted data is corrupted and
# will fail the plaintext hash check. Since we're manually
# attaching shareholders, the fact that the storage index is also
# corrupted doesn't matter.
key = flip_bit(e.key)
u = uri.CHKFileURI(key=key,
uri_extension_hash=uri_extension_hash, uri_extension_hash=uri_extension_hash,
needed_shares=e.required_shares, needed_shares=e.required_shares,
total_shares=e.num_shares, total_shares=e.num_shares,
size=e.file_size).to_string() size=e.file_size)
URI = u.to_string()
client = None client = None
target = download.Data() target = download.Data()
fd = download.FileDownloader(client, URI, target) fd = download.FileDownloader(client, URI, target)

View File

@ -222,11 +222,10 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
return good[:-1] + chr(ord(good[-1]) ^ 0x01) return good[:-1] + chr(ord(good[-1]) ^ 0x01)
def mangle_uri(self, gooduri): def mangle_uri(self, gooduri):
# change the storage index, which means we'll be asking about the # change the key, which changes the storage index, which means we'll
# wrong file, so nobody will have any shares # be asking about the wrong file, so nobody will have any shares
u = IFileURI(gooduri) u = IFileURI(gooduri)
u2 = uri.CHKFileURI(storage_index=self.flip_bit(u.storage_index), u2 = uri.CHKFileURI(key=self.flip_bit(u.key),
key=u.key,
uri_extension_hash=u.uri_extension_hash, uri_extension_hash=u.uri_extension_hash,
needed_shares=u.needed_shares, needed_shares=u.needed_shares,
total_shares=u.total_shares, total_shares=u.total_shares,

View File

@ -47,14 +47,13 @@ class Literal(unittest.TestCase):
class CHKFile(unittest.TestCase): class CHKFile(unittest.TestCase):
def test_pack(self): def test_pack(self):
storage_index = hashutil.tagged_hash("foo", "bar") key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
key = "\x00" * 16 storage_index = hashutil.storage_index_chk_hash(key)
uri_extension_hash = hashutil.uri_extension_hash("stuff") uri_extension_hash = hashutil.uri_extension_hash("stuff")
needed_shares = 25 needed_shares = 25
total_shares = 100 total_shares = 100
size = 1234 size = 1234
u = uri.CHKFileURI(storage_index=storage_index, u = uri.CHKFileURI(key=key,
key=key,
uri_extension_hash=uri_extension_hash, uri_extension_hash=uri_extension_hash,
needed_shares=needed_shares, needed_shares=needed_shares,
total_shares=total_shares, total_shares=total_shares,

View File

@ -51,8 +51,7 @@ uri_counter = itertools.count()
def make_newuri(data): def make_newuri(data):
n = uri_counter.next() n = uri_counter.next()
assert len(str(n)) < 5 assert len(str(n)) < 5
newuri = uri.CHKFileURI(storage_index="SI%05d" % n + "i"*25, newuri = uri.CHKFileURI(key="K%05d" % n + "k"*10,
key="K"*16,
uri_extension_hash="EH" + "h"*30, uri_extension_hash="EH" + "h"*30,
needed_shares=25, needed_shares=25,
total_shares=100, total_shares=100,
@ -220,8 +219,7 @@ class Web(unittest.TestCase):
def makefile(self, number): def makefile(self, number):
n = str(number) n = str(number)
assert len(n) == 1 assert len(n) == 1
newuri = uri.CHKFileURI(storage_index="SI" + n*30, newuri = uri.CHKFileURI(key="K" + n*15,
key="K" + n*15,
uri_extension_hash="EH" + n*30, uri_extension_hash="EH" + n*30,
needed_shares=25, needed_shares=25,
total_shares=100, total_shares=100,

View File

@ -292,7 +292,7 @@ class CHKUploader:
# There's certainly no extra entropy to be had.. # There's certainly no extra entropy to be had..
assert len(storage_index) == 32 # SHA-256 assert len(storage_index) == 32 # SHA-256
self._storage_index = storage_index self._storage_index = storage_index
log.msg(" upload SI is [%s]" % (idlib.b2a(storage_index,))) log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,)))
def locate_all_shareholders(self, ignored=None): def locate_all_shareholders(self, ignored=None):
@ -320,13 +320,13 @@ class CHKUploader:
self._encoder.set_shareholders(buckets) self._encoder.set_shareholders(buckets)
def _compute_uri(self, uri_extension_hash): def _compute_uri(self, uri_extension_hash):
u = uri.CHKFileURI(storage_index=self._storage_index, u = uri.CHKFileURI(key=self._encryption_key,
key=self._encryption_key,
uri_extension_hash=uri_extension_hash, uri_extension_hash=uri_extension_hash,
needed_shares=self.needed_shares, needed_shares=self.needed_shares,
total_shares=self.total_shares, total_shares=self.total_shares,
size=self._size, size=self._size,
) )
assert u.storage_index == self._storage_index
return u.to_string() return u.to_string()
def read_this_many_bytes(uploadable, size, prepend_data=[]): def read_this_many_bytes(uploadable, size, prepend_data=[]):

View File

@ -27,43 +27,50 @@ class CHKFileURI(_BaseURI):
# construct me with kwargs, since there are so many of them # construct me with kwargs, since there are so many of them
if not kwargs: if not kwargs:
return return
for name in ("storage_index", "key", "uri_extension_hash", keys = ("key", "uri_extension_hash",
"needed_shares", "total_shares", "size"): "needed_shares", "total_shares", "size")
for name in kwargs:
if name in keys:
value = kwargs[name] value = kwargs[name]
setattr(self, name, value) setattr(self, name, value)
else:
raise TypeError("CHKFileURI does not accept '%s=' argument"
% name)
self.storage_index = hashutil.storage_index_chk_hash(self.key)
def init_from_string(self, uri): def init_from_string(self, uri):
assert uri.startswith("URI:CHK:"), uri assert uri.startswith("URI:CHK:"), uri
d = {} d = {}
(header_uri, header_chk, (header_uri, header_chk,
storage_index_s, key_s, uri_extension_hash_s, key_s, uri_extension_hash_s,
needed_shares_s, total_shares_s, size_s) = uri.split(":") needed_shares_s, total_shares_s, size_s) = uri.split(":")
assert header_uri == "URI" assert header_uri == "URI"
assert header_chk == "CHK" assert header_chk == "CHK"
self.storage_index = idlib.a2b(storage_index_s)
self.key = idlib.a2b(key_s) self.key = idlib.a2b(key_s)
assert isinstance(self.key, str)
assert len(self.key) == 16 # AES-128
self.storage_index = hashutil.storage_index_chk_hash(self.key)
assert isinstance(self.storage_index, str)
assert len(self.storage_index) == 32 # sha256 hash
self.uri_extension_hash = idlib.a2b(uri_extension_hash_s) self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
assert isinstance(self.uri_extension_hash, str)
assert len(self.uri_extension_hash) == 32 # sha56 hash
self.needed_shares = int(needed_shares_s) self.needed_shares = int(needed_shares_s)
self.total_shares = int(total_shares_s) self.total_shares = int(total_shares_s)
self.size = int(size_s) self.size = int(size_s)
return self return self
def to_string(self): def to_string(self):
assert isinstance(self.storage_index, str)
assert len(self.storage_index) == 32 # sha256 hash
assert isinstance(self.uri_extension_hash, str)
assert len(self.uri_extension_hash) == 32 # sha56 hash
assert isinstance(self.key, str)
assert len(self.key) == 16 # AES-128
assert isinstance(self.needed_shares, int) assert isinstance(self.needed_shares, int)
assert isinstance(self.total_shares, int) assert isinstance(self.total_shares, int)
assert isinstance(self.size, (int,long)) assert isinstance(self.size, (int,long))
return ("URI:CHK:%s:%s:%s:%d:%d:%d" % return ("URI:CHK:%s:%s:%d:%d:%d" %
(idlib.b2a(self.storage_index), (idlib.b2a(self.key),
idlib.b2a(self.key),
idlib.b2a(self.uri_extension_hash), idlib.b2a(self.uri_extension_hash),
self.needed_shares, self.needed_shares,
self.total_shares, self.total_shares,