CHK: remove the storage index from the URI, deriving it from the key instead

This commit is contained in:
Brian Warner 2007-07-21 18:23:15 -07:00
parent 2bc3c163b6
commit 81a9904455
8 changed files with 61 additions and 58 deletions

View File

@ -48,29 +48,25 @@ property), and encrypted using a "read key". A secure hash of the data is
computed to help validate the data afterwards (providing the "identification"
property). All of these pieces, plus information about the file's size and
the number of shares into which it has been distributed, are put into the
"CHK" uri.
In the current release, these URIs contain both the storage index and the
read key. The plan is to derive the storage index from the read key, so only
the latter will appear in the URI.
"CHK" uri. The storage index is derived by hashing the read key, so it does
not need to be physically present in the URI.
The current format for CHK URIs is the concatenation of the following
strings:
URI:CHK:(storage-index):(key):(hash):(needed-shares):(total-shares):(size)
URI:CHK:(key):(hash):(needed-shares):(total-shares):(size)
Where (storage-index) is the base32 encoding of the (binary) storage index
(which itself is a SHA-256 hash), (key) is the base32 encoding of the 16-byte
AES read key, (hash) is the base32 encoding of the SHA-256 hash of the URI
Extension Block, (needed-shares) is an ascii decimal representation of the
number of shares required to reconstruct this file, (total-shares) is the
same representation of the total number of shares created, and (size) is an
ascii decimal representation of the size of the data represented by this URI.
Where (key) is the base32 encoding of the 16-byte AES read key, (hash) is the
base32 encoding of the SHA-256 hash of the URI Extension Block,
(needed-shares) is an ascii decimal representation of the number of shares
required to reconstruct this file, (total-shares) is the same representation
of the total number of shares created, and (size) is an ascii decimal
representation of the size of the data represented by this URI.
For example, the following is a CHK URI, generated from the contents of the
architecture.txt document that lives next to this one in the source tree:
URI:CHK:khzth3n672elnovimdpaczwuyukwm42vkdzenbqoaj6sqebk3zjq====:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
URI:CHK:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
=== LIT URIs ===

View File

@ -173,14 +173,12 @@ class Test(unittest.TestCase):
self.failUnlessEqual(res, {})
d.addCallback(_listed)
file1 = uri.CHKFileURI(storage_index="11" + " "*30,
key="k"*16,
file1 = uri.CHKFileURI(key="k"*15+"1",
uri_extension_hash="e"*32,
needed_shares=25,
total_shares=100,
size=12345).to_string()
file2 = uri.CHKFileURI(storage_index="2i" + " "*30,
key="k"*16,
file2 = uri.CHKFileURI(key="k"*15+"2",
uri_extension_hash="e"*32,
needed_shares=25,
total_shares=100,

View File

@ -302,15 +302,21 @@ class Roundtrip(unittest.TestCase):
def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES,
recover_mode):
key = e.key
if "corrupt_key" in recover_mode:
key = flip_bit(key)
URI = uri.CHKFileURI(storage_index="S" * 32,
key=key,
uri_extension_hash=uri_extension_hash,
needed_shares=e.required_shares,
total_shares=e.num_shares,
size=e.file_size).to_string()
if "corrupt_key" in recover_mode:
# we corrupt the key, so that the decrypted data is corrupted and
# will fail the plaintext hash check. Since we're manually
# attaching shareholders, the fact that the storage index is also
# corrupted doesn't matter.
key = flip_bit(e.key)
u = uri.CHKFileURI(key=key,
uri_extension_hash=uri_extension_hash,
needed_shares=e.required_shares,
total_shares=e.num_shares,
size=e.file_size)
URI = u.to_string()
client = None
target = download.Data()
fd = download.FileDownloader(client, URI, target)

View File

@ -222,11 +222,10 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
return good[:-1] + chr(ord(good[-1]) ^ 0x01)
def mangle_uri(self, gooduri):
# change the storage index, which means we'll be asking about the
# wrong file, so nobody will have any shares
# change the key, which changes the storage index, which means we'll
# be asking about the wrong file, so nobody will have any shares
u = IFileURI(gooduri)
u2 = uri.CHKFileURI(storage_index=self.flip_bit(u.storage_index),
key=u.key,
u2 = uri.CHKFileURI(key=self.flip_bit(u.key),
uri_extension_hash=u.uri_extension_hash,
needed_shares=u.needed_shares,
total_shares=u.total_shares,

View File

@ -47,14 +47,13 @@ class Literal(unittest.TestCase):
class CHKFile(unittest.TestCase):
def test_pack(self):
storage_index = hashutil.tagged_hash("foo", "bar")
key = "\x00" * 16
key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
storage_index = hashutil.storage_index_chk_hash(key)
uri_extension_hash = hashutil.uri_extension_hash("stuff")
needed_shares = 25
total_shares = 100
size = 1234
u = uri.CHKFileURI(storage_index=storage_index,
key=key,
u = uri.CHKFileURI(key=key,
uri_extension_hash=uri_extension_hash,
needed_shares=needed_shares,
total_shares=total_shares,

View File

@ -51,8 +51,7 @@ uri_counter = itertools.count()
def make_newuri(data):
n = uri_counter.next()
assert len(str(n)) < 5
newuri = uri.CHKFileURI(storage_index="SI%05d" % n + "i"*25,
key="K"*16,
newuri = uri.CHKFileURI(key="K%05d" % n + "k"*10,
uri_extension_hash="EH" + "h"*30,
needed_shares=25,
total_shares=100,
@ -220,8 +219,7 @@ class Web(unittest.TestCase):
def makefile(self, number):
n = str(number)
assert len(n) == 1
newuri = uri.CHKFileURI(storage_index="SI" + n*30,
key="K" + n*15,
newuri = uri.CHKFileURI(key="K" + n*15,
uri_extension_hash="EH" + n*30,
needed_shares=25,
total_shares=100,

View File

@ -292,7 +292,7 @@ class CHKUploader:
# There's certainly no extra entropy to be had..
assert len(storage_index) == 32 # SHA-256
self._storage_index = storage_index
log.msg(" upload SI is [%s]" % (idlib.b2a(storage_index,)))
log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,)))
def locate_all_shareholders(self, ignored=None):
@ -320,13 +320,13 @@ class CHKUploader:
self._encoder.set_shareholders(buckets)
def _compute_uri(self, uri_extension_hash):
u = uri.CHKFileURI(storage_index=self._storage_index,
key=self._encryption_key,
u = uri.CHKFileURI(key=self._encryption_key,
uri_extension_hash=uri_extension_hash,
needed_shares=self.needed_shares,
total_shares=self.total_shares,
size=self._size,
)
assert u.storage_index == self._storage_index
return u.to_string()
def read_this_many_bytes(uploadable, size, prepend_data=[]):

View File

@ -27,43 +27,50 @@ class CHKFileURI(_BaseURI):
# construct me with kwargs, since there are so many of them
if not kwargs:
return
for name in ("storage_index", "key", "uri_extension_hash",
"needed_shares", "total_shares", "size"):
value = kwargs[name]
setattr(self, name, value)
keys = ("key", "uri_extension_hash",
"needed_shares", "total_shares", "size")
for name in kwargs:
if name in keys:
value = kwargs[name]
setattr(self, name, value)
else:
raise TypeError("CHKFileURI does not accept '%s=' argument"
% name)
self.storage_index = hashutil.storage_index_chk_hash(self.key)
def init_from_string(self, uri):
assert uri.startswith("URI:CHK:"), uri
d = {}
(header_uri, header_chk,
storage_index_s, key_s, uri_extension_hash_s,
key_s, uri_extension_hash_s,
needed_shares_s, total_shares_s, size_s) = uri.split(":")
assert header_uri == "URI"
assert header_chk == "CHK"
self.storage_index = idlib.a2b(storage_index_s)
self.key = idlib.a2b(key_s)
assert isinstance(self.key, str)
assert len(self.key) == 16 # AES-128
self.storage_index = hashutil.storage_index_chk_hash(self.key)
assert isinstance(self.storage_index, str)
assert len(self.storage_index) == 32 # sha256 hash
self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
assert isinstance(self.uri_extension_hash, str)
assert len(self.uri_extension_hash) == 32 # sha56 hash
self.needed_shares = int(needed_shares_s)
self.total_shares = int(total_shares_s)
self.size = int(size_s)
return self
def to_string(self):
assert isinstance(self.storage_index, str)
assert len(self.storage_index) == 32 # sha256 hash
assert isinstance(self.uri_extension_hash, str)
assert len(self.uri_extension_hash) == 32 # sha56 hash
assert isinstance(self.key, str)
assert len(self.key) == 16 # AES-128
assert isinstance(self.needed_shares, int)
assert isinstance(self.total_shares, int)
assert isinstance(self.size, (int,long))
return ("URI:CHK:%s:%s:%s:%d:%d:%d" %
(idlib.b2a(self.storage_index),
idlib.b2a(self.key),
return ("URI:CHK:%s:%s:%d:%d:%d" %
(idlib.b2a(self.key),
idlib.b2a(self.uri_extension_hash),
self.needed_shares,
self.total_shares,