change encryption-key hash to include encoding parameters. This is a minor compatibility break: CHK files encoded (with convergence) before and after this will have different keys and ciphertexts. Also switched to SHA-256d for both the data-to-key hash and the key-to-storageindex hash

2025-03-21 03:15:16 +00:00 · 2008-02-06 19:50:47 -07:00 · 2008-02-06 19:50:47 -07:00 · da9610e90a
commit da9610e90a
parent 9f9a458249
4 changed files with 47 additions and 20 deletions
--- a/src/allmydata/test/test_helper.py
+++ b/src/allmydata/test/test_helper.py
@ -7,7 +7,7 @@ from foolscap import Tub, eventual
 from foolscap.logging import log

 from allmydata import upload, offloaded
-from allmydata.util import hashutil, fileutil, idlib
+from allmydata.util import hashutil, fileutil, idlib, mathutil
 from pycryptopp.cipher.aes import AES

 MiB = 1024*1024
@ -139,8 +139,18 @@ class AssistedUpload(unittest.TestCase):
        # we want to make sure that an upload which fails (leaving the
        # ciphertext in the CHK_encoding/ directory) does not prevent a later
        # attempt to upload that file from working. We simulate this by
-        # populating the directory manually.
-        key = hashutil.key_hash(DATA)[:16]
+        # populating the directory manually. The hardest part is guessing the
+        # storage index.
+
+        k = FakeClient.DEFAULT_ENCODING_PARAMETERS["k"]
+        n = FakeClient.DEFAULT_ENCODING_PARAMETERS["n"]
+        max_segsize = FakeClient.DEFAULT_ENCODING_PARAMETERS["max_segment_size"]
+        segsize = min(max_segsize, len(DATA))
+        # this must be a multiple of 'required_shares'==k
+        segsize = mathutil.next_multiple(segsize, k)
+
+        key = hashutil.content_hash_key_hash(k, n, segsize, DATA)
+        assert len(key) == 16
        encryptor = AES(key)
        SI = hashutil.storage_index_hash(key)
        SI_s = idlib.b2a(SI)
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@ -443,3 +443,10 @@ class HashUtilTests(unittest.TestCase):
        self.failUnlessEqual(len(h1), 16)
        self.failUnlessEqual(len(h2), 16)
        self.failUnlessEqual(h1, h2)
+
+    def test_chk(self):
+        h1 = hashutil.content_hash_key_hash(3, 10, 1000, "data")
+        h2 = hashutil.content_hash_key_hasher(3, 10, 1000)
+        h2.update("data")
+        h2 = h2.digest()
+        self.failUnlessEqual(h1, h2)
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@ -11,7 +11,7 @@ from foolscap.logging import log
 from allmydata.util.hashutil import file_renewal_secret_hash, \
     file_cancel_secret_hash, bucket_renewal_secret_hash, \
     bucket_cancel_secret_hash, plaintext_hasher, \
-     storage_index_hash, plaintext_segment_hasher, key_hasher
+     storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher
 from allmydata import encode, storage, hashtree, uri
 from allmydata.util import idlib, mathutil
 from allmydata.util.assertutil import precondition
@ -945,10 +945,14 @@ class FileHandle(BaseUploadable):
        self._contenthashkey = contenthashkey

    def _get_encryption_key_content_hash(self):
-        if self._key is None:
+        if self._key is not None:
+            return defer.succeed(self._key)
+
+        d = self.get_all_encoding_parameters()
+        def _got(params):
+            k, happy, n, segsize = params
            f = self._filehandle
-            enckey_hasher = key_hasher()
-            #enckey_hasher.update(encoding_parameters) # TODO
+            enckey_hasher = content_hash_key_hasher(k, n, segsize)
            f.seek(0)
            BLOCKSIZE = 64*1024
            while True:
@ -957,9 +961,11 @@ class FileHandle(BaseUploadable):
                    break
                enckey_hasher.update(data)
            f.seek(0)
-            self._key = enckey_hasher.digest()[:16]
-
-        return defer.succeed(self._key)
+            self._key = enckey_hasher.digest()
+            assert len(self._key) == 16
+            return self._key
+        d.addCallback(_got)
+        return d

    def _get_encryption_key_random(self):
        if self._key is None:
--- a/src/allmydata/util/hashutil.py
+++ b/src/allmydata/util/hashutil.py
@ -66,12 +66,10 @@ def tagged_hasher(tag):
 def storage_index_hash(key):
    # storage index is truncated to 128 bits (16 bytes). We're only hashing a
    # 16-byte value to get it, so there's no point in using a larger value.
-    # TODO: remove the word "CHK" from this tag since we use this same tagged
-    # hash for random-keyed immutable files, mutable files, content-hash-keyed
-    # immutabie files.  Or, define two other tagged hashes, one for each kind.
-    # (Either way is fine -- we can never have collisions of storage indexes
-    # anyway, since we can't have collisions of keys.)
-    return tagged_hash("allmydata_CHK_storage_index_v1", key)[:16]
+    # We use this same tagged hash to go from encryption key to storage index
+    # for random-keyed immutable files and content-hash-keyed immutabie
+    # files. Mutable files use ssk_storage_index_hash().
+    return tagged_hash_256d("allmydata_immutable_storage_index_v2", key, 16)

 def block_hash(data):
    return tagged_hash("allmydata_encoded_subshare_v1", data)
@ -103,10 +101,16 @@ def plaintext_segment_hash(data):
 def plaintext_segment_hasher():
    return tagged_hasher("allmydata_plaintext_segment_v1")

-def key_hash(data):
-    return tagged_hash("allmydata_encryption_key_v1", data)
-def key_hasher():
-    return tagged_hasher("allmydata_encryption_key_v1")
+def content_hash_key_hash(k, n, segsize, data):
+    # this is defined to return a 16-byte AES key. We use SHA-256d here..
+    # we'd like to use it everywhere, but we're only switching algorithms
+    # when we can hide the compatibility breaks in other necessary changes.
+    param_tag = netstring("%d,%d,%d" % (k, n, segsize))
+    h = tagged_hash_256d("allmydata_encryption_key_v2+" + param_tag, data, 16)
+    return h
+def content_hash_key_hasher(k, n, segsize):
+    param_tag = netstring("%d,%d,%d" % (k, n, segsize))
+    return tagged_hasher_256d("allmydata_encryption_key_v2+" + param_tag, 16)

 KEYLEN = 16
 def random_key():