diff --git a/src/allmydata/client.py b/src/allmydata/client.py index ba857d771..94880ec22 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -34,6 +34,9 @@ PiB=1024*TiB class StubClient(Referenceable): implements(RIStubClient) +def _make_secret(): + return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" + class Client(node.Node, testutil.PollMixin): PORTNUMFILE = "client.port" STOREDIR = 'storage' @@ -103,9 +106,7 @@ class Client(node.Node, testutil.PollMixin): self.stats_provider = None def init_lease_secret(self): - def make_secret(): - return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" - secret_s = self.get_or_create_private_config("secret", make_secret) + secret_s = self.get_or_create_private_config("secret", _make_secret) self._lease_secret = base32.a2b(secret_s) def init_storage(self): @@ -151,6 +152,8 @@ class Client(node.Node, testutil.PollMixin): def init_client(self): helper_furl = self.get_config("helper.furl") + convergence_s = self.get_or_create_private_config('convergence', _make_secret) + self.convergence = base32.a2b(convergence_s) self.add_service(Uploader(helper_furl)) self.add_service(Downloader()) self.add_service(Checker()) diff --git a/src/allmydata/control.py b/src/allmydata/control.py index cd9d3cee1..99a3139b1 100644 --- a/src/allmydata/control.py +++ b/src/allmydata/control.py @@ -42,9 +42,9 @@ class ControlServer(Referenceable, service.Service, testutil.PollMixin): def remote_wait_for_client_connections(self, num_clients): return self.parent.debug_wait_for_client_connections(num_clients) - def remote_upload_from_file_to_uri(self, filename): + def remote_upload_from_file_to_uri(self, filename, convergence): uploader = self.parent.getServiceNamed("uploader") - u = upload.FileName(filename) + u = upload.FileName(filename, convergence=convergence) d = uploader.upload(u) d.addCallback(lambda results: results.uri) return d @@ -161,7 +161,7 @@ class SpeedTest: d1 = self._n.overwrite(data) d1.addCallback(lambda res: self._n.get_uri()) else: - up = upload.FileName(fn) + up = upload.FileName(fn, convergence=None) d1 = self.parent.upload(up) d1.addCallback(lambda results: results.uri) d1.addCallback(_record_uri, i) diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index d438de7b5..190288a0c 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1578,11 +1578,14 @@ class RIControlClient(RemoteInterface): storage servers. """ - def upload_from_file_to_uri(filename=str): + def upload_from_file_to_uri(filename=str, convergence=ChoiceOf(None, StringConstraint(2**20))): """Upload a file to the grid. This accepts a filename (which must be - absolute) that points to a file on the node's local disk. The node - will read the contents of this file, upload it to the grid, then - return the URI at which it was uploaded. + absolute) that points to a file on the node's local disk. The node will + read the contents of this file, upload it to the grid, then return the + URI at which it was uploaded. If convergence is None then a random + encryption key will be used, else the plaintext will be hashed, then + that hash will be mixed together with the "convergence" string to form + the encryption key. """ return URI diff --git a/src/allmydata/test/check_memory.py b/src/allmydata/test/check_memory.py index de65de510..c01769e78 100644 --- a/src/allmydata/test/check_memory.py +++ b/src/allmydata/test/check_memory.py @@ -367,7 +367,7 @@ this file are ignored. if self.mode in ("upload", "upload-self"): files[name] = self.create_data(name, size) d = self.control_rref.callRemote("upload_from_file_to_uri", - files[name]) + files[name], convergence=None) def _done(uri): os.remove(files[name]) del files[name] diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 9d053dc89..dafc67af2 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -120,7 +120,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): def test_readonly(self): fileuri = make_chk_file_uri(1234) filenode = self.client.create_node_from_uri(fileuri) - uploadable = upload.Data("some data") + uploadable = upload.Data("some data", convergence="some convergence string") d = self.client.create_empty_dirnode() def _created(rw_dn): @@ -338,7 +338,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): # hundrededths of a second. d.addCallback(self.stall, 0.1) d.addCallback(lambda res: n.add_file(u"timestamps", - upload.Data("stamp me"))) + upload.Data("stamp me", convergence="some convergence string"))) d.addCallback(self.stall, 0.1) def _stop(res): self._stop_timestamp = time.time() @@ -393,7 +393,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): self.failUnlessEqual(sorted(children.keys()), sorted([u"child"]))) - uploadable = upload.Data("some data") + uploadable = upload.Data("some data", convergence="some convergence string") d.addCallback(lambda res: n.add_file(u"newfile", uploadable)) d.addCallback(lambda newnode: self.failUnless(IFileNode.providedBy(newnode))) @@ -406,7 +406,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): self.failUnlessEqual(sorted(metadata.keys()), ["ctime", "mtime"])) - uploadable = upload.Data("some data") + uploadable = upload.Data("some data", convergence="some convergence string") d.addCallback(lambda res: n.add_file(u"newfile-metadata", uploadable, {"key": "value"})) diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 4b729456e..fea99e5a9 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -168,7 +168,7 @@ class Encode(unittest.TestCase): data = make_data(datalen) # force use of multiple segments e = encode.Encoder() - u = upload.Data(data) + u = upload.Data(data, convergence="some convergence string") u.max_segment_size = max_segment_size u.encoding_param_k = 25 u.encoding_param_happy = 75 @@ -303,7 +303,7 @@ class Roundtrip(unittest.TestCase): if AVAILABLE_SHARES is None: AVAILABLE_SHARES = NUM_SHARES e = encode.Encoder() - u = upload.Data(data) + u = upload.Data(data, convergence="some convergence string") # force use of multiple segments by using a low max_segment_size u.max_segment_size = max_segment_size u.encoding_param_k = k diff --git a/src/allmydata/test/test_helper.py b/src/allmydata/test/test_helper.py index 9b0d0b0ce..9470a0dc7 100644 --- a/src/allmydata/test/test_helper.py +++ b/src/allmydata/test/test_helper.py @@ -72,8 +72,8 @@ def flush_but_dont_ignore(res): d.addCallback(_done) return d -def upload_data(uploader, data): - u = upload.Data(data) +def upload_data(uploader, data, convergence): + u = upload.Data(data, convergence=convergence) return uploader.upload(u) class AssistedUpload(unittest.TestCase): @@ -116,7 +116,7 @@ class AssistedUpload(unittest.TestCase): def _ready(res): assert u._helper - return upload_data(u, DATA) + return upload_data(u, DATA, convergence="some convergence string") d.addCallback(_ready) def _uploaded(results): uri = results.uri @@ -149,7 +149,7 @@ class AssistedUpload(unittest.TestCase): # this must be a multiple of 'required_shares'==k segsize = mathutil.next_multiple(segsize, k) - key = hashutil.content_hash_key_hash(k, n, segsize, DATA) + key = hashutil.convergence_hash(k, n, segsize, DATA, "test convergence string") assert len(key) == 16 encryptor = AES(key) SI = hashutil.storage_index_hash(key) @@ -169,7 +169,7 @@ class AssistedUpload(unittest.TestCase): def _ready(res): assert u._helper - return upload_data(u, DATA) + return upload_data(u, DATA, convergence="test convergence string") d.addCallback(_ready) def _uploaded(results): uri = results.uri @@ -200,7 +200,7 @@ class AssistedUpload(unittest.TestCase): def _ready(res): assert u._helper - return upload_data(u, DATA) + return upload_data(u, DATA, convergence="some convergence string") d.addCallback(_ready) def _uploaded(results): uri = results.uri diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 33a4bc060..77092d802 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -256,15 +256,15 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): def test_upload_and_download_random_key(self): self.basedir = "system/SystemTest/test_upload_and_download_random_key" - return self._test_upload_and_download(False) + return self._test_upload_and_download(convergence=None) test_upload_and_download_random_key.timeout = 4800 - def test_upload_and_download_content_hash_key(self): - self.basedir = "system/SystemTest/test_upload_and_download_CHK" - return self._test_upload_and_download(True) - test_upload_and_download_content_hash_key.timeout = 4800 + def test_upload_and_download_convergent(self): + self.basedir = "system/SystemTest/test_upload_and_download_convergent" + return self._test_upload_and_download(convergence="some convergence string") + test_upload_and_download_convergent.timeout = 4800 - def _test_upload_and_download(self, contenthashkey): + def _test_upload_and_download(self, convergence): # we use 4000 bytes of data, which will result in about 400k written # to disk among all our simulated nodes DATA = "Some data to upload\n" * 200 @@ -287,7 +287,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): # tail segment is not the same length as the others. This actualy # gets rounded up to 1025 to be a multiple of the number of # required shares (since we use 25 out of 100 FEC). - up = upload.Data(DATA, contenthashkey=contenthashkey) + up = upload.Data(DATA, convergence=convergence) up.max_segment_size = 1024 d1 = u.upload(up) return d1 @@ -301,12 +301,12 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d.addCallback(_upload_done) def _upload_again(res): - # Upload again. If contenthashkey then this ought to be + # Upload again. If using convergent encryption then this ought to be # short-circuited, however with the way we currently generate URIs # (i.e. because they include the roothash), we have to do all of the # encoding work, and only get to save on the upload part. log.msg("UPLOADING AGAIN") - up = upload.Data(DATA, contenthashkey=contenthashkey) + up = upload.Data(DATA, convergence=convergence) up.max_segment_size = 1024 d1 = self.uploader.upload(up) d.addCallback(_upload_again) @@ -372,7 +372,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): HELPER_DATA = "Data that needs help to upload" * 1000 def _upload_with_helper(res): - u = upload.Data(HELPER_DATA, contenthashkey=contenthashkey) + u = upload.Data(HELPER_DATA, convergence=convergence) d = self.extra_node.upload(u) def _uploaded(results): uri = results.uri @@ -385,7 +385,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d.addCallback(_upload_with_helper) def _upload_duplicate_with_helper(res): - u = upload.Data(HELPER_DATA, contenthashkey=contenthashkey) + u = upload.Data(HELPER_DATA, convergence=convergence) u.debug_stash_RemoteEncryptedUploadable = True d = self.extra_node.upload(u) def _uploaded(results): @@ -398,13 +398,13 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): "uploadable started uploading, should have been avoided") d.addCallback(_check) return d - if contenthashkey: + if convergence is not None: d.addCallback(_upload_duplicate_with_helper) def _upload_resumable(res): DATA = "Data that needs help to upload and gets interrupted" * 1000 - u1 = CountingDataUploadable(DATA, contenthashkey=contenthashkey) - u2 = CountingDataUploadable(DATA, contenthashkey=contenthashkey) + u1 = CountingDataUploadable(DATA, convergence=convergence) + u2 = CountingDataUploadable(DATA, convergence=convergence) # we interrupt the connection after about 5kB by shutting down # the helper, then restartingit. @@ -490,7 +490,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): # to store the key locally and re-use it on the next upload of # this file, which isn't a bad thing to do, but we currently # don't do it.) - if contenthashkey: + if convergence is not None: # Make sure we did not have to read the whole file the # second time around . self.failUnless(bytes_sent < len(DATA), @@ -510,9 +510,9 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): def _check(newdata): self.failUnlessEqual(newdata, DATA) - # If using a content hash key, then also check that the helper - # has removed the temp file from its directories. - if contenthashkey: + # If using convergent encryption, then also check that the + # helper has removed the temp file from its directories. + if convergence is not None: basedir = os.path.join(self.getdir("client0"), "helper") files = os.listdir(os.path.join(basedir, "CHK_encoding")) self.failUnlessEqual(files, []) @@ -890,7 +890,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): return d def _do_publish1(self, res): - ut = upload.Data(self.data) + ut = upload.Data(self.data, convergence=None) c0 = self.clients[0] d = c0.create_empty_dirnode() def _made_root(new_dirnode): @@ -910,7 +910,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): return d def _do_publish2(self, res): - ut = upload.Data(self.data) + ut = upload.Data(self.data, convergence=None) d = self._subdir1_node.create_empty_directory(u"subdir2") d.addCallback(lambda subdir2: subdir2.add_file(u"mydata992", ut)) return d @@ -927,7 +927,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): def _do_publish_private(self, res): self.smalldata = "sssh, very secret stuff" - ut = upload.Data(self.smalldata) + ut = upload.Data(self.smalldata, convergence=None) d = self.clients[0].create_empty_dirnode() d.addCallback(self.log, "GOT private directory") def _got_new_dir(privnode): @@ -1009,7 +1009,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d1.addCallback(lambda res: self.shouldFail2(NotMutableError, "mkdir(nope)", None, dirnode.create_empty_directory, u"nope")) d1.addCallback(self.log, "doing add_file(ro)") - ut = upload.Data("I will disappear, unrecorded and unobserved. The tragedy of my demise is made more poignant by its silence, but this beauty is not for you to ever know.") + ut = upload.Data("I will disappear, unrecorded and unobserved. The tragedy of my demise is made more poignant by its silence, but this beauty is not for you to ever know.", convergence="99i-p1x4-xd4-18yc-ywt-87uu-msu-zo -- completely and totally unguessable string (unless you read this)") d1.addCallback(lambda res: self.shouldFail2(NotMutableError, "add_file(nope)", None, dirnode.add_file, u"hope", ut)) d1.addCallback(self.log, "doing get(ro)") @@ -1345,7 +1345,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d.addCallback(self._test_control2, control_furl_file) return d def _test_control2(self, rref, filename): - d = rref.callRemote("upload_from_file_to_uri", filename) + d = rref.callRemote("upload_from_file_to_uri", filename, convergence=None) downfile = os.path.join(self.basedir, "control.downfile") d.addCallback(lambda uri: rref.callRemote("download_from_uri_to_file", diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 91f0e5a5d..e15731a32 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -25,14 +25,14 @@ class Uploadable(unittest.TestCase): self.failUnlessEqual(s, expected) def test_filehandle_random_key(self): - return self._test_filehandle(True) + return self._test_filehandle(convergence=None) - def test_filehandle_content_hash_key(self): - return self._test_filehandle(False) + def test_filehandle_convergent_encryption(self): + return self._test_filehandle(convergence="some convergence string") - def _test_filehandle(self, randomkey): + def _test_filehandle(self, convergence): s = StringIO("a"*41) - u = upload.FileHandle(s, randomkey) + u = upload.FileHandle(s, convergence=convergence) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) @@ -50,7 +50,7 @@ class Uploadable(unittest.TestCase): f = open(fn, "w") f.write("a"*41) f.close() - u = upload.FileName(fn) + u = upload.FileName(fn, convergence=None) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) @@ -62,7 +62,7 @@ class Uploadable(unittest.TestCase): def test_data(self): s = "a"*41 - u = upload.Data(s) + u = upload.Data(s, convergence=None) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) @@ -169,13 +169,13 @@ SIZE_SMALL = 16 SIZE_LARGE = len(DATA) def upload_data(uploader, data): - u = upload.Data(data) + u = upload.Data(data, convergence=None) return uploader.upload(u) def upload_filename(uploader, filename): - u = upload.FileName(filename) + u = upload.FileName(filename, convergence=None) return uploader.upload(u) def upload_filehandle(uploader, fh): - u = upload.FileHandle(fh) + u = upload.FileHandle(fh, convergence=None) return uploader.upload(u) class GoodServer(unittest.TestCase): @@ -444,38 +444,57 @@ class PeerSelection(unittest.TestCase): class StorageIndex(unittest.TestCase): def test_params_must_matter(self): DATA = "I am some data" - u = upload.Data(DATA) + u = upload.Data(DATA, convergence="") eu = upload.EncryptAnUploadable(u) d1 = eu.get_storage_index() # CHK means the same data should encrypt the same way - u = upload.Data(DATA) + u = upload.Data(DATA, convergence="") eu = upload.EncryptAnUploadable(u) d1a = eu.get_storage_index() - # but if we change the encoding parameters, it should be different - u = upload.Data(DATA) + # but if we use a different convergence string it should be different + u = upload.Data(DATA, convergence="wheee!") + eu = upload.EncryptAnUploadable(u) + d1salt1 = eu.get_storage_index() + + # and if we add yet a different convergence it should be different again + u = upload.Data(DATA, convergence="NOT wheee!") + eu = upload.EncryptAnUploadable(u) + d1salt2 = eu.get_storage_index() + + # and if we use the first string again it should be the same as last time + u = upload.Data(DATA, convergence="wheee!") + eu = upload.EncryptAnUploadable(u) + d1salt1a = eu.get_storage_index() + + # and if we change the encoding parameters, it should be different (from the same convergence string with different encoding parameters) + u = upload.Data(DATA, convergence="") u.encoding_param_k = u.default_encoding_param_k + 1 eu = upload.EncryptAnUploadable(u) d2 = eu.get_storage_index() # and if we use a random key, it should be different than the CHK - u = upload.Data(DATA, contenthashkey=False) + u = upload.Data(DATA, convergence=None) eu = upload.EncryptAnUploadable(u) d3 = eu.get_storage_index() # and different from another instance - u = upload.Data(DATA, contenthashkey=False) + u = upload.Data(DATA, convergence=None) eu = upload.EncryptAnUploadable(u) d4 = eu.get_storage_index() - d = DeferredListShouldSucceed([d1,d1a,d2,d3,d4]) + d = DeferredListShouldSucceed([d1,d1a,d1salt1,d1salt2,d1salt1a,d2,d3,d4]) def _done(res): - si1, si1a, si2, si3, si4 = res + si1, si1a, si1salt1, si1salt2, si1salt1a, si2, si3, si4 = res self.failUnlessEqual(si1, si1a) self.failIfEqual(si1, si2) self.failIfEqual(si1, si3) self.failIfEqual(si1, si4) self.failIfEqual(si3, si4) + self.failIfEqual(si1salt1, si1) + self.failIfEqual(si1salt1, si1salt2) + self.failIfEqual(si1salt2, si1) + self.failUnlessEqual(si1salt1, si1salt1a) d.addCallback(_done) return d diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index d80f09034..98d6da71b 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -408,8 +408,8 @@ class HashUtilTests(unittest.TestCase): self.failUnlessEqual(h1, h2) def test_chk(self): - h1 = hashutil.content_hash_key_hash(3, 10, 1000, "data") - h2 = hashutil.content_hash_key_hasher(3, 10, 1000) + h1 = hashutil.convergence_hash(3, 10, 1000, "data", "secret") + h2 = hashutil.convergence_hasher(3, 10, 1000, "secret") h2.update("data") h2 = h2.digest() self.failUnlessEqual(h1, h2) diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 5711af925..a55b9efb7 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -35,6 +35,7 @@ class FakeClient(service.MultiService): introducer_client = FakeIntroducerClient() _all_upload_status = [upload.UploadStatus()] _all_download_status = [download.DownloadStatus()] + convergence = "some random string" def connected_to_introducer(self): return False diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index a44ecdb87..384aea2b0 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -11,7 +11,7 @@ from foolscap.logging import log from allmydata.util.hashutil import file_renewal_secret_hash, \ file_cancel_secret_hash, bucket_renewal_secret_hash, \ bucket_cancel_secret_hash, plaintext_hasher, \ - storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher + storage_index_hash, plaintext_segment_hasher, convergence_hasher from allmydata import encode, storage, hashtree, uri from allmydata.util import base32, idlib, mathutil from allmydata.util.assertutil import precondition @@ -1084,13 +1084,20 @@ class BaseUploadable: class FileHandle(BaseUploadable): implements(IUploadable) - def __init__(self, filehandle, contenthashkey=True): + def __init__(self, filehandle, convergence): + """ + Upload the data from the filehandle. If convergence is None then a + random encryption key will be used, else the plaintext will be hashed, + then the hash will be hashed together with the string in the + "convergence" argument to form the encryption key." + """ + assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) self._filehandle = filehandle self._key = None - self._contenthashkey = contenthashkey + self.convergence = convergence self._size = None - def _get_encryption_key_content_hash(self): + def _get_encryption_key_convergent(self): if self._key is not None: return defer.succeed(self._key) @@ -1100,7 +1107,7 @@ class FileHandle(BaseUploadable): def _got(params): k, happy, n, segsize = params f = self._filehandle - enckey_hasher = content_hash_key_hasher(k, n, segsize) + enckey_hasher = convergence_hasher(k, n, segsize, self.convergence) f.seek(0) BLOCKSIZE = 64*1024 bytes_read = 0 @@ -1131,8 +1138,8 @@ class FileHandle(BaseUploadable): return defer.succeed(self._key) def get_encryption_key(self): - if self._contenthashkey: - return self._get_encryption_key_content_hash() + if self.convergence is not None: + return self._get_encryption_key_convergent() else: return self._get_encryption_key_random() @@ -1153,15 +1160,29 @@ class FileHandle(BaseUploadable): pass class FileName(FileHandle): - def __init__(self, filename, contenthashkey=True): - FileHandle.__init__(self, open(filename, "rb"), contenthashkey=contenthashkey) + def __init__(self, filename, convergence): + """ + Upload the data from the filename. If convergence is None then a + random encryption key will be used, else the plaintext will be hashed, + then the hash will be hashed together with the string in the + "convergence" argument to form the encryption key." + """ + assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) + FileHandle.__init__(self, open(filename, "rb"), convergence=convergence) def close(self): FileHandle.close(self) self._filehandle.close() class Data(FileHandle): - def __init__(self, data, contenthashkey=True): - FileHandle.__init__(self, StringIO(data), contenthashkey=contenthashkey) + def __init__(self, data, convergence): + """ + Upload the data from the data argument. If convergence is None then a + random encryption key will be used, else the plaintext will be hashed, + then the hash will be hashed together with the string in the + "convergence" argument to form the encryption key." + """ + assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) + FileHandle.__init__(self, StringIO(data), convergence=convergence) class Uploader(service.MultiService): """I am a service that allows file uploading. I am a service-child of the diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index add0a0f18..4e762ecf3 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -68,7 +68,7 @@ PLAINTEXT_TAG = "allmydata_plaintext_v1" CIPHERTEXT_TAG = "allmydata_crypttext_v1" CIPHERTEXT_SEGMENT_TAG = "allmydata_crypttext_segment_v1" PLAINTEXT_SEGMENT_TAG = "allmydata_plaintext_segment_v1" -CONTENT_HASH_KEY_TAG = "allmydata_immutable_content_to_key_v1+" +CONVERGENT_ENCRYPTION_TAG = "allmydata_immutable_content_to_key_with_added_secret_v1+" CLIENT_RENEWAL_TAG = "allmydata_client_renewal_secret_v1" CLIENT_CANCEL_TAG = "allmydata_client_cancel_secret_v1" @@ -91,9 +91,9 @@ DIRNODE_CHILD_WRITECAP_TAG = "allmydata_mutable_writekey_and_salt_to_dirnode_chi def storage_index_hash(key): # storage index is truncated to 128 bits (16 bytes). We're only hashing a - # 16-byte value to get it, so there's no point in using a larger value. - # We use this same tagged hash to go from encryption key to storage index - # for random-keyed immutable files and content-hash-keyed immutabie + # 16-byte value to get it, so there's no point in using a larger value. We + # use this same tagged hash to go from encryption key to storage index for + # random-keyed immutable files and convergent-encryption immutabie # files. Mutable files use ssk_storage_index_hash(). return tagged_hash(STORAGE_INDEX_TAG, key, 16) @@ -129,15 +129,14 @@ def plaintext_segment_hasher(): KEYLEN = 16 -def content_hash_key_hash(k, n, segsize, data): - # This is defined to return a 16-byte AES key. +def convergence_hash(k, n, segsize, data, convergence): + h = convergence_hasher(k, n, segsize, convergence) + h.update(data) + return h.digest() +def convergence_hasher(k, n, segsize, convergence): + assert isinstance(convergence, str) param_tag = netstring("%d,%d,%d" % (k, n, segsize)) - tag = CONTENT_HASH_KEY_TAG + param_tag - h = tagged_hash(tag, data, KEYLEN) - return h -def content_hash_key_hasher(k, n, segsize): - param_tag = netstring("%d,%d,%d" % (k, n, segsize)) - tag = CONTENT_HASH_KEY_TAG + param_tag + tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag return tagged_hasher(tag, KEYLEN) def random_key(): diff --git a/src/allmydata/web/unlinked.py b/src/allmydata/web/unlinked.py index 696ee8471..7e4ae6550 100644 --- a/src/allmydata/web/unlinked.py +++ b/src/allmydata/web/unlinked.py @@ -14,8 +14,10 @@ class UnlinkedPUTCHKUploader(rend.Page): # "PUT /uri", to create an unlinked file. This is like PUT but # without the associated set_uri. - uploadable = FileHandle(req.content) - d = IClient(ctx).upload(uploadable) + client = IClient(ctx) + + uploadable = FileHandle(req.content, client.convergence) + d = client.upload(uploadable) d.addCallback(lambda results: results.uri) # that fires with the URI of the new file return d @@ -52,7 +54,7 @@ class UnlinkedPOSTCHKUploader(status.UploadResultsRendererMixin, rend.Page): assert req.method == "POST" self._done = observer.OneShotObserverList() fileobj = req.fields["file"].file - uploadable = FileHandle(fileobj) + uploadable = FileHandle(fileobj, client.convergence) d = client.upload(uploadable) d.addBoth(self._done.fire) diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 35834a29a..3b6485d12 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -867,7 +867,7 @@ class POSTHandler(rend.Page): return d2 d.addCallback(_checked) else: - uploadable = FileHandle(contents.file) + uploadable = FileHandle(contents.file, convergence=client.convergence) d = self._check_replacement(name) d.addCallback(lambda res: self._node.add_file(name, uploadable)) def _done(newnode): @@ -1047,6 +1047,7 @@ class PUTHandler(rend.Page): self._replace = replace def renderHTTP(self, ctx): + client = IClient(ctx) req = inevow.IRequest(ctx) t = self._t localfile = self._localfile @@ -1063,18 +1064,18 @@ class PUTHandler(rend.Page): d.addCallback(self._check_replacement, name, self._replace) if t == "upload": if localfile: - d.addCallback(self._upload_localfile, localfile, name) + d.addCallback(self._upload_localfile, localfile, name, convergence=client.convergence) else: # localdir # take the last step d.addCallback(self._get_or_create_directories, self._path[-1:]) - d.addCallback(self._upload_localdir, localdir) + d.addCallback(self._upload_localdir, localdir, convergence=client.convergence) elif t == "uri": d.addCallback(self._attach_uri, req.content, name) elif t == "mkdir": d.addCallback(self._mkdir, name) else: - d.addCallback(self._upload_file, req.content, name) + d.addCallback(self._upload_file, req.content, name, convergence=client.convergence) def _transform_error(f): errors = {BlockingFileError: http.BAD_REQUEST, @@ -1126,8 +1127,8 @@ class PUTHandler(rend.Page): d.addCallback(_done) return d - def _upload_file(self, node, contents, name): - uploadable = FileHandle(contents) + def _upload_file(self, node, contents, name, convergence): + uploadable = FileHandle(contents, convergence=convergence) d = node.add_file(name, uploadable) def _done(filenode): log.msg("webish upload complete", @@ -1136,8 +1137,8 @@ class PUTHandler(rend.Page): d.addCallback(_done) return d - def _upload_localfile(self, node, localfile, name): - uploadable = FileName(localfile) + def _upload_localfile(self, node, localfile, name, convergence): + uploadable = FileName(localfile, convergence=convergence) d = node.add_file(name, uploadable) d.addCallback(lambda filenode: filenode.get_uri()) return d @@ -1150,7 +1151,7 @@ class PUTHandler(rend.Page): d.addCallback(_done) return d - def _upload_localdir(self, node, localdir): + def _upload_localdir(self, node, localdir, convergence): # build up a list of files to upload. TODO: for now, these files and # directories must have UTF-8 encoded filenames: anything else will # cause the upload to break. @@ -1179,7 +1180,7 @@ class PUTHandler(rend.Page): if dir: d.addCallback(self._makedir, node, dir) for f in all_files: - d.addCallback(self._upload_one_file, node, localdir, f) + d.addCallback(self._upload_one_file, node, localdir, f, convergence=convergence) return d def _makedir(self, res, node, dir): @@ -1191,12 +1192,12 @@ class PUTHandler(rend.Page): d.addCallback(lambda parent: parent.create_empty_directory(dir[-1])) return d - def _upload_one_file(self, res, node, localdir, f): + def _upload_one_file(self, res, node, localdir, f, convergence): # get the parent. We can be sure this exists because we already # went through and created all the directories we require. localfile = os.path.join(localdir, *f) d = node.get_child_at_path(f[:-1]) - d.addCallback(self._upload_localfile, localfile, f[-1]) + d.addCallback(self._upload_localfile, localfile, f[-1], convergence=convergence) return d